1 /*
   2  * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  33 #include "gc_implementation/g1/g1RemSet.hpp"
  34 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  35 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  36 #include "gc_implementation/shared/vmGCOperations.hpp"
  37 #include "memory/genOopClosures.inline.hpp"
  38 #include "memory/referencePolicy.hpp"
  39 #include "memory/resourceArea.hpp"
  40 #include "oops/oop.inline.hpp"
  41 #include "runtime/handles.inline.hpp"
  42 #include "runtime/java.hpp"
  43 
  44 //
  45 // CMS Bit Map Wrapper
  46 
  47 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
  48   _bm((uintptr_t*)NULL,0),
  49   _shifter(shifter) {
  50   _bmStartWord = (HeapWord*)(rs.base());
  51   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  52   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  53                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  54 
  55   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
  56   // For now we'll just commit all of the bit map up fromt.
  57   // Later on we'll try to be more parsimonious with swap.
  58   guarantee(_virtual_space.initialize(brs, brs.size()),
  59             "couldn't reseve backing store for CMS bit map");
  60   assert(_virtual_space.committed_size() == brs.size(),
  61          "didn't reserve backing store for all of CMS bit map?");
  62   _bm.set_map((uintptr_t*)_virtual_space.low());
  63   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  64          _bmWordSize, "inconsistency in bit map sizing");
  65   _bm.set_size(_bmWordSize >> _shifter);
  66 }
  67 
  68 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  69                                                HeapWord* limit) const {
  70   // First we must round addr *up* to a possible object boundary.
  71   addr = (HeapWord*)align_size_up((intptr_t)addr,
  72                                   HeapWordSize << _shifter);
  73   size_t addrOffset = heapWordToOffset(addr);
  74   if (limit == NULL) {
  75     limit = _bmStartWord + _bmWordSize;
  76   }
  77   size_t limitOffset = heapWordToOffset(limit);
  78   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  79   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  80   assert(nextAddr >= addr, "get_next_one postcondition");
  81   assert(nextAddr == limit || isMarked(nextAddr),
  82          "get_next_one postcondition");
  83   return nextAddr;
  84 }
  85 
  86 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  87                                                  HeapWord* limit) const {
  88   size_t addrOffset = heapWordToOffset(addr);
  89   if (limit == NULL) {
  90     limit = _bmStartWord + _bmWordSize;
  91   }
  92   size_t limitOffset = heapWordToOffset(limit);
  93   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  94   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  95   assert(nextAddr >= addr, "get_next_one postcondition");
  96   assert(nextAddr == limit || !isMarked(nextAddr),
  97          "get_next_one postcondition");
  98   return nextAddr;
  99 }
 100 
 101 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 102   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 103   return (int) (diff >> _shifter);
 104 }
 105 
 106 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
 107   HeapWord* left  = MAX2(_bmStartWord, mr.start());
 108   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
 109   if (right > left) {
 110     // Right-open interval [leftOffset, rightOffset).
 111     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
 112   } else {
 113     return true;
 114   }
 115 }
 116 
 117 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
 118                                              size_t    from_start_index,
 119                                              HeapWord* to_start_word,
 120                                              size_t    word_num) {
 121   _bm.mostly_disjoint_range_union(from_bitmap,
 122                                   from_start_index,
 123                                   heapWordToOffset(to_start_word),
 124                                   word_num);
 125 }
 126 
 127 #ifndef PRODUCT
 128 bool CMBitMapRO::covers(ReservedSpace rs) const {
 129   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 130   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
 131          "size inconsistency");
 132   return _bmStartWord == (HeapWord*)(rs.base()) &&
 133          _bmWordSize  == rs.size()>>LogHeapWordSize;
 134 }
 135 #endif
 136 
 137 void CMBitMap::clearAll() {
 138   _bm.clear();
 139   return;
 140 }
 141 
 142 void CMBitMap::markRange(MemRegion mr) {
 143   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 144   assert(!mr.is_empty(), "unexpected empty region");
 145   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 146           ((HeapWord *) mr.end())),
 147          "markRange memory region end is not card aligned");
 148   // convert address range into offset range
 149   _bm.at_put_range(heapWordToOffset(mr.start()),
 150                    heapWordToOffset(mr.end()), true);
 151 }
 152 
 153 void CMBitMap::clearRange(MemRegion mr) {
 154   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 155   assert(!mr.is_empty(), "unexpected empty region");
 156   // convert address range into offset range
 157   _bm.at_put_range(heapWordToOffset(mr.start()),
 158                    heapWordToOffset(mr.end()), false);
 159 }
 160 
 161 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 162                                             HeapWord* end_addr) {
 163   HeapWord* start = getNextMarkedWordAddress(addr);
 164   start = MIN2(start, end_addr);
 165   HeapWord* end   = getNextUnmarkedWordAddress(start);
 166   end = MIN2(end, end_addr);
 167   assert(start <= end, "Consistency check");
 168   MemRegion mr(start, end);
 169   if (!mr.is_empty()) {
 170     clearRange(mr);
 171   }
 172   return mr;
 173 }
 174 
 175 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 176   _base(NULL), _cm(cm)
 177 #ifdef ASSERT
 178   , _drain_in_progress(false)
 179   , _drain_in_progress_yields(false)
 180 #endif
 181 {}
 182 
 183 void CMMarkStack::allocate(size_t size) {
 184   _base = NEW_C_HEAP_ARRAY(oop, size);
 185   if (_base == NULL) {
 186     vm_exit_during_initialization("Failed to allocate "
 187                                   "CM region mark stack");
 188   }
 189   _index = 0;
 190   _capacity = (jint) size;
 191   _oops_do_bound = -1;
 192   NOT_PRODUCT(_max_depth = 0);
 193 }
 194 
 195 CMMarkStack::~CMMarkStack() {
 196   if (_base != NULL) {
 197     FREE_C_HEAP_ARRAY(oop, _base);
 198   }
 199 }
 200 
 201 void CMMarkStack::par_push(oop ptr) {
 202   while (true) {
 203     if (isFull()) {
 204       _overflow = true;
 205       return;
 206     }
 207     // Otherwise...
 208     jint index = _index;
 209     jint next_index = index+1;
 210     jint res = Atomic::cmpxchg(next_index, &_index, index);
 211     if (res == index) {
 212       _base[index] = ptr;
 213       // Note that we don't maintain this atomically.  We could, but it
 214       // doesn't seem necessary.
 215       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 216       return;
 217     }
 218     // Otherwise, we need to try again.
 219   }
 220 }
 221 
 222 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 223   while (true) {
 224     if (isFull()) {
 225       _overflow = true;
 226       return;
 227     }
 228     // Otherwise...
 229     jint index = _index;
 230     jint next_index = index + n;
 231     if (next_index > _capacity) {
 232       _overflow = true;
 233       return;
 234     }
 235     jint res = Atomic::cmpxchg(next_index, &_index, index);
 236     if (res == index) {
 237       for (int i = 0; i < n; i++) {
 238         int ind = index + i;
 239         assert(ind < _capacity, "By overflow test above.");
 240         _base[ind] = ptr_arr[i];
 241       }
 242       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 243       return;
 244     }
 245     // Otherwise, we need to try again.
 246   }
 247 }
 248 
 249 
 250 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 251   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 252   jint start = _index;
 253   jint next_index = start + n;
 254   if (next_index > _capacity) {
 255     _overflow = true;
 256     return;
 257   }
 258   // Otherwise.
 259   _index = next_index;
 260   for (int i = 0; i < n; i++) {
 261     int ind = start + i;
 262     assert(ind < _capacity, "By overflow test above.");
 263     _base[ind] = ptr_arr[i];
 264   }
 265 }
 266 
 267 
 268 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 269   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 270   jint index = _index;
 271   if (index == 0) {
 272     *n = 0;
 273     return false;
 274   } else {
 275     int k = MIN2(max, index);
 276     jint new_ind = index - k;
 277     for (int j = 0; j < k; j++) {
 278       ptr_arr[j] = _base[new_ind + j];
 279     }
 280     _index = new_ind;
 281     *n = k;
 282     return true;
 283   }
 284 }
 285 
 286 
 287 CMRegionStack::CMRegionStack() : _base(NULL) {}
 288 
 289 void CMRegionStack::allocate(size_t size) {
 290   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
 291   if (_base == NULL) {
 292     vm_exit_during_initialization("Failed to allocate CM region mark stack");
 293   }
 294   _index = 0;
 295   _capacity = (jint) size;
 296 }
 297 
 298 CMRegionStack::~CMRegionStack() {
 299   if (_base != NULL) {
 300     FREE_C_HEAP_ARRAY(oop, _base);
 301   }
 302 }
 303 
 304 void CMRegionStack::push_lock_free(MemRegion mr) {
 305   assert(mr.word_size() > 0, "Precondition");
 306   while (true) {
 307     jint index = _index;
 308 
 309     if (index >= _capacity) {
 310       _overflow = true;
 311       return;
 312     }
 313     // Otherwise...
 314     jint next_index = index+1;
 315     jint res = Atomic::cmpxchg(next_index, &_index, index);
 316     if (res == index) {
 317       _base[index] = mr;
 318       return;
 319     }
 320     // Otherwise, we need to try again.
 321   }
 322 }
 323 
 324 // Lock-free pop of the region stack. Called during the concurrent
 325 // marking / remark phases. Should only be called in tandem with
 326 // other lock-free pops.
 327 MemRegion CMRegionStack::pop_lock_free() {
 328   while (true) {
 329     jint index = _index;
 330 
 331     if (index == 0) {
 332       return MemRegion();
 333     }
 334     // Otherwise...
 335     jint next_index = index-1;
 336     jint res = Atomic::cmpxchg(next_index, &_index, index);
 337     if (res == index) {
 338       MemRegion mr = _base[next_index];
 339       if (mr.start() != NULL) {
 340         assert(mr.end() != NULL, "invariant");
 341         assert(mr.word_size() > 0, "invariant");
 342         return mr;
 343       } else {
 344         // that entry was invalidated... let's skip it
 345         assert(mr.end() == NULL, "invariant");
 346       }
 347     }
 348     // Otherwise, we need to try again.
 349   }
 350 }
 351 
 352 #if 0
 353 // The routines that manipulate the region stack with a lock are
 354 // not currently used. They should be retained, however, as a
 355 // diagnostic aid.
 356 
 357 void CMRegionStack::push_with_lock(MemRegion mr) {
 358   assert(mr.word_size() > 0, "Precondition");
 359   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 360 
 361   if (isFull()) {
 362     _overflow = true;
 363     return;
 364   }
 365 
 366   _base[_index] = mr;
 367   _index += 1;
 368 }
 369 
 370 MemRegion CMRegionStack::pop_with_lock() {
 371   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 372 
 373   while (true) {
 374     if (_index == 0) {
 375       return MemRegion();
 376     }
 377     _index -= 1;
 378 
 379     MemRegion mr = _base[_index];
 380     if (mr.start() != NULL) {
 381       assert(mr.end() != NULL, "invariant");
 382       assert(mr.word_size() > 0, "invariant");
 383       return mr;
 384     } else {
 385       // that entry was invalidated... let's skip it
 386       assert(mr.end() == NULL, "invariant");
 387     }
 388   }
 389 }
 390 #endif
 391 
 392 bool CMRegionStack::invalidate_entries_into_cset() {
 393   bool result = false;
 394   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 395   for (int i = 0; i < _oops_do_bound; ++i) {
 396     MemRegion mr = _base[i];
 397     if (mr.start() != NULL) {
 398       assert(mr.end() != NULL, "invariant");
 399       assert(mr.word_size() > 0, "invariant");
 400       HeapRegion* hr = g1h->heap_region_containing(mr.start());
 401       assert(hr != NULL, "invariant");
 402       if (hr->in_collection_set()) {
 403         // The region points into the collection set
 404         _base[i] = MemRegion();
 405         result = true;
 406       }
 407     } else {
 408       // that entry was invalidated... let's skip it
 409       assert(mr.end() == NULL, "invariant");
 410     }
 411   }
 412   return result;
 413 }
 414 
 415 template<class OopClosureClass>
 416 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 417   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 418          || SafepointSynchronize::is_at_safepoint(),
 419          "Drain recursion must be yield-safe.");
 420   bool res = true;
 421   debug_only(_drain_in_progress = true);
 422   debug_only(_drain_in_progress_yields = yield_after);
 423   while (!isEmpty()) {
 424     oop newOop = pop();
 425     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 426     assert(newOop->is_oop(), "Expected an oop");
 427     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 428            "only grey objects on this stack");
 429     // iterate over the oops in this oop, marking and pushing
 430     // the ones in CMS generation.
 431     newOop->oop_iterate(cl);
 432     if (yield_after && _cm->do_yield_check()) {
 433       res = false;
 434       break;
 435     }
 436   }
 437   debug_only(_drain_in_progress = false);
 438   return res;
 439 }
 440 
 441 void CMMarkStack::oops_do(OopClosure* f) {
 442   if (_index == 0) return;
 443   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
 444          "Bound must be set.");
 445   for (int i = 0; i < _oops_do_bound; i++) {
 446     f->do_oop(&_base[i]);
 447   }
 448   _oops_do_bound = -1;
 449 }
 450 
 451 bool ConcurrentMark::not_yet_marked(oop obj) const {
 452   return (_g1h->is_obj_ill(obj)
 453           || (_g1h->is_in_permanent(obj)
 454               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 455 }
 456 
 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 459 #endif // _MSC_VER
 460 
 461 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
 462                                int max_regions) :
 463   _markBitMap1(rs, MinObjAlignment - 1),
 464   _markBitMap2(rs, MinObjAlignment - 1),
 465 
 466   _parallel_marking_threads(0),
 467   _sleep_factor(0.0),
 468   _marking_task_overhead(1.0),
 469   _cleanup_sleep_factor(0.0),
 470   _cleanup_task_overhead(1.0),
 471   _cleanup_list("Cleanup List"),
 472   _region_bm(max_regions, false /* in_resource_area*/),
 473   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 474            CardTableModRefBS::card_shift,
 475            false /* in_resource_area*/),
 476   _prevMarkBitMap(&_markBitMap1),
 477   _nextMarkBitMap(&_markBitMap2),
 478   _at_least_one_mark_complete(false),
 479 
 480   _markStack(this),
 481   _regionStack(),
 482   // _finger set in set_non_marking_state
 483 
 484   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
 485   // _active_tasks set in set_non_marking_state
 486   // _tasks set inside the constructor
 487   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 488   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 489 
 490   _has_overflown(false),
 491   _concurrent(false),
 492   _has_aborted(false),
 493   _restart_for_overflow(false),
 494   _concurrent_marking_in_progress(false),
 495   _should_gray_objects(false),
 496 
 497   // _verbose_level set below
 498 
 499   _init_times(),
 500   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 501   _cleanup_times(),
 502   _total_counting_time(0.0),
 503   _total_rs_scrub_time(0.0),
 504 
 505   _parallel_workers(NULL) {
 506   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 507   if (verbose_level < no_verbose) {
 508     verbose_level = no_verbose;
 509   }
 510   if (verbose_level > high_verbose) {
 511     verbose_level = high_verbose;
 512   }
 513   _verbose_level = verbose_level;
 514 
 515   if (verbose_low()) {
 516     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 517                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 518   }
 519 
 520   _markStack.allocate(MarkStackSize);
 521   _regionStack.allocate(G1MarkRegionStackSize);
 522 
 523   // Create & start a ConcurrentMark thread.
 524   _cmThread = new ConcurrentMarkThread(this);
 525   assert(cmThread() != NULL, "CM Thread should have been created");
 526   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 527 
 528   _g1h = G1CollectedHeap::heap();
 529   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 530   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 531   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 532 
 533   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 534   satb_qs.set_buffer_size(G1SATBBufferSize);
 535 
 536   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
 537   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 538 
 539   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 540   _active_tasks = _max_task_num;
 541   for (int i = 0; i < (int) _max_task_num; ++i) {
 542     CMTaskQueue* task_queue = new CMTaskQueue();
 543     task_queue->initialize();
 544     _task_queues->register_queue(i, task_queue);
 545 
 546     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
 547     _accum_task_vtime[i] = 0.0;
 548   }
 549 
 550   if (ConcGCThreads > ParallelGCThreads) {
 551     vm_exit_during_initialization("Can't have more ConcGCThreads "
 552                                   "than ParallelGCThreads.");
 553   }
 554   if (ParallelGCThreads == 0) {
 555     // if we are not running with any parallel GC threads we will not
 556     // spawn any marking threads either
 557     _parallel_marking_threads =   0;
 558     _sleep_factor             = 0.0;
 559     _marking_task_overhead    = 1.0;
 560   } else {
 561     if (ConcGCThreads > 0) {
 562       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 563       // if both are set
 564 
 565       _parallel_marking_threads = ConcGCThreads;
 566       _sleep_factor             = 0.0;
 567       _marking_task_overhead    = 1.0;
 568     } else if (G1MarkingOverheadPercent > 0) {
 569       // we will calculate the number of parallel marking threads
 570       // based on a target overhead with respect to the soft real-time
 571       // goal
 572 
 573       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 574       double overall_cm_overhead =
 575         (double) MaxGCPauseMillis * marking_overhead /
 576         (double) GCPauseIntervalMillis;
 577       double cpu_ratio = 1.0 / (double) os::processor_count();
 578       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 579       double marking_task_overhead =
 580         overall_cm_overhead / marking_thread_num *
 581                                                 (double) os::processor_count();
 582       double sleep_factor =
 583                          (1.0 - marking_task_overhead) / marking_task_overhead;
 584 
 585       _parallel_marking_threads = (size_t) marking_thread_num;
 586       _sleep_factor             = sleep_factor;
 587       _marking_task_overhead    = marking_task_overhead;
 588     } else {
 589       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
 590       _sleep_factor             = 0.0;
 591       _marking_task_overhead    = 1.0;
 592     }
 593 
 594     if (parallel_marking_threads() > 1) {
 595       _cleanup_task_overhead = 1.0;
 596     } else {
 597       _cleanup_task_overhead = marking_task_overhead();
 598     }
 599     _cleanup_sleep_factor =
 600                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 601 
 602 #if 0
 603     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 604     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 605     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 606     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 607     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 608 #endif
 609 
 610     guarantee(parallel_marking_threads() > 0, "peace of mind");
 611     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 612          (int) _parallel_marking_threads, false, true);
 613     if (_parallel_workers == NULL) {
 614       vm_exit_during_initialization("Failed necessary allocation.");
 615     } else {
 616       _parallel_workers->initialize_workers();
 617     }
 618   }
 619 
 620   // so that the call below can read a sensible value
 621   _heap_start = (HeapWord*) rs.base();
 622   set_non_marking_state();
 623 }
 624 
 625 void ConcurrentMark::update_g1_committed(bool force) {
 626   // If concurrent marking is not in progress, then we do not need to
 627   // update _heap_end. This has a subtle and important
 628   // side-effect. Imagine that two evacuation pauses happen between
 629   // marking completion and remark. The first one can grow the
 630   // heap (hence now the finger is below the heap end). Then, the
 631   // second one could unnecessarily push regions on the region
 632   // stack. This causes the invariant that the region stack is empty
 633   // at the beginning of remark to be false. By ensuring that we do
 634   // not observe heap expansions after marking is complete, then we do
 635   // not have this problem.
 636   if (!concurrent_marking_in_progress() && !force) return;
 637 
 638   MemRegion committed = _g1h->g1_committed();
 639   assert(committed.start() == _heap_start, "start shouldn't change");
 640   HeapWord* new_end = committed.end();
 641   if (new_end > _heap_end) {
 642     // The heap has been expanded.
 643 
 644     _heap_end = new_end;
 645   }
 646   // Notice that the heap can also shrink. However, this only happens
 647   // during a Full GC (at least currently) and the entire marking
 648   // phase will bail out and the task will not be restarted. So, let's
 649   // do nothing.
 650 }
 651 
 652 void ConcurrentMark::reset() {
 653   // Starting values for these two. This should be called in a STW
 654   // phase. CM will be notified of any future g1_committed expansions
 655   // will be at the end of evacuation pauses, when tasks are
 656   // inactive.
 657   MemRegion committed = _g1h->g1_committed();
 658   _heap_start = committed.start();
 659   _heap_end   = committed.end();
 660 
 661   // Separated the asserts so that we know which one fires.
 662   assert(_heap_start != NULL, "heap bounds should look ok");
 663   assert(_heap_end != NULL, "heap bounds should look ok");
 664   assert(_heap_start < _heap_end, "heap bounds should look ok");
 665 
 666   // reset all the marking data structures and any necessary flags
 667   clear_marking_state();
 668 
 669   if (verbose_low()) {
 670     gclog_or_tty->print_cr("[global] resetting");
 671   }
 672 
 673   // We do reset all of them, since different phases will use
 674   // different number of active threads. So, it's easiest to have all
 675   // of them ready.
 676   for (int i = 0; i < (int) _max_task_num; ++i) {
 677     _tasks[i]->reset(_nextMarkBitMap);
 678   }
 679 
 680   // we need this to make sure that the flag is on during the evac
 681   // pause with initial mark piggy-backed
 682   set_concurrent_marking_in_progress();
 683 }
 684 
 685 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
 686   assert(active_tasks <= _max_task_num, "we should not have more");
 687 
 688   _active_tasks = active_tasks;
 689   // Need to update the three data structures below according to the
 690   // number of active threads for this phase.
 691   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 692   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 693   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 694 
 695   _concurrent = concurrent;
 696   // We propagate this to all tasks, not just the active ones.
 697   for (int i = 0; i < (int) _max_task_num; ++i)
 698     _tasks[i]->set_concurrent(concurrent);
 699 
 700   if (concurrent) {
 701     set_concurrent_marking_in_progress();
 702   } else {
 703     // We currently assume that the concurrent flag has been set to
 704     // false before we start remark. At this point we should also be
 705     // in a STW phase.
 706     assert(!concurrent_marking_in_progress(), "invariant");
 707     assert(_finger == _heap_end, "only way to get here");
 708     update_g1_committed(true);
 709   }
 710 }
 711 
 712 void ConcurrentMark::set_non_marking_state() {
 713   // We set the global marking state to some default values when we're
 714   // not doing marking.
 715   clear_marking_state();
 716   _active_tasks = 0;
 717   clear_concurrent_marking_in_progress();
 718 }
 719 
 720 ConcurrentMark::~ConcurrentMark() {
 721   for (int i = 0; i < (int) _max_task_num; ++i) {
 722     delete _task_queues->queue(i);
 723     delete _tasks[i];
 724   }
 725   delete _task_queues;
 726   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
 727 }
 728 
 729 // This closure is used to mark refs into the g1 generation
 730 // from external roots in the CMS bit map.
 731 // Called at the first checkpoint.
 732 //
 733 
 734 void ConcurrentMark::clearNextBitmap() {
 735   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 736   G1CollectorPolicy* g1p = g1h->g1_policy();
 737 
 738   // Make sure that the concurrent mark thread looks to still be in
 739   // the current cycle.
 740   guarantee(cmThread()->during_cycle(), "invariant");
 741 
 742   // We are finishing up the current cycle by clearing the next
 743   // marking bitmap and getting it ready for the next cycle. During
 744   // this time no other cycle can start. So, let's make sure that this
 745   // is the case.
 746   guarantee(!g1h->mark_in_progress(), "invariant");
 747 
 748   // clear the mark bitmap (no grey objects to start with).
 749   // We need to do this in chunks and offer to yield in between
 750   // each chunk.
 751   HeapWord* start  = _nextMarkBitMap->startWord();
 752   HeapWord* end    = _nextMarkBitMap->endWord();
 753   HeapWord* cur    = start;
 754   size_t chunkSize = M;
 755   while (cur < end) {
 756     HeapWord* next = cur + chunkSize;
 757     if (next > end) {
 758       next = end;
 759     }
 760     MemRegion mr(cur,next);
 761     _nextMarkBitMap->clearRange(mr);
 762     cur = next;
 763     do_yield_check();
 764 
 765     // Repeat the asserts from above. We'll do them as asserts here to
 766     // minimize their overhead on the product. However, we'll have
 767     // them as guarantees at the beginning / end of the bitmap
 768     // clearing to get some checking in the product.
 769     assert(cmThread()->during_cycle(), "invariant");
 770     assert(!g1h->mark_in_progress(), "invariant");
 771   }
 772 
 773   // Repeat the asserts from above.
 774   guarantee(cmThread()->during_cycle(), "invariant");
 775   guarantee(!g1h->mark_in_progress(), "invariant");
 776 }
 777 
 778 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 779 public:
 780   bool doHeapRegion(HeapRegion* r) {
 781     if (!r->continuesHumongous()) {
 782       r->note_start_of_marking(true);
 783     }
 784     return false;
 785   }
 786 };
 787 
 788 void ConcurrentMark::checkpointRootsInitialPre() {
 789   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 790   G1CollectorPolicy* g1p = g1h->g1_policy();
 791 
 792   _has_aborted = false;
 793 
 794 #ifndef PRODUCT
 795   if (G1PrintReachableAtInitialMark) {
 796     print_reachable("at-cycle-start",
 797                     VerifyOption_G1UsePrevMarking, true /* all */);
 798   }
 799 #endif
 800 
 801   // Initialise marking structures. This has to be done in a STW phase.
 802   reset();
 803 }
 804 
 805 
 806 void ConcurrentMark::checkpointRootsInitialPost() {
 807   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 808 
 809   // If we force an overflow during remark, the remark operation will
 810   // actually abort and we'll restart concurrent marking. If we always
 811   // force an oveflow during remark we'll never actually complete the
 812   // marking phase. So, we initilize this here, at the start of the
 813   // cycle, so that at the remaining overflow number will decrease at
 814   // every remark and we'll eventually not need to cause one.
 815   force_overflow_stw()->init();
 816 
 817   // For each region note start of marking.
 818   NoteStartOfMarkHRClosure startcl;
 819   g1h->heap_region_iterate(&startcl);
 820 
 821   // Start weak-reference discovery.
 822   ReferenceProcessor* rp = g1h->ref_processor();
 823   rp->verify_no_references_recorded();
 824   rp->enable_discovery(); // enable ("weak") refs discovery
 825   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 826 
 827   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 828   // This is the start of  the marking cycle, we're expected all
 829   // threads to have SATB queues with active set to false.
 830   satb_mq_set.set_active_all_threads(true, /* new active value */
 831                                      false /* expected_active */);
 832 
 833   // update_g1_committed() will be called at the end of an evac pause
 834   // when marking is on. So, it's also called at the end of the
 835   // initial-mark pause to update the heap end, if the heap expands
 836   // during it. No need to call it here.
 837 }
 838 
 839 /*
 840  * Notice that in the next two methods, we actually leave the STS
 841  * during the barrier sync and join it immediately afterwards. If we
 842  * do not do this, the following deadlock can occur: one thread could
 843  * be in the barrier sync code, waiting for the other thread to also
 844  * sync up, whereas another one could be trying to yield, while also
 845  * waiting for the other threads to sync up too.
 846  *
 847  * Note, however, that this code is also used during remark and in
 848  * this case we should not attempt to leave / enter the STS, otherwise
 849  * we'll either hit an asseert (debug / fastdebug) or deadlock
 850  * (product). So we should only leave / enter the STS if we are
 851  * operating concurrently.
 852  *
 853  * Because the thread that does the sync barrier has left the STS, it
 854  * is possible to be suspended for a Full GC or an evacuation pause
 855  * could occur. This is actually safe, since the entering the sync
 856  * barrier is one of the last things do_marking_step() does, and it
 857  * doesn't manipulate any data structures afterwards.
 858  */
 859 
 860 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 861   if (verbose_low()) {
 862     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 863   }
 864 
 865   if (concurrent()) {
 866     ConcurrentGCThread::stsLeave();
 867   }
 868   _first_overflow_barrier_sync.enter();
 869   if (concurrent()) {
 870     ConcurrentGCThread::stsJoin();
 871   }
 872   // at this point everyone should have synced up and not be doing any
 873   // more work
 874 
 875   if (verbose_low()) {
 876     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 877   }
 878 
 879   // let task 0 do this
 880   if (task_num == 0) {
 881     // task 0 is responsible for clearing the global data structures
 882     // We should be here because of an overflow. During STW we should
 883     // not clear the overflow flag since we rely on it being true when
 884     // we exit this method to abort the pause and restart concurent
 885     // marking.
 886     clear_marking_state(concurrent() /* clear_overflow */);
 887     force_overflow()->update();
 888 
 889     if (PrintGC) {
 890       gclog_or_tty->date_stamp(PrintGCDateStamps);
 891       gclog_or_tty->stamp(PrintGCTimeStamps);
 892       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 893     }
 894   }
 895 
 896   // after this, each task should reset its own data structures then
 897   // then go into the second barrier
 898 }
 899 
 900 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 901   if (verbose_low()) {
 902     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 903   }
 904 
 905   if (concurrent()) {
 906     ConcurrentGCThread::stsLeave();
 907   }
 908   _second_overflow_barrier_sync.enter();
 909   if (concurrent()) {
 910     ConcurrentGCThread::stsJoin();
 911   }
 912   // at this point everything should be re-initialised and ready to go
 913 
 914   if (verbose_low()) {
 915     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 916   }
 917 }
 918 
 919 #ifndef PRODUCT
 920 void ForceOverflowSettings::init() {
 921   _num_remaining = G1ConcMarkForceOverflow;
 922   _force = false;
 923   update();
 924 }
 925 
 926 void ForceOverflowSettings::update() {
 927   if (_num_remaining > 0) {
 928     _num_remaining -= 1;
 929     _force = true;
 930   } else {
 931     _force = false;
 932   }
 933 }
 934 
 935 bool ForceOverflowSettings::should_force() {
 936   if (_force) {
 937     _force = false;
 938     return true;
 939   } else {
 940     return false;
 941   }
 942 }
 943 #endif // !PRODUCT
 944 
 945 void ConcurrentMark::grayRoot(oop p) {
 946   HeapWord* addr = (HeapWord*) p;
 947   // We can't really check against _heap_start and _heap_end, since it
 948   // is possible during an evacuation pause with piggy-backed
 949   // initial-mark that the committed space is expanded during the
 950   // pause without CM observing this change. So the assertions below
 951   // is a bit conservative; but better than nothing.
 952   assert(_g1h->g1_committed().contains(addr),
 953          "address should be within the heap bounds");
 954 
 955   if (!_nextMarkBitMap->isMarked(addr)) {
 956     _nextMarkBitMap->parMark(addr);
 957   }
 958 }
 959 
 960 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
 961   // The objects on the region have already been marked "in bulk" by
 962   // the caller. We only need to decide whether to push the region on
 963   // the region stack or not.
 964 
 965   if (!concurrent_marking_in_progress() || !_should_gray_objects) {
 966     // We're done with marking and waiting for remark. We do not need to
 967     // push anything else on the region stack.
 968     return;
 969   }
 970 
 971   HeapWord* finger = _finger;
 972 
 973   if (verbose_low()) {
 974     gclog_or_tty->print_cr("[global] attempting to push "
 975                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
 976                            PTR_FORMAT, mr.start(), mr.end(), finger);
 977   }
 978 
 979   if (mr.start() < finger) {
 980     // The finger is always heap region aligned and it is not possible
 981     // for mr to span heap regions.
 982     assert(mr.end() <= finger, "invariant");
 983 
 984     // Separated the asserts so that we know which one fires.
 985     assert(mr.start() <= mr.end(),
 986            "region boundaries should fall within the committed space");
 987     assert(_heap_start <= mr.start(),
 988            "region boundaries should fall within the committed space");
 989     assert(mr.end() <= _heap_end,
 990            "region boundaries should fall within the committed space");
 991     if (verbose_low()) {
 992       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
 993                              "below the finger, pushing it",
 994                              mr.start(), mr.end());
 995     }
 996 
 997     if (!region_stack_push_lock_free(mr)) {
 998       if (verbose_low()) {
 999         gclog_or_tty->print_cr("[global] region stack has overflown.");
1000       }
1001     }
1002   }
1003 }
1004 
1005 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1006   // The object is not marked by the caller. We need to at least mark
1007   // it and maybe push in on the stack.
1008 
1009   HeapWord* addr = (HeapWord*)p;
1010   if (!_nextMarkBitMap->isMarked(addr)) {
1011     // We definitely need to mark it, irrespective whether we bail out
1012     // because we're done with marking.
1013     if (_nextMarkBitMap->parMark(addr)) {
1014       if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1015         // If we're done with concurrent marking and we're waiting for
1016         // remark, then we're not pushing anything on the stack.
1017         return;
1018       }
1019 
1020       // No OrderAccess:store_load() is needed. It is implicit in the
1021       // CAS done in parMark(addr) above
1022       HeapWord* finger = _finger;
1023 
1024       if (addr < finger) {
1025         if (!mark_stack_push(oop(addr))) {
1026           if (verbose_low()) {
1027             gclog_or_tty->print_cr("[global] global stack overflow "
1028                                    "during parMark");
1029           }
1030         }
1031       }
1032     }
1033   }
1034 }
1035 
1036 class CMConcurrentMarkingTask: public AbstractGangTask {
1037 private:
1038   ConcurrentMark*       _cm;
1039   ConcurrentMarkThread* _cmt;
1040 
1041 public:
1042   void work(int worker_i) {
1043     assert(Thread::current()->is_ConcurrentGC_thread(),
1044            "this should only be done by a conc GC thread");
1045     ResourceMark rm;
1046 
1047     double start_vtime = os::elapsedVTime();
1048 
1049     ConcurrentGCThread::stsJoin();
1050 
1051     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1052     CMTask* the_task = _cm->task(worker_i);
1053     the_task->record_start_time();
1054     if (!_cm->has_aborted()) {
1055       do {
1056         double start_vtime_sec = os::elapsedVTime();
1057         double start_time_sec = os::elapsedTime();
1058         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1059 
1060         the_task->do_marking_step(mark_step_duration_ms,
1061                                   true /* do_stealing    */,
1062                                   true /* do_termination */);
1063 
1064         double end_time_sec = os::elapsedTime();
1065         double end_vtime_sec = os::elapsedVTime();
1066         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1067         double elapsed_time_sec = end_time_sec - start_time_sec;
1068         _cm->clear_has_overflown();
1069 
1070         bool ret = _cm->do_yield_check(worker_i);
1071 
1072         jlong sleep_time_ms;
1073         if (!_cm->has_aborted() && the_task->has_aborted()) {
1074           sleep_time_ms =
1075             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1076           ConcurrentGCThread::stsLeave();
1077           os::sleep(Thread::current(), sleep_time_ms, false);
1078           ConcurrentGCThread::stsJoin();
1079         }
1080         double end_time2_sec = os::elapsedTime();
1081         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1082 
1083 #if 0
1084           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1085                                  "overhead %1.4lf",
1086                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1087                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1088           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1089                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1090 #endif
1091       } while (!_cm->has_aborted() && the_task->has_aborted());
1092     }
1093     the_task->record_end_time();
1094     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1095 
1096     ConcurrentGCThread::stsLeave();
1097 
1098     double end_vtime = os::elapsedVTime();
1099     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1100   }
1101 
1102   CMConcurrentMarkingTask(ConcurrentMark* cm,
1103                           ConcurrentMarkThread* cmt) :
1104       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1105 
1106   ~CMConcurrentMarkingTask() { }
1107 };
1108 
1109 void ConcurrentMark::markFromRoots() {
1110   // we might be tempted to assert that:
1111   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1112   //        "inconsistent argument?");
1113   // However that wouldn't be right, because it's possible that
1114   // a safepoint is indeed in progress as a younger generation
1115   // stop-the-world GC happens even as we mark in this generation.
1116 
1117   _restart_for_overflow = false;
1118 
1119   size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
1120   force_overflow_conc()->init();
1121   set_phase(active_workers, true /* concurrent */);
1122 
1123   CMConcurrentMarkingTask markingTask(this, cmThread());
1124   if (parallel_marking_threads() > 0) {
1125     _parallel_workers->run_task(&markingTask);
1126   } else {
1127     markingTask.work(0);
1128   }
1129   print_stats();
1130 }
1131 
1132 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1133   // world is stopped at this checkpoint
1134   assert(SafepointSynchronize::is_at_safepoint(),
1135          "world should be stopped");
1136   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1137 
1138   // If a full collection has happened, we shouldn't do this.
1139   if (has_aborted()) {
1140     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1141     return;
1142   }
1143 
1144   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1145 
1146   if (VerifyDuringGC) {
1147     HandleMark hm;  // handle scope
1148     gclog_or_tty->print(" VerifyDuringGC:(before)");
1149     Universe::heap()->prepare_for_verify();
1150     Universe::verify(/* allow dirty */ true,
1151                      /* silent      */ false,
1152                      /* option      */ VerifyOption_G1UsePrevMarking);
1153   }
1154 
1155   G1CollectorPolicy* g1p = g1h->g1_policy();
1156   g1p->record_concurrent_mark_remark_start();
1157 
1158   double start = os::elapsedTime();
1159 
1160   checkpointRootsFinalWork();
1161 
1162   double mark_work_end = os::elapsedTime();
1163 
1164   weakRefsWork(clear_all_soft_refs);
1165 
1166   if (has_overflown()) {
1167     // Oops.  We overflowed.  Restart concurrent marking.
1168     _restart_for_overflow = true;
1169     // Clear the flag. We do not need it any more.
1170     clear_has_overflown();
1171     if (G1TraceMarkStackOverflow) {
1172       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1173     }
1174   } else {
1175     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1176     // We're done with marking.
1177     // This is the end of  the marking cycle, we're expected all
1178     // threads to have SATB queues with active set to true.
1179     satb_mq_set.set_active_all_threads(false, /* new active value */
1180                                        true /* expected_active */);
1181 
1182     if (VerifyDuringGC) {
1183       HandleMark hm;  // handle scope
1184       gclog_or_tty->print(" VerifyDuringGC:(after)");
1185       Universe::heap()->prepare_for_verify();
1186       Universe::verify(/* allow dirty */ true,
1187                        /* silent      */ false,
1188                        /* option      */ VerifyOption_G1UseNextMarking);
1189     }
1190     assert(!restart_for_overflow(), "sanity");
1191   }
1192 
1193   // Reset the marking state if marking completed
1194   if (!restart_for_overflow()) {
1195     set_non_marking_state();
1196   }
1197 
1198 #if VERIFY_OBJS_PROCESSED
1199   _scan_obj_cl.objs_processed = 0;
1200   ThreadLocalObjQueue::objs_enqueued = 0;
1201 #endif
1202 
1203   // Statistics
1204   double now = os::elapsedTime();
1205   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1206   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1207   _remark_times.add((now - start) * 1000.0);
1208 
1209   g1p->record_concurrent_mark_remark_end();
1210 }
1211 
1212 #define CARD_BM_TEST_MODE 0
1213 
1214 class CalcLiveObjectsClosure: public HeapRegionClosure {
1215 
1216   CMBitMapRO* _bm;
1217   ConcurrentMark* _cm;
1218   bool _changed;
1219   bool _yield;
1220   size_t _words_done;
1221   size_t _tot_live;
1222   size_t _tot_used;
1223   size_t _regions_done;
1224   double _start_vtime_sec;
1225 
1226   BitMap* _region_bm;
1227   BitMap* _card_bm;
1228   intptr_t _bottom_card_num;
1229   bool _final;
1230 
1231   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1232     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1233 #if CARD_BM_TEST_MODE
1234       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1235 #else
1236       _card_bm->par_at_put(i - _bottom_card_num, 1);
1237 #endif
1238     }
1239   }
1240 
1241 public:
1242   CalcLiveObjectsClosure(bool final,
1243                          CMBitMapRO *bm, ConcurrentMark *cm,
1244                          BitMap* region_bm, BitMap* card_bm) :
1245     _bm(bm), _cm(cm), _changed(false), _yield(true),
1246     _words_done(0), _tot_live(0), _tot_used(0),
1247     _region_bm(region_bm), _card_bm(card_bm),_final(final),
1248     _regions_done(0), _start_vtime_sec(0.0)
1249   {
1250     _bottom_card_num =
1251       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1252                CardTableModRefBS::card_shift);
1253   }
1254 
1255   // It takes a region that's not empty (i.e., it has at least one
1256   // live object in it and sets its corresponding bit on the region
1257   // bitmap to 1. If the region is "starts humongous" it will also set
1258   // to 1 the bits on the region bitmap that correspond to its
1259   // associated "continues humongous" regions.
1260   void set_bit_for_region(HeapRegion* hr) {
1261     assert(!hr->continuesHumongous(), "should have filtered those out");
1262 
1263     size_t index = hr->hrs_index();
1264     if (!hr->startsHumongous()) {
1265       // Normal (non-humongous) case: just set the bit.
1266       _region_bm->par_at_put((BitMap::idx_t) index, true);
1267     } else {
1268       // Starts humongous case: calculate how many regions are part of
1269       // this humongous region and then set the bit range. It might
1270       // have been a bit more efficient to look at the object that
1271       // spans these humongous regions to calculate their number from
1272       // the object's size. However, it's a good idea to calculate
1273       // this based on the metadata itself, and not the region
1274       // contents, so that this code is not aware of what goes into
1275       // the humongous regions (in case this changes in the future).
1276       G1CollectedHeap* g1h = G1CollectedHeap::heap();
1277       size_t end_index = index + 1;
1278       while (end_index < g1h->n_regions()) {
1279         HeapRegion* chr = g1h->region_at(end_index);
1280         if (!chr->continuesHumongous()) break;
1281         end_index += 1;
1282       }
1283       _region_bm->par_at_put_range((BitMap::idx_t) index,
1284                                    (BitMap::idx_t) end_index, true);
1285     }
1286   }
1287 
1288   bool doHeapRegion(HeapRegion* hr) {
1289     if (!_final && _regions_done == 0) {
1290       _start_vtime_sec = os::elapsedVTime();
1291     }
1292 
1293     if (hr->continuesHumongous()) {
1294       // We will ignore these here and process them when their
1295       // associated "starts humongous" region is processed (see
1296       // set_bit_for_heap_region()). Note that we cannot rely on their
1297       // associated "starts humongous" region to have their bit set to
1298       // 1 since, due to the region chunking in the parallel region
1299       // iteration, a "continues humongous" region might be visited
1300       // before its associated "starts humongous".
1301       return false;
1302     }
1303 
1304     HeapWord* nextTop = hr->next_top_at_mark_start();
1305     HeapWord* start   = hr->top_at_conc_mark_count();
1306     assert(hr->bottom() <= start && start <= hr->end() &&
1307            hr->bottom() <= nextTop && nextTop <= hr->end() &&
1308            start <= nextTop,
1309            "Preconditions.");
1310     // Otherwise, record the number of word's we'll examine.
1311     size_t words_done = (nextTop - start);
1312     // Find the first marked object at or after "start".
1313     start = _bm->getNextMarkedWordAddress(start, nextTop);
1314     size_t marked_bytes = 0;
1315 
1316     // Below, the term "card num" means the result of shifting an address
1317     // by the card shift -- address 0 corresponds to card number 0.  One
1318     // must subtract the card num of the bottom of the heap to obtain a
1319     // card table index.
1320     // The first card num of the sequence of live cards currently being
1321     // constructed.  -1 ==> no sequence.
1322     intptr_t start_card_num = -1;
1323     // The last card num of the sequence of live cards currently being
1324     // constructed.  -1 ==> no sequence.
1325     intptr_t last_card_num = -1;
1326 
1327     while (start < nextTop) {
1328       if (_yield && _cm->do_yield_check()) {
1329         // We yielded.  It might be for a full collection, in which case
1330         // all bets are off; terminate the traversal.
1331         if (_cm->has_aborted()) {
1332           _changed = false;
1333           return true;
1334         } else {
1335           // Otherwise, it might be a collection pause, and the region
1336           // we're looking at might be in the collection set.  We'll
1337           // abandon this region.
1338           return false;
1339         }
1340       }
1341       oop obj = oop(start);
1342       int obj_sz = obj->size();
1343       // The card num of the start of the current object.
1344       intptr_t obj_card_num =
1345         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1346 
1347       HeapWord* obj_last = start + obj_sz - 1;
1348       intptr_t obj_last_card_num =
1349         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1350 
1351       if (obj_card_num != last_card_num) {
1352         if (start_card_num == -1) {
1353           assert(last_card_num == -1, "Both or neither.");
1354           start_card_num = obj_card_num;
1355         } else {
1356           assert(last_card_num != -1, "Both or neither.");
1357           assert(obj_card_num >= last_card_num, "Inv");
1358           if ((obj_card_num - last_card_num) > 1) {
1359             // Mark the last run, and start a new one.
1360             mark_card_num_range(start_card_num, last_card_num);
1361             start_card_num = obj_card_num;
1362           }
1363         }
1364 #if CARD_BM_TEST_MODE
1365         /*
1366         gclog_or_tty->print_cr("Setting bits from %d/%d.",
1367                                obj_card_num - _bottom_card_num,
1368                                obj_last_card_num - _bottom_card_num);
1369         */
1370         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1371           _card_bm->par_at_put(j - _bottom_card_num, 1);
1372         }
1373 #endif
1374       }
1375       // In any case, we set the last card num.
1376       last_card_num = obj_last_card_num;
1377 
1378       marked_bytes += (size_t)obj_sz * HeapWordSize;
1379       // Find the next marked object after this one.
1380       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1381       _changed = true;
1382     }
1383     // Handle the last range, if any.
1384     if (start_card_num != -1) {
1385       mark_card_num_range(start_card_num, last_card_num);
1386     }
1387     if (_final) {
1388       // Mark the allocated-since-marking portion...
1389       HeapWord* tp = hr->top();
1390       if (nextTop < tp) {
1391         start_card_num =
1392           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1393         last_card_num =
1394           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1395         mark_card_num_range(start_card_num, last_card_num);
1396         // This definitely means the region has live objects.
1397         set_bit_for_region(hr);
1398       }
1399     }
1400 
1401     hr->add_to_marked_bytes(marked_bytes);
1402     // Update the live region bitmap.
1403     if (marked_bytes > 0) {
1404       set_bit_for_region(hr);
1405     }
1406     hr->set_top_at_conc_mark_count(nextTop);
1407     _tot_live += hr->next_live_bytes();
1408     _tot_used += hr->used();
1409     _words_done = words_done;
1410 
1411     if (!_final) {
1412       ++_regions_done;
1413       if (_regions_done % 10 == 0) {
1414         double end_vtime_sec = os::elapsedVTime();
1415         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1416         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1417           jlong sleep_time_ms =
1418             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1419           os::sleep(Thread::current(), sleep_time_ms, false);
1420           _start_vtime_sec = end_vtime_sec;
1421         }
1422       }
1423     }
1424 
1425     return false;
1426   }
1427 
1428   bool changed() { return _changed;  }
1429   void reset()   { _changed = false; _words_done = 0; }
1430   void no_yield() { _yield = false; }
1431   size_t words_done() { return _words_done; }
1432   size_t tot_live() { return _tot_live; }
1433   size_t tot_used() { return _tot_used; }
1434 };
1435 
1436 
1437 void ConcurrentMark::calcDesiredRegions() {
1438   _region_bm.clear();
1439   _card_bm.clear();
1440   CalcLiveObjectsClosure calccl(false /*final*/,
1441                                 nextMarkBitMap(), this,
1442                                 &_region_bm, &_card_bm);
1443   G1CollectedHeap *g1h = G1CollectedHeap::heap();
1444   g1h->heap_region_iterate(&calccl);
1445 
1446   do {
1447     calccl.reset();
1448     g1h->heap_region_iterate(&calccl);
1449   } while (calccl.changed());
1450 }
1451 
1452 class G1ParFinalCountTask: public AbstractGangTask {
1453 protected:
1454   G1CollectedHeap* _g1h;
1455   CMBitMap* _bm;
1456   size_t _n_workers;
1457   size_t *_live_bytes;
1458   size_t *_used_bytes;
1459   BitMap* _region_bm;
1460   BitMap* _card_bm;
1461 public:
1462   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1463                       BitMap* region_bm, BitMap* card_bm)
1464     : AbstractGangTask("G1 final counting"), _g1h(g1h),
1465       _bm(bm), _region_bm(region_bm), _card_bm(card_bm) {
1466     if (ParallelGCThreads > 0) {
1467       _n_workers = _g1h->workers()->total_workers();
1468     } else {
1469       _n_workers = 1;
1470     }
1471     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1472     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1473   }
1474 
1475   ~G1ParFinalCountTask() {
1476     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1477     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1478   }
1479 
1480   void work(int i) {
1481     CalcLiveObjectsClosure calccl(true /*final*/,
1482                                   _bm, _g1h->concurrent_mark(),
1483                                   _region_bm, _card_bm);
1484     calccl.no_yield();
1485     if (G1CollectedHeap::use_parallel_gc_threads()) {
1486       _g1h->heap_region_par_iterate_chunked(&calccl, i,
1487                                             HeapRegion::FinalCountClaimValue);
1488     } else {
1489       _g1h->heap_region_iterate(&calccl);
1490     }
1491     assert(calccl.complete(), "Shouldn't have yielded!");
1492 
1493     assert((size_t) i < _n_workers, "invariant");
1494     _live_bytes[i] = calccl.tot_live();
1495     _used_bytes[i] = calccl.tot_used();
1496   }
1497   size_t live_bytes()  {
1498     size_t live_bytes = 0;
1499     for (size_t i = 0; i < _n_workers; ++i)
1500       live_bytes += _live_bytes[i];
1501     return live_bytes;
1502   }
1503   size_t used_bytes()  {
1504     size_t used_bytes = 0;
1505     for (size_t i = 0; i < _n_workers; ++i)
1506       used_bytes += _used_bytes[i];
1507     return used_bytes;
1508   }
1509 };
1510 
1511 class G1ParNoteEndTask;
1512 
1513 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1514   G1CollectedHeap* _g1;
1515   int _worker_num;
1516   size_t _max_live_bytes;
1517   size_t _regions_claimed;
1518   size_t _freed_bytes;
1519   FreeRegionList* _local_cleanup_list;
1520   HumongousRegionSet* _humongous_proxy_set;
1521   HRRSCleanupTask* _hrrs_cleanup_task;
1522   double _claimed_region_time;
1523   double _max_region_time;
1524 
1525 public:
1526   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1527                              int worker_num,
1528                              FreeRegionList* local_cleanup_list,
1529                              HumongousRegionSet* humongous_proxy_set,
1530                              HRRSCleanupTask* hrrs_cleanup_task);
1531   size_t freed_bytes() { return _freed_bytes; }
1532 
1533   bool doHeapRegion(HeapRegion *r);
1534 
1535   size_t max_live_bytes() { return _max_live_bytes; }
1536   size_t regions_claimed() { return _regions_claimed; }
1537   double claimed_region_time_sec() { return _claimed_region_time; }
1538   double max_region_time_sec() { return _max_region_time; }
1539 };
1540 
1541 class G1ParNoteEndTask: public AbstractGangTask {
1542   friend class G1NoteEndOfConcMarkClosure;
1543 
1544 protected:
1545   G1CollectedHeap* _g1h;
1546   size_t _max_live_bytes;
1547   size_t _freed_bytes;
1548   FreeRegionList* _cleanup_list;
1549 
1550 public:
1551   G1ParNoteEndTask(G1CollectedHeap* g1h,
1552                    FreeRegionList* cleanup_list) :
1553     AbstractGangTask("G1 note end"), _g1h(g1h),
1554     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1555 
1556   void work(int i) {
1557     double start = os::elapsedTime();
1558     FreeRegionList local_cleanup_list("Local Cleanup List");
1559     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1560     HRRSCleanupTask hrrs_cleanup_task;
1561     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
1562                                            &humongous_proxy_set,
1563                                            &hrrs_cleanup_task);
1564     if (G1CollectedHeap::use_parallel_gc_threads()) {
1565       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1566                                             HeapRegion::NoteEndClaimValue);
1567     } else {
1568       _g1h->heap_region_iterate(&g1_note_end);
1569     }
1570     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1571 
1572     // Now update the lists
1573     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1574                                             NULL /* free_list */,
1575                                             &humongous_proxy_set,
1576                                             true /* par */);
1577     {
1578       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1579       _max_live_bytes += g1_note_end.max_live_bytes();
1580       _freed_bytes += g1_note_end.freed_bytes();
1581 
1582       // If we iterate over the global cleanup list at the end of
1583       // cleanup to do this printing we will not guarantee to only
1584       // generate output for the newly-reclaimed regions (the list
1585       // might not be empty at the beginning of cleanup; we might
1586       // still be working on its previous contents). So we do the
1587       // printing here, before we append the new regions to the global
1588       // cleanup list.
1589 
1590       G1HRPrinter* hr_printer = _g1h->hr_printer();
1591       if (hr_printer->is_active()) {
1592         HeapRegionLinkedListIterator iter(&local_cleanup_list);
1593         while (iter.more_available()) {
1594           HeapRegion* hr = iter.get_next();
1595           hr_printer->cleanup(hr);
1596         }
1597       }
1598 
1599       _cleanup_list->add_as_tail(&local_cleanup_list);
1600       assert(local_cleanup_list.is_empty(), "post-condition");
1601 
1602       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1603     }
1604     double end = os::elapsedTime();
1605     if (G1PrintParCleanupStats) {
1606       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1607                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1608                           i, start, end, (end-start)*1000.0,
1609                           g1_note_end.regions_claimed(),
1610                           g1_note_end.claimed_region_time_sec()*1000.0,
1611                           g1_note_end.max_region_time_sec()*1000.0);
1612     }
1613   }
1614   size_t max_live_bytes() { return _max_live_bytes; }
1615   size_t freed_bytes() { return _freed_bytes; }
1616 };
1617 
1618 class G1ParScrubRemSetTask: public AbstractGangTask {
1619 protected:
1620   G1RemSet* _g1rs;
1621   BitMap* _region_bm;
1622   BitMap* _card_bm;
1623 public:
1624   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1625                        BitMap* region_bm, BitMap* card_bm) :
1626     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1627     _region_bm(region_bm), _card_bm(card_bm)
1628   {}
1629 
1630   void work(int i) {
1631     if (G1CollectedHeap::use_parallel_gc_threads()) {
1632       _g1rs->scrub_par(_region_bm, _card_bm, i,
1633                        HeapRegion::ScrubRemSetClaimValue);
1634     } else {
1635       _g1rs->scrub(_region_bm, _card_bm);
1636     }
1637   }
1638 
1639 };
1640 
1641 G1NoteEndOfConcMarkClosure::
1642 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1643                            int worker_num,
1644                            FreeRegionList* local_cleanup_list,
1645                            HumongousRegionSet* humongous_proxy_set,
1646                            HRRSCleanupTask* hrrs_cleanup_task)
1647   : _g1(g1), _worker_num(worker_num),
1648     _max_live_bytes(0), _regions_claimed(0),
1649     _freed_bytes(0),
1650     _claimed_region_time(0.0), _max_region_time(0.0),
1651     _local_cleanup_list(local_cleanup_list),
1652     _humongous_proxy_set(humongous_proxy_set),
1653     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1654 
1655 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) {
1656   // We use a claim value of zero here because all regions
1657   // were claimed with value 1 in the FinalCount task.
1658   hr->reset_gc_time_stamp();
1659   if (!hr->continuesHumongous()) {
1660     double start = os::elapsedTime();
1661     _regions_claimed++;
1662     hr->note_end_of_marking();
1663     _max_live_bytes += hr->max_live_bytes();
1664     _g1->free_region_if_empty(hr,
1665                               &_freed_bytes,
1666                               _local_cleanup_list,
1667                               _humongous_proxy_set,
1668                               _hrrs_cleanup_task,
1669                               true /* par */);
1670     double region_time = (os::elapsedTime() - start);
1671     _claimed_region_time += region_time;
1672     if (region_time > _max_region_time) {
1673       _max_region_time = region_time;
1674     }
1675   }
1676   return false;
1677 }
1678 
1679 void ConcurrentMark::cleanup() {
1680   // world is stopped at this checkpoint
1681   assert(SafepointSynchronize::is_at_safepoint(),
1682          "world should be stopped");
1683   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1684 
1685   // If a full collection has happened, we shouldn't do this.
1686   if (has_aborted()) {
1687     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1688     return;
1689   }
1690 
1691   g1h->verify_region_sets_optional();
1692 
1693   if (VerifyDuringGC) {
1694     HandleMark hm;  // handle scope
1695     gclog_or_tty->print(" VerifyDuringGC:(before)");
1696     Universe::heap()->prepare_for_verify();
1697     Universe::verify(/* allow dirty */ true,
1698                      /* silent      */ false,
1699                      /* option      */ VerifyOption_G1UsePrevMarking);
1700   }
1701 
1702   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1703   g1p->record_concurrent_mark_cleanup_start();
1704 
1705   double start = os::elapsedTime();
1706 
1707   HeapRegionRemSet::reset_for_cleanup_tasks();
1708 
1709   // Do counting once more with the world stopped for good measure.
1710   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1711                                         &_region_bm, &_card_bm);
1712   if (G1CollectedHeap::use_parallel_gc_threads()) {
1713     assert(g1h->check_heap_region_claim_values(
1714                                                HeapRegion::InitialClaimValue),
1715            "sanity check");
1716 
1717     int n_workers = g1h->workers()->total_workers();
1718     g1h->set_par_threads(n_workers);
1719     g1h->workers()->run_task(&g1_par_count_task);
1720     g1h->set_par_threads(0);
1721 
1722     assert(g1h->check_heap_region_claim_values(
1723                                              HeapRegion::FinalCountClaimValue),
1724            "sanity check");
1725   } else {
1726     g1_par_count_task.work(0);
1727   }
1728 
1729   size_t known_garbage_bytes =
1730     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1731   g1p->set_known_garbage_bytes(known_garbage_bytes);
1732 
1733   size_t start_used_bytes = g1h->used();
1734   _at_least_one_mark_complete = true;
1735   g1h->set_marking_complete();
1736 
1737   ergo_verbose4(ErgoConcCycles,
1738            "finish cleanup",
1739            ergo_format_byte("occupancy")
1740            ergo_format_byte("capacity")
1741            ergo_format_byte_perc("known garbage"),
1742            start_used_bytes, g1h->capacity(),
1743            known_garbage_bytes,
1744            ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0);
1745 
1746   double count_end = os::elapsedTime();
1747   double this_final_counting_time = (count_end - start);
1748   if (G1PrintParCleanupStats) {
1749     gclog_or_tty->print_cr("Cleanup:");
1750     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
1751                            this_final_counting_time*1000.0);
1752   }
1753   _total_counting_time += this_final_counting_time;
1754 
1755   if (G1PrintRegionLivenessInfo) {
1756     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1757     _g1h->heap_region_iterate(&cl);
1758   }
1759 
1760   // Install newly created mark bitMap as "prev".
1761   swapMarkBitMaps();
1762 
1763   g1h->reset_gc_time_stamp();
1764 
1765   // Note end of marking in all heap regions.
1766   double note_end_start = os::elapsedTime();
1767   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1768   if (G1CollectedHeap::use_parallel_gc_threads()) {
1769     int n_workers = g1h->workers()->total_workers();
1770     g1h->set_par_threads(n_workers);
1771     g1h->workers()->run_task(&g1_par_note_end_task);
1772     g1h->set_par_threads(0);
1773 
1774     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1775            "sanity check");
1776   } else {
1777     g1_par_note_end_task.work(0);
1778   }
1779 
1780   if (!cleanup_list_is_empty()) {
1781     // The cleanup list is not empty, so we'll have to process it
1782     // concurrently. Notify anyone else that might be wanting free
1783     // regions that there will be more free regions coming soon.
1784     g1h->set_free_regions_coming();
1785   }
1786   double note_end_end = os::elapsedTime();
1787   if (G1PrintParCleanupStats) {
1788     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
1789                            (note_end_end - note_end_start)*1000.0);
1790   }
1791 
1792 
1793   // call below, since it affects the metric by which we sort the heap
1794   // regions.
1795   if (G1ScrubRemSets) {
1796     double rs_scrub_start = os::elapsedTime();
1797     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1798     if (G1CollectedHeap::use_parallel_gc_threads()) {
1799       int n_workers = g1h->workers()->total_workers();
1800       g1h->set_par_threads(n_workers);
1801       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1802       g1h->set_par_threads(0);
1803 
1804       assert(g1h->check_heap_region_claim_values(
1805                                             HeapRegion::ScrubRemSetClaimValue),
1806              "sanity check");
1807     } else {
1808       g1_par_scrub_rs_task.work(0);
1809     }
1810 
1811     double rs_scrub_end = os::elapsedTime();
1812     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1813     _total_rs_scrub_time += this_rs_scrub_time;
1814   }
1815 
1816   // this will also free any regions totally full of garbage objects,
1817   // and sort the regions.
1818   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1819                         g1_par_note_end_task.freed_bytes(),
1820                         g1_par_note_end_task.max_live_bytes());
1821 
1822   // Statistics.
1823   double end = os::elapsedTime();
1824   _cleanup_times.add((end - start) * 1000.0);
1825 
1826   // G1CollectedHeap::heap()->print();
1827   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1828   // G1CollectedHeap::heap()->get_gc_time_stamp());
1829 
1830   if (PrintGC || PrintGCDetails) {
1831     g1h->print_size_transition(gclog_or_tty,
1832                                start_used_bytes,
1833                                g1h->used(),
1834                                g1h->capacity());
1835   }
1836 
1837   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1838   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1839 
1840   // We need to make this be a "collection" so any collection pause that
1841   // races with it goes around and waits for completeCleanup to finish.
1842   g1h->increment_total_collections();
1843 
1844   if (VerifyDuringGC) {
1845     HandleMark hm;  // handle scope
1846     gclog_or_tty->print(" VerifyDuringGC:(after)");
1847     Universe::heap()->prepare_for_verify();
1848     Universe::verify(/* allow dirty */ true,
1849                      /* silent      */ false,
1850                      /* option      */ VerifyOption_G1UsePrevMarking);
1851   }
1852 
1853   g1h->verify_region_sets_optional();
1854 }
1855 
1856 void ConcurrentMark::completeCleanup() {
1857   if (has_aborted()) return;
1858 
1859   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1860 
1861   _cleanup_list.verify_optional();
1862   FreeRegionList tmp_free_list("Tmp Free List");
1863 
1864   if (G1ConcRegionFreeingVerbose) {
1865     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1866                            "cleanup list has "SIZE_FORMAT" entries",
1867                            _cleanup_list.length());
1868   }
1869 
1870   // Noone else should be accessing the _cleanup_list at this point,
1871   // so it's not necessary to take any locks
1872   while (!_cleanup_list.is_empty()) {
1873     HeapRegion* hr = _cleanup_list.remove_head();
1874     assert(hr != NULL, "the list was not empty");
1875     hr->par_clear();
1876     tmp_free_list.add_as_tail(hr);
1877 
1878     // Instead of adding one region at a time to the secondary_free_list,
1879     // we accumulate them in the local list and move them a few at a
1880     // time. This also cuts down on the number of notify_all() calls
1881     // we do during this process. We'll also append the local list when
1882     // _cleanup_list is empty (which means we just removed the last
1883     // region from the _cleanup_list).
1884     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1885         _cleanup_list.is_empty()) {
1886       if (G1ConcRegionFreeingVerbose) {
1887         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1888                                "appending "SIZE_FORMAT" entries to the "
1889                                "secondary_free_list, clean list still has "
1890                                SIZE_FORMAT" entries",
1891                                tmp_free_list.length(),
1892                                _cleanup_list.length());
1893       }
1894 
1895       {
1896         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1897         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
1898         SecondaryFreeList_lock->notify_all();
1899       }
1900 
1901       if (G1StressConcRegionFreeing) {
1902         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1903           os::sleep(Thread::current(), (jlong) 1, false);
1904         }
1905       }
1906     }
1907   }
1908   assert(tmp_free_list.is_empty(), "post-condition");
1909 }
1910 
1911 // Support closures for reference procssing in G1
1912 
1913 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1914   HeapWord* addr = (HeapWord*)obj;
1915   return addr != NULL &&
1916          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1917 }
1918 
1919 class G1CMKeepAliveClosure: public OopClosure {
1920   G1CollectedHeap* _g1;
1921   ConcurrentMark*  _cm;
1922   CMBitMap*        _bitMap;
1923  public:
1924   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1925                        CMBitMap* bitMap) :
1926     _g1(g1), _cm(cm),
1927     _bitMap(bitMap) {}
1928 
1929   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1930   virtual void do_oop(      oop* p) { do_oop_work(p); }
1931 
1932   template <class T> void do_oop_work(T* p) {
1933     oop obj = oopDesc::load_decode_heap_oop(p);
1934     HeapWord* addr = (HeapWord*)obj;
1935 
1936     if (_cm->verbose_high()) {
1937       gclog_or_tty->print_cr("\t[0] we're looking at location "
1938                              "*"PTR_FORMAT" = "PTR_FORMAT,
1939                              p, (void*) obj);
1940     }
1941 
1942     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
1943       _bitMap->mark(addr);
1944       _cm->mark_stack_push(obj);
1945     }
1946   }
1947 };
1948 
1949 class G1CMDrainMarkingStackClosure: public VoidClosure {
1950   CMMarkStack*                  _markStack;
1951   CMBitMap*                     _bitMap;
1952   G1CMKeepAliveClosure*         _oopClosure;
1953  public:
1954   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1955                                G1CMKeepAliveClosure* oopClosure) :
1956     _bitMap(bitMap),
1957     _markStack(markStack),
1958     _oopClosure(oopClosure)
1959   {}
1960 
1961   void do_void() {
1962     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1963   }
1964 };
1965 
1966 // 'Keep Alive' closure used by parallel reference processing.
1967 // An instance of this closure is used in the parallel reference processing
1968 // code rather than an instance of G1CMKeepAliveClosure. We could have used
1969 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
1970 // placed on to discovered ref lists once so we can mark and push with no
1971 // need to check whether the object has already been marked. Using the
1972 // G1CMKeepAliveClosure would mean, however, having all the worker threads
1973 // operating on the global mark stack. This means that an individual
1974 // worker would be doing lock-free pushes while it processes its own
1975 // discovered ref list followed by drain call. If the discovered ref lists
1976 // are unbalanced then this could cause interference with the other
1977 // workers. Using a CMTask (and its embedded local data structures)
1978 // avoids that potential interference.
1979 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
1980   ConcurrentMark*  _cm;
1981   CMTask*          _task;
1982   CMBitMap*        _bitMap;
1983   int              _ref_counter_limit;
1984   int              _ref_counter;
1985  public:
1986   G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm,
1987                                   CMTask* task,
1988                                   CMBitMap* bitMap) :
1989     _cm(cm), _task(task), _bitMap(bitMap),
1990     _ref_counter_limit(G1RefProcDrainInterval)
1991   {
1992     assert(_ref_counter_limit > 0, "sanity");
1993     _ref_counter = _ref_counter_limit;
1994   }
1995 
1996   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1997   virtual void do_oop(      oop* p) { do_oop_work(p); }
1998 
1999   template <class T> void do_oop_work(T* p) {
2000     if (!_cm->has_overflown()) {
2001       oop obj = oopDesc::load_decode_heap_oop(p);
2002       if (_cm->verbose_high()) {
2003         gclog_or_tty->print_cr("\t[%d] we're looking at location "
2004                                "*"PTR_FORMAT" = "PTR_FORMAT,
2005                                _task->task_id(), p, (void*) obj);
2006       }
2007 
2008       _task->deal_with_reference(obj);
2009       _ref_counter--;
2010 
2011       if (_ref_counter == 0) {
2012         // We have dealt with _ref_counter_limit references, pushing them and objects
2013         // reachable from them on to the local stack (and possibly the global stack).
2014         // Call do_marking_step() to process these entries. We call the routine in a
2015         // loop, which we'll exit if there's nothing more to do (i.e. we're done
2016         // with the entries that we've pushed as a result of the deal_with_reference
2017         // calls above) or we overflow.
2018         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2019         // while there may still be some work to do. (See the comment at the
2020         // beginning of CMTask::do_marking_step() for those conditions - one of which
2021         // is reaching the specified time target.) It is only when
2022         // CMTask::do_marking_step() returns without setting the has_aborted() flag
2023         // that the marking has completed.
2024         do {
2025           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2026           _task->do_marking_step(mark_step_duration_ms,
2027                                  false /* do_stealing    */,
2028                                  false /* do_termination */);
2029         } while (_task->has_aborted() && !_cm->has_overflown());
2030         _ref_counter = _ref_counter_limit;
2031       }
2032     } else {
2033       if (_cm->verbose_high()) {
2034          gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2035       }
2036     }
2037   }
2038 };
2039 
2040 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2041   ConcurrentMark* _cm;
2042   CMTask* _task;
2043  public:
2044   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2045     _cm(cm), _task(task)
2046   {}
2047 
2048   void do_void() {
2049     do {
2050       if (_cm->verbose_high()) {
2051         gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2052                                _task->task_id());
2053       }
2054 
2055       // We call CMTask::do_marking_step() to completely drain the local and
2056       // global marking stacks. The routine is called in a loop, which we'll
2057       // exit if there's nothing more to do (i.e. we'completely drained the
2058       // entries that were pushed as a result of applying the
2059       // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2060       // lists above) or we overflow the global marking stack.
2061       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2062       // while there may still be some work to do. (See the comment at the
2063       // beginning of CMTask::do_marking_step() for those conditions - one of which
2064       // is reaching the specified time target.) It is only when
2065       // CMTask::do_marking_step() returns without setting the has_aborted() flag
2066       // that the marking has completed.
2067 
2068       _task->do_marking_step(1000000000.0 /* something very large */,
2069                              true /* do_stealing    */,
2070                              true /* do_termination */);
2071     } while (_task->has_aborted() && !_cm->has_overflown());
2072   }
2073 };
2074 
2075 // Implementation of AbstractRefProcTaskExecutor for G1
2076 class G1RefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2077 private:
2078   G1CollectedHeap* _g1h;
2079   ConcurrentMark*  _cm;
2080   CMBitMap*        _bitmap;
2081   WorkGang*        _workers;
2082   int              _active_workers;
2083 
2084 public:
2085   G1RefProcTaskExecutor(G1CollectedHeap* g1h,
2086                         ConcurrentMark* cm,
2087                         CMBitMap* bitmap,
2088                         WorkGang* workers,
2089                         int n_workers) :
2090     _g1h(g1h), _cm(cm), _bitmap(bitmap),
2091     _workers(workers), _active_workers(n_workers)
2092   { }
2093 
2094   // Executes the given task using concurrent marking worker threads.
2095   virtual void execute(ProcessTask& task);
2096   virtual void execute(EnqueueTask& task);
2097 };
2098 
2099 class G1RefProcTaskProxy: public AbstractGangTask {
2100   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2101   ProcessTask&     _proc_task;
2102   G1CollectedHeap* _g1h;
2103   ConcurrentMark*  _cm;
2104   CMBitMap*        _bitmap;
2105 
2106 public:
2107   G1RefProcTaskProxy(ProcessTask& proc_task,
2108                      G1CollectedHeap* g1h,
2109                      ConcurrentMark* cm,
2110                      CMBitMap* bitmap) :
2111     AbstractGangTask("Process reference objects in parallel"),
2112     _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap)
2113   {}
2114 
2115   virtual void work(int i) {
2116     CMTask* marking_task = _cm->task(i);
2117     G1CMIsAliveClosure g1_is_alive(_g1h);
2118     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap);
2119     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2120 
2121     _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2122   }
2123 };
2124 
2125 void G1RefProcTaskExecutor::execute(ProcessTask& proc_task) {
2126   assert(_workers != NULL, "Need parallel worker threads.");
2127 
2128   G1RefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap);
2129 
2130   // We need to reset the phase for each task execution so that
2131   // the termination protocol of CMTask::do_marking_step works.
2132   _cm->set_phase(_active_workers, false /* concurrent */);
2133   _g1h->set_par_threads(_active_workers);
2134   _workers->run_task(&proc_task_proxy);
2135   _g1h->set_par_threads(0);
2136 }
2137 
2138 class G1RefEnqueueTaskProxy: public AbstractGangTask {
2139   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2140   EnqueueTask& _enq_task;
2141 
2142 public:
2143   G1RefEnqueueTaskProxy(EnqueueTask& enq_task) :
2144     AbstractGangTask("Enqueue reference objects in parallel"),
2145     _enq_task(enq_task)
2146   { }
2147 
2148   virtual void work(int i) {
2149     _enq_task.work(i);
2150   }
2151 };
2152 
2153 void G1RefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2154   assert(_workers != NULL, "Need parallel worker threads.");
2155 
2156   G1RefEnqueueTaskProxy enq_task_proxy(enq_task);
2157 
2158   _g1h->set_par_threads(_active_workers);
2159   _workers->run_task(&enq_task_proxy);
2160   _g1h->set_par_threads(0);
2161 }
2162 
2163 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2164   ResourceMark rm;
2165   HandleMark   hm;
2166   G1CollectedHeap* g1h   = G1CollectedHeap::heap();
2167   ReferenceProcessor* rp = g1h->ref_processor();
2168 
2169   // See the comment in G1CollectedHeap::ref_processing_init()
2170   // about how reference processing currently works in G1.
2171 
2172   // Process weak references.
2173   rp->setup_policy(clear_all_soft_refs);
2174   assert(_markStack.isEmpty(), "mark stack should be empty");
2175 
2176   G1CMIsAliveClosure   g1_is_alive(g1h);
2177   G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
2178   G1CMDrainMarkingStackClosure
2179     g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
2180   // We use the work gang from the G1CollectedHeap and we utilize all
2181   // the worker threads.
2182   int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1;
2183   active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
2184 
2185   G1RefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(),
2186                                           g1h->workers(), active_workers);
2187 
2188 
2189   if (rp->processing_is_mt()) {
2190     // Set the degree of MT here.  If the discovery is done MT, there
2191     // may have been a different number of threads doing the discovery
2192     // and a different number of discovered lists may have Ref objects.
2193     // That is OK as long as the Reference lists are balanced (see
2194     // balance_all_queues() and balance_queues()).
2195     rp->set_active_mt_degree(active_workers);
2196 
2197     rp->process_discovered_references(&g1_is_alive,
2198                                       &g1_keep_alive,
2199                                       &g1_drain_mark_stack,
2200                                       &par_task_executor);
2201 
2202     // The work routines of the parallel keep_alive and drain_marking_stack
2203     // will set the has_overflown flag if we overflow the global marking
2204     // stack.
2205   } else {
2206     rp->process_discovered_references(&g1_is_alive,
2207                                       &g1_keep_alive,
2208                                       &g1_drain_mark_stack,
2209                                       NULL);
2210 
2211   }
2212 
2213   assert(_markStack.overflow() || _markStack.isEmpty(),
2214       "mark stack should be empty (unless it overflowed)");
2215   if (_markStack.overflow()) {
2216     // Should have been done already when we tried to push an
2217     // entry on to the global mark stack. But let's do it again.
2218     set_has_overflown();
2219   }
2220 
2221   if (rp->processing_is_mt()) {
2222     assert(rp->num_q() == active_workers, "why not");
2223     rp->enqueue_discovered_references(&par_task_executor);
2224   } else {
2225     rp->enqueue_discovered_references();
2226   }
2227 
2228   rp->verify_no_references_recorded();
2229   assert(!rp->discovery_enabled(), "should have been disabled");
2230 
2231   // Now clean up stale oops in StringTable
2232   StringTable::unlink(&g1_is_alive);
2233   // Clean up unreferenced symbols in symbol table.
2234   SymbolTable::unlink();
2235 }
2236 
2237 void ConcurrentMark::swapMarkBitMaps() {
2238   CMBitMapRO* temp = _prevMarkBitMap;
2239   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2240   _nextMarkBitMap  = (CMBitMap*)  temp;
2241 }
2242 
2243 class CMRemarkTask: public AbstractGangTask {
2244 private:
2245   ConcurrentMark *_cm;
2246 
2247 public:
2248   void work(int worker_i) {
2249     // Since all available tasks are actually started, we should
2250     // only proceed if we're supposed to be actived.
2251     if ((size_t)worker_i < _cm->active_tasks()) {
2252       CMTask* task = _cm->task(worker_i);
2253       task->record_start_time();
2254       do {
2255         task->do_marking_step(1000000000.0 /* something very large */,
2256                               true /* do_stealing    */,
2257                               true /* do_termination */);
2258       } while (task->has_aborted() && !_cm->has_overflown());
2259       // If we overflow, then we do not want to restart. We instead
2260       // want to abort remark and do concurrent marking again.
2261       task->record_end_time();
2262     }
2263   }
2264 
2265   CMRemarkTask(ConcurrentMark* cm) :
2266     AbstractGangTask("Par Remark"), _cm(cm) { }
2267 };
2268 
2269 void ConcurrentMark::checkpointRootsFinalWork() {
2270   ResourceMark rm;
2271   HandleMark   hm;
2272   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2273 
2274   g1h->ensure_parsability(false);
2275 
2276   if (G1CollectedHeap::use_parallel_gc_threads()) {
2277     G1CollectedHeap::StrongRootsScope srs(g1h);
2278     // this is remark, so we'll use up all available threads
2279     int active_workers = ParallelGCThreads;
2280     set_phase(active_workers, false /* concurrent */);
2281 
2282     CMRemarkTask remarkTask(this);
2283     // We will start all available threads, even if we decide that the
2284     // active_workers will be fewer. The extra ones will just bail out
2285     // immediately.
2286     int n_workers = g1h->workers()->total_workers();
2287     g1h->set_par_threads(n_workers);
2288     g1h->workers()->run_task(&remarkTask);
2289     g1h->set_par_threads(0);
2290   } else {
2291     G1CollectedHeap::StrongRootsScope srs(g1h);
2292     // this is remark, so we'll use up all available threads
2293     int active_workers = 1;
2294     set_phase(active_workers, false /* concurrent */);
2295 
2296     CMRemarkTask remarkTask(this);
2297     // We will start all available threads, even if we decide that the
2298     // active_workers will be fewer. The extra ones will just bail out
2299     // immediately.
2300     remarkTask.work(0);
2301   }
2302   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2303   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2304 
2305   print_stats();
2306 
2307 #if VERIFY_OBJS_PROCESSED
2308   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2309     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2310                            _scan_obj_cl.objs_processed,
2311                            ThreadLocalObjQueue::objs_enqueued);
2312     guarantee(_scan_obj_cl.objs_processed ==
2313               ThreadLocalObjQueue::objs_enqueued,
2314               "Different number of objs processed and enqueued.");
2315   }
2316 #endif
2317 }
2318 
2319 #ifndef PRODUCT
2320 
2321 class PrintReachableOopClosure: public OopClosure {
2322 private:
2323   G1CollectedHeap* _g1h;
2324   outputStream*    _out;
2325   VerifyOption     _vo;
2326   bool             _all;
2327 
2328 public:
2329   PrintReachableOopClosure(outputStream* out,
2330                            VerifyOption  vo,
2331                            bool          all) :
2332     _g1h(G1CollectedHeap::heap()),
2333     _out(out), _vo(vo), _all(all) { }
2334 
2335   void do_oop(narrowOop* p) { do_oop_work(p); }
2336   void do_oop(      oop* p) { do_oop_work(p); }
2337 
2338   template <class T> void do_oop_work(T* p) {
2339     oop         obj = oopDesc::load_decode_heap_oop(p);
2340     const char* str = NULL;
2341     const char* str2 = "";
2342 
2343     if (obj == NULL) {
2344       str = "";
2345     } else if (!_g1h->is_in_g1_reserved(obj)) {
2346       str = " O";
2347     } else {
2348       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2349       guarantee(hr != NULL, "invariant");
2350       bool over_tams = false;
2351       bool marked = false;
2352 
2353       switch (_vo) {
2354         case VerifyOption_G1UsePrevMarking:
2355           over_tams = hr->obj_allocated_since_prev_marking(obj);
2356           marked = _g1h->isMarkedPrev(obj);
2357           break;
2358         case VerifyOption_G1UseNextMarking:
2359           over_tams = hr->obj_allocated_since_next_marking(obj);
2360           marked = _g1h->isMarkedNext(obj);
2361           break;
2362         case VerifyOption_G1UseMarkWord:
2363           marked = obj->is_gc_marked();
2364           break;
2365         default:
2366           ShouldNotReachHere();
2367       }
2368 
2369       if (over_tams) {
2370         str = " >";
2371         if (marked) {
2372           str2 = " AND MARKED";
2373         }
2374       } else if (marked) {
2375         str = " M";
2376       } else {
2377         str = " NOT";
2378       }
2379     }
2380 
2381     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2382                    p, (void*) obj, str, str2);
2383   }
2384 };
2385 
2386 class PrintReachableObjectClosure : public ObjectClosure {
2387 private:
2388   G1CollectedHeap* _g1h;
2389   outputStream*    _out;
2390   VerifyOption     _vo;
2391   bool             _all;
2392   HeapRegion*      _hr;
2393 
2394 public:
2395   PrintReachableObjectClosure(outputStream* out,
2396                               VerifyOption  vo,
2397                               bool          all,
2398                               HeapRegion*   hr) :
2399     _g1h(G1CollectedHeap::heap()),
2400     _out(out), _vo(vo), _all(all), _hr(hr) { }
2401 
2402   void do_object(oop o) {
2403     bool over_tams = false;
2404     bool marked = false;
2405 
2406     switch (_vo) {
2407       case VerifyOption_G1UsePrevMarking:
2408         over_tams = _hr->obj_allocated_since_prev_marking(o);
2409         marked = _g1h->isMarkedPrev(o);
2410         break;
2411       case VerifyOption_G1UseNextMarking:
2412         over_tams = _hr->obj_allocated_since_next_marking(o);
2413         marked = _g1h->isMarkedNext(o);
2414         break;
2415       case VerifyOption_G1UseMarkWord:
2416         marked = o->is_gc_marked();
2417         break;
2418       default:
2419         ShouldNotReachHere();
2420     }
2421     bool print_it = _all || over_tams || marked;
2422 
2423     if (print_it) {
2424       _out->print_cr(" "PTR_FORMAT"%s",
2425                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2426       PrintReachableOopClosure oopCl(_out, _vo, _all);
2427       o->oop_iterate(&oopCl);
2428     }
2429   }
2430 };
2431 
2432 class PrintReachableRegionClosure : public HeapRegionClosure {
2433 private:
2434   outputStream* _out;
2435   VerifyOption  _vo;
2436   bool          _all;
2437 
2438 public:
2439   bool doHeapRegion(HeapRegion* hr) {
2440     HeapWord* b = hr->bottom();
2441     HeapWord* e = hr->end();
2442     HeapWord* t = hr->top();
2443     HeapWord* p = NULL;
2444 
2445     switch (_vo) {
2446       case VerifyOption_G1UsePrevMarking:
2447         p = hr->prev_top_at_mark_start();
2448         break;
2449       case VerifyOption_G1UseNextMarking:
2450         p = hr->next_top_at_mark_start();
2451         break;
2452       case VerifyOption_G1UseMarkWord:
2453         // When we are verifying marking using the mark word
2454         // TAMS has no relevance.
2455         assert(p == NULL, "post-condition");
2456         break;
2457       default:
2458         ShouldNotReachHere();
2459     }
2460     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2461                    "TAMS: "PTR_FORMAT, b, e, t, p);
2462     _out->cr();
2463 
2464     HeapWord* from = b;
2465     HeapWord* to   = t;
2466 
2467     if (to > from) {
2468       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2469       _out->cr();
2470       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2471       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2472       _out->cr();
2473     }
2474 
2475     return false;
2476   }
2477 
2478   PrintReachableRegionClosure(outputStream* out,
2479                               VerifyOption  vo,
2480                               bool          all) :
2481     _out(out), _vo(vo), _all(all) { }
2482 };
2483 
2484 static const char* verify_option_to_tams(VerifyOption vo) {
2485   switch (vo) {
2486     case VerifyOption_G1UsePrevMarking:
2487       return "PTAMS";
2488     case VerifyOption_G1UseNextMarking:
2489       return "NTAMS";
2490     default:
2491       return "NONE";
2492   }
2493 }
2494 
2495 void ConcurrentMark::print_reachable(const char* str,
2496                                      VerifyOption vo,
2497                                      bool all) {
2498   gclog_or_tty->cr();
2499   gclog_or_tty->print_cr("== Doing heap dump... ");
2500 
2501   if (G1PrintReachableBaseFile == NULL) {
2502     gclog_or_tty->print_cr("  #### error: no base file defined");
2503     return;
2504   }
2505 
2506   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2507       (JVM_MAXPATHLEN - 1)) {
2508     gclog_or_tty->print_cr("  #### error: file name too long");
2509     return;
2510   }
2511 
2512   char file_name[JVM_MAXPATHLEN];
2513   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2514   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2515 
2516   fileStream fout(file_name);
2517   if (!fout.is_open()) {
2518     gclog_or_tty->print_cr("  #### error: could not open file");
2519     return;
2520   }
2521 
2522   outputStream* out = &fout;
2523   out->print_cr("-- USING %s", verify_option_to_tams(vo));
2524   out->cr();
2525 
2526   out->print_cr("--- ITERATING OVER REGIONS");
2527   out->cr();
2528   PrintReachableRegionClosure rcl(out, vo, all);
2529   _g1h->heap_region_iterate(&rcl);
2530   out->cr();
2531 
2532   gclog_or_tty->print_cr("  done");
2533   gclog_or_tty->flush();
2534 }
2535 
2536 #endif // PRODUCT
2537 
2538 // This note is for drainAllSATBBuffers and the code in between.
2539 // In the future we could reuse a task to do this work during an
2540 // evacuation pause (since now tasks are not active and can be claimed
2541 // during an evacuation pause). This was a late change to the code and
2542 // is currently not being taken advantage of.
2543 
2544 class CMGlobalObjectClosure : public ObjectClosure {
2545 private:
2546   ConcurrentMark* _cm;
2547 
2548 public:
2549   void do_object(oop obj) {
2550     _cm->deal_with_reference(obj);
2551   }
2552 
2553   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2554 };
2555 
2556 void ConcurrentMark::deal_with_reference(oop obj) {
2557   if (verbose_high()) {
2558     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2559                            (void*) obj);
2560   }
2561 
2562   HeapWord* objAddr = (HeapWord*) obj;
2563   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2564   if (_g1h->is_in_g1_reserved(objAddr)) {
2565     assert(obj != NULL, "null check is implicit");
2566     if (!_nextMarkBitMap->isMarked(objAddr)) {
2567       // Only get the containing region if the object is not marked on the
2568       // bitmap (otherwise, it's a waste of time since we won't do
2569       // anything with it).
2570       HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
2571       if (!hr->obj_allocated_since_next_marking(obj)) {
2572         if (verbose_high()) {
2573           gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2574                                  "marked", (void*) obj);
2575         }
2576 
2577         // we need to mark it first
2578         if (_nextMarkBitMap->parMark(objAddr)) {
2579           // No OrderAccess:store_load() is needed. It is implicit in the
2580           // CAS done in parMark(objAddr) above
2581           HeapWord* finger = _finger;
2582           if (objAddr < finger) {
2583             if (verbose_high()) {
2584               gclog_or_tty->print_cr("[global] below the global finger "
2585                                      "("PTR_FORMAT"), pushing it", finger);
2586             }
2587             if (!mark_stack_push(obj)) {
2588               if (verbose_low()) {
2589                 gclog_or_tty->print_cr("[global] global stack overflow during "
2590                                        "deal_with_reference");
2591               }
2592             }
2593           }
2594         }
2595       }
2596     }
2597   }
2598 }
2599 
2600 void ConcurrentMark::drainAllSATBBuffers() {
2601   CMGlobalObjectClosure oc(this);
2602   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2603   satb_mq_set.set_closure(&oc);
2604 
2605   while (satb_mq_set.apply_closure_to_completed_buffer()) {
2606     if (verbose_medium()) {
2607       gclog_or_tty->print_cr("[global] processed an SATB buffer");
2608     }
2609   }
2610 
2611   // no need to check whether we should do this, as this is only
2612   // called during an evacuation pause
2613   satb_mq_set.iterate_closure_all_threads();
2614 
2615   satb_mq_set.set_closure(NULL);
2616   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2617 }
2618 
2619 void ConcurrentMark::markPrev(oop p) {
2620   // Note we are overriding the read-only view of the prev map here, via
2621   // the cast.
2622   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2623 }
2624 
2625 void ConcurrentMark::clear(oop p) {
2626   assert(p != NULL && p->is_oop(), "expected an oop");
2627   HeapWord* addr = (HeapWord*)p;
2628   assert(addr >= _nextMarkBitMap->startWord() ||
2629          addr < _nextMarkBitMap->endWord(), "in a region");
2630 
2631   _nextMarkBitMap->clear(addr);
2632 }
2633 
2634 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2635   // Note we are overriding the read-only view of the prev map here, via
2636   // the cast.
2637   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2638   _nextMarkBitMap->clearRange(mr);
2639 }
2640 
2641 HeapRegion*
2642 ConcurrentMark::claim_region(int task_num) {
2643   // "checkpoint" the finger
2644   HeapWord* finger = _finger;
2645 
2646   // _heap_end will not change underneath our feet; it only changes at
2647   // yield points.
2648   while (finger < _heap_end) {
2649     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2650 
2651     // Note on how this code handles humongous regions. In the
2652     // normal case the finger will reach the start of a "starts
2653     // humongous" (SH) region. Its end will either be the end of the
2654     // last "continues humongous" (CH) region in the sequence, or the
2655     // standard end of the SH region (if the SH is the only region in
2656     // the sequence). That way claim_region() will skip over the CH
2657     // regions. However, there is a subtle race between a CM thread
2658     // executing this method and a mutator thread doing a humongous
2659     // object allocation. The two are not mutually exclusive as the CM
2660     // thread does not need to hold the Heap_lock when it gets
2661     // here. So there is a chance that claim_region() will come across
2662     // a free region that's in the progress of becoming a SH or a CH
2663     // region. In the former case, it will either
2664     //   a) Miss the update to the region's end, in which case it will
2665     //      visit every subsequent CH region, will find their bitmaps
2666     //      empty, and do nothing, or
2667     //   b) Will observe the update of the region's end (in which case
2668     //      it will skip the subsequent CH regions).
2669     // If it comes across a region that suddenly becomes CH, the
2670     // scenario will be similar to b). So, the race between
2671     // claim_region() and a humongous object allocation might force us
2672     // to do a bit of unnecessary work (due to some unnecessary bitmap
2673     // iterations) but it should not introduce and correctness issues.
2674     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2675     HeapWord*   bottom        = curr_region->bottom();
2676     HeapWord*   end           = curr_region->end();
2677     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2678 
2679     if (verbose_low()) {
2680       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2681                              "["PTR_FORMAT", "PTR_FORMAT"), "
2682                              "limit = "PTR_FORMAT,
2683                              task_num, curr_region, bottom, end, limit);
2684     }
2685 
2686     // Is the gap between reading the finger and doing the CAS too long?
2687     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2688     if (res == finger) {
2689       // we succeeded
2690 
2691       // notice that _finger == end cannot be guaranteed here since,
2692       // someone else might have moved the finger even further
2693       assert(_finger >= end, "the finger should have moved forward");
2694 
2695       if (verbose_low()) {
2696         gclog_or_tty->print_cr("[%d] we were successful with region = "
2697                                PTR_FORMAT, task_num, curr_region);
2698       }
2699 
2700       if (limit > bottom) {
2701         if (verbose_low()) {
2702           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2703                                  "returning it ", task_num, curr_region);
2704         }
2705         return curr_region;
2706       } else {
2707         assert(limit == bottom,
2708                "the region limit should be at bottom");
2709         if (verbose_low()) {
2710           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2711                                  "returning NULL", task_num, curr_region);
2712         }
2713         // we return NULL and the caller should try calling
2714         // claim_region() again.
2715         return NULL;
2716       }
2717     } else {
2718       assert(_finger > finger, "the finger should have moved forward");
2719       if (verbose_low()) {
2720         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2721                                "global finger = "PTR_FORMAT", "
2722                                "our finger = "PTR_FORMAT,
2723                                task_num, _finger, finger);
2724       }
2725 
2726       // read it again
2727       finger = _finger;
2728     }
2729   }
2730 
2731   return NULL;
2732 }
2733 
2734 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
2735   bool result = false;
2736   for (int i = 0; i < (int)_max_task_num; ++i) {
2737     CMTask* the_task = _tasks[i];
2738     MemRegion mr = the_task->aborted_region();
2739     if (mr.start() != NULL) {
2740       assert(mr.end() != NULL, "invariant");
2741       assert(mr.word_size() > 0, "invariant");
2742       HeapRegion* hr = _g1h->heap_region_containing(mr.start());
2743       assert(hr != NULL, "invariant");
2744       if (hr->in_collection_set()) {
2745         // The region points into the collection set
2746         the_task->set_aborted_region(MemRegion());
2747         result = true;
2748       }
2749     }
2750   }
2751   return result;
2752 }
2753 
2754 bool ConcurrentMark::has_aborted_regions() {
2755   for (int i = 0; i < (int)_max_task_num; ++i) {
2756     CMTask* the_task = _tasks[i];
2757     MemRegion mr = the_task->aborted_region();
2758     if (mr.start() != NULL) {
2759       assert(mr.end() != NULL, "invariant");
2760       assert(mr.word_size() > 0, "invariant");
2761       return true;
2762     }
2763   }
2764   return false;
2765 }
2766 
2767 void ConcurrentMark::oops_do(OopClosure* cl) {
2768   if (_markStack.size() > 0 && verbose_low()) {
2769     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2770                            "size = %d", _markStack.size());
2771   }
2772   // we first iterate over the contents of the mark stack...
2773   _markStack.oops_do(cl);
2774 
2775   for (int i = 0; i < (int)_max_task_num; ++i) {
2776     OopTaskQueue* queue = _task_queues->queue((int)i);
2777 
2778     if (queue->size() > 0 && verbose_low()) {
2779       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2780                              "size = %d", i, queue->size());
2781     }
2782 
2783     // ...then over the contents of the all the task queues.
2784     queue->oops_do(cl);
2785   }
2786 
2787   // Invalidate any entries, that are in the region stack, that
2788   // point into the collection set
2789   if (_regionStack.invalidate_entries_into_cset()) {
2790     // otherwise, any gray objects copied during the evacuation pause
2791     // might not be visited.
2792     assert(_should_gray_objects, "invariant");
2793   }
2794 
2795   // Invalidate any aborted regions, recorded in the individual CM
2796   // tasks, that point into the collection set.
2797   if (invalidate_aborted_regions_in_cset()) {
2798     // otherwise, any gray objects copied during the evacuation pause
2799     // might not be visited.
2800     assert(_should_gray_objects, "invariant");
2801   }
2802 
2803 }
2804 
2805 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2806   _markStack.setEmpty();
2807   _markStack.clear_overflow();
2808   _regionStack.setEmpty();
2809   _regionStack.clear_overflow();
2810   if (clear_overflow) {
2811     clear_has_overflown();
2812   } else {
2813     assert(has_overflown(), "pre-condition");
2814   }
2815   _finger = _heap_start;
2816 
2817   for (int i = 0; i < (int)_max_task_num; ++i) {
2818     OopTaskQueue* queue = _task_queues->queue(i);
2819     queue->set_empty();
2820     // Clear any partial regions from the CMTasks
2821     _tasks[i]->clear_aborted_region();
2822   }
2823 }
2824 
2825 void ConcurrentMark::print_stats() {
2826   if (verbose_stats()) {
2827     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2828     for (size_t i = 0; i < _active_tasks; ++i) {
2829       _tasks[i]->print_stats();
2830       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2831     }
2832   }
2833 }
2834 
2835 class CSMarkOopClosure: public OopClosure {
2836   friend class CSMarkBitMapClosure;
2837 
2838   G1CollectedHeap* _g1h;
2839   CMBitMap*        _bm;
2840   ConcurrentMark*  _cm;
2841   oop*             _ms;
2842   jint*            _array_ind_stack;
2843   int              _ms_size;
2844   int              _ms_ind;
2845   int              _array_increment;
2846 
2847   bool push(oop obj, int arr_ind = 0) {
2848     if (_ms_ind == _ms_size) {
2849       gclog_or_tty->print_cr("Mark stack is full.");
2850       return false;
2851     }
2852     _ms[_ms_ind] = obj;
2853     if (obj->is_objArray()) {
2854       _array_ind_stack[_ms_ind] = arr_ind;
2855     }
2856     _ms_ind++;
2857     return true;
2858   }
2859 
2860   oop pop() {
2861     if (_ms_ind == 0) {
2862       return NULL;
2863     } else {
2864       _ms_ind--;
2865       return _ms[_ms_ind];
2866     }
2867   }
2868 
2869   template <class T> bool drain() {
2870     while (_ms_ind > 0) {
2871       oop obj = pop();
2872       assert(obj != NULL, "Since index was non-zero.");
2873       if (obj->is_objArray()) {
2874         jint arr_ind = _array_ind_stack[_ms_ind];
2875         objArrayOop aobj = objArrayOop(obj);
2876         jint len = aobj->length();
2877         jint next_arr_ind = arr_ind + _array_increment;
2878         if (next_arr_ind < len) {
2879           push(obj, next_arr_ind);
2880         }
2881         // Now process this portion of this one.
2882         int lim = MIN2(next_arr_ind, len);
2883         for (int j = arr_ind; j < lim; j++) {
2884           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2885         }
2886 
2887       } else {
2888         obj->oop_iterate(this);
2889       }
2890       if (abort()) return false;
2891     }
2892     return true;
2893   }
2894 
2895 public:
2896   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2897     _g1h(G1CollectedHeap::heap()),
2898     _cm(cm),
2899     _bm(cm->nextMarkBitMap()),
2900     _ms_size(ms_size), _ms_ind(0),
2901     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2902     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2903     _array_increment(MAX2(ms_size/8, 16))
2904   {}
2905 
2906   ~CSMarkOopClosure() {
2907     FREE_C_HEAP_ARRAY(oop, _ms);
2908     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2909   }
2910 
2911   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2912   virtual void do_oop(      oop* p) { do_oop_work(p); }
2913 
2914   template <class T> void do_oop_work(T* p) {
2915     T heap_oop = oopDesc::load_heap_oop(p);
2916     if (oopDesc::is_null(heap_oop)) return;
2917     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2918     if (obj->is_forwarded()) {
2919       // If the object has already been forwarded, we have to make sure
2920       // that it's marked.  So follow the forwarding pointer.  Note that
2921       // this does the right thing for self-forwarding pointers in the
2922       // evacuation failure case.
2923       obj = obj->forwardee();
2924     }
2925     HeapRegion* hr = _g1h->heap_region_containing(obj);
2926     if (hr != NULL) {
2927       if (hr->in_collection_set()) {
2928         if (_g1h->is_obj_ill(obj)) {
2929           _bm->mark((HeapWord*)obj);
2930           if (!push(obj)) {
2931             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2932             set_abort();
2933           }
2934         }
2935       } else {
2936         // Outside the collection set; we need to gray it
2937         _cm->deal_with_reference(obj);
2938       }
2939     }
2940   }
2941 };
2942 
2943 class CSMarkBitMapClosure: public BitMapClosure {
2944   G1CollectedHeap* _g1h;
2945   CMBitMap*        _bitMap;
2946   ConcurrentMark*  _cm;
2947   CSMarkOopClosure _oop_cl;
2948 public:
2949   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2950     _g1h(G1CollectedHeap::heap()),
2951     _bitMap(cm->nextMarkBitMap()),
2952     _oop_cl(cm, ms_size)
2953   {}
2954 
2955   ~CSMarkBitMapClosure() {}
2956 
2957   bool do_bit(size_t offset) {
2958     // convert offset into a HeapWord*
2959     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2960     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2961            "address out of range");
2962     assert(_bitMap->isMarked(addr), "tautology");
2963     oop obj = oop(addr);
2964     if (!obj->is_forwarded()) {
2965       if (!_oop_cl.push(obj)) return false;
2966       if (UseCompressedOops) {
2967         if (!_oop_cl.drain<narrowOop>()) return false;
2968       } else {
2969         if (!_oop_cl.drain<oop>()) return false;
2970       }
2971     }
2972     // Otherwise...
2973     return true;
2974   }
2975 };
2976 
2977 
2978 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2979   CMBitMap* _bm;
2980   CSMarkBitMapClosure _bit_cl;
2981   enum SomePrivateConstants {
2982     MSSize = 1000
2983   };
2984   bool _completed;
2985 public:
2986   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2987     _bm(cm->nextMarkBitMap()),
2988     _bit_cl(cm, MSSize),
2989     _completed(true)
2990   {}
2991 
2992   ~CompleteMarkingInCSHRClosure() {}
2993 
2994   bool doHeapRegion(HeapRegion* r) {
2995     if (!r->evacuation_failed()) {
2996       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2997       if (!mr.is_empty()) {
2998         if (!_bm->iterate(&_bit_cl, mr)) {
2999           _completed = false;
3000           return true;
3001         }
3002       }
3003     }
3004     return false;
3005   }
3006 
3007   bool completed() { return _completed; }
3008 };
3009 
3010 class ClearMarksInHRClosure: public HeapRegionClosure {
3011   CMBitMap* _bm;
3012 public:
3013   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
3014 
3015   bool doHeapRegion(HeapRegion* r) {
3016     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
3017       MemRegion usedMR = r->used_region();
3018       _bm->clearRange(r->used_region());
3019     }
3020     return false;
3021   }
3022 };
3023 
3024 void ConcurrentMark::complete_marking_in_collection_set() {
3025   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
3026 
3027   if (!g1h->mark_in_progress()) {
3028     g1h->g1_policy()->record_mark_closure_time(0.0);
3029     return;
3030   }
3031 
3032   int i = 1;
3033   double start = os::elapsedTime();
3034   while (true) {
3035     i++;
3036     CompleteMarkingInCSHRClosure cmplt(this);
3037     g1h->collection_set_iterate(&cmplt);
3038     if (cmplt.completed()) break;
3039   }
3040   double end_time = os::elapsedTime();
3041   double elapsed_time_ms = (end_time - start) * 1000.0;
3042   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
3043 
3044   ClearMarksInHRClosure clr(nextMarkBitMap());
3045   g1h->collection_set_iterate(&clr);
3046 }
3047 
3048 // The next two methods deal with the following optimisation. Some
3049 // objects are gray by being marked and located above the finger. If
3050 // they are copied, during an evacuation pause, below the finger then
3051 // the need to be pushed on the stack. The observation is that, if
3052 // there are no regions in the collection set located above the
3053 // finger, then the above cannot happen, hence we do not need to
3054 // explicitly gray any objects when copying them to below the
3055 // finger. The global stack will be scanned to ensure that, if it
3056 // points to objects being copied, it will update their
3057 // location. There is a tricky situation with the gray objects in
3058 // region stack that are being coped, however. See the comment in
3059 // newCSet().
3060 
3061 void ConcurrentMark::newCSet() {
3062   if (!concurrent_marking_in_progress()) {
3063     // nothing to do if marking is not in progress
3064     return;
3065   }
3066 
3067   // find what the lowest finger is among the global and local fingers
3068   _min_finger = _finger;
3069   for (int i = 0; i < (int)_max_task_num; ++i) {
3070     CMTask* task = _tasks[i];
3071     HeapWord* task_finger = task->finger();
3072     if (task_finger != NULL && task_finger < _min_finger) {
3073       _min_finger = task_finger;
3074     }
3075   }
3076 
3077   _should_gray_objects = false;
3078 
3079   // This fixes a very subtle and fustrating bug. It might be the case
3080   // that, during en evacuation pause, heap regions that contain
3081   // objects that are gray (by being in regions contained in the
3082   // region stack) are included in the collection set. Since such gray
3083   // objects will be moved, and because it's not easy to redirect
3084   // region stack entries to point to a new location (because objects
3085   // in one region might be scattered to multiple regions after they
3086   // are copied), one option is to ensure that all marked objects
3087   // copied during a pause are pushed on the stack. Notice, however,
3088   // that this problem can only happen when the region stack is not
3089   // empty during an evacuation pause. So, we make the fix a bit less
3090   // conservative and ensure that regions are pushed on the stack,
3091   // irrespective whether all collection set regions are below the
3092   // finger, if the region stack is not empty. This is expected to be
3093   // a rare case, so I don't think it's necessary to be smarted about it.
3094   if (!region_stack_empty() || has_aborted_regions()) {
3095     _should_gray_objects = true;
3096   }
3097 }
3098 
3099 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
3100   if (!concurrent_marking_in_progress()) return;
3101 
3102   HeapWord* region_end = hr->end();
3103   if (region_end > _min_finger) {
3104     _should_gray_objects = true;
3105   }
3106 }
3107 
3108 // Resets the region fields of active CMTasks whose values point
3109 // into the collection set.
3110 void ConcurrentMark::reset_active_task_region_fields_in_cset() {
3111   assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
3112   assert(parallel_marking_threads() <= _max_task_num, "sanity");
3113 
3114   for (int i = 0; i < (int)parallel_marking_threads(); i += 1) {
3115     CMTask* task = _tasks[i];
3116     HeapWord* task_finger = task->finger();
3117     if (task_finger != NULL) {
3118       assert(_g1h->is_in_g1_reserved(task_finger), "not in heap");
3119       HeapRegion* finger_region = _g1h->heap_region_containing(task_finger);
3120       if (finger_region->in_collection_set()) {
3121         // The task's current region is in the collection set.
3122         // This region will be evacuated in the current GC and
3123         // the region fields in the task will be stale.
3124         task->giveup_current_region();
3125       }
3126     }
3127   }
3128 }
3129 
3130 // abandon current marking iteration due to a Full GC
3131 void ConcurrentMark::abort() {
3132   // Clear all marks to force marking thread to do nothing
3133   _nextMarkBitMap->clearAll();
3134   // Empty mark stack
3135   clear_marking_state();
3136   for (int i = 0; i < (int)_max_task_num; ++i) {
3137     _tasks[i]->clear_region_fields();
3138   }
3139   _has_aborted = true;
3140 
3141   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3142   satb_mq_set.abandon_partial_marking();
3143   // This can be called either during or outside marking, we'll read
3144   // the expected_active value from the SATB queue set.
3145   satb_mq_set.set_active_all_threads(
3146                                  false, /* new active value */
3147                                  satb_mq_set.is_active() /* expected_active */);
3148 }
3149 
3150 static void print_ms_time_info(const char* prefix, const char* name,
3151                                NumberSeq& ns) {
3152   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3153                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3154   if (ns.num() > 0) {
3155     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3156                            prefix, ns.sd(), ns.maximum());
3157   }
3158 }
3159 
3160 void ConcurrentMark::print_summary_info() {
3161   gclog_or_tty->print_cr(" Concurrent marking:");
3162   print_ms_time_info("  ", "init marks", _init_times);
3163   print_ms_time_info("  ", "remarks", _remark_times);
3164   {
3165     print_ms_time_info("     ", "final marks", _remark_mark_times);
3166     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3167 
3168   }
3169   print_ms_time_info("  ", "cleanups", _cleanup_times);
3170   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3171                          _total_counting_time,
3172                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3173                           (double)_cleanup_times.num()
3174                          : 0.0));
3175   if (G1ScrubRemSets) {
3176     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3177                            _total_rs_scrub_time,
3178                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3179                             (double)_cleanup_times.num()
3180                            : 0.0));
3181   }
3182   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3183                          (_init_times.sum() + _remark_times.sum() +
3184                           _cleanup_times.sum())/1000.0);
3185   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3186                 "(%8.2f s marking, %8.2f s counting).",
3187                 cmThread()->vtime_accum(),
3188                 cmThread()->vtime_mark_accum(),
3189                 cmThread()->vtime_count_accum());
3190 }
3191 
3192 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3193   _parallel_workers->print_worker_threads_on(st);
3194 }
3195 
3196 // Closures
3197 // XXX: there seems to be a lot of code  duplication here;
3198 // should refactor and consolidate the shared code.
3199 
3200 // This closure is used to mark refs into the CMS generation in
3201 // the CMS bit map. Called at the first checkpoint.
3202 
3203 // We take a break if someone is trying to stop the world.
3204 bool ConcurrentMark::do_yield_check(int worker_i) {
3205   if (should_yield()) {
3206     if (worker_i == 0) {
3207       _g1h->g1_policy()->record_concurrent_pause();
3208     }
3209     cmThread()->yield();
3210     if (worker_i == 0) {
3211       _g1h->g1_policy()->record_concurrent_pause_end();
3212     }
3213     return true;
3214   } else {
3215     return false;
3216   }
3217 }
3218 
3219 bool ConcurrentMark::should_yield() {
3220   return cmThread()->should_yield();
3221 }
3222 
3223 bool ConcurrentMark::containing_card_is_marked(void* p) {
3224   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3225   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3226 }
3227 
3228 bool ConcurrentMark::containing_cards_are_marked(void* start,
3229                                                  void* last) {
3230   return containing_card_is_marked(start) &&
3231          containing_card_is_marked(last);
3232 }
3233 
3234 #ifndef PRODUCT
3235 // for debugging purposes
3236 void ConcurrentMark::print_finger() {
3237   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3238                          _heap_start, _heap_end, _finger);
3239   for (int i = 0; i < (int) _max_task_num; ++i) {
3240     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
3241   }
3242   gclog_or_tty->print_cr("");
3243 }
3244 #endif
3245 
3246 void CMTask::scan_object(oop obj) {
3247   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3248 
3249   if (_cm->verbose_high()) {
3250     gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3251                            _task_id, (void*) obj);
3252   }
3253 
3254   size_t obj_size = obj->size();
3255   _words_scanned += obj_size;
3256 
3257   obj->oop_iterate(_cm_oop_closure);
3258   statsOnly( ++_objs_scanned );
3259   check_limits();
3260 }
3261 
3262 // Closure for iteration over bitmaps
3263 class CMBitMapClosure : public BitMapClosure {
3264 private:
3265   // the bitmap that is being iterated over
3266   CMBitMap*                   _nextMarkBitMap;
3267   ConcurrentMark*             _cm;
3268   CMTask*                     _task;
3269   // true if we're scanning a heap region claimed by the task (so that
3270   // we move the finger along), false if we're not, i.e. currently when
3271   // scanning a heap region popped from the region stack (so that we
3272   // do not move the task finger along; it'd be a mistake if we did so).
3273   bool                        _scanning_heap_region;
3274 
3275 public:
3276   CMBitMapClosure(CMTask *task,
3277                   ConcurrentMark* cm,
3278                   CMBitMap* nextMarkBitMap)
3279     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3280 
3281   void set_scanning_heap_region(bool scanning_heap_region) {
3282     _scanning_heap_region = scanning_heap_region;
3283   }
3284 
3285   bool do_bit(size_t offset) {
3286     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3287     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3288     assert( addr < _cm->finger(), "invariant");
3289 
3290     if (_scanning_heap_region) {
3291       statsOnly( _task->increase_objs_found_on_bitmap() );
3292       assert(addr >= _task->finger(), "invariant");
3293       // We move that task's local finger along.
3294       _task->move_finger_to(addr);
3295     } else {
3296       // We move the task's region finger along.
3297       _task->move_region_finger_to(addr);
3298     }
3299 
3300     _task->scan_object(oop(addr));
3301     // we only partially drain the local queue and global stack
3302     _task->drain_local_queue(true);
3303     _task->drain_global_stack(true);
3304 
3305     // if the has_aborted flag has been raised, we need to bail out of
3306     // the iteration
3307     return !_task->has_aborted();
3308   }
3309 };
3310 
3311 // Closure for iterating over objects, currently only used for
3312 // processing SATB buffers.
3313 class CMObjectClosure : public ObjectClosure {
3314 private:
3315   CMTask* _task;
3316 
3317 public:
3318   void do_object(oop obj) {
3319     _task->deal_with_reference(obj);
3320   }
3321 
3322   CMObjectClosure(CMTask* task) : _task(task) { }
3323 };
3324 
3325 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3326                                ConcurrentMark* cm,
3327                                CMTask* task)
3328   : _g1h(g1h), _cm(cm), _task(task) {
3329   assert(_ref_processor == NULL, "should be initialized to NULL");
3330 
3331   if (G1UseConcMarkReferenceProcessing) {
3332     _ref_processor = g1h->ref_processor();
3333     assert(_ref_processor != NULL, "should not be NULL");
3334   }
3335 }
3336 
3337 void CMTask::setup_for_region(HeapRegion* hr) {
3338   // Separated the asserts so that we know which one fires.
3339   assert(hr != NULL,
3340         "claim_region() should have filtered out continues humongous regions");
3341   assert(!hr->continuesHumongous(),
3342         "claim_region() should have filtered out continues humongous regions");
3343 
3344   if (_cm->verbose_low()) {
3345     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3346                            _task_id, hr);
3347   }
3348 
3349   _curr_region  = hr;
3350   _finger       = hr->bottom();
3351   update_region_limit();
3352 }
3353 
3354 void CMTask::update_region_limit() {
3355   HeapRegion* hr            = _curr_region;
3356   HeapWord* bottom          = hr->bottom();
3357   HeapWord* limit           = hr->next_top_at_mark_start();
3358 
3359   if (limit == bottom) {
3360     if (_cm->verbose_low()) {
3361       gclog_or_tty->print_cr("[%d] found an empty region "
3362                              "["PTR_FORMAT", "PTR_FORMAT")",
3363                              _task_id, bottom, limit);
3364     }
3365     // The region was collected underneath our feet.
3366     // We set the finger to bottom to ensure that the bitmap
3367     // iteration that will follow this will not do anything.
3368     // (this is not a condition that holds when we set the region up,
3369     // as the region is not supposed to be empty in the first place)
3370     _finger = bottom;
3371   } else if (limit >= _region_limit) {
3372     assert(limit >= _finger, "peace of mind");
3373   } else {
3374     assert(limit < _region_limit, "only way to get here");
3375     // This can happen under some pretty unusual circumstances.  An
3376     // evacuation pause empties the region underneath our feet (NTAMS
3377     // at bottom). We then do some allocation in the region (NTAMS
3378     // stays at bottom), followed by the region being used as a GC
3379     // alloc region (NTAMS will move to top() and the objects
3380     // originally below it will be grayed). All objects now marked in
3381     // the region are explicitly grayed, if below the global finger,
3382     // and we do not need in fact to scan anything else. So, we simply
3383     // set _finger to be limit to ensure that the bitmap iteration
3384     // doesn't do anything.
3385     _finger = limit;
3386   }
3387 
3388   _region_limit = limit;
3389 }
3390 
3391 void CMTask::giveup_current_region() {
3392   assert(_curr_region != NULL, "invariant");
3393   if (_cm->verbose_low()) {
3394     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3395                            _task_id, _curr_region);
3396   }
3397   clear_region_fields();
3398 }
3399 
3400 void CMTask::clear_region_fields() {
3401   // Values for these three fields that indicate that we're not
3402   // holding on to a region.
3403   _curr_region   = NULL;
3404   _finger        = NULL;
3405   _region_limit  = NULL;
3406 
3407   _region_finger = NULL;
3408 }
3409 
3410 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3411   if (cm_oop_closure == NULL) {
3412     assert(_cm_oop_closure != NULL, "invariant");
3413   } else {
3414     assert(_cm_oop_closure == NULL, "invariant");
3415   }
3416   _cm_oop_closure = cm_oop_closure;
3417 }
3418 
3419 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3420   guarantee(nextMarkBitMap != NULL, "invariant");
3421 
3422   if (_cm->verbose_low()) {
3423     gclog_or_tty->print_cr("[%d] resetting", _task_id);
3424   }
3425 
3426   _nextMarkBitMap                = nextMarkBitMap;
3427   clear_region_fields();
3428   assert(_aborted_region.is_empty(), "should have been cleared");
3429 
3430   _calls                         = 0;
3431   _elapsed_time_ms               = 0.0;
3432   _termination_time_ms           = 0.0;
3433   _termination_start_time_ms     = 0.0;
3434 
3435 #if _MARKING_STATS_
3436   _local_pushes                  = 0;
3437   _local_pops                    = 0;
3438   _local_max_size                = 0;
3439   _objs_scanned                  = 0;
3440   _global_pushes                 = 0;
3441   _global_pops                   = 0;
3442   _global_max_size               = 0;
3443   _global_transfers_to           = 0;
3444   _global_transfers_from         = 0;
3445   _region_stack_pops             = 0;
3446   _regions_claimed               = 0;
3447   _objs_found_on_bitmap          = 0;
3448   _satb_buffers_processed        = 0;
3449   _steal_attempts                = 0;
3450   _steals                        = 0;
3451   _aborted                       = 0;
3452   _aborted_overflow              = 0;
3453   _aborted_cm_aborted            = 0;
3454   _aborted_yield                 = 0;
3455   _aborted_timed_out             = 0;
3456   _aborted_satb                  = 0;
3457   _aborted_termination           = 0;
3458 #endif // _MARKING_STATS_
3459 }
3460 
3461 bool CMTask::should_exit_termination() {
3462   regular_clock_call();
3463   // This is called when we are in the termination protocol. We should
3464   // quit if, for some reason, this task wants to abort or the global
3465   // stack is not empty (this means that we can get work from it).
3466   return !_cm->mark_stack_empty() || has_aborted();
3467 }
3468 
3469 void CMTask::reached_limit() {
3470   assert(_words_scanned >= _words_scanned_limit ||
3471          _refs_reached >= _refs_reached_limit ,
3472          "shouldn't have been called otherwise");
3473   regular_clock_call();
3474 }
3475 
3476 void CMTask::regular_clock_call() {
3477   if (has_aborted()) return;
3478 
3479   // First, we need to recalculate the words scanned and refs reached
3480   // limits for the next clock call.
3481   recalculate_limits();
3482 
3483   // During the regular clock call we do the following
3484 
3485   // (1) If an overflow has been flagged, then we abort.
3486   if (_cm->has_overflown()) {
3487     set_has_aborted();
3488     return;
3489   }
3490 
3491   // If we are not concurrent (i.e. we're doing remark) we don't need
3492   // to check anything else. The other steps are only needed during
3493   // the concurrent marking phase.
3494   if (!concurrent()) return;
3495 
3496   // (2) If marking has been aborted for Full GC, then we also abort.
3497   if (_cm->has_aborted()) {
3498     set_has_aborted();
3499     statsOnly( ++_aborted_cm_aborted );
3500     return;
3501   }
3502 
3503   double curr_time_ms = os::elapsedVTime() * 1000.0;
3504 
3505   // (3) If marking stats are enabled, then we update the step history.
3506 #if _MARKING_STATS_
3507   if (_words_scanned >= _words_scanned_limit) {
3508     ++_clock_due_to_scanning;
3509   }
3510   if (_refs_reached >= _refs_reached_limit) {
3511     ++_clock_due_to_marking;
3512   }
3513 
3514   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3515   _interval_start_time_ms = curr_time_ms;
3516   _all_clock_intervals_ms.add(last_interval_ms);
3517 
3518   if (_cm->verbose_medium()) {
3519       gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3520                         "scanned = %d%s, refs reached = %d%s",
3521                         _task_id, last_interval_ms,
3522                         _words_scanned,
3523                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3524                         _refs_reached,
3525                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3526   }
3527 #endif // _MARKING_STATS_
3528 
3529   // (4) We check whether we should yield. If we have to, then we abort.
3530   if (_cm->should_yield()) {
3531     // We should yield. To do this we abort the task. The caller is
3532     // responsible for yielding.
3533     set_has_aborted();
3534     statsOnly( ++_aborted_yield );
3535     return;
3536   }
3537 
3538   // (5) We check whether we've reached our time quota. If we have,
3539   // then we abort.
3540   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3541   if (elapsed_time_ms > _time_target_ms) {
3542     set_has_aborted();
3543     _has_timed_out = true;
3544     statsOnly( ++_aborted_timed_out );
3545     return;
3546   }
3547 
3548   // (6) Finally, we check whether there are enough completed STAB
3549   // buffers available for processing. If there are, we abort.
3550   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3551   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3552     if (_cm->verbose_low()) {
3553       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3554                              _task_id);
3555     }
3556     // we do need to process SATB buffers, we'll abort and restart
3557     // the marking task to do so
3558     set_has_aborted();
3559     statsOnly( ++_aborted_satb );
3560     return;
3561   }
3562 }
3563 
3564 void CMTask::recalculate_limits() {
3565   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3566   _words_scanned_limit      = _real_words_scanned_limit;
3567 
3568   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3569   _refs_reached_limit       = _real_refs_reached_limit;
3570 }
3571 
3572 void CMTask::decrease_limits() {
3573   // This is called when we believe that we're going to do an infrequent
3574   // operation which will increase the per byte scanned cost (i.e. move
3575   // entries to/from the global stack). It basically tries to decrease the
3576   // scanning limit so that the clock is called earlier.
3577 
3578   if (_cm->verbose_medium()) {
3579     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3580   }
3581 
3582   _words_scanned_limit = _real_words_scanned_limit -
3583     3 * words_scanned_period / 4;
3584   _refs_reached_limit  = _real_refs_reached_limit -
3585     3 * refs_reached_period / 4;
3586 }
3587 
3588 void CMTask::move_entries_to_global_stack() {
3589   // local array where we'll store the entries that will be popped
3590   // from the local queue
3591   oop buffer[global_stack_transfer_size];
3592 
3593   int n = 0;
3594   oop obj;
3595   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3596     buffer[n] = obj;
3597     ++n;
3598   }
3599 
3600   if (n > 0) {
3601     // we popped at least one entry from the local queue
3602 
3603     statsOnly( ++_global_transfers_to; _local_pops += n );
3604 
3605     if (!_cm->mark_stack_push(buffer, n)) {
3606       if (_cm->verbose_low()) {
3607         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
3608                                _task_id);
3609       }
3610       set_has_aborted();
3611     } else {
3612       // the transfer was successful
3613 
3614       if (_cm->verbose_medium()) {
3615         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3616                                _task_id, n);
3617       }
3618       statsOnly( int tmp_size = _cm->mark_stack_size();
3619                  if (tmp_size > _global_max_size) {
3620                    _global_max_size = tmp_size;
3621                  }
3622                  _global_pushes += n );
3623     }
3624   }
3625 
3626   // this operation was quite expensive, so decrease the limits
3627   decrease_limits();
3628 }
3629 
3630 void CMTask::get_entries_from_global_stack() {
3631   // local array where we'll store the entries that will be popped
3632   // from the global stack.
3633   oop buffer[global_stack_transfer_size];
3634   int n;
3635   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3636   assert(n <= global_stack_transfer_size,
3637          "we should not pop more than the given limit");
3638   if (n > 0) {
3639     // yes, we did actually pop at least one entry
3640 
3641     statsOnly( ++_global_transfers_from; _global_pops += n );
3642     if (_cm->verbose_medium()) {
3643       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3644                              _task_id, n);
3645     }
3646     for (int i = 0; i < n; ++i) {
3647       bool success = _task_queue->push(buffer[i]);
3648       // We only call this when the local queue is empty or under a
3649       // given target limit. So, we do not expect this push to fail.
3650       assert(success, "invariant");
3651     }
3652 
3653     statsOnly( int tmp_size = _task_queue->size();
3654                if (tmp_size > _local_max_size) {
3655                  _local_max_size = tmp_size;
3656                }
3657                _local_pushes += n );
3658   }
3659 
3660   // this operation was quite expensive, so decrease the limits
3661   decrease_limits();
3662 }
3663 
3664 void CMTask::drain_local_queue(bool partially) {
3665   if (has_aborted()) return;
3666 
3667   // Decide what the target size is, depending whether we're going to
3668   // drain it partially (so that other tasks can steal if they run out
3669   // of things to do) or totally (at the very end).
3670   size_t target_size;
3671   if (partially) {
3672     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3673   } else {
3674     target_size = 0;
3675   }
3676 
3677   if (_task_queue->size() > target_size) {
3678     if (_cm->verbose_high()) {
3679       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3680                              _task_id, target_size);
3681     }
3682 
3683     oop obj;
3684     bool ret = _task_queue->pop_local(obj);
3685     while (ret) {
3686       statsOnly( ++_local_pops );
3687 
3688       if (_cm->verbose_high()) {
3689         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3690                                (void*) obj);
3691       }
3692 
3693       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3694       assert(!_g1h->is_on_master_free_list(
3695                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3696 
3697       scan_object(obj);
3698 
3699       if (_task_queue->size() <= target_size || has_aborted()) {
3700         ret = false;
3701       } else {
3702         ret = _task_queue->pop_local(obj);
3703       }
3704     }
3705 
3706     if (_cm->verbose_high()) {
3707       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3708                              _task_id, _task_queue->size());
3709     }
3710   }
3711 }
3712 
3713 void CMTask::drain_global_stack(bool partially) {
3714   if (has_aborted()) return;
3715 
3716   // We have a policy to drain the local queue before we attempt to
3717   // drain the global stack.
3718   assert(partially || _task_queue->size() == 0, "invariant");
3719 
3720   // Decide what the target size is, depending whether we're going to
3721   // drain it partially (so that other tasks can steal if they run out
3722   // of things to do) or totally (at the very end).  Notice that,
3723   // because we move entries from the global stack in chunks or
3724   // because another task might be doing the same, we might in fact
3725   // drop below the target. But, this is not a problem.
3726   size_t target_size;
3727   if (partially) {
3728     target_size = _cm->partial_mark_stack_size_target();
3729   } else {
3730     target_size = 0;
3731   }
3732 
3733   if (_cm->mark_stack_size() > target_size) {
3734     if (_cm->verbose_low()) {
3735       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3736                              _task_id, target_size);
3737     }
3738 
3739     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3740       get_entries_from_global_stack();
3741       drain_local_queue(partially);
3742     }
3743 
3744     if (_cm->verbose_low()) {
3745       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3746                              _task_id, _cm->mark_stack_size());
3747     }
3748   }
3749 }
3750 
3751 // SATB Queue has several assumptions on whether to call the par or
3752 // non-par versions of the methods. this is why some of the code is
3753 // replicated. We should really get rid of the single-threaded version
3754 // of the code to simplify things.
3755 void CMTask::drain_satb_buffers() {
3756   if (has_aborted()) return;
3757 
3758   // We set this so that the regular clock knows that we're in the
3759   // middle of draining buffers and doesn't set the abort flag when it
3760   // notices that SATB buffers are available for draining. It'd be
3761   // very counter productive if it did that. :-)
3762   _draining_satb_buffers = true;
3763 
3764   CMObjectClosure oc(this);
3765   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3766   if (G1CollectedHeap::use_parallel_gc_threads()) {
3767     satb_mq_set.set_par_closure(_task_id, &oc);
3768   } else {
3769     satb_mq_set.set_closure(&oc);
3770   }
3771 
3772   // This keeps claiming and applying the closure to completed buffers
3773   // until we run out of buffers or we need to abort.
3774   if (G1CollectedHeap::use_parallel_gc_threads()) {
3775     while (!has_aborted() &&
3776            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3777       if (_cm->verbose_medium()) {
3778         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3779       }
3780       statsOnly( ++_satb_buffers_processed );
3781       regular_clock_call();
3782     }
3783   } else {
3784     while (!has_aborted() &&
3785            satb_mq_set.apply_closure_to_completed_buffer()) {
3786       if (_cm->verbose_medium()) {
3787         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3788       }
3789       statsOnly( ++_satb_buffers_processed );
3790       regular_clock_call();
3791     }
3792   }
3793 
3794   if (!concurrent() && !has_aborted()) {
3795     // We should only do this during remark.
3796     if (G1CollectedHeap::use_parallel_gc_threads()) {
3797       satb_mq_set.par_iterate_closure_all_threads(_task_id);
3798     } else {
3799       satb_mq_set.iterate_closure_all_threads();
3800     }
3801   }
3802 
3803   _draining_satb_buffers = false;
3804 
3805   assert(has_aborted() ||
3806          concurrent() ||
3807          satb_mq_set.completed_buffers_num() == 0, "invariant");
3808 
3809   if (G1CollectedHeap::use_parallel_gc_threads()) {
3810     satb_mq_set.set_par_closure(_task_id, NULL);
3811   } else {
3812     satb_mq_set.set_closure(NULL);
3813   }
3814 
3815   // again, this was a potentially expensive operation, decrease the
3816   // limits to get the regular clock call early
3817   decrease_limits();
3818 }
3819 
3820 void CMTask::drain_region_stack(BitMapClosure* bc) {
3821   if (has_aborted()) return;
3822 
3823   assert(_region_finger == NULL,
3824          "it should be NULL when we're not scanning a region");
3825 
3826   if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
3827     if (_cm->verbose_low()) {
3828       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3829                              _task_id, _cm->region_stack_size());
3830     }
3831 
3832     MemRegion mr;
3833 
3834     if (!_aborted_region.is_empty()) {
3835       mr = _aborted_region;
3836       _aborted_region = MemRegion();
3837 
3838       if (_cm->verbose_low()) {
3839         gclog_or_tty->print_cr("[%d] scanning aborted region "
3840                                "[ " PTR_FORMAT ", " PTR_FORMAT " )",
3841                                _task_id, mr.start(), mr.end());
3842       }
3843     } else {
3844       mr = _cm->region_stack_pop_lock_free();
3845       // it returns MemRegion() if the pop fails
3846       statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3847     }
3848 
3849     while (mr.start() != NULL) {
3850       if (_cm->verbose_medium()) {
3851         gclog_or_tty->print_cr("[%d] we are scanning region "
3852                                "["PTR_FORMAT", "PTR_FORMAT")",
3853                                _task_id, mr.start(), mr.end());
3854       }
3855 
3856       assert(mr.end() <= _cm->finger(),
3857              "otherwise the region shouldn't be on the stack");
3858       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3859       if (_nextMarkBitMap->iterate(bc, mr)) {
3860         assert(!has_aborted(),
3861                "cannot abort the task without aborting the bitmap iteration");
3862 
3863         // We finished iterating over the region without aborting.
3864         regular_clock_call();
3865         if (has_aborted()) {
3866           mr = MemRegion();
3867         } else {
3868           mr = _cm->region_stack_pop_lock_free();
3869           // it returns MemRegion() if the pop fails
3870           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3871         }
3872       } else {
3873         assert(has_aborted(), "currently the only way to do so");
3874 
3875         // The only way to abort the bitmap iteration is to return
3876         // false from the do_bit() method. However, inside the
3877         // do_bit() method we move the _region_finger to point to the
3878         // object currently being looked at. So, if we bail out, we
3879         // have definitely set _region_finger to something non-null.
3880         assert(_region_finger != NULL, "invariant");
3881 
3882         // Make sure that any previously aborted region has been
3883         // cleared.
3884         assert(_aborted_region.is_empty(), "aborted region not cleared");
3885 
3886         // The iteration was actually aborted. So now _region_finger
3887         // points to the address of the object we last scanned. If we
3888         // leave it there, when we restart this task, we will rescan
3889         // the object. It is easy to avoid this. We move the finger by
3890         // enough to point to the next possible object header (the
3891         // bitmap knows by how much we need to move it as it knows its
3892         // granularity).
3893         MemRegion newRegion =
3894           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3895 
3896         if (!newRegion.is_empty()) {
3897           if (_cm->verbose_low()) {
3898             gclog_or_tty->print_cr("[%d] recording unscanned region"
3899                                    "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
3900                                    _task_id,
3901                                    newRegion.start(), newRegion.end());
3902           }
3903           // Now record the part of the region we didn't scan to
3904           // make sure this task scans it later.
3905           _aborted_region = newRegion;
3906         }
3907         // break from while
3908         mr = MemRegion();
3909       }
3910       _region_finger = NULL;
3911     }
3912 
3913     if (_cm->verbose_low()) {
3914       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3915                              _task_id, _cm->region_stack_size());
3916     }
3917   }
3918 }
3919 
3920 void CMTask::print_stats() {
3921   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3922                          _task_id, _calls);
3923   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3924                          _elapsed_time_ms, _termination_time_ms);
3925   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3926                          _step_times_ms.num(), _step_times_ms.avg(),
3927                          _step_times_ms.sd());
3928   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3929                          _step_times_ms.maximum(), _step_times_ms.sum());
3930 
3931 #if _MARKING_STATS_
3932   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3933                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3934                          _all_clock_intervals_ms.sd());
3935   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3936                          _all_clock_intervals_ms.maximum(),
3937                          _all_clock_intervals_ms.sum());
3938   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3939                          _clock_due_to_scanning, _clock_due_to_marking);
3940   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3941                          _objs_scanned, _objs_found_on_bitmap);
3942   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3943                          _local_pushes, _local_pops, _local_max_size);
3944   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3945                          _global_pushes, _global_pops, _global_max_size);
3946   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3947                          _global_transfers_to,_global_transfers_from);
3948   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
3949                          _regions_claimed, _region_stack_pops);
3950   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3951   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3952                          _steal_attempts, _steals);
3953   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3954   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3955                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3956   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3957                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3958 #endif // _MARKING_STATS_
3959 }
3960 
3961 /*****************************************************************************
3962 
3963     The do_marking_step(time_target_ms) method is the building block
3964     of the parallel marking framework. It can be called in parallel
3965     with other invocations of do_marking_step() on different tasks
3966     (but only one per task, obviously) and concurrently with the
3967     mutator threads, or during remark, hence it eliminates the need
3968     for two versions of the code. When called during remark, it will
3969     pick up from where the task left off during the concurrent marking
3970     phase. Interestingly, tasks are also claimable during evacuation
3971     pauses too, since do_marking_step() ensures that it aborts before
3972     it needs to yield.
3973 
3974     The data structures that is uses to do marking work are the
3975     following:
3976 
3977       (1) Marking Bitmap. If there are gray objects that appear only
3978       on the bitmap (this happens either when dealing with an overflow
3979       or when the initial marking phase has simply marked the roots
3980       and didn't push them on the stack), then tasks claim heap
3981       regions whose bitmap they then scan to find gray objects. A
3982       global finger indicates where the end of the last claimed region
3983       is. A local finger indicates how far into the region a task has
3984       scanned. The two fingers are used to determine how to gray an
3985       object (i.e. whether simply marking it is OK, as it will be
3986       visited by a task in the future, or whether it needs to be also
3987       pushed on a stack).
3988 
3989       (2) Local Queue. The local queue of the task which is accessed
3990       reasonably efficiently by the task. Other tasks can steal from
3991       it when they run out of work. Throughout the marking phase, a
3992       task attempts to keep its local queue short but not totally
3993       empty, so that entries are available for stealing by other
3994       tasks. Only when there is no more work, a task will totally
3995       drain its local queue.
3996 
3997       (3) Global Mark Stack. This handles local queue overflow. During
3998       marking only sets of entries are moved between it and the local
3999       queues, as access to it requires a mutex and more fine-grain
4000       interaction with it which might cause contention. If it
4001       overflows, then the marking phase should restart and iterate
4002       over the bitmap to identify gray objects. Throughout the marking
4003       phase, tasks attempt to keep the global mark stack at a small
4004       length but not totally empty, so that entries are available for
4005       popping by other tasks. Only when there is no more work, tasks
4006       will totally drain the global mark stack.
4007 
4008       (4) Global Region Stack. Entries on it correspond to areas of
4009       the bitmap that need to be scanned since they contain gray
4010       objects. Pushes on the region stack only happen during
4011       evacuation pauses and typically correspond to areas covered by
4012       GC LABS. If it overflows, then the marking phase should restart
4013       and iterate over the bitmap to identify gray objects. Tasks will
4014       try to totally drain the region stack as soon as possible.
4015 
4016       (5) SATB Buffer Queue. This is where completed SATB buffers are
4017       made available. Buffers are regularly removed from this queue
4018       and scanned for roots, so that the queue doesn't get too
4019       long. During remark, all completed buffers are processed, as
4020       well as the filled in parts of any uncompleted buffers.
4021 
4022     The do_marking_step() method tries to abort when the time target
4023     has been reached. There are a few other cases when the
4024     do_marking_step() method also aborts:
4025 
4026       (1) When the marking phase has been aborted (after a Full GC).
4027 
4028       (2) When a global overflow (either on the global stack or the
4029       region stack) has been triggered. Before the task aborts, it
4030       will actually sync up with the other tasks to ensure that all
4031       the marking data structures (local queues, stacks, fingers etc.)
4032       are re-initialised so that when do_marking_step() completes,
4033       the marking phase can immediately restart.
4034 
4035       (3) When enough completed SATB buffers are available. The
4036       do_marking_step() method only tries to drain SATB buffers right
4037       at the beginning. So, if enough buffers are available, the
4038       marking step aborts and the SATB buffers are processed at
4039       the beginning of the next invocation.
4040 
4041       (4) To yield. when we have to yield then we abort and yield
4042       right at the end of do_marking_step(). This saves us from a lot
4043       of hassle as, by yielding we might allow a Full GC. If this
4044       happens then objects will be compacted underneath our feet, the
4045       heap might shrink, etc. We save checking for this by just
4046       aborting and doing the yield right at the end.
4047 
4048     From the above it follows that the do_marking_step() method should
4049     be called in a loop (or, otherwise, regularly) until it completes.
4050 
4051     If a marking step completes without its has_aborted() flag being
4052     true, it means it has completed the current marking phase (and
4053     also all other marking tasks have done so and have all synced up).
4054 
4055     A method called regular_clock_call() is invoked "regularly" (in
4056     sub ms intervals) throughout marking. It is this clock method that
4057     checks all the abort conditions which were mentioned above and
4058     decides when the task should abort. A work-based scheme is used to
4059     trigger this clock method: when the number of object words the
4060     marking phase has scanned or the number of references the marking
4061     phase has visited reach a given limit. Additional invocations to
4062     the method clock have been planted in a few other strategic places
4063     too. The initial reason for the clock method was to avoid calling
4064     vtime too regularly, as it is quite expensive. So, once it was in
4065     place, it was natural to piggy-back all the other conditions on it
4066     too and not constantly check them throughout the code.
4067 
4068  *****************************************************************************/
4069 
4070 void CMTask::do_marking_step(double time_target_ms,
4071                              bool do_stealing,
4072                              bool do_termination) {
4073   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4074   assert(concurrent() == _cm->concurrent(), "they should be the same");
4075 
4076   assert(concurrent() || _cm->region_stack_empty(),
4077          "the region stack should have been cleared before remark");
4078   assert(concurrent() || !_cm->has_aborted_regions(),
4079          "aborted regions should have been cleared before remark");
4080   assert(_region_finger == NULL,
4081          "this should be non-null only when a region is being scanned");
4082 
4083   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4084   assert(_task_queues != NULL, "invariant");
4085   assert(_task_queue != NULL, "invariant");
4086   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
4087 
4088   assert(!_claimed,
4089          "only one thread should claim this task at any one time");
4090 
4091   // OK, this doesn't safeguard again all possible scenarios, as it is
4092   // possible for two threads to set the _claimed flag at the same
4093   // time. But it is only for debugging purposes anyway and it will
4094   // catch most problems.
4095   _claimed = true;
4096 
4097   _start_time_ms = os::elapsedVTime() * 1000.0;
4098   statsOnly( _interval_start_time_ms = _start_time_ms );
4099 
4100   double diff_prediction_ms =
4101     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4102   _time_target_ms = time_target_ms - diff_prediction_ms;
4103 
4104   // set up the variables that are used in the work-based scheme to
4105   // call the regular clock method
4106   _words_scanned = 0;
4107   _refs_reached  = 0;
4108   recalculate_limits();
4109 
4110   // clear all flags
4111   clear_has_aborted();
4112   _has_timed_out = false;
4113   _draining_satb_buffers = false;
4114 
4115   ++_calls;
4116 
4117   if (_cm->verbose_low()) {
4118     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
4119                            "target = %1.2lfms >>>>>>>>>>",
4120                            _task_id, _calls, _time_target_ms);
4121   }
4122 
4123   // Set up the bitmap and oop closures. Anything that uses them is
4124   // eventually called from this method, so it is OK to allocate these
4125   // statically.
4126   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4127   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4128   set_cm_oop_closure(&cm_oop_closure);
4129 
4130   if (_cm->has_overflown()) {
4131     // This can happen if the region stack or the mark stack overflows
4132     // during a GC pause and this task, after a yield point,
4133     // restarts. We have to abort as we need to get into the overflow
4134     // protocol which happens right at the end of this task.
4135     set_has_aborted();
4136   }
4137 
4138   // First drain any available SATB buffers. After this, we will not
4139   // look at SATB buffers before the next invocation of this method.
4140   // If enough completed SATB buffers are queued up, the regular clock
4141   // will abort this task so that it restarts.
4142   drain_satb_buffers();
4143   // ...then partially drain the local queue and the global stack
4144   drain_local_queue(true);
4145   drain_global_stack(true);
4146 
4147   // Then totally drain the region stack.  We will not look at
4148   // it again before the next invocation of this method. Entries on
4149   // the region stack are only added during evacuation pauses, for
4150   // which we have to yield. When we do, we abort the task anyway so
4151   // it will look at the region stack again when it restarts.
4152   bitmap_closure.set_scanning_heap_region(false);
4153   drain_region_stack(&bitmap_closure);
4154   // ...then partially drain the local queue and the global stack
4155   drain_local_queue(true);
4156   drain_global_stack(true);
4157 
4158   do {
4159     if (!has_aborted() && _curr_region != NULL) {
4160       // This means that we're already holding on to a region.
4161       assert(_finger != NULL, "if region is not NULL, then the finger "
4162              "should not be NULL either");
4163 
4164       // We might have restarted this task after an evacuation pause
4165       // which might have evacuated the region we're holding on to
4166       // underneath our feet. Let's read its limit again to make sure
4167       // that we do not iterate over a region of the heap that
4168       // contains garbage (update_region_limit() will also move
4169       // _finger to the start of the region if it is found empty).
4170       update_region_limit();
4171       // We will start from _finger not from the start of the region,
4172       // as we might be restarting this task after aborting half-way
4173       // through scanning this region. In this case, _finger points to
4174       // the address where we last found a marked object. If this is a
4175       // fresh region, _finger points to start().
4176       MemRegion mr = MemRegion(_finger, _region_limit);
4177 
4178       if (_cm->verbose_low()) {
4179         gclog_or_tty->print_cr("[%d] we're scanning part "
4180                                "["PTR_FORMAT", "PTR_FORMAT") "
4181                                "of region "PTR_FORMAT,
4182                                _task_id, _finger, _region_limit, _curr_region);
4183       }
4184 
4185       // Let's iterate over the bitmap of the part of the
4186       // region that is left.
4187       bitmap_closure.set_scanning_heap_region(true);
4188       if (mr.is_empty() ||
4189           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4190         // We successfully completed iterating over the region. Now,
4191         // let's give up the region.
4192         giveup_current_region();
4193         regular_clock_call();
4194       } else {
4195         assert(has_aborted(), "currently the only way to do so");
4196         // The only way to abort the bitmap iteration is to return
4197         // false from the do_bit() method. However, inside the
4198         // do_bit() method we move the _finger to point to the
4199         // object currently being looked at. So, if we bail out, we
4200         // have definitely set _finger to something non-null.
4201         assert(_finger != NULL, "invariant");
4202 
4203         // Region iteration was actually aborted. So now _finger
4204         // points to the address of the object we last scanned. If we
4205         // leave it there, when we restart this task, we will rescan
4206         // the object. It is easy to avoid this. We move the finger by
4207         // enough to point to the next possible object header (the
4208         // bitmap knows by how much we need to move it as it knows its
4209         // granularity).
4210         assert(_finger < _region_limit, "invariant");
4211         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4212         // Check if bitmap iteration was aborted while scanning the last object
4213         if (new_finger >= _region_limit) {
4214             giveup_current_region();
4215         } else {
4216             move_finger_to(new_finger);
4217         }
4218       }
4219     }
4220     // At this point we have either completed iterating over the
4221     // region we were holding on to, or we have aborted.
4222 
4223     // We then partially drain the local queue and the global stack.
4224     // (Do we really need this?)
4225     drain_local_queue(true);
4226     drain_global_stack(true);
4227 
4228     // Read the note on the claim_region() method on why it might
4229     // return NULL with potentially more regions available for
4230     // claiming and why we have to check out_of_regions() to determine
4231     // whether we're done or not.
4232     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4233       // We are going to try to claim a new region. We should have
4234       // given up on the previous one.
4235       // Separated the asserts so that we know which one fires.
4236       assert(_curr_region  == NULL, "invariant");
4237       assert(_finger       == NULL, "invariant");
4238       assert(_region_limit == NULL, "invariant");
4239       if (_cm->verbose_low()) {
4240         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4241       }
4242       HeapRegion* claimed_region = _cm->claim_region(_task_id);
4243       if (claimed_region != NULL) {
4244         // Yes, we managed to claim one
4245         statsOnly( ++_regions_claimed );
4246 
4247         if (_cm->verbose_low()) {
4248           gclog_or_tty->print_cr("[%d] we successfully claimed "
4249                                  "region "PTR_FORMAT,
4250                                  _task_id, claimed_region);
4251         }
4252 
4253         setup_for_region(claimed_region);
4254         assert(_curr_region == claimed_region, "invariant");
4255       }
4256       // It is important to call the regular clock here. It might take
4257       // a while to claim a region if, for example, we hit a large
4258       // block of empty regions. So we need to call the regular clock
4259       // method once round the loop to make sure it's called
4260       // frequently enough.
4261       regular_clock_call();
4262     }
4263 
4264     if (!has_aborted() && _curr_region == NULL) {
4265       assert(_cm->out_of_regions(),
4266              "at this point we should be out of regions");
4267     }
4268   } while ( _curr_region != NULL && !has_aborted());
4269 
4270   if (!has_aborted()) {
4271     // We cannot check whether the global stack is empty, since other
4272     // tasks might be pushing objects to it concurrently. We also cannot
4273     // check if the region stack is empty because if a thread is aborting
4274     // it can push a partially done region back.
4275     assert(_cm->out_of_regions(),
4276            "at this point we should be out of regions");
4277 
4278     if (_cm->verbose_low()) {
4279       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4280     }
4281 
4282     // Try to reduce the number of available SATB buffers so that
4283     // remark has less work to do.
4284     drain_satb_buffers();
4285   }
4286 
4287   // Since we've done everything else, we can now totally drain the
4288   // local queue and global stack.
4289   drain_local_queue(false);
4290   drain_global_stack(false);
4291 
4292   // Attempt at work stealing from other task's queues.
4293   if (do_stealing && !has_aborted()) {
4294     // We have not aborted. This means that we have finished all that
4295     // we could. Let's try to do some stealing...
4296 
4297     // We cannot check whether the global stack is empty, since other
4298     // tasks might be pushing objects to it concurrently. We also cannot
4299     // check if the region stack is empty because if a thread is aborting
4300     // it can push a partially done region back.
4301     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4302            "only way to reach here");
4303 
4304     if (_cm->verbose_low()) {
4305       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4306     }
4307 
4308     while (!has_aborted()) {
4309       oop obj;
4310       statsOnly( ++_steal_attempts );
4311 
4312       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4313         if (_cm->verbose_medium()) {
4314           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4315                                  _task_id, (void*) obj);
4316         }
4317 
4318         statsOnly( ++_steals );
4319 
4320         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4321                "any stolen object should be marked");
4322         scan_object(obj);
4323 
4324         // And since we're towards the end, let's totally drain the
4325         // local queue and global stack.
4326         drain_local_queue(false);
4327         drain_global_stack(false);
4328       } else {
4329         break;
4330       }
4331     }
4332   }
4333 
4334   // If we are about to wrap up and go into termination, check if we
4335   // should raise the overflow flag.
4336   if (do_termination && !has_aborted()) {
4337     if (_cm->force_overflow()->should_force()) {
4338       _cm->set_has_overflown();
4339       regular_clock_call();
4340     }
4341   }
4342 
4343   // We still haven't aborted. Now, let's try to get into the
4344   // termination protocol.
4345   if (do_termination && !has_aborted()) {
4346     // We cannot check whether the global stack is empty, since other
4347     // tasks might be concurrently pushing objects on it. We also cannot
4348     // check if the region stack is empty because if a thread is aborting
4349     // it can push a partially done region back.
4350     // Separated the asserts so that we know which one fires.
4351     assert(_cm->out_of_regions(), "only way to reach here");
4352     assert(_task_queue->size() == 0, "only way to reach here");
4353 
4354     if (_cm->verbose_low()) {
4355       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4356     }
4357 
4358     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4359     // The CMTask class also extends the TerminatorTerminator class,
4360     // hence its should_exit_termination() method will also decide
4361     // whether to exit the termination protocol or not.
4362     bool finished = _cm->terminator()->offer_termination(this);
4363     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4364     _termination_time_ms +=
4365       termination_end_time_ms - _termination_start_time_ms;
4366 
4367     if (finished) {
4368       // We're all done.
4369 
4370       if (_task_id == 0) {
4371         // let's allow task 0 to do this
4372         if (concurrent()) {
4373           assert(_cm->concurrent_marking_in_progress(), "invariant");
4374           // we need to set this to false before the next
4375           // safepoint. This way we ensure that the marking phase
4376           // doesn't observe any more heap expansions.
4377           _cm->clear_concurrent_marking_in_progress();
4378         }
4379       }
4380 
4381       // We can now guarantee that the global stack is empty, since
4382       // all other tasks have finished. We separated the guarantees so
4383       // that, if a condition is false, we can immediately find out
4384       // which one.
4385       guarantee(_cm->out_of_regions(), "only way to reach here");
4386       guarantee(_aborted_region.is_empty(), "only way to reach here");
4387       guarantee(_cm->region_stack_empty(), "only way to reach here");
4388       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4389       guarantee(_task_queue->size() == 0, "only way to reach here");
4390       guarantee(!_cm->has_overflown(), "only way to reach here");
4391       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4392       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
4393 
4394       if (_cm->verbose_low()) {
4395         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4396       }
4397     } else {
4398       // Apparently there's more work to do. Let's abort this task. It
4399       // will restart it and we can hopefully find more things to do.
4400 
4401       if (_cm->verbose_low()) {
4402         gclog_or_tty->print_cr("[%d] apparently there is more work to do",
4403                                _task_id);
4404       }
4405 
4406       set_has_aborted();
4407       statsOnly( ++_aborted_termination );
4408     }
4409   }
4410 
4411   // Mainly for debugging purposes to make sure that a pointer to the
4412   // closure which was statically allocated in this frame doesn't
4413   // escape it by accident.
4414   set_cm_oop_closure(NULL);
4415   double end_time_ms = os::elapsedVTime() * 1000.0;
4416   double elapsed_time_ms = end_time_ms - _start_time_ms;
4417   // Update the step history.
4418   _step_times_ms.add(elapsed_time_ms);
4419 
4420   if (has_aborted()) {
4421     // The task was aborted for some reason.
4422 
4423     statsOnly( ++_aborted );
4424 
4425     if (_has_timed_out) {
4426       double diff_ms = elapsed_time_ms - _time_target_ms;
4427       // Keep statistics of how well we did with respect to hitting
4428       // our target only if we actually timed out (if we aborted for
4429       // other reasons, then the results might get skewed).
4430       _marking_step_diffs_ms.add(diff_ms);
4431     }
4432 
4433     if (_cm->has_overflown()) {
4434       // This is the interesting one. We aborted because a global
4435       // overflow was raised. This means we have to restart the
4436       // marking phase and start iterating over regions. However, in
4437       // order to do this we have to make sure that all tasks stop
4438       // what they are doing and re-initialise in a safe manner. We
4439       // will achieve this with the use of two barrier sync points.
4440 
4441       if (_cm->verbose_low()) {
4442         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4443       }
4444 
4445       _cm->enter_first_sync_barrier(_task_id);
4446       // When we exit this sync barrier we know that all tasks have
4447       // stopped doing marking work. So, it's now safe to
4448       // re-initialise our data structures. At the end of this method,
4449       // task 0 will clear the global data structures.
4450 
4451       statsOnly( ++_aborted_overflow );
4452 
4453       // We clear the local state of this task...
4454       clear_region_fields();
4455 
4456       // ...and enter the second barrier.
4457       _cm->enter_second_sync_barrier(_task_id);
4458       // At this point everything has bee re-initialised and we're
4459       // ready to restart.
4460     }
4461 
4462     if (_cm->verbose_low()) {
4463       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4464                              "elapsed = %1.2lfms <<<<<<<<<<",
4465                              _task_id, _time_target_ms, elapsed_time_ms);
4466       if (_cm->has_aborted()) {
4467         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4468                                _task_id);
4469       }
4470     }
4471   } else {
4472     if (_cm->verbose_low()) {
4473       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4474                              "elapsed = %1.2lfms <<<<<<<<<<",
4475                              _task_id, _time_target_ms, elapsed_time_ms);
4476     }
4477   }
4478 
4479   _claimed = false;
4480 }
4481 
4482 CMTask::CMTask(int task_id,
4483                ConcurrentMark* cm,
4484                CMTaskQueue* task_queue,
4485                CMTaskQueueSet* task_queues)
4486   : _g1h(G1CollectedHeap::heap()),
4487     _task_id(task_id), _cm(cm),
4488     _claimed(false),
4489     _nextMarkBitMap(NULL), _hash_seed(17),
4490     _task_queue(task_queue),
4491     _task_queues(task_queues),
4492     _cm_oop_closure(NULL),
4493     _aborted_region(MemRegion()) {
4494   guarantee(task_queue != NULL, "invariant");
4495   guarantee(task_queues != NULL, "invariant");
4496 
4497   statsOnly( _clock_due_to_scanning = 0;
4498              _clock_due_to_marking  = 0 );
4499 
4500   _marking_step_diffs_ms.add(0.5);
4501 }
4502 
4503 // These are formatting macros that are used below to ensure
4504 // consistent formatting. The *_H_* versions are used to format the
4505 // header for a particular value and they should be kept consistent
4506 // with the corresponding macro. Also note that most of the macros add
4507 // the necessary white space (as a prefix) which makes them a bit
4508 // easier to compose.
4509 
4510 // All the output lines are prefixed with this string to be able to
4511 // identify them easily in a large log file.
4512 #define G1PPRL_LINE_PREFIX            "###"
4513 
4514 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4515 #ifdef _LP64
4516 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4517 #else // _LP64
4518 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4519 #endif // _LP64
4520 
4521 // For per-region info
4522 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4523 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4524 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4525 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4526 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4527 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4528 
4529 // For summary info
4530 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4531 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4532 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4533 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4534 
4535 G1PrintRegionLivenessInfoClosure::
4536 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4537   : _out(out),
4538     _total_used_bytes(0), _total_capacity_bytes(0),
4539     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4540     _hum_used_bytes(0), _hum_capacity_bytes(0),
4541     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4542   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4543   MemRegion g1_committed = g1h->g1_committed();
4544   MemRegion g1_reserved = g1h->g1_reserved();
4545   double now = os::elapsedTime();
4546 
4547   // Print the header of the output.
4548   _out->cr();
4549   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4550   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4551                  G1PPRL_SUM_ADDR_FORMAT("committed")
4552                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4553                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4554                  g1_committed.start(), g1_committed.end(),
4555                  g1_reserved.start(), g1_reserved.end(),
4556                  (size_t)HeapRegion::GrainBytes);
4557   _out->print_cr(G1PPRL_LINE_PREFIX);
4558   _out->print_cr(G1PPRL_LINE_PREFIX
4559                  G1PPRL_TYPE_H_FORMAT
4560                  G1PPRL_ADDR_BASE_H_FORMAT
4561                  G1PPRL_BYTE_H_FORMAT
4562                  G1PPRL_BYTE_H_FORMAT
4563                  G1PPRL_BYTE_H_FORMAT
4564                  G1PPRL_DOUBLE_H_FORMAT,
4565                  "type", "address-range",
4566                  "used", "prev-live", "next-live", "gc-eff");
4567 }
4568 
4569 // It takes as a parameter a reference to one of the _hum_* fields, it
4570 // deduces the corresponding value for a region in a humongous region
4571 // series (either the region size, or what's left if the _hum_* field
4572 // is < the region size), and updates the _hum_* field accordingly.
4573 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4574   size_t bytes = 0;
4575   // The > 0 check is to deal with the prev and next live bytes which
4576   // could be 0.
4577   if (*hum_bytes > 0) {
4578     bytes = MIN2((size_t) HeapRegion::GrainBytes, *hum_bytes);
4579     *hum_bytes -= bytes;
4580   }
4581   return bytes;
4582 }
4583 
4584 // It deduces the values for a region in a humongous region series
4585 // from the _hum_* fields and updates those accordingly. It assumes
4586 // that that _hum_* fields have already been set up from the "starts
4587 // humongous" region and we visit the regions in address order.
4588 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4589                                                      size_t* capacity_bytes,
4590                                                      size_t* prev_live_bytes,
4591                                                      size_t* next_live_bytes) {
4592   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4593   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4594   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4595   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4596   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4597 }
4598 
4599 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4600   const char* type = "";
4601   HeapWord* bottom       = r->bottom();
4602   HeapWord* end          = r->end();
4603   size_t capacity_bytes  = r->capacity();
4604   size_t used_bytes      = r->used();
4605   size_t prev_live_bytes = r->live_bytes();
4606   size_t next_live_bytes = r->next_live_bytes();
4607   double gc_eff          = r->gc_efficiency();
4608   if (r->used() == 0) {
4609     type = "FREE";
4610   } else if (r->is_survivor()) {
4611     type = "SURV";
4612   } else if (r->is_young()) {
4613     type = "EDEN";
4614   } else if (r->startsHumongous()) {
4615     type = "HUMS";
4616 
4617     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4618            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4619            "they should have been zeroed after the last time we used them");
4620     // Set up the _hum_* fields.
4621     _hum_capacity_bytes  = capacity_bytes;
4622     _hum_used_bytes      = used_bytes;
4623     _hum_prev_live_bytes = prev_live_bytes;
4624     _hum_next_live_bytes = next_live_bytes;
4625     get_hum_bytes(&used_bytes, &capacity_bytes,
4626                   &prev_live_bytes, &next_live_bytes);
4627     end = bottom + HeapRegion::GrainWords;
4628   } else if (r->continuesHumongous()) {
4629     type = "HUMC";
4630     get_hum_bytes(&used_bytes, &capacity_bytes,
4631                   &prev_live_bytes, &next_live_bytes);
4632     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4633   } else {
4634     type = "OLD";
4635   }
4636 
4637   _total_used_bytes      += used_bytes;
4638   _total_capacity_bytes  += capacity_bytes;
4639   _total_prev_live_bytes += prev_live_bytes;
4640   _total_next_live_bytes += next_live_bytes;
4641 
4642   // Print a line for this particular region.
4643   _out->print_cr(G1PPRL_LINE_PREFIX
4644                  G1PPRL_TYPE_FORMAT
4645                  G1PPRL_ADDR_BASE_FORMAT
4646                  G1PPRL_BYTE_FORMAT
4647                  G1PPRL_BYTE_FORMAT
4648                  G1PPRL_BYTE_FORMAT
4649                  G1PPRL_DOUBLE_FORMAT,
4650                  type, bottom, end,
4651                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4652 
4653   return false;
4654 }
4655 
4656 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4657   // Print the footer of the output.
4658   _out->print_cr(G1PPRL_LINE_PREFIX);
4659   _out->print_cr(G1PPRL_LINE_PREFIX
4660                  " SUMMARY"
4661                  G1PPRL_SUM_MB_FORMAT("capacity")
4662                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4663                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4664                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4665                  bytes_to_mb(_total_capacity_bytes),
4666                  bytes_to_mb(_total_used_bytes),
4667                  perc(_total_used_bytes, _total_capacity_bytes),
4668                  bytes_to_mb(_total_prev_live_bytes),
4669                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4670                  bytes_to_mb(_total_next_live_bytes),
4671                  perc(_total_next_live_bytes, _total_capacity_bytes));
4672   _out->cr();
4673 }