1 /*
   2  * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  32 #include "gc_implementation/g1/g1RemSet.hpp"
  33 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  34 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  35 #include "gc_implementation/shared/vmGCOperations.hpp"
  36 #include "memory/genOopClosures.inline.hpp"
  37 #include "memory/referencePolicy.hpp"
  38 #include "memory/resourceArea.hpp"
  39 #include "oops/oop.inline.hpp"
  40 #include "runtime/handles.inline.hpp"
  41 #include "runtime/java.hpp"
  42 
  43 //
  44 // CMS Bit Map Wrapper
  45 
  46 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
  47   _bm((uintptr_t*)NULL,0),
  48   _shifter(shifter) {
  49   _bmStartWord = (HeapWord*)(rs.base());
  50   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  51   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  52                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  53 
  54   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
  55   // For now we'll just commit all of the bit map up fromt.
  56   // Later on we'll try to be more parsimonious with swap.
  57   guarantee(_virtual_space.initialize(brs, brs.size()),
  58             "couldn't reseve backing store for CMS bit map");
  59   assert(_virtual_space.committed_size() == brs.size(),
  60          "didn't reserve backing store for all of CMS bit map?");
  61   _bm.set_map((uintptr_t*)_virtual_space.low());
  62   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  63          _bmWordSize, "inconsistency in bit map sizing");
  64   _bm.set_size(_bmWordSize >> _shifter);
  65 }
  66 
  67 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  68                                                HeapWord* limit) const {
  69   // First we must round addr *up* to a possible object boundary.
  70   addr = (HeapWord*)align_size_up((intptr_t)addr,
  71                                   HeapWordSize << _shifter);
  72   size_t addrOffset = heapWordToOffset(addr);
  73   if (limit == NULL) {
  74     limit = _bmStartWord + _bmWordSize;
  75   }
  76   size_t limitOffset = heapWordToOffset(limit);
  77   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  78   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  79   assert(nextAddr >= addr, "get_next_one postcondition");
  80   assert(nextAddr == limit || isMarked(nextAddr),
  81          "get_next_one postcondition");
  82   return nextAddr;
  83 }
  84 
  85 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  86                                                  HeapWord* limit) const {
  87   size_t addrOffset = heapWordToOffset(addr);
  88   if (limit == NULL) {
  89     limit = _bmStartWord + _bmWordSize;
  90   }
  91   size_t limitOffset = heapWordToOffset(limit);
  92   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  93   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  94   assert(nextAddr >= addr, "get_next_one postcondition");
  95   assert(nextAddr == limit || !isMarked(nextAddr),
  96          "get_next_one postcondition");
  97   return nextAddr;
  98 }
  99 
 100 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 101   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 102   return (int) (diff >> _shifter);
 103 }
 104 
 105 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
 106   HeapWord* left  = MAX2(_bmStartWord, mr.start());
 107   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
 108   if (right > left) {
 109     // Right-open interval [leftOffset, rightOffset).
 110     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
 111   } else {
 112     return true;
 113   }
 114 }
 115 
 116 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
 117                                              size_t    from_start_index,
 118                                              HeapWord* to_start_word,
 119                                              size_t    word_num) {
 120   _bm.mostly_disjoint_range_union(from_bitmap,
 121                                   from_start_index,
 122                                   heapWordToOffset(to_start_word),
 123                                   word_num);
 124 }
 125 
 126 #ifndef PRODUCT
 127 bool CMBitMapRO::covers(ReservedSpace rs) const {
 128   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 129   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
 130          "size inconsistency");
 131   return _bmStartWord == (HeapWord*)(rs.base()) &&
 132          _bmWordSize  == rs.size()>>LogHeapWordSize;
 133 }
 134 #endif
 135 
 136 void CMBitMap::clearAll() {
 137   _bm.clear();
 138   return;
 139 }
 140 
 141 void CMBitMap::markRange(MemRegion mr) {
 142   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 143   assert(!mr.is_empty(), "unexpected empty region");
 144   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 145           ((HeapWord *) mr.end())),
 146          "markRange memory region end is not card aligned");
 147   // convert address range into offset range
 148   _bm.at_put_range(heapWordToOffset(mr.start()),
 149                    heapWordToOffset(mr.end()), true);
 150 }
 151 
 152 void CMBitMap::clearRange(MemRegion mr) {
 153   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 154   assert(!mr.is_empty(), "unexpected empty region");
 155   // convert address range into offset range
 156   _bm.at_put_range(heapWordToOffset(mr.start()),
 157                    heapWordToOffset(mr.end()), false);
 158 }
 159 
 160 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 161                                             HeapWord* end_addr) {
 162   HeapWord* start = getNextMarkedWordAddress(addr);
 163   start = MIN2(start, end_addr);
 164   HeapWord* end   = getNextUnmarkedWordAddress(start);
 165   end = MIN2(end, end_addr);
 166   assert(start <= end, "Consistency check");
 167   MemRegion mr(start, end);
 168   if (!mr.is_empty()) {
 169     clearRange(mr);
 170   }
 171   return mr;
 172 }
 173 
 174 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 175   _base(NULL), _cm(cm)
 176 #ifdef ASSERT
 177   , _drain_in_progress(false)
 178   , _drain_in_progress_yields(false)
 179 #endif
 180 {}
 181 
 182 void CMMarkStack::allocate(size_t size) {
 183   _base = NEW_C_HEAP_ARRAY(oop, size);
 184   if (_base == NULL) {
 185     vm_exit_during_initialization("Failed to allocate "
 186                                   "CM region mark stack");
 187   }
 188   _index = 0;
 189   _capacity = (jint) size;
 190   _oops_do_bound = -1;
 191   NOT_PRODUCT(_max_depth = 0);
 192 }
 193 
 194 CMMarkStack::~CMMarkStack() {
 195   if (_base != NULL) {
 196     FREE_C_HEAP_ARRAY(oop, _base);
 197   }
 198 }
 199 
 200 void CMMarkStack::par_push(oop ptr) {
 201   while (true) {
 202     if (isFull()) {
 203       _overflow = true;
 204       return;
 205     }
 206     // Otherwise...
 207     jint index = _index;
 208     jint next_index = index+1;
 209     jint res = Atomic::cmpxchg(next_index, &_index, index);
 210     if (res == index) {
 211       _base[index] = ptr;
 212       // Note that we don't maintain this atomically.  We could, but it
 213       // doesn't seem necessary.
 214       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 215       return;
 216     }
 217     // Otherwise, we need to try again.
 218   }
 219 }
 220 
 221 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 222   while (true) {
 223     if (isFull()) {
 224       _overflow = true;
 225       return;
 226     }
 227     // Otherwise...
 228     jint index = _index;
 229     jint next_index = index + n;
 230     if (next_index > _capacity) {
 231       _overflow = true;
 232       return;
 233     }
 234     jint res = Atomic::cmpxchg(next_index, &_index, index);
 235     if (res == index) {
 236       for (int i = 0; i < n; i++) {
 237         int ind = index + i;
 238         assert(ind < _capacity, "By overflow test above.");
 239         _base[ind] = ptr_arr[i];
 240       }
 241       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 242       return;
 243     }
 244     // Otherwise, we need to try again.
 245   }
 246 }
 247 
 248 
 249 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 250   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 251   jint start = _index;
 252   jint next_index = start + n;
 253   if (next_index > _capacity) {
 254     _overflow = true;
 255     return;
 256   }
 257   // Otherwise.
 258   _index = next_index;
 259   for (int i = 0; i < n; i++) {
 260     int ind = start + i;
 261     assert(ind < _capacity, "By overflow test above.");
 262     _base[ind] = ptr_arr[i];
 263   }
 264 }
 265 
 266 
 267 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 268   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 269   jint index = _index;
 270   if (index == 0) {
 271     *n = 0;
 272     return false;
 273   } else {
 274     int k = MIN2(max, index);
 275     jint new_ind = index - k;
 276     for (int j = 0; j < k; j++) {
 277       ptr_arr[j] = _base[new_ind + j];
 278     }
 279     _index = new_ind;
 280     *n = k;
 281     return true;
 282   }
 283 }
 284 
 285 
 286 CMRegionStack::CMRegionStack() : _base(NULL) {}
 287 
 288 void CMRegionStack::allocate(size_t size) {
 289   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
 290   if (_base == NULL) {
 291     vm_exit_during_initialization("Failed to allocate CM region mark stack");
 292   }
 293   _index = 0;
 294   _capacity = (jint) size;
 295 }
 296 
 297 CMRegionStack::~CMRegionStack() {
 298   if (_base != NULL) {
 299     FREE_C_HEAP_ARRAY(oop, _base);
 300   }
 301 }
 302 
 303 void CMRegionStack::push_lock_free(MemRegion mr) {
 304   assert(mr.word_size() > 0, "Precondition");
 305   while (true) {
 306     jint index = _index;
 307 
 308     if (index >= _capacity) {
 309       _overflow = true;
 310       return;
 311     }
 312     // Otherwise...
 313     jint next_index = index+1;
 314     jint res = Atomic::cmpxchg(next_index, &_index, index);
 315     if (res == index) {
 316       _base[index] = mr;
 317       return;
 318     }
 319     // Otherwise, we need to try again.
 320   }
 321 }
 322 
 323 // Lock-free pop of the region stack. Called during the concurrent
 324 // marking / remark phases. Should only be called in tandem with
 325 // other lock-free pops.
 326 MemRegion CMRegionStack::pop_lock_free() {
 327   while (true) {
 328     jint index = _index;
 329 
 330     if (index == 0) {
 331       return MemRegion();
 332     }
 333     // Otherwise...
 334     jint next_index = index-1;
 335     jint res = Atomic::cmpxchg(next_index, &_index, index);
 336     if (res == index) {
 337       MemRegion mr = _base[next_index];
 338       if (mr.start() != NULL) {
 339         assert(mr.end() != NULL, "invariant");
 340         assert(mr.word_size() > 0, "invariant");
 341         return mr;
 342       } else {
 343         // that entry was invalidated... let's skip it
 344         assert(mr.end() == NULL, "invariant");
 345       }
 346     }
 347     // Otherwise, we need to try again.
 348   }
 349 }
 350 
 351 #if 0
 352 // The routines that manipulate the region stack with a lock are
 353 // not currently used. They should be retained, however, as a
 354 // diagnostic aid.
 355 
 356 void CMRegionStack::push_with_lock(MemRegion mr) {
 357   assert(mr.word_size() > 0, "Precondition");
 358   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 359 
 360   if (isFull()) {
 361     _overflow = true;
 362     return;
 363   }
 364 
 365   _base[_index] = mr;
 366   _index += 1;
 367 }
 368 
 369 MemRegion CMRegionStack::pop_with_lock() {
 370   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 371 
 372   while (true) {
 373     if (_index == 0) {
 374       return MemRegion();
 375     }
 376     _index -= 1;
 377 
 378     MemRegion mr = _base[_index];
 379     if (mr.start() != NULL) {
 380       assert(mr.end() != NULL, "invariant");
 381       assert(mr.word_size() > 0, "invariant");
 382       return mr;
 383     } else {
 384       // that entry was invalidated... let's skip it
 385       assert(mr.end() == NULL, "invariant");
 386     }
 387   }
 388 }
 389 #endif
 390 
 391 bool CMRegionStack::invalidate_entries_into_cset() {
 392   bool result = false;
 393   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 394   for (int i = 0; i < _oops_do_bound; ++i) {
 395     MemRegion mr = _base[i];
 396     if (mr.start() != NULL) {
 397       assert(mr.end() != NULL, "invariant");
 398       assert(mr.word_size() > 0, "invariant");
 399       HeapRegion* hr = g1h->heap_region_containing(mr.start());
 400       assert(hr != NULL, "invariant");
 401       if (hr->in_collection_set()) {
 402         // The region points into the collection set
 403         _base[i] = MemRegion();
 404         result = true;
 405       }
 406     } else {
 407       // that entry was invalidated... let's skip it
 408       assert(mr.end() == NULL, "invariant");
 409     }
 410   }
 411   return result;
 412 }
 413 
 414 template<class OopClosureClass>
 415 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 416   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 417          || SafepointSynchronize::is_at_safepoint(),
 418          "Drain recursion must be yield-safe.");
 419   bool res = true;
 420   debug_only(_drain_in_progress = true);
 421   debug_only(_drain_in_progress_yields = yield_after);
 422   while (!isEmpty()) {
 423     oop newOop = pop();
 424     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 425     assert(newOop->is_oop(), "Expected an oop");
 426     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 427            "only grey objects on this stack");
 428     // iterate over the oops in this oop, marking and pushing
 429     // the ones in CMS generation.
 430     newOop->oop_iterate(cl);
 431     if (yield_after && _cm->do_yield_check()) {
 432       res = false;
 433       break;
 434     }
 435   }
 436   debug_only(_drain_in_progress = false);
 437   return res;
 438 }
 439 
 440 void CMMarkStack::oops_do(OopClosure* f) {
 441   if (_index == 0) return;
 442   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
 443          "Bound must be set.");
 444   for (int i = 0; i < _oops_do_bound; i++) {
 445     f->do_oop(&_base[i]);
 446   }
 447   _oops_do_bound = -1;
 448 }
 449 
 450 bool ConcurrentMark::not_yet_marked(oop obj) const {
 451   return (_g1h->is_obj_ill(obj)
 452           || (_g1h->is_in_permanent(obj)
 453               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 454 }
 455 
 456 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 457 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 458 #endif // _MSC_VER
 459 
 460 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
 461                                int max_regions) :
 462   _markBitMap1(rs, MinObjAlignment - 1),
 463   _markBitMap2(rs, MinObjAlignment - 1),
 464 
 465   _parallel_marking_threads(0),
 466   _sleep_factor(0.0),
 467   _marking_task_overhead(1.0),
 468   _cleanup_sleep_factor(0.0),
 469   _cleanup_task_overhead(1.0),
 470   _cleanup_list("Cleanup List"),
 471   _region_bm(max_regions, false /* in_resource_area*/),
 472   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 473            CardTableModRefBS::card_shift,
 474            false /* in_resource_area*/),
 475   _prevMarkBitMap(&_markBitMap1),
 476   _nextMarkBitMap(&_markBitMap2),
 477   _at_least_one_mark_complete(false),
 478 
 479   _markStack(this),
 480   _regionStack(),
 481   // _finger set in set_non_marking_state
 482 
 483   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
 484   // _active_tasks set in set_non_marking_state
 485   // _tasks set inside the constructor
 486   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 487   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 488 
 489   _has_overflown(false),
 490   _concurrent(false),
 491   _has_aborted(false),
 492   _restart_for_overflow(false),
 493   _concurrent_marking_in_progress(false),
 494   _should_gray_objects(false),
 495 
 496   // _verbose_level set below
 497 
 498   _init_times(),
 499   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 500   _cleanup_times(),
 501   _total_counting_time(0.0),
 502   _total_rs_scrub_time(0.0),
 503 
 504   _parallel_workers(NULL) {
 505   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 506   if (verbose_level < no_verbose) {
 507     verbose_level = no_verbose;
 508   }
 509   if (verbose_level > high_verbose) {
 510     verbose_level = high_verbose;
 511   }
 512   _verbose_level = verbose_level;
 513 
 514   if (verbose_low()) {
 515     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 516                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 517   }
 518 
 519   _markStack.allocate(MarkStackSize);
 520   _regionStack.allocate(G1MarkRegionStackSize);
 521 
 522   // Create & start a ConcurrentMark thread.
 523   _cmThread = new ConcurrentMarkThread(this);
 524   assert(cmThread() != NULL, "CM Thread should have been created");
 525   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 526 
 527   _g1h = G1CollectedHeap::heap();
 528   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 529   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 530   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 531 
 532   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 533   satb_qs.set_buffer_size(G1SATBBufferSize);
 534 
 535   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
 536   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 537 
 538   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 539   _active_tasks = _max_task_num;
 540   for (int i = 0; i < (int) _max_task_num; ++i) {
 541     CMTaskQueue* task_queue = new CMTaskQueue();
 542     task_queue->initialize();
 543     _task_queues->register_queue(i, task_queue);
 544 
 545     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
 546     _accum_task_vtime[i] = 0.0;
 547   }
 548 
 549   if (ConcGCThreads > ParallelGCThreads) {
 550     vm_exit_during_initialization("Can't have more ConcGCThreads "
 551                                   "than ParallelGCThreads.");
 552   }
 553   if (ParallelGCThreads == 0) {
 554     // if we are not running with any parallel GC threads we will not
 555     // spawn any marking threads either
 556     _parallel_marking_threads =   0;
 557     _sleep_factor             = 0.0;
 558     _marking_task_overhead    = 1.0;
 559   } else {
 560     if (ConcGCThreads > 0) {
 561       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 562       // if both are set
 563 
 564       _parallel_marking_threads = ConcGCThreads;
 565       _sleep_factor             = 0.0;
 566       _marking_task_overhead    = 1.0;
 567     } else if (G1MarkingOverheadPercent > 0) {
 568       // we will calculate the number of parallel marking threads
 569       // based on a target overhead with respect to the soft real-time
 570       // goal
 571 
 572       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 573       double overall_cm_overhead =
 574         (double) MaxGCPauseMillis * marking_overhead /
 575         (double) GCPauseIntervalMillis;
 576       double cpu_ratio = 1.0 / (double) os::processor_count();
 577       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 578       double marking_task_overhead =
 579         overall_cm_overhead / marking_thread_num *
 580                                                 (double) os::processor_count();
 581       double sleep_factor =
 582                          (1.0 - marking_task_overhead) / marking_task_overhead;
 583 
 584       _parallel_marking_threads = (size_t) marking_thread_num;
 585       _sleep_factor             = sleep_factor;
 586       _marking_task_overhead    = marking_task_overhead;
 587     } else {
 588       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
 589       _sleep_factor             = 0.0;
 590       _marking_task_overhead    = 1.0;
 591     }
 592 
 593     if (parallel_marking_threads() > 1) {
 594       _cleanup_task_overhead = 1.0;
 595     } else {
 596       _cleanup_task_overhead = marking_task_overhead();
 597     }
 598     _cleanup_sleep_factor =
 599                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 600 
 601 #if 0
 602     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 603     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 604     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 605     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 606     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 607 #endif
 608 
 609     guarantee(parallel_marking_threads() > 0, "peace of mind");
 610     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 611          (int) _parallel_marking_threads, false, true);
 612     if (_parallel_workers == NULL) {
 613       vm_exit_during_initialization("Failed necessary allocation.");
 614     } else {
 615       _parallel_workers->initialize_workers();
 616     }
 617   }
 618 
 619   // so that the call below can read a sensible value
 620   _heap_start = (HeapWord*) rs.base();
 621   set_non_marking_state();
 622 }
 623 
 624 void ConcurrentMark::update_g1_committed(bool force) {
 625   // If concurrent marking is not in progress, then we do not need to
 626   // update _heap_end. This has a subtle and important
 627   // side-effect. Imagine that two evacuation pauses happen between
 628   // marking completion and remark. The first one can grow the
 629   // heap (hence now the finger is below the heap end). Then, the
 630   // second one could unnecessarily push regions on the region
 631   // stack. This causes the invariant that the region stack is empty
 632   // at the beginning of remark to be false. By ensuring that we do
 633   // not observe heap expansions after marking is complete, then we do
 634   // not have this problem.
 635   if (!concurrent_marking_in_progress() && !force) return;
 636 
 637   MemRegion committed = _g1h->g1_committed();
 638   assert(committed.start() == _heap_start, "start shouldn't change");
 639   HeapWord* new_end = committed.end();
 640   if (new_end > _heap_end) {
 641     // The heap has been expanded.
 642 
 643     _heap_end = new_end;
 644   }
 645   // Notice that the heap can also shrink. However, this only happens
 646   // during a Full GC (at least currently) and the entire marking
 647   // phase will bail out and the task will not be restarted. So, let's
 648   // do nothing.
 649 }
 650 
 651 void ConcurrentMark::reset() {
 652   // Starting values for these two. This should be called in a STW
 653   // phase. CM will be notified of any future g1_committed expansions
 654   // will be at the end of evacuation pauses, when tasks are
 655   // inactive.
 656   MemRegion committed = _g1h->g1_committed();
 657   _heap_start = committed.start();
 658   _heap_end   = committed.end();
 659 
 660   // Separated the asserts so that we know which one fires.
 661   assert(_heap_start != NULL, "heap bounds should look ok");
 662   assert(_heap_end != NULL, "heap bounds should look ok");
 663   assert(_heap_start < _heap_end, "heap bounds should look ok");
 664 
 665   // reset all the marking data structures and any necessary flags
 666   clear_marking_state();
 667 
 668   if (verbose_low()) {
 669     gclog_or_tty->print_cr("[global] resetting");
 670   }
 671 
 672   // We do reset all of them, since different phases will use
 673   // different number of active threads. So, it's easiest to have all
 674   // of them ready.
 675   for (int i = 0; i < (int) _max_task_num; ++i) {
 676     _tasks[i]->reset(_nextMarkBitMap);
 677   }
 678 
 679   // we need this to make sure that the flag is on during the evac
 680   // pause with initial mark piggy-backed
 681   set_concurrent_marking_in_progress();
 682 }
 683 
 684 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
 685   assert(active_tasks <= _max_task_num, "we should not have more");
 686 
 687   _active_tasks = active_tasks;
 688   // Need to update the three data structures below according to the
 689   // number of active threads for this phase.
 690   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 691   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 692   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 693 
 694   _concurrent = concurrent;
 695   // We propagate this to all tasks, not just the active ones.
 696   for (int i = 0; i < (int) _max_task_num; ++i)
 697     _tasks[i]->set_concurrent(concurrent);
 698 
 699   if (concurrent) {
 700     set_concurrent_marking_in_progress();
 701   } else {
 702     // We currently assume that the concurrent flag has been set to
 703     // false before we start remark. At this point we should also be
 704     // in a STW phase.
 705     assert(!concurrent_marking_in_progress(), "invariant");
 706     assert(_finger == _heap_end, "only way to get here");
 707     update_g1_committed(true);
 708   }
 709 }
 710 
 711 void ConcurrentMark::set_non_marking_state() {
 712   // We set the global marking state to some default values when we're
 713   // not doing marking.
 714   clear_marking_state();
 715   _active_tasks = 0;
 716   clear_concurrent_marking_in_progress();
 717 }
 718 
 719 ConcurrentMark::~ConcurrentMark() {
 720   for (int i = 0; i < (int) _max_task_num; ++i) {
 721     delete _task_queues->queue(i);
 722     delete _tasks[i];
 723   }
 724   delete _task_queues;
 725   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
 726 }
 727 
 728 // This closure is used to mark refs into the g1 generation
 729 // from external roots in the CMS bit map.
 730 // Called at the first checkpoint.
 731 //
 732 
 733 void ConcurrentMark::clearNextBitmap() {
 734   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 735   G1CollectorPolicy* g1p = g1h->g1_policy();
 736 
 737   // Make sure that the concurrent mark thread looks to still be in
 738   // the current cycle.
 739   guarantee(cmThread()->during_cycle(), "invariant");
 740 
 741   // We are finishing up the current cycle by clearing the next
 742   // marking bitmap and getting it ready for the next cycle. During
 743   // this time no other cycle can start. So, let's make sure that this
 744   // is the case.
 745   guarantee(!g1h->mark_in_progress(), "invariant");
 746 
 747   // clear the mark bitmap (no grey objects to start with).
 748   // We need to do this in chunks and offer to yield in between
 749   // each chunk.
 750   HeapWord* start  = _nextMarkBitMap->startWord();
 751   HeapWord* end    = _nextMarkBitMap->endWord();
 752   HeapWord* cur    = start;
 753   size_t chunkSize = M;
 754   while (cur < end) {
 755     HeapWord* next = cur + chunkSize;
 756     if (next > end) {
 757       next = end;
 758     }
 759     MemRegion mr(cur,next);
 760     _nextMarkBitMap->clearRange(mr);
 761     cur = next;
 762     do_yield_check();
 763 
 764     // Repeat the asserts from above. We'll do them as asserts here to
 765     // minimize their overhead on the product. However, we'll have
 766     // them as guarantees at the beginning / end of the bitmap
 767     // clearing to get some checking in the product.
 768     assert(cmThread()->during_cycle(), "invariant");
 769     assert(!g1h->mark_in_progress(), "invariant");
 770   }
 771 
 772   // Repeat the asserts from above.
 773   guarantee(cmThread()->during_cycle(), "invariant");
 774   guarantee(!g1h->mark_in_progress(), "invariant");
 775 }
 776 
 777 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 778 public:
 779   bool doHeapRegion(HeapRegion* r) {
 780     if (!r->continuesHumongous()) {
 781       r->note_start_of_marking(true);
 782     }
 783     return false;
 784   }
 785 };
 786 
 787 void ConcurrentMark::checkpointRootsInitialPre() {
 788   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 789   G1CollectorPolicy* g1p = g1h->g1_policy();
 790 
 791   _has_aborted = false;
 792 
 793 #ifndef PRODUCT
 794   if (G1PrintReachableAtInitialMark) {
 795     print_reachable("at-cycle-start",
 796                     VerifyOption_G1UsePrevMarking, true /* all */);
 797   }
 798 #endif
 799 
 800   // Initialise marking structures. This has to be done in a STW phase.
 801   reset();
 802 }
 803 
 804 class CMMarkRootsClosure: public OopsInGenClosure {
 805 private:
 806   ConcurrentMark*  _cm;
 807   G1CollectedHeap* _g1h;
 808   bool             _do_barrier;
 809 
 810 public:
 811   CMMarkRootsClosure(ConcurrentMark* cm,
 812                      G1CollectedHeap* g1h,
 813                      bool do_barrier) : _cm(cm), _g1h(g1h),
 814                                         _do_barrier(do_barrier) { }
 815 
 816   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
 817   virtual void do_oop(      oop* p) { do_oop_work(p); }
 818 
 819   template <class T> void do_oop_work(T* p) {
 820     T heap_oop = oopDesc::load_heap_oop(p);
 821     if (!oopDesc::is_null(heap_oop)) {
 822       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
 823       assert(obj->is_oop() || obj->mark() == NULL,
 824              "expected an oop, possibly with mark word displaced");
 825       HeapWord* addr = (HeapWord*)obj;
 826       if (_g1h->is_in_g1_reserved(addr)) {
 827         _cm->grayRoot(obj);
 828       }
 829     }
 830     if (_do_barrier) {
 831       assert(!_g1h->is_in_g1_reserved(p),
 832              "Should be called on external roots");
 833       do_barrier(p);
 834     }
 835   }
 836 };
 837 
 838 void ConcurrentMark::checkpointRootsInitialPost() {
 839   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 840 
 841   // If we force an overflow during remark, the remark operation will
 842   // actually abort and we'll restart concurrent marking. If we always
 843   // force an oveflow during remark we'll never actually complete the
 844   // marking phase. So, we initilize this here, at the start of the
 845   // cycle, so that at the remaining overflow number will decrease at
 846   // every remark and we'll eventually not need to cause one.
 847   force_overflow_stw()->init();
 848 
 849   // For each region note start of marking.
 850   NoteStartOfMarkHRClosure startcl;
 851   g1h->heap_region_iterate(&startcl);
 852 
 853   // Start weak-reference discovery.
 854   ReferenceProcessor* rp = g1h->ref_processor();
 855   rp->verify_no_references_recorded();
 856   rp->enable_discovery(); // enable ("weak") refs discovery
 857   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 858 
 859   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 860   // This is the start of  the marking cycle, we're expected all
 861   // threads to have SATB queues with active set to false.
 862   satb_mq_set.set_active_all_threads(true, /* new active value */
 863                                      false /* expected_active */);
 864 
 865   // update_g1_committed() will be called at the end of an evac pause
 866   // when marking is on. So, it's also called at the end of the
 867   // initial-mark pause to update the heap end, if the heap expands
 868   // during it. No need to call it here.
 869 }
 870 
 871 // Checkpoint the roots into this generation from outside
 872 // this generation. [Note this initial checkpoint need only
 873 // be approximate -- we'll do a catch up phase subsequently.]
 874 void ConcurrentMark::checkpointRootsInitial() {
 875   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
 876   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 877 
 878   double start = os::elapsedTime();
 879 
 880   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
 881   g1p->record_concurrent_mark_init_start();
 882   checkpointRootsInitialPre();
 883 
 884   // YSR: when concurrent precleaning is in place, we'll
 885   // need to clear the cached card table here
 886 
 887   ResourceMark rm;
 888   HandleMark  hm;
 889 
 890   g1h->ensure_parsability(false);
 891   g1h->perm_gen()->save_marks();
 892 
 893   CMMarkRootsClosure notOlder(this, g1h, false);
 894   CMMarkRootsClosure older(this, g1h, true);
 895 
 896   g1h->set_marking_started();
 897   g1h->rem_set()->prepare_for_younger_refs_iterate(false);
 898 
 899   g1h->process_strong_roots(true,    // activate StrongRootsScope
 900                             false,   // fake perm gen collection
 901                             SharedHeap::SO_AllClasses,
 902                             &notOlder, // Regular roots
 903                             NULL,     // do not visit active blobs
 904                             &older    // Perm Gen Roots
 905                             );
 906   checkpointRootsInitialPost();
 907 
 908   // Statistics.
 909   double end = os::elapsedTime();
 910   _init_times.add((end - start) * 1000.0);
 911 
 912   g1p->record_concurrent_mark_init_end();
 913 }
 914 
 915 /*
 916  * Notice that in the next two methods, we actually leave the STS
 917  * during the barrier sync and join it immediately afterwards. If we
 918  * do not do this, the following deadlock can occur: one thread could
 919  * be in the barrier sync code, waiting for the other thread to also
 920  * sync up, whereas another one could be trying to yield, while also
 921  * waiting for the other threads to sync up too.
 922  *
 923  * Note, however, that this code is also used during remark and in
 924  * this case we should not attempt to leave / enter the STS, otherwise
 925  * we'll either hit an asseert (debug / fastdebug) or deadlock
 926  * (product). So we should only leave / enter the STS if we are
 927  * operating concurrently.
 928  *
 929  * Because the thread that does the sync barrier has left the STS, it
 930  * is possible to be suspended for a Full GC or an evacuation pause
 931  * could occur. This is actually safe, since the entering the sync
 932  * barrier is one of the last things do_marking_step() does, and it
 933  * doesn't manipulate any data structures afterwards.
 934  */
 935 
 936 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 937   if (verbose_low()) {
 938     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 939   }
 940 
 941   if (concurrent()) {
 942     ConcurrentGCThread::stsLeave();
 943   }
 944   _first_overflow_barrier_sync.enter();
 945   if (concurrent()) {
 946     ConcurrentGCThread::stsJoin();
 947   }
 948   // at this point everyone should have synced up and not be doing any
 949   // more work
 950 
 951   if (verbose_low()) {
 952     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 953   }
 954 
 955   // let task 0 do this
 956   if (task_num == 0) {
 957     // task 0 is responsible for clearing the global data structures
 958     // We should be here because of an overflow. During STW we should
 959     // not clear the overflow flag since we rely on it being true when
 960     // we exit this method to abort the pause and restart concurent
 961     // marking.
 962     clear_marking_state(concurrent() /* clear_overflow */);
 963     force_overflow()->update();
 964 
 965     if (PrintGC) {
 966       gclog_or_tty->date_stamp(PrintGCDateStamps);
 967       gclog_or_tty->stamp(PrintGCTimeStamps);
 968       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 969     }
 970   }
 971 
 972   // after this, each task should reset its own data structures then
 973   // then go into the second barrier
 974 }
 975 
 976 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 977   if (verbose_low()) {
 978     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 979   }
 980 
 981   if (concurrent()) {
 982     ConcurrentGCThread::stsLeave();
 983   }
 984   _second_overflow_barrier_sync.enter();
 985   if (concurrent()) {
 986     ConcurrentGCThread::stsJoin();
 987   }
 988   // at this point everything should be re-initialised and ready to go
 989 
 990   if (verbose_low()) {
 991     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 992   }
 993 }
 994 
 995 #ifndef PRODUCT
 996 void ForceOverflowSettings::init() {
 997   _num_remaining = G1ConcMarkForceOverflow;
 998   _force = false;
 999   update();
1000 }
1001 
1002 void ForceOverflowSettings::update() {
1003   if (_num_remaining > 0) {
1004     _num_remaining -= 1;
1005     _force = true;
1006   } else {
1007     _force = false;
1008   }
1009 }
1010 
1011 bool ForceOverflowSettings::should_force() {
1012   if (_force) {
1013     _force = false;
1014     return true;
1015   } else {
1016     return false;
1017   }
1018 }
1019 #endif // !PRODUCT
1020 
1021 void ConcurrentMark::grayRoot(oop p) {
1022   HeapWord* addr = (HeapWord*) p;
1023   // We can't really check against _heap_start and _heap_end, since it
1024   // is possible during an evacuation pause with piggy-backed
1025   // initial-mark that the committed space is expanded during the
1026   // pause without CM observing this change. So the assertions below
1027   // is a bit conservative; but better than nothing.
1028   assert(_g1h->g1_committed().contains(addr),
1029          "address should be within the heap bounds");
1030 
1031   if (!_nextMarkBitMap->isMarked(addr)) {
1032     _nextMarkBitMap->parMark(addr);
1033   }
1034 }
1035 
1036 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
1037   // The objects on the region have already been marked "in bulk" by
1038   // the caller. We only need to decide whether to push the region on
1039   // the region stack or not.
1040 
1041   if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1042     // We're done with marking and waiting for remark. We do not need to
1043     // push anything else on the region stack.
1044     return;
1045   }
1046 
1047   HeapWord* finger = _finger;
1048 
1049   if (verbose_low()) {
1050     gclog_or_tty->print_cr("[global] attempting to push "
1051                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
1052                            PTR_FORMAT, mr.start(), mr.end(), finger);
1053   }
1054 
1055   if (mr.start() < finger) {
1056     // The finger is always heap region aligned and it is not possible
1057     // for mr to span heap regions.
1058     assert(mr.end() <= finger, "invariant");
1059 
1060     // Separated the asserts so that we know which one fires.
1061     assert(mr.start() <= mr.end(),
1062            "region boundaries should fall within the committed space");
1063     assert(_heap_start <= mr.start(),
1064            "region boundaries should fall within the committed space");
1065     assert(mr.end() <= _heap_end,
1066            "region boundaries should fall within the committed space");
1067     if (verbose_low()) {
1068       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
1069                              "below the finger, pushing it",
1070                              mr.start(), mr.end());
1071     }
1072 
1073     if (!region_stack_push_lock_free(mr)) {
1074       if (verbose_low()) {
1075         gclog_or_tty->print_cr("[global] region stack has overflown.");
1076       }
1077     }
1078   }
1079 }
1080 
1081 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1082   // The object is not marked by the caller. We need to at least mark
1083   // it and maybe push in on the stack.
1084 
1085   HeapWord* addr = (HeapWord*)p;
1086   if (!_nextMarkBitMap->isMarked(addr)) {
1087     // We definitely need to mark it, irrespective whether we bail out
1088     // because we're done with marking.
1089     if (_nextMarkBitMap->parMark(addr)) {
1090       if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1091         // If we're done with concurrent marking and we're waiting for
1092         // remark, then we're not pushing anything on the stack.
1093         return;
1094       }
1095 
1096       // No OrderAccess:store_load() is needed. It is implicit in the
1097       // CAS done in parMark(addr) above
1098       HeapWord* finger = _finger;
1099 
1100       if (addr < finger) {
1101         if (!mark_stack_push(oop(addr))) {
1102           if (verbose_low()) {
1103             gclog_or_tty->print_cr("[global] global stack overflow "
1104                                    "during parMark");
1105           }
1106         }
1107       }
1108     }
1109   }
1110 }
1111 
1112 class CMConcurrentMarkingTask: public AbstractGangTask {
1113 private:
1114   ConcurrentMark*       _cm;
1115   ConcurrentMarkThread* _cmt;
1116 
1117 public:
1118   void work(int worker_i) {
1119     assert(Thread::current()->is_ConcurrentGC_thread(),
1120            "this should only be done by a conc GC thread");
1121     ResourceMark rm;
1122 
1123     double start_vtime = os::elapsedVTime();
1124 
1125     ConcurrentGCThread::stsJoin();
1126 
1127     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1128     CMTask* the_task = _cm->task(worker_i);
1129     the_task->record_start_time();
1130     if (!_cm->has_aborted()) {
1131       do {
1132         double start_vtime_sec = os::elapsedVTime();
1133         double start_time_sec = os::elapsedTime();
1134         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1135 
1136         the_task->do_marking_step(mark_step_duration_ms,
1137                                   true /* do_stealing    */,
1138                                   true /* do_termination */);
1139 
1140         double end_time_sec = os::elapsedTime();
1141         double end_vtime_sec = os::elapsedVTime();
1142         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1143         double elapsed_time_sec = end_time_sec - start_time_sec;
1144         _cm->clear_has_overflown();
1145 
1146         bool ret = _cm->do_yield_check(worker_i);
1147 
1148         jlong sleep_time_ms;
1149         if (!_cm->has_aborted() && the_task->has_aborted()) {
1150           sleep_time_ms =
1151             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1152           ConcurrentGCThread::stsLeave();
1153           os::sleep(Thread::current(), sleep_time_ms, false);
1154           ConcurrentGCThread::stsJoin();
1155         }
1156         double end_time2_sec = os::elapsedTime();
1157         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1158 
1159 #if 0
1160           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1161                                  "overhead %1.4lf",
1162                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1163                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1164           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1165                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1166 #endif
1167       } while (!_cm->has_aborted() && the_task->has_aborted());
1168     }
1169     the_task->record_end_time();
1170     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1171 
1172     ConcurrentGCThread::stsLeave();
1173 
1174     double end_vtime = os::elapsedVTime();
1175     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1176   }
1177 
1178   CMConcurrentMarkingTask(ConcurrentMark* cm,
1179                           ConcurrentMarkThread* cmt) :
1180       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1181 
1182   ~CMConcurrentMarkingTask() { }
1183 };
1184 
1185 void ConcurrentMark::markFromRoots() {
1186   // we might be tempted to assert that:
1187   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1188   //        "inconsistent argument?");
1189   // However that wouldn't be right, because it's possible that
1190   // a safepoint is indeed in progress as a younger generation
1191   // stop-the-world GC happens even as we mark in this generation.
1192 
1193   _restart_for_overflow = false;
1194 
1195   size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
1196   force_overflow_conc()->init();
1197   set_phase(active_workers, true /* concurrent */);
1198 
1199   CMConcurrentMarkingTask markingTask(this, cmThread());
1200   if (parallel_marking_threads() > 0) {
1201     _parallel_workers->run_task(&markingTask);
1202   } else {
1203     markingTask.work(0);
1204   }
1205   print_stats();
1206 }
1207 
1208 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1209   // world is stopped at this checkpoint
1210   assert(SafepointSynchronize::is_at_safepoint(),
1211          "world should be stopped");
1212   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1213 
1214   // If a full collection has happened, we shouldn't do this.
1215   if (has_aborted()) {
1216     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1217     return;
1218   }
1219 
1220   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1221 
1222   if (VerifyDuringGC) {
1223     HandleMark hm;  // handle scope
1224     gclog_or_tty->print(" VerifyDuringGC:(before)");
1225     Universe::heap()->prepare_for_verify();
1226     Universe::verify(/* allow dirty */ true,
1227                      /* silent      */ false,
1228                      /* option      */ VerifyOption_G1UsePrevMarking);
1229   }
1230 
1231   G1CollectorPolicy* g1p = g1h->g1_policy();
1232   g1p->record_concurrent_mark_remark_start();
1233 
1234   double start = os::elapsedTime();
1235 
1236   checkpointRootsFinalWork();
1237 
1238   double mark_work_end = os::elapsedTime();
1239 
1240   weakRefsWork(clear_all_soft_refs);
1241 
1242   if (has_overflown()) {
1243     // Oops.  We overflowed.  Restart concurrent marking.
1244     _restart_for_overflow = true;
1245     // Clear the flag. We do not need it any more.
1246     clear_has_overflown();
1247     if (G1TraceMarkStackOverflow) {
1248       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1249     }
1250   } else {
1251     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1252     // We're done with marking.
1253     // This is the end of  the marking cycle, we're expected all
1254     // threads to have SATB queues with active set to true.
1255     satb_mq_set.set_active_all_threads(false, /* new active value */
1256                                        true /* expected_active */);
1257 
1258     if (VerifyDuringGC) {
1259       HandleMark hm;  // handle scope
1260       gclog_or_tty->print(" VerifyDuringGC:(after)");
1261       Universe::heap()->prepare_for_verify();
1262       Universe::verify(/* allow dirty */ true,
1263                        /* silent      */ false,
1264                        /* option      */ VerifyOption_G1UseNextMarking);
1265     }
1266     assert(!restart_for_overflow(), "sanity");
1267   }
1268 
1269   // Reset the marking state if marking completed
1270   if (!restart_for_overflow()) {
1271     set_non_marking_state();
1272   }
1273 
1274 #if VERIFY_OBJS_PROCESSED
1275   _scan_obj_cl.objs_processed = 0;
1276   ThreadLocalObjQueue::objs_enqueued = 0;
1277 #endif
1278 
1279   // Statistics
1280   double now = os::elapsedTime();
1281   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1282   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1283   _remark_times.add((now - start) * 1000.0);
1284 
1285   g1p->record_concurrent_mark_remark_end();
1286 }
1287 
1288 #define CARD_BM_TEST_MODE 0
1289 
1290 class CalcLiveObjectsClosure: public HeapRegionClosure {
1291 
1292   CMBitMapRO* _bm;
1293   ConcurrentMark* _cm;
1294   bool _changed;
1295   bool _yield;
1296   size_t _words_done;
1297   size_t _tot_live;
1298   size_t _tot_used;
1299   size_t _regions_done;
1300   double _start_vtime_sec;
1301 
1302   BitMap* _region_bm;
1303   BitMap* _card_bm;
1304   intptr_t _bottom_card_num;
1305   bool _final;
1306 
1307   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1308     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1309 #if CARD_BM_TEST_MODE
1310       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1311 #else
1312       _card_bm->par_at_put(i - _bottom_card_num, 1);
1313 #endif
1314     }
1315   }
1316 
1317 public:
1318   CalcLiveObjectsClosure(bool final,
1319                          CMBitMapRO *bm, ConcurrentMark *cm,
1320                          BitMap* region_bm, BitMap* card_bm) :
1321     _bm(bm), _cm(cm), _changed(false), _yield(true),
1322     _words_done(0), _tot_live(0), _tot_used(0),
1323     _region_bm(region_bm), _card_bm(card_bm),_final(final),
1324     _regions_done(0), _start_vtime_sec(0.0)
1325   {
1326     _bottom_card_num =
1327       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1328                CardTableModRefBS::card_shift);
1329   }
1330 
1331   // It takes a region that's not empty (i.e., it has at least one
1332   // live object in it and sets its corresponding bit on the region
1333   // bitmap to 1. If the region is "starts humongous" it will also set
1334   // to 1 the bits on the region bitmap that correspond to its
1335   // associated "continues humongous" regions.
1336   void set_bit_for_region(HeapRegion* hr) {
1337     assert(!hr->continuesHumongous(), "should have filtered those out");
1338 
1339     size_t index = hr->hrs_index();
1340     if (!hr->startsHumongous()) {
1341       // Normal (non-humongous) case: just set the bit.
1342       _region_bm->par_at_put((BitMap::idx_t) index, true);
1343     } else {
1344       // Starts humongous case: calculate how many regions are part of
1345       // this humongous region and then set the bit range. It might
1346       // have been a bit more efficient to look at the object that
1347       // spans these humongous regions to calculate their number from
1348       // the object's size. However, it's a good idea to calculate
1349       // this based on the metadata itself, and not the region
1350       // contents, so that this code is not aware of what goes into
1351       // the humongous regions (in case this changes in the future).
1352       G1CollectedHeap* g1h = G1CollectedHeap::heap();
1353       size_t end_index = index + 1;
1354       while (end_index < g1h->n_regions()) {
1355         HeapRegion* chr = g1h->region_at(end_index);
1356         if (!chr->continuesHumongous()) break;
1357         end_index += 1;
1358       }
1359       _region_bm->par_at_put_range((BitMap::idx_t) index,
1360                                    (BitMap::idx_t) end_index, true);
1361     }
1362   }
1363 
1364   bool doHeapRegion(HeapRegion* hr) {
1365     if (!_final && _regions_done == 0) {
1366       _start_vtime_sec = os::elapsedVTime();
1367     }
1368 
1369     if (hr->continuesHumongous()) {
1370       // We will ignore these here and process them when their
1371       // associated "starts humongous" region is processed (see
1372       // set_bit_for_heap_region()). Note that we cannot rely on their
1373       // associated "starts humongous" region to have their bit set to
1374       // 1 since, due to the region chunking in the parallel region
1375       // iteration, a "continues humongous" region might be visited
1376       // before its associated "starts humongous".
1377       return false;
1378     }
1379 
1380     HeapWord* nextTop = hr->next_top_at_mark_start();
1381     HeapWord* start   = hr->top_at_conc_mark_count();
1382     assert(hr->bottom() <= start && start <= hr->end() &&
1383            hr->bottom() <= nextTop && nextTop <= hr->end() &&
1384            start <= nextTop,
1385            "Preconditions.");
1386     // Otherwise, record the number of word's we'll examine.
1387     size_t words_done = (nextTop - start);
1388     // Find the first marked object at or after "start".
1389     start = _bm->getNextMarkedWordAddress(start, nextTop);
1390     size_t marked_bytes = 0;
1391 
1392     // Below, the term "card num" means the result of shifting an address
1393     // by the card shift -- address 0 corresponds to card number 0.  One
1394     // must subtract the card num of the bottom of the heap to obtain a
1395     // card table index.
1396     // The first card num of the sequence of live cards currently being
1397     // constructed.  -1 ==> no sequence.
1398     intptr_t start_card_num = -1;
1399     // The last card num of the sequence of live cards currently being
1400     // constructed.  -1 ==> no sequence.
1401     intptr_t last_card_num = -1;
1402 
1403     while (start < nextTop) {
1404       if (_yield && _cm->do_yield_check()) {
1405         // We yielded.  It might be for a full collection, in which case
1406         // all bets are off; terminate the traversal.
1407         if (_cm->has_aborted()) {
1408           _changed = false;
1409           return true;
1410         } else {
1411           // Otherwise, it might be a collection pause, and the region
1412           // we're looking at might be in the collection set.  We'll
1413           // abandon this region.
1414           return false;
1415         }
1416       }
1417       oop obj = oop(start);
1418       int obj_sz = obj->size();
1419       // The card num of the start of the current object.
1420       intptr_t obj_card_num =
1421         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1422 
1423       HeapWord* obj_last = start + obj_sz - 1;
1424       intptr_t obj_last_card_num =
1425         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1426 
1427       if (obj_card_num != last_card_num) {
1428         if (start_card_num == -1) {
1429           assert(last_card_num == -1, "Both or neither.");
1430           start_card_num = obj_card_num;
1431         } else {
1432           assert(last_card_num != -1, "Both or neither.");
1433           assert(obj_card_num >= last_card_num, "Inv");
1434           if ((obj_card_num - last_card_num) > 1) {
1435             // Mark the last run, and start a new one.
1436             mark_card_num_range(start_card_num, last_card_num);
1437             start_card_num = obj_card_num;
1438           }
1439         }
1440 #if CARD_BM_TEST_MODE
1441         /*
1442         gclog_or_tty->print_cr("Setting bits from %d/%d.",
1443                                obj_card_num - _bottom_card_num,
1444                                obj_last_card_num - _bottom_card_num);
1445         */
1446         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1447           _card_bm->par_at_put(j - _bottom_card_num, 1);
1448         }
1449 #endif
1450       }
1451       // In any case, we set the last card num.
1452       last_card_num = obj_last_card_num;
1453 
1454       marked_bytes += (size_t)obj_sz * HeapWordSize;
1455       // Find the next marked object after this one.
1456       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1457       _changed = true;
1458     }
1459     // Handle the last range, if any.
1460     if (start_card_num != -1) {
1461       mark_card_num_range(start_card_num, last_card_num);
1462     }
1463     if (_final) {
1464       // Mark the allocated-since-marking portion...
1465       HeapWord* tp = hr->top();
1466       if (nextTop < tp) {
1467         start_card_num =
1468           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1469         last_card_num =
1470           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1471         mark_card_num_range(start_card_num, last_card_num);
1472         // This definitely means the region has live objects.
1473         set_bit_for_region(hr);
1474       }
1475     }
1476 
1477     hr->add_to_marked_bytes(marked_bytes);
1478     // Update the live region bitmap.
1479     if (marked_bytes > 0) {
1480       set_bit_for_region(hr);
1481     }
1482     hr->set_top_at_conc_mark_count(nextTop);
1483     _tot_live += hr->next_live_bytes();
1484     _tot_used += hr->used();
1485     _words_done = words_done;
1486 
1487     if (!_final) {
1488       ++_regions_done;
1489       if (_regions_done % 10 == 0) {
1490         double end_vtime_sec = os::elapsedVTime();
1491         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1492         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1493           jlong sleep_time_ms =
1494             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1495           os::sleep(Thread::current(), sleep_time_ms, false);
1496           _start_vtime_sec = end_vtime_sec;
1497         }
1498       }
1499     }
1500 
1501     return false;
1502   }
1503 
1504   bool changed() { return _changed;  }
1505   void reset()   { _changed = false; _words_done = 0; }
1506   void no_yield() { _yield = false; }
1507   size_t words_done() { return _words_done; }
1508   size_t tot_live() { return _tot_live; }
1509   size_t tot_used() { return _tot_used; }
1510 };
1511 
1512 
1513 void ConcurrentMark::calcDesiredRegions() {
1514   _region_bm.clear();
1515   _card_bm.clear();
1516   CalcLiveObjectsClosure calccl(false /*final*/,
1517                                 nextMarkBitMap(), this,
1518                                 &_region_bm, &_card_bm);
1519   G1CollectedHeap *g1h = G1CollectedHeap::heap();
1520   g1h->heap_region_iterate(&calccl);
1521 
1522   do {
1523     calccl.reset();
1524     g1h->heap_region_iterate(&calccl);
1525   } while (calccl.changed());
1526 }
1527 
1528 class G1ParFinalCountTask: public AbstractGangTask {
1529 protected:
1530   G1CollectedHeap* _g1h;
1531   CMBitMap* _bm;
1532   size_t _n_workers;
1533   size_t *_live_bytes;
1534   size_t *_used_bytes;
1535   BitMap* _region_bm;
1536   BitMap* _card_bm;
1537 public:
1538   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1539                       BitMap* region_bm, BitMap* card_bm)
1540     : AbstractGangTask("G1 final counting"), _g1h(g1h),
1541       _bm(bm), _region_bm(region_bm), _card_bm(card_bm) {
1542     if (ParallelGCThreads > 0) {
1543       _n_workers = _g1h->workers()->total_workers();
1544     } else {
1545       _n_workers = 1;
1546     }
1547     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1548     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1549   }
1550 
1551   ~G1ParFinalCountTask() {
1552     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1553     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1554   }
1555 
1556   void work(int i) {
1557     CalcLiveObjectsClosure calccl(true /*final*/,
1558                                   _bm, _g1h->concurrent_mark(),
1559                                   _region_bm, _card_bm);
1560     calccl.no_yield();
1561     if (G1CollectedHeap::use_parallel_gc_threads()) {
1562       _g1h->heap_region_par_iterate_chunked(&calccl, i,
1563                                             HeapRegion::FinalCountClaimValue);
1564     } else {
1565       _g1h->heap_region_iterate(&calccl);
1566     }
1567     assert(calccl.complete(), "Shouldn't have yielded!");
1568 
1569     assert((size_t) i < _n_workers, "invariant");
1570     _live_bytes[i] = calccl.tot_live();
1571     _used_bytes[i] = calccl.tot_used();
1572   }
1573   size_t live_bytes()  {
1574     size_t live_bytes = 0;
1575     for (size_t i = 0; i < _n_workers; ++i)
1576       live_bytes += _live_bytes[i];
1577     return live_bytes;
1578   }
1579   size_t used_bytes()  {
1580     size_t used_bytes = 0;
1581     for (size_t i = 0; i < _n_workers; ++i)
1582       used_bytes += _used_bytes[i];
1583     return used_bytes;
1584   }
1585 };
1586 
1587 class G1ParNoteEndTask;
1588 
1589 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1590   G1CollectedHeap* _g1;
1591   int _worker_num;
1592   size_t _max_live_bytes;
1593   size_t _regions_claimed;
1594   size_t _freed_bytes;
1595   FreeRegionList* _local_cleanup_list;
1596   HumongousRegionSet* _humongous_proxy_set;
1597   HRRSCleanupTask* _hrrs_cleanup_task;
1598   double _claimed_region_time;
1599   double _max_region_time;
1600 
1601 public:
1602   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1603                              int worker_num,
1604                              FreeRegionList* local_cleanup_list,
1605                              HumongousRegionSet* humongous_proxy_set,
1606                              HRRSCleanupTask* hrrs_cleanup_task);
1607   size_t freed_bytes() { return _freed_bytes; }
1608 
1609   bool doHeapRegion(HeapRegion *r);
1610 
1611   size_t max_live_bytes() { return _max_live_bytes; }
1612   size_t regions_claimed() { return _regions_claimed; }
1613   double claimed_region_time_sec() { return _claimed_region_time; }
1614   double max_region_time_sec() { return _max_region_time; }
1615 };
1616 
1617 class G1ParNoteEndTask: public AbstractGangTask {
1618   friend class G1NoteEndOfConcMarkClosure;
1619 
1620 protected:
1621   G1CollectedHeap* _g1h;
1622   size_t _max_live_bytes;
1623   size_t _freed_bytes;
1624   FreeRegionList* _cleanup_list;
1625 
1626 public:
1627   G1ParNoteEndTask(G1CollectedHeap* g1h,
1628                    FreeRegionList* cleanup_list) :
1629     AbstractGangTask("G1 note end"), _g1h(g1h),
1630     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1631 
1632   void work(int i) {
1633     double start = os::elapsedTime();
1634     FreeRegionList local_cleanup_list("Local Cleanup List");
1635     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1636     HRRSCleanupTask hrrs_cleanup_task;
1637     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
1638                                            &humongous_proxy_set,
1639                                            &hrrs_cleanup_task);
1640     if (G1CollectedHeap::use_parallel_gc_threads()) {
1641       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1642                                             HeapRegion::NoteEndClaimValue);
1643     } else {
1644       _g1h->heap_region_iterate(&g1_note_end);
1645     }
1646     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1647 
1648     // Now update the lists
1649     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1650                                             NULL /* free_list */,
1651                                             &humongous_proxy_set,
1652                                             true /* par */);
1653     {
1654       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1655       _max_live_bytes += g1_note_end.max_live_bytes();
1656       _freed_bytes += g1_note_end.freed_bytes();
1657 
1658       // If we iterate over the global cleanup list at the end of
1659       // cleanup to do this printing we will not guarantee to only
1660       // generate output for the newly-reclaimed regions (the list
1661       // might not be empty at the beginning of cleanup; we might
1662       // still be working on its previous contents). So we do the
1663       // printing here, before we append the new regions to the global
1664       // cleanup list.
1665 
1666       G1HRPrinter* hr_printer = _g1h->hr_printer();
1667       if (hr_printer->is_active()) {
1668         HeapRegionLinkedListIterator iter(&local_cleanup_list);
1669         while (iter.more_available()) {
1670           HeapRegion* hr = iter.get_next();
1671           hr_printer->cleanup(hr);
1672         }
1673       }
1674 
1675       _cleanup_list->add_as_tail(&local_cleanup_list);
1676       assert(local_cleanup_list.is_empty(), "post-condition");
1677 
1678       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1679     }
1680     double end = os::elapsedTime();
1681     if (G1PrintParCleanupStats) {
1682       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1683                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1684                           i, start, end, (end-start)*1000.0,
1685                           g1_note_end.regions_claimed(),
1686                           g1_note_end.claimed_region_time_sec()*1000.0,
1687                           g1_note_end.max_region_time_sec()*1000.0);
1688     }
1689   }
1690   size_t max_live_bytes() { return _max_live_bytes; }
1691   size_t freed_bytes() { return _freed_bytes; }
1692 };
1693 
1694 class G1ParScrubRemSetTask: public AbstractGangTask {
1695 protected:
1696   G1RemSet* _g1rs;
1697   BitMap* _region_bm;
1698   BitMap* _card_bm;
1699 public:
1700   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1701                        BitMap* region_bm, BitMap* card_bm) :
1702     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1703     _region_bm(region_bm), _card_bm(card_bm)
1704   {}
1705 
1706   void work(int i) {
1707     if (G1CollectedHeap::use_parallel_gc_threads()) {
1708       _g1rs->scrub_par(_region_bm, _card_bm, i,
1709                        HeapRegion::ScrubRemSetClaimValue);
1710     } else {
1711       _g1rs->scrub(_region_bm, _card_bm);
1712     }
1713   }
1714 
1715 };
1716 
1717 G1NoteEndOfConcMarkClosure::
1718 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1719                            int worker_num,
1720                            FreeRegionList* local_cleanup_list,
1721                            HumongousRegionSet* humongous_proxy_set,
1722                            HRRSCleanupTask* hrrs_cleanup_task)
1723   : _g1(g1), _worker_num(worker_num),
1724     _max_live_bytes(0), _regions_claimed(0),
1725     _freed_bytes(0),
1726     _claimed_region_time(0.0), _max_region_time(0.0),
1727     _local_cleanup_list(local_cleanup_list),
1728     _humongous_proxy_set(humongous_proxy_set),
1729     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1730 
1731 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) {
1732   // We use a claim value of zero here because all regions
1733   // were claimed with value 1 in the FinalCount task.
1734   hr->reset_gc_time_stamp();
1735   if (!hr->continuesHumongous()) {
1736     double start = os::elapsedTime();
1737     _regions_claimed++;
1738     hr->note_end_of_marking();
1739     _max_live_bytes += hr->max_live_bytes();
1740     _g1->free_region_if_empty(hr,
1741                               &_freed_bytes,
1742                               _local_cleanup_list,
1743                               _humongous_proxy_set,
1744                               _hrrs_cleanup_task,
1745                               true /* par */);
1746     double region_time = (os::elapsedTime() - start);
1747     _claimed_region_time += region_time;
1748     if (region_time > _max_region_time) {
1749       _max_region_time = region_time;
1750     }
1751   }
1752   return false;
1753 }
1754 
1755 void ConcurrentMark::cleanup() {
1756   // world is stopped at this checkpoint
1757   assert(SafepointSynchronize::is_at_safepoint(),
1758          "world should be stopped");
1759   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1760 
1761   // If a full collection has happened, we shouldn't do this.
1762   if (has_aborted()) {
1763     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1764     return;
1765   }
1766 
1767   g1h->verify_region_sets_optional();
1768 
1769   if (VerifyDuringGC) {
1770     HandleMark hm;  // handle scope
1771     gclog_or_tty->print(" VerifyDuringGC:(before)");
1772     Universe::heap()->prepare_for_verify();
1773     Universe::verify(/* allow dirty */ true,
1774                      /* silent      */ false,
1775                      /* option      */ VerifyOption_G1UsePrevMarking);
1776   }
1777 
1778   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1779   g1p->record_concurrent_mark_cleanup_start();
1780 
1781   double start = os::elapsedTime();
1782 
1783   HeapRegionRemSet::reset_for_cleanup_tasks();
1784 
1785   // Do counting once more with the world stopped for good measure.
1786   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1787                                         &_region_bm, &_card_bm);
1788   if (G1CollectedHeap::use_parallel_gc_threads()) {
1789     assert(g1h->check_heap_region_claim_values(
1790                                                HeapRegion::InitialClaimValue),
1791            "sanity check");
1792 
1793     int n_workers = g1h->workers()->total_workers();
1794     g1h->set_par_threads(n_workers);
1795     g1h->workers()->run_task(&g1_par_count_task);
1796     g1h->set_par_threads(0);
1797 
1798     assert(g1h->check_heap_region_claim_values(
1799                                              HeapRegion::FinalCountClaimValue),
1800            "sanity check");
1801   } else {
1802     g1_par_count_task.work(0);
1803   }
1804 
1805   size_t known_garbage_bytes =
1806     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1807 #if 0
1808   gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1809                          (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1810                          (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1811                          (double) known_garbage_bytes / (double) (1024 * 1024));
1812 #endif // 0
1813   g1p->set_known_garbage_bytes(known_garbage_bytes);
1814 
1815   size_t start_used_bytes = g1h->used();
1816   _at_least_one_mark_complete = true;
1817   g1h->set_marking_complete();
1818 
1819   double count_end = os::elapsedTime();
1820   double this_final_counting_time = (count_end - start);
1821   if (G1PrintParCleanupStats) {
1822     gclog_or_tty->print_cr("Cleanup:");
1823     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
1824                            this_final_counting_time*1000.0);
1825   }
1826   _total_counting_time += this_final_counting_time;
1827 
1828   if (G1PrintRegionLivenessInfo) {
1829     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1830     _g1h->heap_region_iterate(&cl);
1831   }
1832 
1833   // Install newly created mark bitMap as "prev".
1834   swapMarkBitMaps();
1835 
1836   g1h->reset_gc_time_stamp();
1837 
1838   // Note end of marking in all heap regions.
1839   double note_end_start = os::elapsedTime();
1840   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1841   if (G1CollectedHeap::use_parallel_gc_threads()) {
1842     int n_workers = g1h->workers()->total_workers();
1843     g1h->set_par_threads(n_workers);
1844     g1h->workers()->run_task(&g1_par_note_end_task);
1845     g1h->set_par_threads(0);
1846 
1847     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1848            "sanity check");
1849   } else {
1850     g1_par_note_end_task.work(0);
1851   }
1852 
1853   if (!cleanup_list_is_empty()) {
1854     // The cleanup list is not empty, so we'll have to process it
1855     // concurrently. Notify anyone else that might be wanting free
1856     // regions that there will be more free regions coming soon.
1857     g1h->set_free_regions_coming();
1858   }
1859   double note_end_end = os::elapsedTime();
1860   if (G1PrintParCleanupStats) {
1861     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
1862                            (note_end_end - note_end_start)*1000.0);
1863   }
1864 
1865 
1866   // call below, since it affects the metric by which we sort the heap
1867   // regions.
1868   if (G1ScrubRemSets) {
1869     double rs_scrub_start = os::elapsedTime();
1870     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1871     if (G1CollectedHeap::use_parallel_gc_threads()) {
1872       int n_workers = g1h->workers()->total_workers();
1873       g1h->set_par_threads(n_workers);
1874       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1875       g1h->set_par_threads(0);
1876 
1877       assert(g1h->check_heap_region_claim_values(
1878                                             HeapRegion::ScrubRemSetClaimValue),
1879              "sanity check");
1880     } else {
1881       g1_par_scrub_rs_task.work(0);
1882     }
1883 
1884     double rs_scrub_end = os::elapsedTime();
1885     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1886     _total_rs_scrub_time += this_rs_scrub_time;
1887   }
1888 
1889   // this will also free any regions totally full of garbage objects,
1890   // and sort the regions.
1891   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1892                         g1_par_note_end_task.freed_bytes(),
1893                         g1_par_note_end_task.max_live_bytes());
1894 
1895   // Statistics.
1896   double end = os::elapsedTime();
1897   _cleanup_times.add((end - start) * 1000.0);
1898 
1899   // G1CollectedHeap::heap()->print();
1900   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1901   // G1CollectedHeap::heap()->get_gc_time_stamp());
1902 
1903   if (PrintGC || PrintGCDetails) {
1904     g1h->print_size_transition(gclog_or_tty,
1905                                start_used_bytes,
1906                                g1h->used(),
1907                                g1h->capacity());
1908   }
1909 
1910   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1911   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1912 
1913   // We need to make this be a "collection" so any collection pause that
1914   // races with it goes around and waits for completeCleanup to finish.
1915   g1h->increment_total_collections();
1916 
1917   if (VerifyDuringGC) {
1918     HandleMark hm;  // handle scope
1919     gclog_or_tty->print(" VerifyDuringGC:(after)");
1920     Universe::heap()->prepare_for_verify();
1921     Universe::verify(/* allow dirty */ true,
1922                      /* silent      */ false,
1923                      /* option      */ VerifyOption_G1UsePrevMarking);
1924   }
1925 
1926   g1h->verify_region_sets_optional();
1927 }
1928 
1929 void ConcurrentMark::completeCleanup() {
1930   if (has_aborted()) return;
1931 
1932   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1933 
1934   _cleanup_list.verify_optional();
1935   FreeRegionList tmp_free_list("Tmp Free List");
1936 
1937   if (G1ConcRegionFreeingVerbose) {
1938     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1939                            "cleanup list has "SIZE_FORMAT" entries",
1940                            _cleanup_list.length());
1941   }
1942 
1943   // Noone else should be accessing the _cleanup_list at this point,
1944   // so it's not necessary to take any locks
1945   while (!_cleanup_list.is_empty()) {
1946     HeapRegion* hr = _cleanup_list.remove_head();
1947     assert(hr != NULL, "the list was not empty");
1948     hr->par_clear();
1949     tmp_free_list.add_as_tail(hr);
1950 
1951     // Instead of adding one region at a time to the secondary_free_list,
1952     // we accumulate them in the local list and move them a few at a
1953     // time. This also cuts down on the number of notify_all() calls
1954     // we do during this process. We'll also append the local list when
1955     // _cleanup_list is empty (which means we just removed the last
1956     // region from the _cleanup_list).
1957     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1958         _cleanup_list.is_empty()) {
1959       if (G1ConcRegionFreeingVerbose) {
1960         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1961                                "appending "SIZE_FORMAT" entries to the "
1962                                "secondary_free_list, clean list still has "
1963                                SIZE_FORMAT" entries",
1964                                tmp_free_list.length(),
1965                                _cleanup_list.length());
1966       }
1967 
1968       {
1969         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1970         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
1971         SecondaryFreeList_lock->notify_all();
1972       }
1973 
1974       if (G1StressConcRegionFreeing) {
1975         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1976           os::sleep(Thread::current(), (jlong) 1, false);
1977         }
1978       }
1979     }
1980   }
1981   assert(tmp_free_list.is_empty(), "post-condition");
1982 }
1983 
1984 // Support closures for reference procssing in G1
1985 
1986 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1987   HeapWord* addr = (HeapWord*)obj;
1988   return addr != NULL &&
1989          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1990 }
1991 
1992 class G1CMKeepAliveClosure: public OopClosure {
1993   G1CollectedHeap* _g1;
1994   ConcurrentMark*  _cm;
1995   CMBitMap*        _bitMap;
1996  public:
1997   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1998                        CMBitMap* bitMap) :
1999     _g1(g1), _cm(cm),
2000     _bitMap(bitMap) {}
2001 
2002   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2003   virtual void do_oop(      oop* p) { do_oop_work(p); }
2004 
2005   template <class T> void do_oop_work(T* p) {
2006     oop obj = oopDesc::load_decode_heap_oop(p);
2007     HeapWord* addr = (HeapWord*)obj;
2008 
2009     if (_cm->verbose_high()) {
2010       gclog_or_tty->print_cr("\t[0] we're looking at location "
2011                              "*"PTR_FORMAT" = "PTR_FORMAT,
2012                              p, (void*) obj);
2013     }
2014 
2015     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2016       _bitMap->mark(addr);
2017       _cm->mark_stack_push(obj);
2018     }
2019   }
2020 };
2021 
2022 class G1CMDrainMarkingStackClosure: public VoidClosure {
2023   CMMarkStack*                  _markStack;
2024   CMBitMap*                     _bitMap;
2025   G1CMKeepAliveClosure*         _oopClosure;
2026  public:
2027   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
2028                                G1CMKeepAliveClosure* oopClosure) :
2029     _bitMap(bitMap),
2030     _markStack(markStack),
2031     _oopClosure(oopClosure)
2032   {}
2033 
2034   void do_void() {
2035     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
2036   }
2037 };
2038 
2039 // 'Keep Alive' closure used by parallel reference processing.
2040 // An instance of this closure is used in the parallel reference processing
2041 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2042 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2043 // placed on to discovered ref lists once so we can mark and push with no
2044 // need to check whether the object has already been marked. Using the
2045 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2046 // operating on the global mark stack. This means that an individual
2047 // worker would be doing lock-free pushes while it processes its own
2048 // discovered ref list followed by drain call. If the discovered ref lists
2049 // are unbalanced then this could cause interference with the other
2050 // workers. Using a CMTask (and its embedded local data structures)
2051 // avoids that potential interference.
2052 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2053   ConcurrentMark*  _cm;
2054   CMTask*          _task;
2055   CMBitMap*        _bitMap;
2056   int              _ref_counter_limit;
2057   int              _ref_counter;
2058  public:
2059   G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm,
2060                                   CMTask* task,
2061                                   CMBitMap* bitMap) :
2062     _cm(cm), _task(task), _bitMap(bitMap),
2063     _ref_counter_limit(G1RefProcDrainInterval)
2064   {
2065     assert(_ref_counter_limit > 0, "sanity");
2066     _ref_counter = _ref_counter_limit;
2067   }
2068 
2069   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2070   virtual void do_oop(      oop* p) { do_oop_work(p); }
2071 
2072   template <class T> void do_oop_work(T* p) {
2073     if (!_cm->has_overflown()) {
2074       oop obj = oopDesc::load_decode_heap_oop(p);
2075       if (_cm->verbose_high()) {
2076         gclog_or_tty->print_cr("\t[%d] we're looking at location "
2077                                "*"PTR_FORMAT" = "PTR_FORMAT,
2078                                _task->task_id(), p, (void*) obj);
2079       }
2080 
2081       _task->deal_with_reference(obj);
2082       _ref_counter--;
2083 
2084       if (_ref_counter == 0) {
2085         // We have dealt with _ref_counter_limit references, pushing them and objects
2086         // reachable from them on to the local stack (and possibly the global stack).
2087         // Call do_marking_step() to process these entries. We call the routine in a
2088         // loop, which we'll exit if there's nothing more to do (i.e. we're done
2089         // with the entries that we've pushed as a result of the deal_with_reference
2090         // calls above) or we overflow.
2091         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2092         // while there may still be some work to do. (See the comment at the
2093         // beginning of CMTask::do_marking_step() for those conditions - one of which
2094         // is reaching the specified time target.) It is only when
2095         // CMTask::do_marking_step() returns without setting the has_aborted() flag
2096         // that the marking has completed.
2097         do {
2098           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2099           _task->do_marking_step(mark_step_duration_ms,
2100                                  false /* do_stealing    */,
2101                                  false /* do_termination */);
2102         } while (_task->has_aborted() && !_cm->has_overflown());
2103         _ref_counter = _ref_counter_limit;
2104       }
2105     } else {
2106       if (_cm->verbose_high()) {
2107          gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2108       }
2109     }
2110   }
2111 };
2112 
2113 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2114   ConcurrentMark* _cm;
2115   CMTask* _task;
2116  public:
2117   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2118     _cm(cm), _task(task)
2119   {}
2120 
2121   void do_void() {
2122     do {
2123       if (_cm->verbose_high()) {
2124         gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2125                                _task->task_id());
2126       }
2127 
2128       // We call CMTask::do_marking_step() to completely drain the local and
2129       // global marking stacks. The routine is called in a loop, which we'll
2130       // exit if there's nothing more to do (i.e. we'completely drained the
2131       // entries that were pushed as a result of applying the
2132       // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2133       // lists above) or we overflow the global marking stack.
2134       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2135       // while there may still be some work to do. (See the comment at the
2136       // beginning of CMTask::do_marking_step() for those conditions - one of which
2137       // is reaching the specified time target.) It is only when
2138       // CMTask::do_marking_step() returns without setting the has_aborted() flag
2139       // that the marking has completed.
2140 
2141       _task->do_marking_step(1000000000.0 /* something very large */,
2142                              true /* do_stealing    */,
2143                              true /* do_termination */);
2144     } while (_task->has_aborted() && !_cm->has_overflown());
2145   }
2146 };
2147 
2148 // Implementation of AbstractRefProcTaskExecutor for G1
2149 class G1RefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2150 private:
2151   G1CollectedHeap* _g1h;
2152   ConcurrentMark*  _cm;
2153   CMBitMap*        _bitmap;
2154   WorkGang*        _workers;
2155   int              _active_workers;
2156 
2157 public:
2158   G1RefProcTaskExecutor(G1CollectedHeap* g1h,
2159                         ConcurrentMark* cm,
2160                         CMBitMap* bitmap,
2161                         WorkGang* workers,
2162                         int n_workers) :
2163     _g1h(g1h), _cm(cm), _bitmap(bitmap),
2164     _workers(workers), _active_workers(n_workers)
2165   { }
2166 
2167   // Executes the given task using concurrent marking worker threads.
2168   virtual void execute(ProcessTask& task);
2169   virtual void execute(EnqueueTask& task);
2170 };
2171 
2172 class G1RefProcTaskProxy: public AbstractGangTask {
2173   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2174   ProcessTask&     _proc_task;
2175   G1CollectedHeap* _g1h;
2176   ConcurrentMark*  _cm;
2177   CMBitMap*        _bitmap;
2178 
2179 public:
2180   G1RefProcTaskProxy(ProcessTask& proc_task,
2181                      G1CollectedHeap* g1h,
2182                      ConcurrentMark* cm,
2183                      CMBitMap* bitmap) :
2184     AbstractGangTask("Process reference objects in parallel"),
2185     _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap)
2186   {}
2187 
2188   virtual void work(int i) {
2189     CMTask* marking_task = _cm->task(i);
2190     G1CMIsAliveClosure g1_is_alive(_g1h);
2191     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap);
2192     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2193 
2194     _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2195   }
2196 };
2197 
2198 void G1RefProcTaskExecutor::execute(ProcessTask& proc_task) {
2199   assert(_workers != NULL, "Need parallel worker threads.");
2200 
2201   G1RefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap);
2202 
2203   // We need to reset the phase for each task execution so that
2204   // the termination protocol of CMTask::do_marking_step works.
2205   _cm->set_phase(_active_workers, false /* concurrent */);
2206   _g1h->set_par_threads(_active_workers);
2207   _workers->run_task(&proc_task_proxy);
2208   _g1h->set_par_threads(0);
2209 }
2210 
2211 class G1RefEnqueueTaskProxy: public AbstractGangTask {
2212   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2213   EnqueueTask& _enq_task;
2214 
2215 public:
2216   G1RefEnqueueTaskProxy(EnqueueTask& enq_task) :
2217     AbstractGangTask("Enqueue reference objects in parallel"),
2218     _enq_task(enq_task)
2219   { }
2220 
2221   virtual void work(int i) {
2222     _enq_task.work(i);
2223   }
2224 };
2225 
2226 void G1RefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2227   assert(_workers != NULL, "Need parallel worker threads.");
2228 
2229   G1RefEnqueueTaskProxy enq_task_proxy(enq_task);
2230 
2231   _g1h->set_par_threads(_active_workers);
2232   _workers->run_task(&enq_task_proxy);
2233   _g1h->set_par_threads(0);
2234 }
2235 
2236 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2237   ResourceMark rm;
2238   HandleMark   hm;
2239   G1CollectedHeap* g1h   = G1CollectedHeap::heap();
2240   ReferenceProcessor* rp = g1h->ref_processor();
2241 
2242   // See the comment in G1CollectedHeap::ref_processing_init()
2243   // about how reference processing currently works in G1.
2244 
2245   // Process weak references.
2246   rp->setup_policy(clear_all_soft_refs);
2247   assert(_markStack.isEmpty(), "mark stack should be empty");
2248 
2249   G1CMIsAliveClosure   g1_is_alive(g1h);
2250   G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
2251   G1CMDrainMarkingStackClosure
2252     g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
2253   // We use the work gang from the G1CollectedHeap and we utilize all
2254   // the worker threads.
2255   int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1;
2256   active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
2257 
2258   G1RefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(),
2259                                           g1h->workers(), active_workers);
2260 
2261 
2262   if (rp->processing_is_mt()) {
2263     // Set the degree of MT here.  If the discovery is done MT, there
2264     // may have been a different number of threads doing the discovery
2265     // and a different number of discovered lists may have Ref objects.
2266     // That is OK as long as the Reference lists are balanced (see
2267     // balance_all_queues() and balance_queues()).
2268     rp->set_active_mt_degree(active_workers);
2269 
2270     rp->process_discovered_references(&g1_is_alive,
2271                                       &g1_keep_alive,
2272                                       &g1_drain_mark_stack,
2273                                       &par_task_executor);
2274 
2275     // The work routines of the parallel keep_alive and drain_marking_stack
2276     // will set the has_overflown flag if we overflow the global marking
2277     // stack.
2278   } else {
2279     rp->process_discovered_references(&g1_is_alive,
2280                                       &g1_keep_alive,
2281                                       &g1_drain_mark_stack,
2282                                       NULL);
2283 
2284   }
2285 
2286   assert(_markStack.overflow() || _markStack.isEmpty(),
2287       "mark stack should be empty (unless it overflowed)");
2288   if (_markStack.overflow()) {
2289     // Should have been done already when we tried to push an
2290     // entry on to the global mark stack. But let's do it again.
2291     set_has_overflown();
2292   }
2293 
2294   if (rp->processing_is_mt()) {
2295     assert(rp->num_q() == active_workers, "why not");
2296     rp->enqueue_discovered_references(&par_task_executor);
2297   } else {
2298     rp->enqueue_discovered_references();
2299   }
2300 
2301   rp->verify_no_references_recorded();
2302   assert(!rp->discovery_enabled(), "should have been disabled");
2303 
2304   // Now clean up stale oops in StringTable
2305   StringTable::unlink(&g1_is_alive);
2306   // Clean up unreferenced symbols in symbol table.
2307   SymbolTable::unlink();
2308 }
2309 
2310 void ConcurrentMark::swapMarkBitMaps() {
2311   CMBitMapRO* temp = _prevMarkBitMap;
2312   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2313   _nextMarkBitMap  = (CMBitMap*)  temp;
2314 }
2315 
2316 class CMRemarkTask: public AbstractGangTask {
2317 private:
2318   ConcurrentMark *_cm;
2319 
2320 public:
2321   void work(int worker_i) {
2322     // Since all available tasks are actually started, we should
2323     // only proceed if we're supposed to be actived.
2324     if ((size_t)worker_i < _cm->active_tasks()) {
2325       CMTask* task = _cm->task(worker_i);
2326       task->record_start_time();
2327       do {
2328         task->do_marking_step(1000000000.0 /* something very large */,
2329                               true /* do_stealing    */,
2330                               true /* do_termination */);
2331       } while (task->has_aborted() && !_cm->has_overflown());
2332       // If we overflow, then we do not want to restart. We instead
2333       // want to abort remark and do concurrent marking again.
2334       task->record_end_time();
2335     }
2336   }
2337 
2338   CMRemarkTask(ConcurrentMark* cm) :
2339     AbstractGangTask("Par Remark"), _cm(cm) { }
2340 };
2341 
2342 void ConcurrentMark::checkpointRootsFinalWork() {
2343   ResourceMark rm;
2344   HandleMark   hm;
2345   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2346 
2347   g1h->ensure_parsability(false);
2348 
2349   if (G1CollectedHeap::use_parallel_gc_threads()) {
2350     G1CollectedHeap::StrongRootsScope srs(g1h);
2351     // this is remark, so we'll use up all available threads
2352     int active_workers = ParallelGCThreads;
2353     set_phase(active_workers, false /* concurrent */);
2354 
2355     CMRemarkTask remarkTask(this);
2356     // We will start all available threads, even if we decide that the
2357     // active_workers will be fewer. The extra ones will just bail out
2358     // immediately.
2359     int n_workers = g1h->workers()->total_workers();
2360     g1h->set_par_threads(n_workers);
2361     g1h->workers()->run_task(&remarkTask);
2362     g1h->set_par_threads(0);
2363   } else {
2364     G1CollectedHeap::StrongRootsScope srs(g1h);
2365     // this is remark, so we'll use up all available threads
2366     int active_workers = 1;
2367     set_phase(active_workers, false /* concurrent */);
2368 
2369     CMRemarkTask remarkTask(this);
2370     // We will start all available threads, even if we decide that the
2371     // active_workers will be fewer. The extra ones will just bail out
2372     // immediately.
2373     remarkTask.work(0);
2374   }
2375   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2376   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2377 
2378   print_stats();
2379 
2380 #if VERIFY_OBJS_PROCESSED
2381   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2382     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2383                            _scan_obj_cl.objs_processed,
2384                            ThreadLocalObjQueue::objs_enqueued);
2385     guarantee(_scan_obj_cl.objs_processed ==
2386               ThreadLocalObjQueue::objs_enqueued,
2387               "Different number of objs processed and enqueued.");
2388   }
2389 #endif
2390 }
2391 
2392 #ifndef PRODUCT
2393 
2394 class PrintReachableOopClosure: public OopClosure {
2395 private:
2396   G1CollectedHeap* _g1h;
2397   outputStream*    _out;
2398   VerifyOption     _vo;
2399   bool             _all;
2400 
2401 public:
2402   PrintReachableOopClosure(outputStream* out,
2403                            VerifyOption  vo,
2404                            bool          all) :
2405     _g1h(G1CollectedHeap::heap()),
2406     _out(out), _vo(vo), _all(all) { }
2407 
2408   void do_oop(narrowOop* p) { do_oop_work(p); }
2409   void do_oop(      oop* p) { do_oop_work(p); }
2410 
2411   template <class T> void do_oop_work(T* p) {
2412     oop         obj = oopDesc::load_decode_heap_oop(p);
2413     const char* str = NULL;
2414     const char* str2 = "";
2415 
2416     if (obj == NULL) {
2417       str = "";
2418     } else if (!_g1h->is_in_g1_reserved(obj)) {
2419       str = " O";
2420     } else {
2421       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2422       guarantee(hr != NULL, "invariant");
2423       bool over_tams = false;
2424       bool marked = false;
2425 
2426       switch (_vo) {
2427         case VerifyOption_G1UsePrevMarking:
2428           over_tams = hr->obj_allocated_since_prev_marking(obj);
2429           marked = _g1h->isMarkedPrev(obj);
2430           break;
2431         case VerifyOption_G1UseNextMarking:
2432           over_tams = hr->obj_allocated_since_next_marking(obj);
2433           marked = _g1h->isMarkedNext(obj);
2434           break;
2435         case VerifyOption_G1UseMarkWord:
2436           marked = obj->is_gc_marked();
2437           break;
2438         default:
2439           ShouldNotReachHere();
2440       }
2441 
2442       if (over_tams) {
2443         str = " >";
2444         if (marked) {
2445           str2 = " AND MARKED";
2446         }
2447       } else if (marked) {
2448         str = " M";
2449       } else {
2450         str = " NOT";
2451       }
2452     }
2453 
2454     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2455                    p, (void*) obj, str, str2);
2456   }
2457 };
2458 
2459 class PrintReachableObjectClosure : public ObjectClosure {
2460 private:
2461   G1CollectedHeap* _g1h;
2462   outputStream*    _out;
2463   VerifyOption     _vo;
2464   bool             _all;
2465   HeapRegion*      _hr;
2466 
2467 public:
2468   PrintReachableObjectClosure(outputStream* out,
2469                               VerifyOption  vo,
2470                               bool          all,
2471                               HeapRegion*   hr) :
2472     _g1h(G1CollectedHeap::heap()),
2473     _out(out), _vo(vo), _all(all), _hr(hr) { }
2474 
2475   void do_object(oop o) {
2476     bool over_tams = false;
2477     bool marked = false;
2478 
2479     switch (_vo) {
2480       case VerifyOption_G1UsePrevMarking:
2481         over_tams = _hr->obj_allocated_since_prev_marking(o);
2482         marked = _g1h->isMarkedPrev(o);
2483         break;
2484       case VerifyOption_G1UseNextMarking:
2485         over_tams = _hr->obj_allocated_since_next_marking(o);
2486         marked = _g1h->isMarkedNext(o);
2487         break;
2488       case VerifyOption_G1UseMarkWord:
2489         marked = o->is_gc_marked();
2490         break;
2491       default:
2492         ShouldNotReachHere();
2493     }
2494     bool print_it = _all || over_tams || marked;
2495 
2496     if (print_it) {
2497       _out->print_cr(" "PTR_FORMAT"%s",
2498                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2499       PrintReachableOopClosure oopCl(_out, _vo, _all);
2500       o->oop_iterate(&oopCl);
2501     }
2502   }
2503 };
2504 
2505 class PrintReachableRegionClosure : public HeapRegionClosure {
2506 private:
2507   outputStream* _out;
2508   VerifyOption  _vo;
2509   bool          _all;
2510 
2511 public:
2512   bool doHeapRegion(HeapRegion* hr) {
2513     HeapWord* b = hr->bottom();
2514     HeapWord* e = hr->end();
2515     HeapWord* t = hr->top();
2516     HeapWord* p = NULL;
2517 
2518     switch (_vo) {
2519       case VerifyOption_G1UsePrevMarking:
2520         p = hr->prev_top_at_mark_start();
2521         break;
2522       case VerifyOption_G1UseNextMarking:
2523         p = hr->next_top_at_mark_start();
2524         break;
2525       case VerifyOption_G1UseMarkWord:
2526         // When we are verifying marking using the mark word
2527         // TAMS has no relevance.
2528         assert(p == NULL, "post-condition");
2529         break;
2530       default:
2531         ShouldNotReachHere();
2532     }
2533     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2534                    "TAMS: "PTR_FORMAT, b, e, t, p);
2535     _out->cr();
2536 
2537     HeapWord* from = b;
2538     HeapWord* to   = t;
2539 
2540     if (to > from) {
2541       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2542       _out->cr();
2543       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2544       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2545       _out->cr();
2546     }
2547 
2548     return false;
2549   }
2550 
2551   PrintReachableRegionClosure(outputStream* out,
2552                               VerifyOption  vo,
2553                               bool          all) :
2554     _out(out), _vo(vo), _all(all) { }
2555 };
2556 
2557 static const char* verify_option_to_tams(VerifyOption vo) {
2558   switch (vo) {
2559     case VerifyOption_G1UsePrevMarking:
2560       return "PTAMS";
2561     case VerifyOption_G1UseNextMarking:
2562       return "NTAMS";
2563     default:
2564       return "NONE";
2565   }
2566 }
2567 
2568 void ConcurrentMark::print_reachable(const char* str,
2569                                      VerifyOption vo,
2570                                      bool all) {
2571   gclog_or_tty->cr();
2572   gclog_or_tty->print_cr("== Doing heap dump... ");
2573 
2574   if (G1PrintReachableBaseFile == NULL) {
2575     gclog_or_tty->print_cr("  #### error: no base file defined");
2576     return;
2577   }
2578 
2579   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2580       (JVM_MAXPATHLEN - 1)) {
2581     gclog_or_tty->print_cr("  #### error: file name too long");
2582     return;
2583   }
2584 
2585   char file_name[JVM_MAXPATHLEN];
2586   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2587   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2588 
2589   fileStream fout(file_name);
2590   if (!fout.is_open()) {
2591     gclog_or_tty->print_cr("  #### error: could not open file");
2592     return;
2593   }
2594 
2595   outputStream* out = &fout;
2596   out->print_cr("-- USING %s", verify_option_to_tams(vo));
2597   out->cr();
2598 
2599   out->print_cr("--- ITERATING OVER REGIONS");
2600   out->cr();
2601   PrintReachableRegionClosure rcl(out, vo, all);
2602   _g1h->heap_region_iterate(&rcl);
2603   out->cr();
2604 
2605   gclog_or_tty->print_cr("  done");
2606   gclog_or_tty->flush();
2607 }
2608 
2609 #endif // PRODUCT
2610 
2611 // This note is for drainAllSATBBuffers and the code in between.
2612 // In the future we could reuse a task to do this work during an
2613 // evacuation pause (since now tasks are not active and can be claimed
2614 // during an evacuation pause). This was a late change to the code and
2615 // is currently not being taken advantage of.
2616 
2617 class CMGlobalObjectClosure : public ObjectClosure {
2618 private:
2619   ConcurrentMark* _cm;
2620 
2621 public:
2622   void do_object(oop obj) {
2623     _cm->deal_with_reference(obj);
2624   }
2625 
2626   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2627 };
2628 
2629 void ConcurrentMark::deal_with_reference(oop obj) {
2630   if (verbose_high()) {
2631     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2632                            (void*) obj);
2633   }
2634 
2635   HeapWord* objAddr = (HeapWord*) obj;
2636   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2637   if (_g1h->is_in_g1_reserved(objAddr)) {
2638     assert(obj != NULL, "null check is implicit");
2639     if (!_nextMarkBitMap->isMarked(objAddr)) {
2640       // Only get the containing region if the object is not marked on the
2641       // bitmap (otherwise, it's a waste of time since we won't do
2642       // anything with it).
2643       HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
2644       if (!hr->obj_allocated_since_next_marking(obj)) {
2645         if (verbose_high()) {
2646           gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2647                                  "marked", (void*) obj);
2648         }
2649 
2650         // we need to mark it first
2651         if (_nextMarkBitMap->parMark(objAddr)) {
2652           // No OrderAccess:store_load() is needed. It is implicit in the
2653           // CAS done in parMark(objAddr) above
2654           HeapWord* finger = _finger;
2655           if (objAddr < finger) {
2656             if (verbose_high()) {
2657               gclog_or_tty->print_cr("[global] below the global finger "
2658                                      "("PTR_FORMAT"), pushing it", finger);
2659             }
2660             if (!mark_stack_push(obj)) {
2661               if (verbose_low()) {
2662                 gclog_or_tty->print_cr("[global] global stack overflow during "
2663                                        "deal_with_reference");
2664               }
2665             }
2666           }
2667         }
2668       }
2669     }
2670   }
2671 }
2672 
2673 void ConcurrentMark::drainAllSATBBuffers() {
2674   CMGlobalObjectClosure oc(this);
2675   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2676   satb_mq_set.set_closure(&oc);
2677 
2678   while (satb_mq_set.apply_closure_to_completed_buffer()) {
2679     if (verbose_medium()) {
2680       gclog_or_tty->print_cr("[global] processed an SATB buffer");
2681     }
2682   }
2683 
2684   // no need to check whether we should do this, as this is only
2685   // called during an evacuation pause
2686   satb_mq_set.iterate_closure_all_threads();
2687 
2688   satb_mq_set.set_closure(NULL);
2689   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2690 }
2691 
2692 void ConcurrentMark::markPrev(oop p) {
2693   // Note we are overriding the read-only view of the prev map here, via
2694   // the cast.
2695   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2696 }
2697 
2698 void ConcurrentMark::clear(oop p) {
2699   assert(p != NULL && p->is_oop(), "expected an oop");
2700   HeapWord* addr = (HeapWord*)p;
2701   assert(addr >= _nextMarkBitMap->startWord() ||
2702          addr < _nextMarkBitMap->endWord(), "in a region");
2703 
2704   _nextMarkBitMap->clear(addr);
2705 }
2706 
2707 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2708   // Note we are overriding the read-only view of the prev map here, via
2709   // the cast.
2710   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2711   _nextMarkBitMap->clearRange(mr);
2712 }
2713 
2714 HeapRegion*
2715 ConcurrentMark::claim_region(int task_num) {
2716   // "checkpoint" the finger
2717   HeapWord* finger = _finger;
2718 
2719   // _heap_end will not change underneath our feet; it only changes at
2720   // yield points.
2721   while (finger < _heap_end) {
2722     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2723 
2724     // Note on how this code handles humongous regions. In the
2725     // normal case the finger will reach the start of a "starts
2726     // humongous" (SH) region. Its end will either be the end of the
2727     // last "continues humongous" (CH) region in the sequence, or the
2728     // standard end of the SH region (if the SH is the only region in
2729     // the sequence). That way claim_region() will skip over the CH
2730     // regions. However, there is a subtle race between a CM thread
2731     // executing this method and a mutator thread doing a humongous
2732     // object allocation. The two are not mutually exclusive as the CM
2733     // thread does not need to hold the Heap_lock when it gets
2734     // here. So there is a chance that claim_region() will come across
2735     // a free region that's in the progress of becoming a SH or a CH
2736     // region. In the former case, it will either
2737     //   a) Miss the update to the region's end, in which case it will
2738     //      visit every subsequent CH region, will find their bitmaps
2739     //      empty, and do nothing, or
2740     //   b) Will observe the update of the region's end (in which case
2741     //      it will skip the subsequent CH regions).
2742     // If it comes across a region that suddenly becomes CH, the
2743     // scenario will be similar to b). So, the race between
2744     // claim_region() and a humongous object allocation might force us
2745     // to do a bit of unnecessary work (due to some unnecessary bitmap
2746     // iterations) but it should not introduce and correctness issues.
2747     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2748     HeapWord*   bottom        = curr_region->bottom();
2749     HeapWord*   end           = curr_region->end();
2750     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2751 
2752     if (verbose_low()) {
2753       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2754                              "["PTR_FORMAT", "PTR_FORMAT"), "
2755                              "limit = "PTR_FORMAT,
2756                              task_num, curr_region, bottom, end, limit);
2757     }
2758 
2759     // Is the gap between reading the finger and doing the CAS too long?
2760     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2761     if (res == finger) {
2762       // we succeeded
2763 
2764       // notice that _finger == end cannot be guaranteed here since,
2765       // someone else might have moved the finger even further
2766       assert(_finger >= end, "the finger should have moved forward");
2767 
2768       if (verbose_low()) {
2769         gclog_or_tty->print_cr("[%d] we were successful with region = "
2770                                PTR_FORMAT, task_num, curr_region);
2771       }
2772 
2773       if (limit > bottom) {
2774         if (verbose_low()) {
2775           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2776                                  "returning it ", task_num, curr_region);
2777         }
2778         return curr_region;
2779       } else {
2780         assert(limit == bottom,
2781                "the region limit should be at bottom");
2782         if (verbose_low()) {
2783           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2784                                  "returning NULL", task_num, curr_region);
2785         }
2786         // we return NULL and the caller should try calling
2787         // claim_region() again.
2788         return NULL;
2789       }
2790     } else {
2791       assert(_finger > finger, "the finger should have moved forward");
2792       if (verbose_low()) {
2793         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2794                                "global finger = "PTR_FORMAT", "
2795                                "our finger = "PTR_FORMAT,
2796                                task_num, _finger, finger);
2797       }
2798 
2799       // read it again
2800       finger = _finger;
2801     }
2802   }
2803 
2804   return NULL;
2805 }
2806 
2807 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
2808   bool result = false;
2809   for (int i = 0; i < (int)_max_task_num; ++i) {
2810     CMTask* the_task = _tasks[i];
2811     MemRegion mr = the_task->aborted_region();
2812     if (mr.start() != NULL) {
2813       assert(mr.end() != NULL, "invariant");
2814       assert(mr.word_size() > 0, "invariant");
2815       HeapRegion* hr = _g1h->heap_region_containing(mr.start());
2816       assert(hr != NULL, "invariant");
2817       if (hr->in_collection_set()) {
2818         // The region points into the collection set
2819         the_task->set_aborted_region(MemRegion());
2820         result = true;
2821       }
2822     }
2823   }
2824   return result;
2825 }
2826 
2827 bool ConcurrentMark::has_aborted_regions() {
2828   for (int i = 0; i < (int)_max_task_num; ++i) {
2829     CMTask* the_task = _tasks[i];
2830     MemRegion mr = the_task->aborted_region();
2831     if (mr.start() != NULL) {
2832       assert(mr.end() != NULL, "invariant");
2833       assert(mr.word_size() > 0, "invariant");
2834       return true;
2835     }
2836   }
2837   return false;
2838 }
2839 
2840 void ConcurrentMark::oops_do(OopClosure* cl) {
2841   if (_markStack.size() > 0 && verbose_low()) {
2842     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2843                            "size = %d", _markStack.size());
2844   }
2845   // we first iterate over the contents of the mark stack...
2846   _markStack.oops_do(cl);
2847 
2848   for (int i = 0; i < (int)_max_task_num; ++i) {
2849     OopTaskQueue* queue = _task_queues->queue((int)i);
2850 
2851     if (queue->size() > 0 && verbose_low()) {
2852       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2853                              "size = %d", i, queue->size());
2854     }
2855 
2856     // ...then over the contents of the all the task queues.
2857     queue->oops_do(cl);
2858   }
2859 
2860   // Invalidate any entries, that are in the region stack, that
2861   // point into the collection set
2862   if (_regionStack.invalidate_entries_into_cset()) {
2863     // otherwise, any gray objects copied during the evacuation pause
2864     // might not be visited.
2865     assert(_should_gray_objects, "invariant");
2866   }
2867 
2868   // Invalidate any aborted regions, recorded in the individual CM
2869   // tasks, that point into the collection set.
2870   if (invalidate_aborted_regions_in_cset()) {
2871     // otherwise, any gray objects copied during the evacuation pause
2872     // might not be visited.
2873     assert(_should_gray_objects, "invariant");
2874   }
2875 
2876 }
2877 
2878 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2879   _markStack.setEmpty();
2880   _markStack.clear_overflow();
2881   _regionStack.setEmpty();
2882   _regionStack.clear_overflow();
2883   if (clear_overflow) {
2884     clear_has_overflown();
2885   } else {
2886     assert(has_overflown(), "pre-condition");
2887   }
2888   _finger = _heap_start;
2889 
2890   for (int i = 0; i < (int)_max_task_num; ++i) {
2891     OopTaskQueue* queue = _task_queues->queue(i);
2892     queue->set_empty();
2893     // Clear any partial regions from the CMTasks
2894     _tasks[i]->clear_aborted_region();
2895   }
2896 }
2897 
2898 void ConcurrentMark::print_stats() {
2899   if (verbose_stats()) {
2900     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2901     for (size_t i = 0; i < _active_tasks; ++i) {
2902       _tasks[i]->print_stats();
2903       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2904     }
2905   }
2906 }
2907 
2908 class CSMarkOopClosure: public OopClosure {
2909   friend class CSMarkBitMapClosure;
2910 
2911   G1CollectedHeap* _g1h;
2912   CMBitMap*        _bm;
2913   ConcurrentMark*  _cm;
2914   oop*             _ms;
2915   jint*            _array_ind_stack;
2916   int              _ms_size;
2917   int              _ms_ind;
2918   int              _array_increment;
2919 
2920   bool push(oop obj, int arr_ind = 0) {
2921     if (_ms_ind == _ms_size) {
2922       gclog_or_tty->print_cr("Mark stack is full.");
2923       return false;
2924     }
2925     _ms[_ms_ind] = obj;
2926     if (obj->is_objArray()) {
2927       _array_ind_stack[_ms_ind] = arr_ind;
2928     }
2929     _ms_ind++;
2930     return true;
2931   }
2932 
2933   oop pop() {
2934     if (_ms_ind == 0) {
2935       return NULL;
2936     } else {
2937       _ms_ind--;
2938       return _ms[_ms_ind];
2939     }
2940   }
2941 
2942   template <class T> bool drain() {
2943     while (_ms_ind > 0) {
2944       oop obj = pop();
2945       assert(obj != NULL, "Since index was non-zero.");
2946       if (obj->is_objArray()) {
2947         jint arr_ind = _array_ind_stack[_ms_ind];
2948         objArrayOop aobj = objArrayOop(obj);
2949         jint len = aobj->length();
2950         jint next_arr_ind = arr_ind + _array_increment;
2951         if (next_arr_ind < len) {
2952           push(obj, next_arr_ind);
2953         }
2954         // Now process this portion of this one.
2955         int lim = MIN2(next_arr_ind, len);
2956         for (int j = arr_ind; j < lim; j++) {
2957           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2958         }
2959 
2960       } else {
2961         obj->oop_iterate(this);
2962       }
2963       if (abort()) return false;
2964     }
2965     return true;
2966   }
2967 
2968 public:
2969   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2970     _g1h(G1CollectedHeap::heap()),
2971     _cm(cm),
2972     _bm(cm->nextMarkBitMap()),
2973     _ms_size(ms_size), _ms_ind(0),
2974     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2975     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2976     _array_increment(MAX2(ms_size/8, 16))
2977   {}
2978 
2979   ~CSMarkOopClosure() {
2980     FREE_C_HEAP_ARRAY(oop, _ms);
2981     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2982   }
2983 
2984   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2985   virtual void do_oop(      oop* p) { do_oop_work(p); }
2986 
2987   template <class T> void do_oop_work(T* p) {
2988     T heap_oop = oopDesc::load_heap_oop(p);
2989     if (oopDesc::is_null(heap_oop)) return;
2990     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2991     if (obj->is_forwarded()) {
2992       // If the object has already been forwarded, we have to make sure
2993       // that it's marked.  So follow the forwarding pointer.  Note that
2994       // this does the right thing for self-forwarding pointers in the
2995       // evacuation failure case.
2996       obj = obj->forwardee();
2997     }
2998     HeapRegion* hr = _g1h->heap_region_containing(obj);
2999     if (hr != NULL) {
3000       if (hr->in_collection_set()) {
3001         if (_g1h->is_obj_ill(obj)) {
3002           _bm->mark((HeapWord*)obj);
3003           if (!push(obj)) {
3004             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
3005             set_abort();
3006           }
3007         }
3008       } else {
3009         // Outside the collection set; we need to gray it
3010         _cm->deal_with_reference(obj);
3011       }
3012     }
3013   }
3014 };
3015 
3016 class CSMarkBitMapClosure: public BitMapClosure {
3017   G1CollectedHeap* _g1h;
3018   CMBitMap*        _bitMap;
3019   ConcurrentMark*  _cm;
3020   CSMarkOopClosure _oop_cl;
3021 public:
3022   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
3023     _g1h(G1CollectedHeap::heap()),
3024     _bitMap(cm->nextMarkBitMap()),
3025     _oop_cl(cm, ms_size)
3026   {}
3027 
3028   ~CSMarkBitMapClosure() {}
3029 
3030   bool do_bit(size_t offset) {
3031     // convert offset into a HeapWord*
3032     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
3033     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
3034            "address out of range");
3035     assert(_bitMap->isMarked(addr), "tautology");
3036     oop obj = oop(addr);
3037     if (!obj->is_forwarded()) {
3038       if (!_oop_cl.push(obj)) return false;
3039       if (UseCompressedOops) {
3040         if (!_oop_cl.drain<narrowOop>()) return false;
3041       } else {
3042         if (!_oop_cl.drain<oop>()) return false;
3043       }
3044     }
3045     // Otherwise...
3046     return true;
3047   }
3048 };
3049 
3050 
3051 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
3052   CMBitMap* _bm;
3053   CSMarkBitMapClosure _bit_cl;
3054   enum SomePrivateConstants {
3055     MSSize = 1000
3056   };
3057   bool _completed;
3058 public:
3059   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
3060     _bm(cm->nextMarkBitMap()),
3061     _bit_cl(cm, MSSize),
3062     _completed(true)
3063   {}
3064 
3065   ~CompleteMarkingInCSHRClosure() {}
3066 
3067   bool doHeapRegion(HeapRegion* r) {
3068     if (!r->evacuation_failed()) {
3069       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
3070       if (!mr.is_empty()) {
3071         if (!_bm->iterate(&_bit_cl, mr)) {
3072           _completed = false;
3073           return true;
3074         }
3075       }
3076     }
3077     return false;
3078   }
3079 
3080   bool completed() { return _completed; }
3081 };
3082 
3083 class ClearMarksInHRClosure: public HeapRegionClosure {
3084   CMBitMap* _bm;
3085 public:
3086   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
3087 
3088   bool doHeapRegion(HeapRegion* r) {
3089     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
3090       MemRegion usedMR = r->used_region();
3091       _bm->clearRange(r->used_region());
3092     }
3093     return false;
3094   }
3095 };
3096 
3097 void ConcurrentMark::complete_marking_in_collection_set() {
3098   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
3099 
3100   if (!g1h->mark_in_progress()) {
3101     g1h->g1_policy()->record_mark_closure_time(0.0);
3102     return;
3103   }
3104 
3105   int i = 1;
3106   double start = os::elapsedTime();
3107   while (true) {
3108     i++;
3109     CompleteMarkingInCSHRClosure cmplt(this);
3110     g1h->collection_set_iterate(&cmplt);
3111     if (cmplt.completed()) break;
3112   }
3113   double end_time = os::elapsedTime();
3114   double elapsed_time_ms = (end_time - start) * 1000.0;
3115   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
3116 
3117   ClearMarksInHRClosure clr(nextMarkBitMap());
3118   g1h->collection_set_iterate(&clr);
3119 }
3120 
3121 // The next two methods deal with the following optimisation. Some
3122 // objects are gray by being marked and located above the finger. If
3123 // they are copied, during an evacuation pause, below the finger then
3124 // the need to be pushed on the stack. The observation is that, if
3125 // there are no regions in the collection set located above the
3126 // finger, then the above cannot happen, hence we do not need to
3127 // explicitly gray any objects when copying them to below the
3128 // finger. The global stack will be scanned to ensure that, if it
3129 // points to objects being copied, it will update their
3130 // location. There is a tricky situation with the gray objects in
3131 // region stack that are being coped, however. See the comment in
3132 // newCSet().
3133 
3134 void ConcurrentMark::newCSet() {
3135   if (!concurrent_marking_in_progress()) {
3136     // nothing to do if marking is not in progress
3137     return;
3138   }
3139 
3140   // find what the lowest finger is among the global and local fingers
3141   _min_finger = _finger;
3142   for (int i = 0; i < (int)_max_task_num; ++i) {
3143     CMTask* task = _tasks[i];
3144     HeapWord* task_finger = task->finger();
3145     if (task_finger != NULL && task_finger < _min_finger) {
3146       _min_finger = task_finger;
3147     }
3148   }
3149 
3150   _should_gray_objects = false;
3151 
3152   // This fixes a very subtle and fustrating bug. It might be the case
3153   // that, during en evacuation pause, heap regions that contain
3154   // objects that are gray (by being in regions contained in the
3155   // region stack) are included in the collection set. Since such gray
3156   // objects will be moved, and because it's not easy to redirect
3157   // region stack entries to point to a new location (because objects
3158   // in one region might be scattered to multiple regions after they
3159   // are copied), one option is to ensure that all marked objects
3160   // copied during a pause are pushed on the stack. Notice, however,
3161   // that this problem can only happen when the region stack is not
3162   // empty during an evacuation pause. So, we make the fix a bit less
3163   // conservative and ensure that regions are pushed on the stack,
3164   // irrespective whether all collection set regions are below the
3165   // finger, if the region stack is not empty. This is expected to be
3166   // a rare case, so I don't think it's necessary to be smarted about it.
3167   if (!region_stack_empty() || has_aborted_regions()) {
3168     _should_gray_objects = true;
3169   }
3170 }
3171 
3172 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
3173   if (!concurrent_marking_in_progress()) return;
3174 
3175   HeapWord* region_end = hr->end();
3176   if (region_end > _min_finger) {
3177     _should_gray_objects = true;
3178   }
3179 }
3180 
3181 // Resets the region fields of active CMTasks whose values point
3182 // into the collection set.
3183 void ConcurrentMark::reset_active_task_region_fields_in_cset() {
3184   assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
3185   assert(parallel_marking_threads() <= _max_task_num, "sanity");
3186 
3187   for (int i = 0; i < (int)parallel_marking_threads(); i += 1) {
3188     CMTask* task = _tasks[i];
3189     HeapWord* task_finger = task->finger();
3190     if (task_finger != NULL) {
3191       assert(_g1h->is_in_g1_reserved(task_finger), "not in heap");
3192       HeapRegion* finger_region = _g1h->heap_region_containing(task_finger);
3193       if (finger_region->in_collection_set()) {
3194         // The task's current region is in the collection set.
3195         // This region will be evacuated in the current GC and
3196         // the region fields in the task will be stale.
3197         task->giveup_current_region();
3198       }
3199     }
3200   }
3201 }
3202 
3203 // abandon current marking iteration due to a Full GC
3204 void ConcurrentMark::abort() {
3205   // Clear all marks to force marking thread to do nothing
3206   _nextMarkBitMap->clearAll();
3207   // Empty mark stack
3208   clear_marking_state();
3209   for (int i = 0; i < (int)_max_task_num; ++i) {
3210     _tasks[i]->clear_region_fields();
3211   }
3212   _has_aborted = true;
3213 
3214   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3215   satb_mq_set.abandon_partial_marking();
3216   // This can be called either during or outside marking, we'll read
3217   // the expected_active value from the SATB queue set.
3218   satb_mq_set.set_active_all_threads(
3219                                  false, /* new active value */
3220                                  satb_mq_set.is_active() /* expected_active */);
3221 }
3222 
3223 static void print_ms_time_info(const char* prefix, const char* name,
3224                                NumberSeq& ns) {
3225   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3226                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3227   if (ns.num() > 0) {
3228     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3229                            prefix, ns.sd(), ns.maximum());
3230   }
3231 }
3232 
3233 void ConcurrentMark::print_summary_info() {
3234   gclog_or_tty->print_cr(" Concurrent marking:");
3235   print_ms_time_info("  ", "init marks", _init_times);
3236   print_ms_time_info("  ", "remarks", _remark_times);
3237   {
3238     print_ms_time_info("     ", "final marks", _remark_mark_times);
3239     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3240 
3241   }
3242   print_ms_time_info("  ", "cleanups", _cleanup_times);
3243   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3244                          _total_counting_time,
3245                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3246                           (double)_cleanup_times.num()
3247                          : 0.0));
3248   if (G1ScrubRemSets) {
3249     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3250                            _total_rs_scrub_time,
3251                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3252                             (double)_cleanup_times.num()
3253                            : 0.0));
3254   }
3255   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3256                          (_init_times.sum() + _remark_times.sum() +
3257                           _cleanup_times.sum())/1000.0);
3258   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3259                 "(%8.2f s marking, %8.2f s counting).",
3260                 cmThread()->vtime_accum(),
3261                 cmThread()->vtime_mark_accum(),
3262                 cmThread()->vtime_count_accum());
3263 }
3264 
3265 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3266   _parallel_workers->print_worker_threads_on(st);
3267 }
3268 
3269 // Closures
3270 // XXX: there seems to be a lot of code  duplication here;
3271 // should refactor and consolidate the shared code.
3272 
3273 // This closure is used to mark refs into the CMS generation in
3274 // the CMS bit map. Called at the first checkpoint.
3275 
3276 // We take a break if someone is trying to stop the world.
3277 bool ConcurrentMark::do_yield_check(int worker_i) {
3278   if (should_yield()) {
3279     if (worker_i == 0) {
3280       _g1h->g1_policy()->record_concurrent_pause();
3281     }
3282     cmThread()->yield();
3283     if (worker_i == 0) {
3284       _g1h->g1_policy()->record_concurrent_pause_end();
3285     }
3286     return true;
3287   } else {
3288     return false;
3289   }
3290 }
3291 
3292 bool ConcurrentMark::should_yield() {
3293   return cmThread()->should_yield();
3294 }
3295 
3296 bool ConcurrentMark::containing_card_is_marked(void* p) {
3297   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3298   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3299 }
3300 
3301 bool ConcurrentMark::containing_cards_are_marked(void* start,
3302                                                  void* last) {
3303   return containing_card_is_marked(start) &&
3304          containing_card_is_marked(last);
3305 }
3306 
3307 #ifndef PRODUCT
3308 // for debugging purposes
3309 void ConcurrentMark::print_finger() {
3310   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3311                          _heap_start, _heap_end, _finger);
3312   for (int i = 0; i < (int) _max_task_num; ++i) {
3313     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
3314   }
3315   gclog_or_tty->print_cr("");
3316 }
3317 #endif
3318 
3319 void CMTask::scan_object(oop obj) {
3320   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3321 
3322   if (_cm->verbose_high()) {
3323     gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3324                            _task_id, (void*) obj);
3325   }
3326 
3327   size_t obj_size = obj->size();
3328   _words_scanned += obj_size;
3329 
3330   obj->oop_iterate(_cm_oop_closure);
3331   statsOnly( ++_objs_scanned );
3332   check_limits();
3333 }
3334 
3335 // Closure for iteration over bitmaps
3336 class CMBitMapClosure : public BitMapClosure {
3337 private:
3338   // the bitmap that is being iterated over
3339   CMBitMap*                   _nextMarkBitMap;
3340   ConcurrentMark*             _cm;
3341   CMTask*                     _task;
3342   // true if we're scanning a heap region claimed by the task (so that
3343   // we move the finger along), false if we're not, i.e. currently when
3344   // scanning a heap region popped from the region stack (so that we
3345   // do not move the task finger along; it'd be a mistake if we did so).
3346   bool                        _scanning_heap_region;
3347 
3348 public:
3349   CMBitMapClosure(CMTask *task,
3350                   ConcurrentMark* cm,
3351                   CMBitMap* nextMarkBitMap)
3352     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3353 
3354   void set_scanning_heap_region(bool scanning_heap_region) {
3355     _scanning_heap_region = scanning_heap_region;
3356   }
3357 
3358   bool do_bit(size_t offset) {
3359     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3360     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3361     assert( addr < _cm->finger(), "invariant");
3362 
3363     if (_scanning_heap_region) {
3364       statsOnly( _task->increase_objs_found_on_bitmap() );
3365       assert(addr >= _task->finger(), "invariant");
3366       // We move that task's local finger along.
3367       _task->move_finger_to(addr);
3368     } else {
3369       // We move the task's region finger along.
3370       _task->move_region_finger_to(addr);
3371     }
3372 
3373     _task->scan_object(oop(addr));
3374     // we only partially drain the local queue and global stack
3375     _task->drain_local_queue(true);
3376     _task->drain_global_stack(true);
3377 
3378     // if the has_aborted flag has been raised, we need to bail out of
3379     // the iteration
3380     return !_task->has_aborted();
3381   }
3382 };
3383 
3384 // Closure for iterating over objects, currently only used for
3385 // processing SATB buffers.
3386 class CMObjectClosure : public ObjectClosure {
3387 private:
3388   CMTask* _task;
3389 
3390 public:
3391   void do_object(oop obj) {
3392     _task->deal_with_reference(obj);
3393   }
3394 
3395   CMObjectClosure(CMTask* task) : _task(task) { }
3396 };
3397 
3398 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3399                                ConcurrentMark* cm,
3400                                CMTask* task)
3401   : _g1h(g1h), _cm(cm), _task(task) {
3402   assert(_ref_processor == NULL, "should be initialized to NULL");
3403 
3404   if (G1UseConcMarkReferenceProcessing) {
3405     _ref_processor = g1h->ref_processor();
3406     assert(_ref_processor != NULL, "should not be NULL");
3407   }
3408 }
3409 
3410 void CMTask::setup_for_region(HeapRegion* hr) {
3411   // Separated the asserts so that we know which one fires.
3412   assert(hr != NULL,
3413         "claim_region() should have filtered out continues humongous regions");
3414   assert(!hr->continuesHumongous(),
3415         "claim_region() should have filtered out continues humongous regions");
3416 
3417   if (_cm->verbose_low()) {
3418     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3419                            _task_id, hr);
3420   }
3421 
3422   _curr_region  = hr;
3423   _finger       = hr->bottom();
3424   update_region_limit();
3425 }
3426 
3427 void CMTask::update_region_limit() {
3428   HeapRegion* hr            = _curr_region;
3429   HeapWord* bottom          = hr->bottom();
3430   HeapWord* limit           = hr->next_top_at_mark_start();
3431 
3432   if (limit == bottom) {
3433     if (_cm->verbose_low()) {
3434       gclog_or_tty->print_cr("[%d] found an empty region "
3435                              "["PTR_FORMAT", "PTR_FORMAT")",
3436                              _task_id, bottom, limit);
3437     }
3438     // The region was collected underneath our feet.
3439     // We set the finger to bottom to ensure that the bitmap
3440     // iteration that will follow this will not do anything.
3441     // (this is not a condition that holds when we set the region up,
3442     // as the region is not supposed to be empty in the first place)
3443     _finger = bottom;
3444   } else if (limit >= _region_limit) {
3445     assert(limit >= _finger, "peace of mind");
3446   } else {
3447     assert(limit < _region_limit, "only way to get here");
3448     // This can happen under some pretty unusual circumstances.  An
3449     // evacuation pause empties the region underneath our feet (NTAMS
3450     // at bottom). We then do some allocation in the region (NTAMS
3451     // stays at bottom), followed by the region being used as a GC
3452     // alloc region (NTAMS will move to top() and the objects
3453     // originally below it will be grayed). All objects now marked in
3454     // the region are explicitly grayed, if below the global finger,
3455     // and we do not need in fact to scan anything else. So, we simply
3456     // set _finger to be limit to ensure that the bitmap iteration
3457     // doesn't do anything.
3458     _finger = limit;
3459   }
3460 
3461   _region_limit = limit;
3462 }
3463 
3464 void CMTask::giveup_current_region() {
3465   assert(_curr_region != NULL, "invariant");
3466   if (_cm->verbose_low()) {
3467     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3468                            _task_id, _curr_region);
3469   }
3470   clear_region_fields();
3471 }
3472 
3473 void CMTask::clear_region_fields() {
3474   // Values for these three fields that indicate that we're not
3475   // holding on to a region.
3476   _curr_region   = NULL;
3477   _finger        = NULL;
3478   _region_limit  = NULL;
3479 
3480   _region_finger = NULL;
3481 }
3482 
3483 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3484   if (cm_oop_closure == NULL) {
3485     assert(_cm_oop_closure != NULL, "invariant");
3486   } else {
3487     assert(_cm_oop_closure == NULL, "invariant");
3488   }
3489   _cm_oop_closure = cm_oop_closure;
3490 }
3491 
3492 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3493   guarantee(nextMarkBitMap != NULL, "invariant");
3494 
3495   if (_cm->verbose_low()) {
3496     gclog_or_tty->print_cr("[%d] resetting", _task_id);
3497   }
3498 
3499   _nextMarkBitMap                = nextMarkBitMap;
3500   clear_region_fields();
3501   assert(_aborted_region.is_empty(), "should have been cleared");
3502 
3503   _calls                         = 0;
3504   _elapsed_time_ms               = 0.0;
3505   _termination_time_ms           = 0.0;
3506   _termination_start_time_ms     = 0.0;
3507 
3508 #if _MARKING_STATS_
3509   _local_pushes                  = 0;
3510   _local_pops                    = 0;
3511   _local_max_size                = 0;
3512   _objs_scanned                  = 0;
3513   _global_pushes                 = 0;
3514   _global_pops                   = 0;
3515   _global_max_size               = 0;
3516   _global_transfers_to           = 0;
3517   _global_transfers_from         = 0;
3518   _region_stack_pops             = 0;
3519   _regions_claimed               = 0;
3520   _objs_found_on_bitmap          = 0;
3521   _satb_buffers_processed        = 0;
3522   _steal_attempts                = 0;
3523   _steals                        = 0;
3524   _aborted                       = 0;
3525   _aborted_overflow              = 0;
3526   _aborted_cm_aborted            = 0;
3527   _aborted_yield                 = 0;
3528   _aborted_timed_out             = 0;
3529   _aborted_satb                  = 0;
3530   _aborted_termination           = 0;
3531 #endif // _MARKING_STATS_
3532 }
3533 
3534 bool CMTask::should_exit_termination() {
3535   regular_clock_call();
3536   // This is called when we are in the termination protocol. We should
3537   // quit if, for some reason, this task wants to abort or the global
3538   // stack is not empty (this means that we can get work from it).
3539   return !_cm->mark_stack_empty() || has_aborted();
3540 }
3541 
3542 void CMTask::reached_limit() {
3543   assert(_words_scanned >= _words_scanned_limit ||
3544          _refs_reached >= _refs_reached_limit ,
3545          "shouldn't have been called otherwise");
3546   regular_clock_call();
3547 }
3548 
3549 void CMTask::regular_clock_call() {
3550   if (has_aborted()) return;
3551 
3552   // First, we need to recalculate the words scanned and refs reached
3553   // limits for the next clock call.
3554   recalculate_limits();
3555 
3556   // During the regular clock call we do the following
3557 
3558   // (1) If an overflow has been flagged, then we abort.
3559   if (_cm->has_overflown()) {
3560     set_has_aborted();
3561     return;
3562   }
3563 
3564   // If we are not concurrent (i.e. we're doing remark) we don't need
3565   // to check anything else. The other steps are only needed during
3566   // the concurrent marking phase.
3567   if (!concurrent()) return;
3568 
3569   // (2) If marking has been aborted for Full GC, then we also abort.
3570   if (_cm->has_aborted()) {
3571     set_has_aborted();
3572     statsOnly( ++_aborted_cm_aborted );
3573     return;
3574   }
3575 
3576   double curr_time_ms = os::elapsedVTime() * 1000.0;
3577 
3578   // (3) If marking stats are enabled, then we update the step history.
3579 #if _MARKING_STATS_
3580   if (_words_scanned >= _words_scanned_limit) {
3581     ++_clock_due_to_scanning;
3582   }
3583   if (_refs_reached >= _refs_reached_limit) {
3584     ++_clock_due_to_marking;
3585   }
3586 
3587   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3588   _interval_start_time_ms = curr_time_ms;
3589   _all_clock_intervals_ms.add(last_interval_ms);
3590 
3591   if (_cm->verbose_medium()) {
3592       gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3593                         "scanned = %d%s, refs reached = %d%s",
3594                         _task_id, last_interval_ms,
3595                         _words_scanned,
3596                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3597                         _refs_reached,
3598                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3599   }
3600 #endif // _MARKING_STATS_
3601 
3602   // (4) We check whether we should yield. If we have to, then we abort.
3603   if (_cm->should_yield()) {
3604     // We should yield. To do this we abort the task. The caller is
3605     // responsible for yielding.
3606     set_has_aborted();
3607     statsOnly( ++_aborted_yield );
3608     return;
3609   }
3610 
3611   // (5) We check whether we've reached our time quota. If we have,
3612   // then we abort.
3613   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3614   if (elapsed_time_ms > _time_target_ms) {
3615     set_has_aborted();
3616     _has_timed_out = true;
3617     statsOnly( ++_aborted_timed_out );
3618     return;
3619   }
3620 
3621   // (6) Finally, we check whether there are enough completed STAB
3622   // buffers available for processing. If there are, we abort.
3623   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3624   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3625     if (_cm->verbose_low()) {
3626       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3627                              _task_id);
3628     }
3629     // we do need to process SATB buffers, we'll abort and restart
3630     // the marking task to do so
3631     set_has_aborted();
3632     statsOnly( ++_aborted_satb );
3633     return;
3634   }
3635 }
3636 
3637 void CMTask::recalculate_limits() {
3638   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3639   _words_scanned_limit      = _real_words_scanned_limit;
3640 
3641   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3642   _refs_reached_limit       = _real_refs_reached_limit;
3643 }
3644 
3645 void CMTask::decrease_limits() {
3646   // This is called when we believe that we're going to do an infrequent
3647   // operation which will increase the per byte scanned cost (i.e. move
3648   // entries to/from the global stack). It basically tries to decrease the
3649   // scanning limit so that the clock is called earlier.
3650 
3651   if (_cm->verbose_medium()) {
3652     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3653   }
3654 
3655   _words_scanned_limit = _real_words_scanned_limit -
3656     3 * words_scanned_period / 4;
3657   _refs_reached_limit  = _real_refs_reached_limit -
3658     3 * refs_reached_period / 4;
3659 }
3660 
3661 void CMTask::move_entries_to_global_stack() {
3662   // local array where we'll store the entries that will be popped
3663   // from the local queue
3664   oop buffer[global_stack_transfer_size];
3665 
3666   int n = 0;
3667   oop obj;
3668   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3669     buffer[n] = obj;
3670     ++n;
3671   }
3672 
3673   if (n > 0) {
3674     // we popped at least one entry from the local queue
3675 
3676     statsOnly( ++_global_transfers_to; _local_pops += n );
3677 
3678     if (!_cm->mark_stack_push(buffer, n)) {
3679       if (_cm->verbose_low()) {
3680         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
3681                                _task_id);
3682       }
3683       set_has_aborted();
3684     } else {
3685       // the transfer was successful
3686 
3687       if (_cm->verbose_medium()) {
3688         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3689                                _task_id, n);
3690       }
3691       statsOnly( int tmp_size = _cm->mark_stack_size();
3692                  if (tmp_size > _global_max_size) {
3693                    _global_max_size = tmp_size;
3694                  }
3695                  _global_pushes += n );
3696     }
3697   }
3698 
3699   // this operation was quite expensive, so decrease the limits
3700   decrease_limits();
3701 }
3702 
3703 void CMTask::get_entries_from_global_stack() {
3704   // local array where we'll store the entries that will be popped
3705   // from the global stack.
3706   oop buffer[global_stack_transfer_size];
3707   int n;
3708   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3709   assert(n <= global_stack_transfer_size,
3710          "we should not pop more than the given limit");
3711   if (n > 0) {
3712     // yes, we did actually pop at least one entry
3713 
3714     statsOnly( ++_global_transfers_from; _global_pops += n );
3715     if (_cm->verbose_medium()) {
3716       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3717                              _task_id, n);
3718     }
3719     for (int i = 0; i < n; ++i) {
3720       bool success = _task_queue->push(buffer[i]);
3721       // We only call this when the local queue is empty or under a
3722       // given target limit. So, we do not expect this push to fail.
3723       assert(success, "invariant");
3724     }
3725 
3726     statsOnly( int tmp_size = _task_queue->size();
3727                if (tmp_size > _local_max_size) {
3728                  _local_max_size = tmp_size;
3729                }
3730                _local_pushes += n );
3731   }
3732 
3733   // this operation was quite expensive, so decrease the limits
3734   decrease_limits();
3735 }
3736 
3737 void CMTask::drain_local_queue(bool partially) {
3738   if (has_aborted()) return;
3739 
3740   // Decide what the target size is, depending whether we're going to
3741   // drain it partially (so that other tasks can steal if they run out
3742   // of things to do) or totally (at the very end).
3743   size_t target_size;
3744   if (partially) {
3745     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3746   } else {
3747     target_size = 0;
3748   }
3749 
3750   if (_task_queue->size() > target_size) {
3751     if (_cm->verbose_high()) {
3752       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3753                              _task_id, target_size);
3754     }
3755 
3756     oop obj;
3757     bool ret = _task_queue->pop_local(obj);
3758     while (ret) {
3759       statsOnly( ++_local_pops );
3760 
3761       if (_cm->verbose_high()) {
3762         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3763                                (void*) obj);
3764       }
3765 
3766       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3767       assert(!_g1h->is_on_master_free_list(
3768                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3769 
3770       scan_object(obj);
3771 
3772       if (_task_queue->size() <= target_size || has_aborted()) {
3773         ret = false;
3774       } else {
3775         ret = _task_queue->pop_local(obj);
3776       }
3777     }
3778 
3779     if (_cm->verbose_high()) {
3780       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3781                              _task_id, _task_queue->size());
3782     }
3783   }
3784 }
3785 
3786 void CMTask::drain_global_stack(bool partially) {
3787   if (has_aborted()) return;
3788 
3789   // We have a policy to drain the local queue before we attempt to
3790   // drain the global stack.
3791   assert(partially || _task_queue->size() == 0, "invariant");
3792 
3793   // Decide what the target size is, depending whether we're going to
3794   // drain it partially (so that other tasks can steal if they run out
3795   // of things to do) or totally (at the very end).  Notice that,
3796   // because we move entries from the global stack in chunks or
3797   // because another task might be doing the same, we might in fact
3798   // drop below the target. But, this is not a problem.
3799   size_t target_size;
3800   if (partially) {
3801     target_size = _cm->partial_mark_stack_size_target();
3802   } else {
3803     target_size = 0;
3804   }
3805 
3806   if (_cm->mark_stack_size() > target_size) {
3807     if (_cm->verbose_low()) {
3808       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3809                              _task_id, target_size);
3810     }
3811 
3812     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3813       get_entries_from_global_stack();
3814       drain_local_queue(partially);
3815     }
3816 
3817     if (_cm->verbose_low()) {
3818       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3819                              _task_id, _cm->mark_stack_size());
3820     }
3821   }
3822 }
3823 
3824 // SATB Queue has several assumptions on whether to call the par or
3825 // non-par versions of the methods. this is why some of the code is
3826 // replicated. We should really get rid of the single-threaded version
3827 // of the code to simplify things.
3828 void CMTask::drain_satb_buffers() {
3829   if (has_aborted()) return;
3830 
3831   // We set this so that the regular clock knows that we're in the
3832   // middle of draining buffers and doesn't set the abort flag when it
3833   // notices that SATB buffers are available for draining. It'd be
3834   // very counter productive if it did that. :-)
3835   _draining_satb_buffers = true;
3836 
3837   CMObjectClosure oc(this);
3838   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3839   if (G1CollectedHeap::use_parallel_gc_threads()) {
3840     satb_mq_set.set_par_closure(_task_id, &oc);
3841   } else {
3842     satb_mq_set.set_closure(&oc);
3843   }
3844 
3845   // This keeps claiming and applying the closure to completed buffers
3846   // until we run out of buffers or we need to abort.
3847   if (G1CollectedHeap::use_parallel_gc_threads()) {
3848     while (!has_aborted() &&
3849            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3850       if (_cm->verbose_medium()) {
3851         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3852       }
3853       statsOnly( ++_satb_buffers_processed );
3854       regular_clock_call();
3855     }
3856   } else {
3857     while (!has_aborted() &&
3858            satb_mq_set.apply_closure_to_completed_buffer()) {
3859       if (_cm->verbose_medium()) {
3860         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3861       }
3862       statsOnly( ++_satb_buffers_processed );
3863       regular_clock_call();
3864     }
3865   }
3866 
3867   if (!concurrent() && !has_aborted()) {
3868     // We should only do this during remark.
3869     if (G1CollectedHeap::use_parallel_gc_threads()) {
3870       satb_mq_set.par_iterate_closure_all_threads(_task_id);
3871     } else {
3872       satb_mq_set.iterate_closure_all_threads();
3873     }
3874   }
3875 
3876   _draining_satb_buffers = false;
3877 
3878   assert(has_aborted() ||
3879          concurrent() ||
3880          satb_mq_set.completed_buffers_num() == 0, "invariant");
3881 
3882   if (G1CollectedHeap::use_parallel_gc_threads()) {
3883     satb_mq_set.set_par_closure(_task_id, NULL);
3884   } else {
3885     satb_mq_set.set_closure(NULL);
3886   }
3887 
3888   // again, this was a potentially expensive operation, decrease the
3889   // limits to get the regular clock call early
3890   decrease_limits();
3891 }
3892 
3893 void CMTask::drain_region_stack(BitMapClosure* bc) {
3894   if (has_aborted()) return;
3895 
3896   assert(_region_finger == NULL,
3897          "it should be NULL when we're not scanning a region");
3898 
3899   if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
3900     if (_cm->verbose_low()) {
3901       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3902                              _task_id, _cm->region_stack_size());
3903     }
3904 
3905     MemRegion mr;
3906 
3907     if (!_aborted_region.is_empty()) {
3908       mr = _aborted_region;
3909       _aborted_region = MemRegion();
3910 
3911       if (_cm->verbose_low()) {
3912         gclog_or_tty->print_cr("[%d] scanning aborted region "
3913                                "[ " PTR_FORMAT ", " PTR_FORMAT " )",
3914                                _task_id, mr.start(), mr.end());
3915       }
3916     } else {
3917       mr = _cm->region_stack_pop_lock_free();
3918       // it returns MemRegion() if the pop fails
3919       statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3920     }
3921 
3922     while (mr.start() != NULL) {
3923       if (_cm->verbose_medium()) {
3924         gclog_or_tty->print_cr("[%d] we are scanning region "
3925                                "["PTR_FORMAT", "PTR_FORMAT")",
3926                                _task_id, mr.start(), mr.end());
3927       }
3928 
3929       assert(mr.end() <= _cm->finger(),
3930              "otherwise the region shouldn't be on the stack");
3931       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3932       if (_nextMarkBitMap->iterate(bc, mr)) {
3933         assert(!has_aborted(),
3934                "cannot abort the task without aborting the bitmap iteration");
3935 
3936         // We finished iterating over the region without aborting.
3937         regular_clock_call();
3938         if (has_aborted()) {
3939           mr = MemRegion();
3940         } else {
3941           mr = _cm->region_stack_pop_lock_free();
3942           // it returns MemRegion() if the pop fails
3943           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3944         }
3945       } else {
3946         assert(has_aborted(), "currently the only way to do so");
3947 
3948         // The only way to abort the bitmap iteration is to return
3949         // false from the do_bit() method. However, inside the
3950         // do_bit() method we move the _region_finger to point to the
3951         // object currently being looked at. So, if we bail out, we
3952         // have definitely set _region_finger to something non-null.
3953         assert(_region_finger != NULL, "invariant");
3954 
3955         // Make sure that any previously aborted region has been
3956         // cleared.
3957         assert(_aborted_region.is_empty(), "aborted region not cleared");
3958 
3959         // The iteration was actually aborted. So now _region_finger
3960         // points to the address of the object we last scanned. If we
3961         // leave it there, when we restart this task, we will rescan
3962         // the object. It is easy to avoid this. We move the finger by
3963         // enough to point to the next possible object header (the
3964         // bitmap knows by how much we need to move it as it knows its
3965         // granularity).
3966         MemRegion newRegion =
3967           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3968 
3969         if (!newRegion.is_empty()) {
3970           if (_cm->verbose_low()) {
3971             gclog_or_tty->print_cr("[%d] recording unscanned region"
3972                                    "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
3973                                    _task_id,
3974                                    newRegion.start(), newRegion.end());
3975           }
3976           // Now record the part of the region we didn't scan to
3977           // make sure this task scans it later.
3978           _aborted_region = newRegion;
3979         }
3980         // break from while
3981         mr = MemRegion();
3982       }
3983       _region_finger = NULL;
3984     }
3985 
3986     if (_cm->verbose_low()) {
3987       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3988                              _task_id, _cm->region_stack_size());
3989     }
3990   }
3991 }
3992 
3993 void CMTask::print_stats() {
3994   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3995                          _task_id, _calls);
3996   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3997                          _elapsed_time_ms, _termination_time_ms);
3998   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3999                          _step_times_ms.num(), _step_times_ms.avg(),
4000                          _step_times_ms.sd());
4001   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
4002                          _step_times_ms.maximum(), _step_times_ms.sum());
4003 
4004 #if _MARKING_STATS_
4005   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4006                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
4007                          _all_clock_intervals_ms.sd());
4008   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
4009                          _all_clock_intervals_ms.maximum(),
4010                          _all_clock_intervals_ms.sum());
4011   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
4012                          _clock_due_to_scanning, _clock_due_to_marking);
4013   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
4014                          _objs_scanned, _objs_found_on_bitmap);
4015   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
4016                          _local_pushes, _local_pops, _local_max_size);
4017   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
4018                          _global_pushes, _global_pops, _global_max_size);
4019   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
4020                          _global_transfers_to,_global_transfers_from);
4021   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
4022                          _regions_claimed, _region_stack_pops);
4023   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
4024   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
4025                          _steal_attempts, _steals);
4026   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
4027   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
4028                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
4029   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
4030                          _aborted_timed_out, _aborted_satb, _aborted_termination);
4031 #endif // _MARKING_STATS_
4032 }
4033 
4034 /*****************************************************************************
4035 
4036     The do_marking_step(time_target_ms) method is the building block
4037     of the parallel marking framework. It can be called in parallel
4038     with other invocations of do_marking_step() on different tasks
4039     (but only one per task, obviously) and concurrently with the
4040     mutator threads, or during remark, hence it eliminates the need
4041     for two versions of the code. When called during remark, it will
4042     pick up from where the task left off during the concurrent marking
4043     phase. Interestingly, tasks are also claimable during evacuation
4044     pauses too, since do_marking_step() ensures that it aborts before
4045     it needs to yield.
4046 
4047     The data structures that is uses to do marking work are the
4048     following:
4049 
4050       (1) Marking Bitmap. If there are gray objects that appear only
4051       on the bitmap (this happens either when dealing with an overflow
4052       or when the initial marking phase has simply marked the roots
4053       and didn't push them on the stack), then tasks claim heap
4054       regions whose bitmap they then scan to find gray objects. A
4055       global finger indicates where the end of the last claimed region
4056       is. A local finger indicates how far into the region a task has
4057       scanned. The two fingers are used to determine how to gray an
4058       object (i.e. whether simply marking it is OK, as it will be
4059       visited by a task in the future, or whether it needs to be also
4060       pushed on a stack).
4061 
4062       (2) Local Queue. The local queue of the task which is accessed
4063       reasonably efficiently by the task. Other tasks can steal from
4064       it when they run out of work. Throughout the marking phase, a
4065       task attempts to keep its local queue short but not totally
4066       empty, so that entries are available for stealing by other
4067       tasks. Only when there is no more work, a task will totally
4068       drain its local queue.
4069 
4070       (3) Global Mark Stack. This handles local queue overflow. During
4071       marking only sets of entries are moved between it and the local
4072       queues, as access to it requires a mutex and more fine-grain
4073       interaction with it which might cause contention. If it
4074       overflows, then the marking phase should restart and iterate
4075       over the bitmap to identify gray objects. Throughout the marking
4076       phase, tasks attempt to keep the global mark stack at a small
4077       length but not totally empty, so that entries are available for
4078       popping by other tasks. Only when there is no more work, tasks
4079       will totally drain the global mark stack.
4080 
4081       (4) Global Region Stack. Entries on it correspond to areas of
4082       the bitmap that need to be scanned since they contain gray
4083       objects. Pushes on the region stack only happen during
4084       evacuation pauses and typically correspond to areas covered by
4085       GC LABS. If it overflows, then the marking phase should restart
4086       and iterate over the bitmap to identify gray objects. Tasks will
4087       try to totally drain the region stack as soon as possible.
4088 
4089       (5) SATB Buffer Queue. This is where completed SATB buffers are
4090       made available. Buffers are regularly removed from this queue
4091       and scanned for roots, so that the queue doesn't get too
4092       long. During remark, all completed buffers are processed, as
4093       well as the filled in parts of any uncompleted buffers.
4094 
4095     The do_marking_step() method tries to abort when the time target
4096     has been reached. There are a few other cases when the
4097     do_marking_step() method also aborts:
4098 
4099       (1) When the marking phase has been aborted (after a Full GC).
4100 
4101       (2) When a global overflow (either on the global stack or the
4102       region stack) has been triggered. Before the task aborts, it
4103       will actually sync up with the other tasks to ensure that all
4104       the marking data structures (local queues, stacks, fingers etc.)
4105       are re-initialised so that when do_marking_step() completes,
4106       the marking phase can immediately restart.
4107 
4108       (3) When enough completed SATB buffers are available. The
4109       do_marking_step() method only tries to drain SATB buffers right
4110       at the beginning. So, if enough buffers are available, the
4111       marking step aborts and the SATB buffers are processed at
4112       the beginning of the next invocation.
4113 
4114       (4) To yield. when we have to yield then we abort and yield
4115       right at the end of do_marking_step(). This saves us from a lot
4116       of hassle as, by yielding we might allow a Full GC. If this
4117       happens then objects will be compacted underneath our feet, the
4118       heap might shrink, etc. We save checking for this by just
4119       aborting and doing the yield right at the end.
4120 
4121     From the above it follows that the do_marking_step() method should
4122     be called in a loop (or, otherwise, regularly) until it completes.
4123 
4124     If a marking step completes without its has_aborted() flag being
4125     true, it means it has completed the current marking phase (and
4126     also all other marking tasks have done so and have all synced up).
4127 
4128     A method called regular_clock_call() is invoked "regularly" (in
4129     sub ms intervals) throughout marking. It is this clock method that
4130     checks all the abort conditions which were mentioned above and
4131     decides when the task should abort. A work-based scheme is used to
4132     trigger this clock method: when the number of object words the
4133     marking phase has scanned or the number of references the marking
4134     phase has visited reach a given limit. Additional invocations to
4135     the method clock have been planted in a few other strategic places
4136     too. The initial reason for the clock method was to avoid calling
4137     vtime too regularly, as it is quite expensive. So, once it was in
4138     place, it was natural to piggy-back all the other conditions on it
4139     too and not constantly check them throughout the code.
4140 
4141  *****************************************************************************/
4142 
4143 void CMTask::do_marking_step(double time_target_ms,
4144                              bool do_stealing,
4145                              bool do_termination) {
4146   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4147   assert(concurrent() == _cm->concurrent(), "they should be the same");
4148 
4149   assert(concurrent() || _cm->region_stack_empty(),
4150          "the region stack should have been cleared before remark");
4151   assert(concurrent() || !_cm->has_aborted_regions(),
4152          "aborted regions should have been cleared before remark");
4153   assert(_region_finger == NULL,
4154          "this should be non-null only when a region is being scanned");
4155 
4156   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4157   assert(_task_queues != NULL, "invariant");
4158   assert(_task_queue != NULL, "invariant");
4159   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
4160 
4161   assert(!_claimed,
4162          "only one thread should claim this task at any one time");
4163 
4164   // OK, this doesn't safeguard again all possible scenarios, as it is
4165   // possible for two threads to set the _claimed flag at the same
4166   // time. But it is only for debugging purposes anyway and it will
4167   // catch most problems.
4168   _claimed = true;
4169 
4170   _start_time_ms = os::elapsedVTime() * 1000.0;
4171   statsOnly( _interval_start_time_ms = _start_time_ms );
4172 
4173   double diff_prediction_ms =
4174     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4175   _time_target_ms = time_target_ms - diff_prediction_ms;
4176 
4177   // set up the variables that are used in the work-based scheme to
4178   // call the regular clock method
4179   _words_scanned = 0;
4180   _refs_reached  = 0;
4181   recalculate_limits();
4182 
4183   // clear all flags
4184   clear_has_aborted();
4185   _has_timed_out = false;
4186   _draining_satb_buffers = false;
4187 
4188   ++_calls;
4189 
4190   if (_cm->verbose_low()) {
4191     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
4192                            "target = %1.2lfms >>>>>>>>>>",
4193                            _task_id, _calls, _time_target_ms);
4194   }
4195 
4196   // Set up the bitmap and oop closures. Anything that uses them is
4197   // eventually called from this method, so it is OK to allocate these
4198   // statically.
4199   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4200   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4201   set_cm_oop_closure(&cm_oop_closure);
4202 
4203   if (_cm->has_overflown()) {
4204     // This can happen if the region stack or the mark stack overflows
4205     // during a GC pause and this task, after a yield point,
4206     // restarts. We have to abort as we need to get into the overflow
4207     // protocol which happens right at the end of this task.
4208     set_has_aborted();
4209   }
4210 
4211   // First drain any available SATB buffers. After this, we will not
4212   // look at SATB buffers before the next invocation of this method.
4213   // If enough completed SATB buffers are queued up, the regular clock
4214   // will abort this task so that it restarts.
4215   drain_satb_buffers();
4216   // ...then partially drain the local queue and the global stack
4217   drain_local_queue(true);
4218   drain_global_stack(true);
4219 
4220   // Then totally drain the region stack.  We will not look at
4221   // it again before the next invocation of this method. Entries on
4222   // the region stack are only added during evacuation pauses, for
4223   // which we have to yield. When we do, we abort the task anyway so
4224   // it will look at the region stack again when it restarts.
4225   bitmap_closure.set_scanning_heap_region(false);
4226   drain_region_stack(&bitmap_closure);
4227   // ...then partially drain the local queue and the global stack
4228   drain_local_queue(true);
4229   drain_global_stack(true);
4230 
4231   do {
4232     if (!has_aborted() && _curr_region != NULL) {
4233       // This means that we're already holding on to a region.
4234       assert(_finger != NULL, "if region is not NULL, then the finger "
4235              "should not be NULL either");
4236 
4237       // We might have restarted this task after an evacuation pause
4238       // which might have evacuated the region we're holding on to
4239       // underneath our feet. Let's read its limit again to make sure
4240       // that we do not iterate over a region of the heap that
4241       // contains garbage (update_region_limit() will also move
4242       // _finger to the start of the region if it is found empty).
4243       update_region_limit();
4244       // We will start from _finger not from the start of the region,
4245       // as we might be restarting this task after aborting half-way
4246       // through scanning this region. In this case, _finger points to
4247       // the address where we last found a marked object. If this is a
4248       // fresh region, _finger points to start().
4249       MemRegion mr = MemRegion(_finger, _region_limit);
4250 
4251       if (_cm->verbose_low()) {
4252         gclog_or_tty->print_cr("[%d] we're scanning part "
4253                                "["PTR_FORMAT", "PTR_FORMAT") "
4254                                "of region "PTR_FORMAT,
4255                                _task_id, _finger, _region_limit, _curr_region);
4256       }
4257 
4258       // Let's iterate over the bitmap of the part of the
4259       // region that is left.
4260       bitmap_closure.set_scanning_heap_region(true);
4261       if (mr.is_empty() ||
4262           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4263         // We successfully completed iterating over the region. Now,
4264         // let's give up the region.
4265         giveup_current_region();
4266         regular_clock_call();
4267       } else {
4268         assert(has_aborted(), "currently the only way to do so");
4269         // The only way to abort the bitmap iteration is to return
4270         // false from the do_bit() method. However, inside the
4271         // do_bit() method we move the _finger to point to the
4272         // object currently being looked at. So, if we bail out, we
4273         // have definitely set _finger to something non-null.
4274         assert(_finger != NULL, "invariant");
4275 
4276         // Region iteration was actually aborted. So now _finger
4277         // points to the address of the object we last scanned. If we
4278         // leave it there, when we restart this task, we will rescan
4279         // the object. It is easy to avoid this. We move the finger by
4280         // enough to point to the next possible object header (the
4281         // bitmap knows by how much we need to move it as it knows its
4282         // granularity).
4283         assert(_finger < _region_limit, "invariant");
4284         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4285         // Check if bitmap iteration was aborted while scanning the last object
4286         if (new_finger >= _region_limit) {
4287             giveup_current_region();
4288         } else {
4289             move_finger_to(new_finger);
4290         }
4291       }
4292     }
4293     // At this point we have either completed iterating over the
4294     // region we were holding on to, or we have aborted.
4295 
4296     // We then partially drain the local queue and the global stack.
4297     // (Do we really need this?)
4298     drain_local_queue(true);
4299     drain_global_stack(true);
4300 
4301     // Read the note on the claim_region() method on why it might
4302     // return NULL with potentially more regions available for
4303     // claiming and why we have to check out_of_regions() to determine
4304     // whether we're done or not.
4305     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4306       // We are going to try to claim a new region. We should have
4307       // given up on the previous one.
4308       // Separated the asserts so that we know which one fires.
4309       assert(_curr_region  == NULL, "invariant");
4310       assert(_finger       == NULL, "invariant");
4311       assert(_region_limit == NULL, "invariant");
4312       if (_cm->verbose_low()) {
4313         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4314       }
4315       HeapRegion* claimed_region = _cm->claim_region(_task_id);
4316       if (claimed_region != NULL) {
4317         // Yes, we managed to claim one
4318         statsOnly( ++_regions_claimed );
4319 
4320         if (_cm->verbose_low()) {
4321           gclog_or_tty->print_cr("[%d] we successfully claimed "
4322                                  "region "PTR_FORMAT,
4323                                  _task_id, claimed_region);
4324         }
4325 
4326         setup_for_region(claimed_region);
4327         assert(_curr_region == claimed_region, "invariant");
4328       }
4329       // It is important to call the regular clock here. It might take
4330       // a while to claim a region if, for example, we hit a large
4331       // block of empty regions. So we need to call the regular clock
4332       // method once round the loop to make sure it's called
4333       // frequently enough.
4334       regular_clock_call();
4335     }
4336 
4337     if (!has_aborted() && _curr_region == NULL) {
4338       assert(_cm->out_of_regions(),
4339              "at this point we should be out of regions");
4340     }
4341   } while ( _curr_region != NULL && !has_aborted());
4342 
4343   if (!has_aborted()) {
4344     // We cannot check whether the global stack is empty, since other
4345     // tasks might be pushing objects to it concurrently. We also cannot
4346     // check if the region stack is empty because if a thread is aborting
4347     // it can push a partially done region back.
4348     assert(_cm->out_of_regions(),
4349            "at this point we should be out of regions");
4350 
4351     if (_cm->verbose_low()) {
4352       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4353     }
4354 
4355     // Try to reduce the number of available SATB buffers so that
4356     // remark has less work to do.
4357     drain_satb_buffers();
4358   }
4359 
4360   // Since we've done everything else, we can now totally drain the
4361   // local queue and global stack.
4362   drain_local_queue(false);
4363   drain_global_stack(false);
4364 
4365   // Attempt at work stealing from other task's queues.
4366   if (do_stealing && !has_aborted()) {
4367     // We have not aborted. This means that we have finished all that
4368     // we could. Let's try to do some stealing...
4369 
4370     // We cannot check whether the global stack is empty, since other
4371     // tasks might be pushing objects to it concurrently. We also cannot
4372     // check if the region stack is empty because if a thread is aborting
4373     // it can push a partially done region back.
4374     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4375            "only way to reach here");
4376 
4377     if (_cm->verbose_low()) {
4378       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4379     }
4380 
4381     while (!has_aborted()) {
4382       oop obj;
4383       statsOnly( ++_steal_attempts );
4384 
4385       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4386         if (_cm->verbose_medium()) {
4387           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4388                                  _task_id, (void*) obj);
4389         }
4390 
4391         statsOnly( ++_steals );
4392 
4393         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4394                "any stolen object should be marked");
4395         scan_object(obj);
4396 
4397         // And since we're towards the end, let's totally drain the
4398         // local queue and global stack.
4399         drain_local_queue(false);
4400         drain_global_stack(false);
4401       } else {
4402         break;
4403       }
4404     }
4405   }
4406 
4407   // If we are about to wrap up and go into termination, check if we
4408   // should raise the overflow flag.
4409   if (do_termination && !has_aborted()) {
4410     if (_cm->force_overflow()->should_force()) {
4411       _cm->set_has_overflown();
4412       regular_clock_call();
4413     }
4414   }
4415 
4416   // We still haven't aborted. Now, let's try to get into the
4417   // termination protocol.
4418   if (do_termination && !has_aborted()) {
4419     // We cannot check whether the global stack is empty, since other
4420     // tasks might be concurrently pushing objects on it. We also cannot
4421     // check if the region stack is empty because if a thread is aborting
4422     // it can push a partially done region back.
4423     // Separated the asserts so that we know which one fires.
4424     assert(_cm->out_of_regions(), "only way to reach here");
4425     assert(_task_queue->size() == 0, "only way to reach here");
4426 
4427     if (_cm->verbose_low()) {
4428       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4429     }
4430 
4431     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4432     // The CMTask class also extends the TerminatorTerminator class,
4433     // hence its should_exit_termination() method will also decide
4434     // whether to exit the termination protocol or not.
4435     bool finished = _cm->terminator()->offer_termination(this);
4436     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4437     _termination_time_ms +=
4438       termination_end_time_ms - _termination_start_time_ms;
4439 
4440     if (finished) {
4441       // We're all done.
4442 
4443       if (_task_id == 0) {
4444         // let's allow task 0 to do this
4445         if (concurrent()) {
4446           assert(_cm->concurrent_marking_in_progress(), "invariant");
4447           // we need to set this to false before the next
4448           // safepoint. This way we ensure that the marking phase
4449           // doesn't observe any more heap expansions.
4450           _cm->clear_concurrent_marking_in_progress();
4451         }
4452       }
4453 
4454       // We can now guarantee that the global stack is empty, since
4455       // all other tasks have finished. We separated the guarantees so
4456       // that, if a condition is false, we can immediately find out
4457       // which one.
4458       guarantee(_cm->out_of_regions(), "only way to reach here");
4459       guarantee(_aborted_region.is_empty(), "only way to reach here");
4460       guarantee(_cm->region_stack_empty(), "only way to reach here");
4461       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4462       guarantee(_task_queue->size() == 0, "only way to reach here");
4463       guarantee(!_cm->has_overflown(), "only way to reach here");
4464       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4465       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
4466 
4467       if (_cm->verbose_low()) {
4468         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4469       }
4470     } else {
4471       // Apparently there's more work to do. Let's abort this task. It
4472       // will restart it and we can hopefully find more things to do.
4473 
4474       if (_cm->verbose_low()) {
4475         gclog_or_tty->print_cr("[%d] apparently there is more work to do",
4476                                _task_id);
4477       }
4478 
4479       set_has_aborted();
4480       statsOnly( ++_aborted_termination );
4481     }
4482   }
4483 
4484   // Mainly for debugging purposes to make sure that a pointer to the
4485   // closure which was statically allocated in this frame doesn't
4486   // escape it by accident.
4487   set_cm_oop_closure(NULL);
4488   double end_time_ms = os::elapsedVTime() * 1000.0;
4489   double elapsed_time_ms = end_time_ms - _start_time_ms;
4490   // Update the step history.
4491   _step_times_ms.add(elapsed_time_ms);
4492 
4493   if (has_aborted()) {
4494     // The task was aborted for some reason.
4495 
4496     statsOnly( ++_aborted );
4497 
4498     if (_has_timed_out) {
4499       double diff_ms = elapsed_time_ms - _time_target_ms;
4500       // Keep statistics of how well we did with respect to hitting
4501       // our target only if we actually timed out (if we aborted for
4502       // other reasons, then the results might get skewed).
4503       _marking_step_diffs_ms.add(diff_ms);
4504     }
4505 
4506     if (_cm->has_overflown()) {
4507       // This is the interesting one. We aborted because a global
4508       // overflow was raised. This means we have to restart the
4509       // marking phase and start iterating over regions. However, in
4510       // order to do this we have to make sure that all tasks stop
4511       // what they are doing and re-initialise in a safe manner. We
4512       // will achieve this with the use of two barrier sync points.
4513 
4514       if (_cm->verbose_low()) {
4515         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4516       }
4517 
4518       _cm->enter_first_sync_barrier(_task_id);
4519       // When we exit this sync barrier we know that all tasks have
4520       // stopped doing marking work. So, it's now safe to
4521       // re-initialise our data structures. At the end of this method,
4522       // task 0 will clear the global data structures.
4523 
4524       statsOnly( ++_aborted_overflow );
4525 
4526       // We clear the local state of this task...
4527       clear_region_fields();
4528 
4529       // ...and enter the second barrier.
4530       _cm->enter_second_sync_barrier(_task_id);
4531       // At this point everything has bee re-initialised and we're
4532       // ready to restart.
4533     }
4534 
4535     if (_cm->verbose_low()) {
4536       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4537                              "elapsed = %1.2lfms <<<<<<<<<<",
4538                              _task_id, _time_target_ms, elapsed_time_ms);
4539       if (_cm->has_aborted()) {
4540         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4541                                _task_id);
4542       }
4543     }
4544   } else {
4545     if (_cm->verbose_low()) {
4546       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4547                              "elapsed = %1.2lfms <<<<<<<<<<",
4548                              _task_id, _time_target_ms, elapsed_time_ms);
4549     }
4550   }
4551 
4552   _claimed = false;
4553 }
4554 
4555 CMTask::CMTask(int task_id,
4556                ConcurrentMark* cm,
4557                CMTaskQueue* task_queue,
4558                CMTaskQueueSet* task_queues)
4559   : _g1h(G1CollectedHeap::heap()),
4560     _task_id(task_id), _cm(cm),
4561     _claimed(false),
4562     _nextMarkBitMap(NULL), _hash_seed(17),
4563     _task_queue(task_queue),
4564     _task_queues(task_queues),
4565     _cm_oop_closure(NULL),
4566     _aborted_region(MemRegion()) {
4567   guarantee(task_queue != NULL, "invariant");
4568   guarantee(task_queues != NULL, "invariant");
4569 
4570   statsOnly( _clock_due_to_scanning = 0;
4571              _clock_due_to_marking  = 0 );
4572 
4573   _marking_step_diffs_ms.add(0.5);
4574 }
4575 
4576 // These are formatting macros that are used below to ensure
4577 // consistent formatting. The *_H_* versions are used to format the
4578 // header for a particular value and they should be kept consistent
4579 // with the corresponding macro. Also note that most of the macros add
4580 // the necessary white space (as a prefix) which makes them a bit
4581 // easier to compose.
4582 
4583 // All the output lines are prefixed with this string to be able to
4584 // identify them easily in a large log file.
4585 #define G1PPRL_LINE_PREFIX            "###"
4586 
4587 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4588 #ifdef _LP64
4589 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4590 #else // _LP64
4591 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4592 #endif // _LP64
4593 
4594 // For per-region info
4595 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4596 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4597 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4598 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4599 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4600 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4601 
4602 // For summary info
4603 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4604 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4605 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4606 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4607 
4608 G1PrintRegionLivenessInfoClosure::
4609 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4610   : _out(out),
4611     _total_used_bytes(0), _total_capacity_bytes(0),
4612     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4613     _hum_used_bytes(0), _hum_capacity_bytes(0),
4614     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4615   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4616   MemRegion g1_committed = g1h->g1_committed();
4617   MemRegion g1_reserved = g1h->g1_reserved();
4618   double now = os::elapsedTime();
4619 
4620   // Print the header of the output.
4621   _out->cr();
4622   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4623   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4624                  G1PPRL_SUM_ADDR_FORMAT("committed")
4625                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4626                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4627                  g1_committed.start(), g1_committed.end(),
4628                  g1_reserved.start(), g1_reserved.end(),
4629                  HeapRegion::GrainBytes);
4630   _out->print_cr(G1PPRL_LINE_PREFIX);
4631   _out->print_cr(G1PPRL_LINE_PREFIX
4632                  G1PPRL_TYPE_H_FORMAT
4633                  G1PPRL_ADDR_BASE_H_FORMAT
4634                  G1PPRL_BYTE_H_FORMAT
4635                  G1PPRL_BYTE_H_FORMAT
4636                  G1PPRL_BYTE_H_FORMAT
4637                  G1PPRL_DOUBLE_H_FORMAT,
4638                  "type", "address-range",
4639                  "used", "prev-live", "next-live", "gc-eff");
4640 }
4641 
4642 // It takes as a parameter a reference to one of the _hum_* fields, it
4643 // deduces the corresponding value for a region in a humongous region
4644 // series (either the region size, or what's left if the _hum_* field
4645 // is < the region size), and updates the _hum_* field accordingly.
4646 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4647   size_t bytes = 0;
4648   // The > 0 check is to deal with the prev and next live bytes which
4649   // could be 0.
4650   if (*hum_bytes > 0) {
4651     bytes = MIN2((size_t) HeapRegion::GrainBytes, *hum_bytes);
4652     *hum_bytes -= bytes;
4653   }
4654   return bytes;
4655 }
4656 
4657 // It deduces the values for a region in a humongous region series
4658 // from the _hum_* fields and updates those accordingly. It assumes
4659 // that that _hum_* fields have already been set up from the "starts
4660 // humongous" region and we visit the regions in address order.
4661 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4662                                                      size_t* capacity_bytes,
4663                                                      size_t* prev_live_bytes,
4664                                                      size_t* next_live_bytes) {
4665   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4666   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4667   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4668   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4669   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4670 }
4671 
4672 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4673   const char* type = "";
4674   HeapWord* bottom       = r->bottom();
4675   HeapWord* end          = r->end();
4676   size_t capacity_bytes  = r->capacity();
4677   size_t used_bytes      = r->used();
4678   size_t prev_live_bytes = r->live_bytes();
4679   size_t next_live_bytes = r->next_live_bytes();
4680   double gc_eff          = r->gc_efficiency();
4681   if (r->used() == 0) {
4682     type = "FREE";
4683   } else if (r->is_survivor()) {
4684     type = "SURV";
4685   } else if (r->is_young()) {
4686     type = "EDEN";
4687   } else if (r->startsHumongous()) {
4688     type = "HUMS";
4689 
4690     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4691            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4692            "they should have been zeroed after the last time we used them");
4693     // Set up the _hum_* fields.
4694     _hum_capacity_bytes  = capacity_bytes;
4695     _hum_used_bytes      = used_bytes;
4696     _hum_prev_live_bytes = prev_live_bytes;
4697     _hum_next_live_bytes = next_live_bytes;
4698     get_hum_bytes(&used_bytes, &capacity_bytes,
4699                   &prev_live_bytes, &next_live_bytes);
4700     end = bottom + HeapRegion::GrainWords;
4701   } else if (r->continuesHumongous()) {
4702     type = "HUMC";
4703     get_hum_bytes(&used_bytes, &capacity_bytes,
4704                   &prev_live_bytes, &next_live_bytes);
4705     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4706   } else {
4707     type = "OLD";
4708   }
4709 
4710   _total_used_bytes      += used_bytes;
4711   _total_capacity_bytes  += capacity_bytes;
4712   _total_prev_live_bytes += prev_live_bytes;
4713   _total_next_live_bytes += next_live_bytes;
4714 
4715   // Print a line for this particular region.
4716   _out->print_cr(G1PPRL_LINE_PREFIX
4717                  G1PPRL_TYPE_FORMAT
4718                  G1PPRL_ADDR_BASE_FORMAT
4719                  G1PPRL_BYTE_FORMAT
4720                  G1PPRL_BYTE_FORMAT
4721                  G1PPRL_BYTE_FORMAT
4722                  G1PPRL_DOUBLE_FORMAT,
4723                  type, bottom, end,
4724                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4725 
4726   return false;
4727 }
4728 
4729 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4730   // Print the footer of the output.
4731   _out->print_cr(G1PPRL_LINE_PREFIX);
4732   _out->print_cr(G1PPRL_LINE_PREFIX
4733                  " SUMMARY"
4734                  G1PPRL_SUM_MB_FORMAT("capacity")
4735                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4736                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4737                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4738                  bytes_to_mb(_total_capacity_bytes),
4739                  bytes_to_mb(_total_used_bytes),
4740                  perc(_total_used_bytes, _total_capacity_bytes),
4741                  bytes_to_mb(_total_prev_live_bytes),
4742                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4743                  bytes_to_mb(_total_next_live_bytes),
4744                  perc(_total_next_live_bytes, _total_capacity_bytes));
4745   _out->cr();
4746 }