1 /*
   2  * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  33 #include "gc_implementation/g1/g1RemSet.hpp"
  34 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  35 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  36 #include "gc_implementation/shared/vmGCOperations.hpp"
  37 #include "memory/genOopClosures.inline.hpp"
  38 #include "memory/referencePolicy.hpp"
  39 #include "memory/resourceArea.hpp"
  40 #include "oops/oop.inline.hpp"
  41 #include "runtime/handles.inline.hpp"
  42 #include "runtime/java.hpp"
  43 
  44 //
  45 // CMS Bit Map Wrapper
  46 
  47 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
  48   _bm((uintptr_t*)NULL,0),
  49   _shifter(shifter) {
  50   _bmStartWord = (HeapWord*)(rs.base());
  51   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  52   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  53                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  54 
  55   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
  56   // For now we'll just commit all of the bit map up fromt.
  57   // Later on we'll try to be more parsimonious with swap.
  58   guarantee(_virtual_space.initialize(brs, brs.size()),
  59             "couldn't reseve backing store for CMS bit map");
  60   assert(_virtual_space.committed_size() == brs.size(),
  61          "didn't reserve backing store for all of CMS bit map?");
  62   _bm.set_map((uintptr_t*)_virtual_space.low());
  63   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  64          _bmWordSize, "inconsistency in bit map sizing");
  65   _bm.set_size(_bmWordSize >> _shifter);
  66 }
  67 
  68 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  69                                                HeapWord* limit) const {
  70   // First we must round addr *up* to a possible object boundary.
  71   addr = (HeapWord*)align_size_up((intptr_t)addr,
  72                                   HeapWordSize << _shifter);
  73   size_t addrOffset = heapWordToOffset(addr);
  74   if (limit == NULL) {
  75     limit = _bmStartWord + _bmWordSize;
  76   }
  77   size_t limitOffset = heapWordToOffset(limit);
  78   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  79   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  80   assert(nextAddr >= addr, "get_next_one postcondition");
  81   assert(nextAddr == limit || isMarked(nextAddr),
  82          "get_next_one postcondition");
  83   return nextAddr;
  84 }
  85 
  86 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  87                                                  HeapWord* limit) const {
  88   size_t addrOffset = heapWordToOffset(addr);
  89   if (limit == NULL) {
  90     limit = _bmStartWord + _bmWordSize;
  91   }
  92   size_t limitOffset = heapWordToOffset(limit);
  93   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  94   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  95   assert(nextAddr >= addr, "get_next_one postcondition");
  96   assert(nextAddr == limit || !isMarked(nextAddr),
  97          "get_next_one postcondition");
  98   return nextAddr;
  99 }
 100 
 101 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 102   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 103   return (int) (diff >> _shifter);
 104 }
 105 
 106 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
 107   HeapWord* left  = MAX2(_bmStartWord, mr.start());
 108   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
 109   if (right > left) {
 110     // Right-open interval [leftOffset, rightOffset).
 111     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
 112   } else {
 113     return true;
 114   }
 115 }
 116 
 117 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
 118                                              size_t    from_start_index,
 119                                              HeapWord* to_start_word,
 120                                              size_t    word_num) {
 121   _bm.mostly_disjoint_range_union(from_bitmap,
 122                                   from_start_index,
 123                                   heapWordToOffset(to_start_word),
 124                                   word_num);
 125 }
 126 
 127 #ifndef PRODUCT
 128 bool CMBitMapRO::covers(ReservedSpace rs) const {
 129   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 130   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
 131          "size inconsistency");
 132   return _bmStartWord == (HeapWord*)(rs.base()) &&
 133          _bmWordSize  == rs.size()>>LogHeapWordSize;
 134 }
 135 #endif
 136 
 137 void CMBitMap::clearAll() {
 138   _bm.clear();
 139   return;
 140 }
 141 
 142 void CMBitMap::markRange(MemRegion mr) {
 143   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 144   assert(!mr.is_empty(), "unexpected empty region");
 145   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 146           ((HeapWord *) mr.end())),
 147          "markRange memory region end is not card aligned");
 148   // convert address range into offset range
 149   _bm.at_put_range(heapWordToOffset(mr.start()),
 150                    heapWordToOffset(mr.end()), true);
 151 }
 152 
 153 void CMBitMap::clearRange(MemRegion mr) {
 154   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 155   assert(!mr.is_empty(), "unexpected empty region");
 156   // convert address range into offset range
 157   _bm.at_put_range(heapWordToOffset(mr.start()),
 158                    heapWordToOffset(mr.end()), false);
 159 }
 160 
 161 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 162                                             HeapWord* end_addr) {
 163   HeapWord* start = getNextMarkedWordAddress(addr);
 164   start = MIN2(start, end_addr);
 165   HeapWord* end   = getNextUnmarkedWordAddress(start);
 166   end = MIN2(end, end_addr);
 167   assert(start <= end, "Consistency check");
 168   MemRegion mr(start, end);
 169   if (!mr.is_empty()) {
 170     clearRange(mr);
 171   }
 172   return mr;
 173 }
 174 
 175 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 176   _base(NULL), _cm(cm)
 177 #ifdef ASSERT
 178   , _drain_in_progress(false)
 179   , _drain_in_progress_yields(false)
 180 #endif
 181 {}
 182 
 183 void CMMarkStack::allocate(size_t size) {
 184   _base = NEW_C_HEAP_ARRAY(oop, size);
 185   if (_base == NULL) {
 186     vm_exit_during_initialization("Failed to allocate "
 187                                   "CM region mark stack");
 188   }
 189   _index = 0;
 190   _capacity = (jint) size;
 191   _oops_do_bound = -1;
 192   NOT_PRODUCT(_max_depth = 0);
 193 }
 194 
 195 CMMarkStack::~CMMarkStack() {
 196   if (_base != NULL) {
 197     FREE_C_HEAP_ARRAY(oop, _base);
 198   }
 199 }
 200 
 201 void CMMarkStack::par_push(oop ptr) {
 202   while (true) {
 203     if (isFull()) {
 204       _overflow = true;
 205       return;
 206     }
 207     // Otherwise...
 208     jint index = _index;
 209     jint next_index = index+1;
 210     jint res = Atomic::cmpxchg(next_index, &_index, index);
 211     if (res == index) {
 212       _base[index] = ptr;
 213       // Note that we don't maintain this atomically.  We could, but it
 214       // doesn't seem necessary.
 215       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 216       return;
 217     }
 218     // Otherwise, we need to try again.
 219   }
 220 }
 221 
 222 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 223   while (true) {
 224     if (isFull()) {
 225       _overflow = true;
 226       return;
 227     }
 228     // Otherwise...
 229     jint index = _index;
 230     jint next_index = index + n;
 231     if (next_index > _capacity) {
 232       _overflow = true;
 233       return;
 234     }
 235     jint res = Atomic::cmpxchg(next_index, &_index, index);
 236     if (res == index) {
 237       for (int i = 0; i < n; i++) {
 238         int ind = index + i;
 239         assert(ind < _capacity, "By overflow test above.");
 240         _base[ind] = ptr_arr[i];
 241       }
 242       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 243       return;
 244     }
 245     // Otherwise, we need to try again.
 246   }
 247 }
 248 
 249 
 250 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 251   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 252   jint start = _index;
 253   jint next_index = start + n;
 254   if (next_index > _capacity) {
 255     _overflow = true;
 256     return;
 257   }
 258   // Otherwise.
 259   _index = next_index;
 260   for (int i = 0; i < n; i++) {
 261     int ind = start + i;
 262     assert(ind < _capacity, "By overflow test above.");
 263     _base[ind] = ptr_arr[i];
 264   }
 265 }
 266 
 267 
 268 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 269   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 270   jint index = _index;
 271   if (index == 0) {
 272     *n = 0;
 273     return false;
 274   } else {
 275     int k = MIN2(max, index);
 276     jint new_ind = index - k;
 277     for (int j = 0; j < k; j++) {
 278       ptr_arr[j] = _base[new_ind + j];
 279     }
 280     _index = new_ind;
 281     *n = k;
 282     return true;
 283   }
 284 }
 285 
 286 
 287 CMRegionStack::CMRegionStack() : _base(NULL) {}
 288 
 289 void CMRegionStack::allocate(size_t size) {
 290   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
 291   if (_base == NULL) {
 292     vm_exit_during_initialization("Failed to allocate CM region mark stack");
 293   }
 294   _index = 0;
 295   _capacity = (jint) size;
 296 }
 297 
 298 CMRegionStack::~CMRegionStack() {
 299   if (_base != NULL) {
 300     FREE_C_HEAP_ARRAY(oop, _base);
 301   }
 302 }
 303 
 304 void CMRegionStack::push_lock_free(MemRegion mr) {
 305   assert(mr.word_size() > 0, "Precondition");
 306   while (true) {
 307     jint index = _index;
 308 
 309     if (index >= _capacity) {
 310       _overflow = true;
 311       return;
 312     }
 313     // Otherwise...
 314     jint next_index = index+1;
 315     jint res = Atomic::cmpxchg(next_index, &_index, index);
 316     if (res == index) {
 317       _base[index] = mr;
 318       return;
 319     }
 320     // Otherwise, we need to try again.
 321   }
 322 }
 323 
 324 // Lock-free pop of the region stack. Called during the concurrent
 325 // marking / remark phases. Should only be called in tandem with
 326 // other lock-free pops.
 327 MemRegion CMRegionStack::pop_lock_free() {
 328   while (true) {
 329     jint index = _index;
 330 
 331     if (index == 0) {
 332       return MemRegion();
 333     }
 334     // Otherwise...
 335     jint next_index = index-1;
 336     jint res = Atomic::cmpxchg(next_index, &_index, index);
 337     if (res == index) {
 338       MemRegion mr = _base[next_index];
 339       if (mr.start() != NULL) {
 340         assert(mr.end() != NULL, "invariant");
 341         assert(mr.word_size() > 0, "invariant");
 342         return mr;
 343       } else {
 344         // that entry was invalidated... let's skip it
 345         assert(mr.end() == NULL, "invariant");
 346       }
 347     }
 348     // Otherwise, we need to try again.
 349   }
 350 }
 351 
 352 #if 0
 353 // The routines that manipulate the region stack with a lock are
 354 // not currently used. They should be retained, however, as a
 355 // diagnostic aid.
 356 
 357 void CMRegionStack::push_with_lock(MemRegion mr) {
 358   assert(mr.word_size() > 0, "Precondition");
 359   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 360 
 361   if (isFull()) {
 362     _overflow = true;
 363     return;
 364   }
 365 
 366   _base[_index] = mr;
 367   _index += 1;
 368 }
 369 
 370 MemRegion CMRegionStack::pop_with_lock() {
 371   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 372 
 373   while (true) {
 374     if (_index == 0) {
 375       return MemRegion();
 376     }
 377     _index -= 1;
 378 
 379     MemRegion mr = _base[_index];
 380     if (mr.start() != NULL) {
 381       assert(mr.end() != NULL, "invariant");
 382       assert(mr.word_size() > 0, "invariant");
 383       return mr;
 384     } else {
 385       // that entry was invalidated... let's skip it
 386       assert(mr.end() == NULL, "invariant");
 387     }
 388   }
 389 }
 390 #endif
 391 
 392 bool CMRegionStack::invalidate_entries_into_cset() {
 393   bool result = false;
 394   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 395   for (int i = 0; i < _oops_do_bound; ++i) {
 396     MemRegion mr = _base[i];
 397     if (mr.start() != NULL) {
 398       assert(mr.end() != NULL, "invariant");
 399       assert(mr.word_size() > 0, "invariant");
 400       HeapRegion* hr = g1h->heap_region_containing(mr.start());
 401       assert(hr != NULL, "invariant");
 402       if (hr->in_collection_set()) {
 403         // The region points into the collection set
 404         _base[i] = MemRegion();
 405         result = true;
 406       }
 407     } else {
 408       // that entry was invalidated... let's skip it
 409       assert(mr.end() == NULL, "invariant");
 410     }
 411   }
 412   return result;
 413 }
 414 
 415 template<class OopClosureClass>
 416 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 417   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 418          || SafepointSynchronize::is_at_safepoint(),
 419          "Drain recursion must be yield-safe.");
 420   bool res = true;
 421   debug_only(_drain_in_progress = true);
 422   debug_only(_drain_in_progress_yields = yield_after);
 423   while (!isEmpty()) {
 424     oop newOop = pop();
 425     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 426     assert(newOop->is_oop(), "Expected an oop");
 427     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 428            "only grey objects on this stack");
 429     // iterate over the oops in this oop, marking and pushing
 430     // the ones in CMS generation.
 431     newOop->oop_iterate(cl);
 432     if (yield_after && _cm->do_yield_check()) {
 433       res = false;
 434       break;
 435     }
 436   }
 437   debug_only(_drain_in_progress = false);
 438   return res;
 439 }
 440 
 441 void CMMarkStack::oops_do(OopClosure* f) {
 442   if (_index == 0) return;
 443   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
 444          "Bound must be set.");
 445   for (int i = 0; i < _oops_do_bound; i++) {
 446     f->do_oop(&_base[i]);
 447   }
 448   _oops_do_bound = -1;
 449 }
 450 
 451 bool ConcurrentMark::not_yet_marked(oop obj) const {
 452   return (_g1h->is_obj_ill(obj)
 453           || (_g1h->is_in_permanent(obj)
 454               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 455 }
 456 
 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 459 #endif // _MSC_VER
 460 
 461 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
 462                                int max_regions) :
 463   _markBitMap1(rs, MinObjAlignment - 1),
 464   _markBitMap2(rs, MinObjAlignment - 1),
 465 
 466   _parallel_marking_threads(0),
 467   _sleep_factor(0.0),
 468   _marking_task_overhead(1.0),
 469   _cleanup_sleep_factor(0.0),
 470   _cleanup_task_overhead(1.0),
 471   _cleanup_list("Cleanup List"),
 472   _region_bm(max_regions, false /* in_resource_area*/),
 473   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 474            CardTableModRefBS::card_shift,
 475            false /* in_resource_area*/),
 476 
 477   _prevMarkBitMap(&_markBitMap1),
 478   _nextMarkBitMap(&_markBitMap2),
 479   _at_least_one_mark_complete(false),
 480 
 481   _markStack(this),
 482   _regionStack(),
 483   // _finger set in set_non_marking_state
 484 
 485   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
 486   // _active_tasks set in set_non_marking_state
 487   // _tasks set inside the constructor
 488   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 489   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 490 
 491   _has_overflown(false),
 492   _concurrent(false),
 493   _has_aborted(false),
 494   _restart_for_overflow(false),
 495   _concurrent_marking_in_progress(false),
 496   _should_gray_objects(false),
 497 
 498   // _verbose_level set below
 499 
 500   _init_times(),
 501   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 502   _cleanup_times(),
 503   _total_counting_time(0.0),
 504   _total_rs_scrub_time(0.0),
 505 
 506   _parallel_workers(NULL),
 507 
 508   _count_card_bitmaps(NULL),
 509   _count_marked_bytes(NULL)
 510 {
 511   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 512   if (verbose_level < no_verbose) {
 513     verbose_level = no_verbose;
 514   }
 515   if (verbose_level > high_verbose) {
 516     verbose_level = high_verbose;
 517   }
 518   _verbose_level = verbose_level;
 519 
 520   if (verbose_low()) {
 521     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 522                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 523   }
 524 
 525   _markStack.allocate(MarkStackSize);
 526   _regionStack.allocate(G1MarkRegionStackSize);
 527 
 528   // Create & start a ConcurrentMark thread.
 529   _cmThread = new ConcurrentMarkThread(this);
 530   assert(cmThread() != NULL, "CM Thread should have been created");
 531   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 532 
 533   _g1h = G1CollectedHeap::heap();
 534   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 535   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 536   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 537 
 538   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 539   satb_qs.set_buffer_size(G1SATBBufferSize);
 540 
 541   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
 542   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 543 
 544   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num);
 545   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num);
 546 
 547   BitMap::idx_t card_bm_size = _card_bm.size();
 548 
 549   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 550   _active_tasks = _max_task_num;
 551   for (int i = 0; i < (int) _max_task_num; ++i) {
 552     CMTaskQueue* task_queue = new CMTaskQueue();
 553     task_queue->initialize();
 554     _task_queues->register_queue(i, task_queue);
 555 
 556     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
 557     _accum_task_vtime[i] = 0.0;
 558 
 559     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 560     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions);
 561   }
 562 
 563   if (ConcGCThreads > ParallelGCThreads) {
 564     vm_exit_during_initialization("Can't have more ConcGCThreads "
 565                                   "than ParallelGCThreads.");
 566   }
 567   if (ParallelGCThreads == 0) {
 568     // if we are not running with any parallel GC threads we will not
 569     // spawn any marking threads either
 570     _parallel_marking_threads =   0;
 571     _sleep_factor             = 0.0;
 572     _marking_task_overhead    = 1.0;
 573   } else {
 574     if (ConcGCThreads > 0) {
 575       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 576       // if both are set
 577 
 578       _parallel_marking_threads = ConcGCThreads;
 579       _sleep_factor             = 0.0;
 580       _marking_task_overhead    = 1.0;
 581     } else if (G1MarkingOverheadPercent > 0) {
 582       // we will calculate the number of parallel marking threads
 583       // based on a target overhead with respect to the soft real-time
 584       // goal
 585 
 586       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 587       double overall_cm_overhead =
 588         (double) MaxGCPauseMillis * marking_overhead /
 589         (double) GCPauseIntervalMillis;
 590       double cpu_ratio = 1.0 / (double) os::processor_count();
 591       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 592       double marking_task_overhead =
 593         overall_cm_overhead / marking_thread_num *
 594                                                 (double) os::processor_count();
 595       double sleep_factor =
 596                          (1.0 - marking_task_overhead) / marking_task_overhead;
 597 
 598       _parallel_marking_threads = (size_t) marking_thread_num;
 599       _sleep_factor             = sleep_factor;
 600       _marking_task_overhead    = marking_task_overhead;
 601     } else {
 602       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
 603       _sleep_factor             = 0.0;
 604       _marking_task_overhead    = 1.0;
 605     }
 606 
 607     if (parallel_marking_threads() > 1) {
 608       _cleanup_task_overhead = 1.0;
 609     } else {
 610       _cleanup_task_overhead = marking_task_overhead();
 611     }
 612     _cleanup_sleep_factor =
 613                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 614 
 615 #if 0
 616     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 617     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 618     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 619     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 620     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 621 #endif
 622 
 623     guarantee(parallel_marking_threads() > 0, "peace of mind");
 624     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 625          (int) _parallel_marking_threads, false, true);
 626     if (_parallel_workers == NULL) {
 627       vm_exit_during_initialization("Failed necessary allocation.");
 628     } else {
 629       _parallel_workers->initialize_workers();
 630     }
 631   }
 632 
 633   // so that the call below can read a sensible value
 634   _heap_start = (HeapWord*) rs.base();
 635   set_non_marking_state();
 636 }
 637 
 638 void ConcurrentMark::update_g1_committed(bool force) {
 639   // If concurrent marking is not in progress, then we do not need to
 640   // update _heap_end. This has a subtle and important
 641   // side-effect. Imagine that two evacuation pauses happen between
 642   // marking completion and remark. The first one can grow the
 643   // heap (hence now the finger is below the heap end). Then, the
 644   // second one could unnecessarily push regions on the region
 645   // stack. This causes the invariant that the region stack is empty
 646   // at the beginning of remark to be false. By ensuring that we do
 647   // not observe heap expansions after marking is complete, then we do
 648   // not have this problem.
 649   if (!concurrent_marking_in_progress() && !force) return;
 650 
 651   MemRegion committed = _g1h->g1_committed();
 652   assert(committed.start() == _heap_start, "start shouldn't change");
 653   HeapWord* new_end = committed.end();
 654   if (new_end > _heap_end) {
 655     // The heap has been expanded.
 656 
 657     _heap_end = new_end;
 658   }
 659   // Notice that the heap can also shrink. However, this only happens
 660   // during a Full GC (at least currently) and the entire marking
 661   // phase will bail out and the task will not be restarted. So, let's
 662   // do nothing.
 663 }
 664 
 665 void ConcurrentMark::reset() {
 666   // Starting values for these two. This should be called in a STW
 667   // phase. CM will be notified of any future g1_committed expansions
 668   // will be at the end of evacuation pauses, when tasks are
 669   // inactive.
 670   MemRegion committed = _g1h->g1_committed();
 671   _heap_start = committed.start();
 672   _heap_end   = committed.end();
 673 
 674   // Separated the asserts so that we know which one fires.
 675   assert(_heap_start != NULL, "heap bounds should look ok");
 676   assert(_heap_end != NULL, "heap bounds should look ok");
 677   assert(_heap_start < _heap_end, "heap bounds should look ok");
 678 
 679   // reset all the marking data structures and any necessary flags
 680   clear_marking_state();
 681 
 682   clear_all_count_data();
 683 
 684   if (verbose_low()) {
 685     gclog_or_tty->print_cr("[global] resetting");
 686   }
 687 
 688   // We do reset all of them, since different phases will use
 689   // different number of active threads. So, it's easiest to have all
 690   // of them ready.
 691   for (int i = 0; i < (int) _max_task_num; ++i) {
 692     _tasks[i]->reset(_nextMarkBitMap);
 693   }
 694 
 695   // we need this to make sure that the flag is on during the evac
 696   // pause with initial mark piggy-backed
 697   set_concurrent_marking_in_progress();
 698 }
 699 
 700 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
 701   assert(active_tasks <= _max_task_num, "we should not have more");
 702 
 703   _active_tasks = active_tasks;
 704   // Need to update the three data structures below according to the
 705   // number of active threads for this phase.
 706   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 707   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 708   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 709 
 710   _concurrent = concurrent;
 711   // We propagate this to all tasks, not just the active ones.
 712   for (int i = 0; i < (int) _max_task_num; ++i)
 713     _tasks[i]->set_concurrent(concurrent);
 714 
 715   if (concurrent) {
 716     set_concurrent_marking_in_progress();
 717   } else {
 718     // We currently assume that the concurrent flag has been set to
 719     // false before we start remark. At this point we should also be
 720     // in a STW phase.
 721     assert(!concurrent_marking_in_progress(), "invariant");
 722     assert(_finger == _heap_end, "only way to get here");
 723     update_g1_committed(true);
 724   }
 725 }
 726 
 727 void ConcurrentMark::set_non_marking_state() {
 728   // We set the global marking state to some default values when we're
 729   // not doing marking.
 730   clear_marking_state();
 731   _active_tasks = 0;
 732   clear_concurrent_marking_in_progress();
 733 }
 734 
 735 ConcurrentMark::~ConcurrentMark() {
 736   for (int i = 0; i < (int) _max_task_num; ++i) {
 737     delete _task_queues->queue(i);
 738     delete _tasks[i];
 739 
 740     _count_card_bitmaps[i].resize(0, false);
 741     FREE_C_HEAP_ARRAY(size_t, _count_marked_bytes[i]);
 742   }
 743 
 744   delete _task_queues;
 745   FREE_C_HEAP_ARRAY(CMTask*, _tasks);
 746   FREE_C_HEAP_ARRAY(double, _accum_task_vtime);
 747 
 748   FREE_C_HEAP_ARRAY(BitMap*, _count_card_bitmaps);
 749   FREE_C_HEAP_ARRAY(size_t*, _count_marked_bytes);
 750 }
 751 
 752 // This closure is used to mark refs into the g1 generation
 753 // from external roots in the CMS bit map.
 754 // Called at the first checkpoint.
 755 //
 756 
 757 void ConcurrentMark::clearNextBitmap() {
 758   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 759   G1CollectorPolicy* g1p = g1h->g1_policy();
 760 
 761   // Make sure that the concurrent mark thread looks to still be in
 762   // the current cycle.
 763   guarantee(cmThread()->during_cycle(), "invariant");
 764 
 765   // We are finishing up the current cycle by clearing the next
 766   // marking bitmap and getting it ready for the next cycle. During
 767   // this time no other cycle can start. So, let's make sure that this
 768   // is the case.
 769   guarantee(!g1h->mark_in_progress(), "invariant");
 770 
 771   // clear the mark bitmap (no grey objects to start with).
 772   // We need to do this in chunks and offer to yield in between
 773   // each chunk.
 774   HeapWord* start  = _nextMarkBitMap->startWord();
 775   HeapWord* end    = _nextMarkBitMap->endWord();
 776   HeapWord* cur    = start;
 777   size_t chunkSize = M;
 778   while (cur < end) {
 779     HeapWord* next = cur + chunkSize;
 780     if (next > end) {
 781       next = end;
 782     }
 783     MemRegion mr(cur,next);
 784     _nextMarkBitMap->clearRange(mr);
 785     cur = next;
 786     do_yield_check();
 787 
 788     // Repeat the asserts from above. We'll do them as asserts here to
 789     // minimize their overhead on the product. However, we'll have
 790     // them as guarantees at the beginning / end of the bitmap
 791     // clearing to get some checking in the product.
 792     assert(cmThread()->during_cycle(), "invariant");
 793     assert(!g1h->mark_in_progress(), "invariant");
 794   }
 795 
 796   // Repeat the asserts from above.
 797   guarantee(cmThread()->during_cycle(), "invariant");
 798   guarantee(!g1h->mark_in_progress(), "invariant");
 799 }
 800 
 801 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 802 public:
 803   bool doHeapRegion(HeapRegion* r) {
 804     if (!r->continuesHumongous()) {
 805       r->note_start_of_marking(true);
 806     }
 807     return false;
 808   }
 809 };
 810 
 811 void ConcurrentMark::checkpointRootsInitialPre() {
 812   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 813   G1CollectorPolicy* g1p = g1h->g1_policy();
 814 
 815   _has_aborted = false;
 816 
 817 #ifndef PRODUCT
 818   if (G1PrintReachableAtInitialMark) {
 819     print_reachable("at-cycle-start",
 820                     VerifyOption_G1UsePrevMarking, true /* all */);
 821   }
 822 #endif
 823 
 824   // Initialise marking structures. This has to be done in a STW phase.
 825   reset();
 826 }
 827 
 828 
 829 void ConcurrentMark::checkpointRootsInitialPost() {
 830   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 831 
 832   // If we force an overflow during remark, the remark operation will
 833   // actually abort and we'll restart concurrent marking. If we always
 834   // force an oveflow during remark we'll never actually complete the
 835   // marking phase. So, we initilize this here, at the start of the
 836   // cycle, so that at the remaining overflow number will decrease at
 837   // every remark and we'll eventually not need to cause one.
 838   force_overflow_stw()->init();
 839 
 840   // For each region note start of marking.
 841   NoteStartOfMarkHRClosure startcl;
 842   g1h->heap_region_iterate(&startcl);
 843 
 844   // Start Concurrent Marking weak-reference discovery.
 845   ReferenceProcessor* rp = g1h->ref_processor_cm();
 846   // enable ("weak") refs discovery
 847   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 848   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 849 
 850   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 851   // This is the start of  the marking cycle, we're expected all
 852   // threads to have SATB queues with active set to false.
 853   satb_mq_set.set_active_all_threads(true, /* new active value */
 854                                      false /* expected_active */);
 855 
 856   // update_g1_committed() will be called at the end of an evac pause
 857   // when marking is on. So, it's also called at the end of the
 858   // initial-mark pause to update the heap end, if the heap expands
 859   // during it. No need to call it here.
 860 }
 861 
 862 /*
 863  * Notice that in the next two methods, we actually leave the STS
 864  * during the barrier sync and join it immediately afterwards. If we
 865  * do not do this, the following deadlock can occur: one thread could
 866  * be in the barrier sync code, waiting for the other thread to also
 867  * sync up, whereas another one could be trying to yield, while also
 868  * waiting for the other threads to sync up too.
 869  *
 870  * Note, however, that this code is also used during remark and in
 871  * this case we should not attempt to leave / enter the STS, otherwise
 872  * we'll either hit an asseert (debug / fastdebug) or deadlock
 873  * (product). So we should only leave / enter the STS if we are
 874  * operating concurrently.
 875  *
 876  * Because the thread that does the sync barrier has left the STS, it
 877  * is possible to be suspended for a Full GC or an evacuation pause
 878  * could occur. This is actually safe, since the entering the sync
 879  * barrier is one of the last things do_marking_step() does, and it
 880  * doesn't manipulate any data structures afterwards.
 881  */
 882 
 883 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 884   if (verbose_low()) {
 885     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 886   }
 887 
 888   if (concurrent()) {
 889     ConcurrentGCThread::stsLeave();
 890   }
 891   _first_overflow_barrier_sync.enter();
 892   if (concurrent()) {
 893     ConcurrentGCThread::stsJoin();
 894   }
 895   // at this point everyone should have synced up and not be doing any
 896   // more work
 897 
 898   if (verbose_low()) {
 899     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 900   }
 901 
 902   // let task 0 do this
 903   if (task_num == 0) {
 904     // task 0 is responsible for clearing the global data structures
 905     // We should be here because of an overflow. During STW we should
 906     // not clear the overflow flag since we rely on it being true when
 907     // we exit this method to abort the pause and restart concurent
 908     // marking.
 909     clear_marking_state(concurrent() /* clear_overflow */);
 910     force_overflow()->update();
 911 
 912     if (PrintGC) {
 913       gclog_or_tty->date_stamp(PrintGCDateStamps);
 914       gclog_or_tty->stamp(PrintGCTimeStamps);
 915       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 916     }
 917   }
 918 
 919   // after this, each task should reset its own data structures then
 920   // then go into the second barrier
 921 }
 922 
 923 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 924   if (verbose_low()) {
 925     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 926   }
 927 
 928   if (concurrent()) {
 929     ConcurrentGCThread::stsLeave();
 930   }
 931   _second_overflow_barrier_sync.enter();
 932   if (concurrent()) {
 933     ConcurrentGCThread::stsJoin();
 934   }
 935   // at this point everything should be re-initialised and ready to go
 936 
 937   if (verbose_low()) {
 938     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 939   }
 940 }
 941 
 942 #ifndef PRODUCT
 943 void ForceOverflowSettings::init() {
 944   _num_remaining = G1ConcMarkForceOverflow;
 945   _force = false;
 946   update();
 947 }
 948 
 949 void ForceOverflowSettings::update() {
 950   if (_num_remaining > 0) {
 951     _num_remaining -= 1;
 952     _force = true;
 953   } else {
 954     _force = false;
 955   }
 956 }
 957 
 958 bool ForceOverflowSettings::should_force() {
 959   if (_force) {
 960     _force = false;
 961     return true;
 962   } else {
 963     return false;
 964   }
 965 }
 966 #endif // !PRODUCT
 967 
 968 void ConcurrentMark::grayRoot(oop p, int worker_i) {
 969   HeapWord* addr = (HeapWord*) p;
 970   // We can't really check against _heap_start and _heap_end, since it
 971   // is possible during an evacuation pause with piggy-backed
 972   // initial-mark that the committed space is expanded during the
 973   // pause without CM observing this change. So the assertions below
 974   // is a bit conservative; but better than nothing.
 975   assert(_g1h->g1_committed().contains(addr),
 976          "address should be within the heap bounds");
 977 
 978   if (!_nextMarkBitMap->isMarked(addr)) {
 979     if (_nextMarkBitMap->parMark(addr)) {
 980       // Update the task specific count data for object p.
 981       add_to_count_data_for(p, worker_i);
 982     }
 983   }
 984 }
 985 
 986 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
 987   // The objects on the region have already been marked "in bulk" by
 988   // the caller. We only need to decide whether to push the region on
 989   // the region stack or not.
 990 
 991   if (!concurrent_marking_in_progress() || !_should_gray_objects) {
 992     // We're done with marking and waiting for remark. We do not need to
 993     // push anything else on the region stack.
 994     return;
 995   }
 996 
 997   HeapWord* finger = _finger;
 998 
 999   if (verbose_low()) {
1000     gclog_or_tty->print_cr("[global] attempting to push "
1001                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
1002                            PTR_FORMAT, mr.start(), mr.end(), finger);
1003   }
1004 
1005   if (mr.start() < finger) {
1006     // The finger is always heap region aligned and it is not possible
1007     // for mr to span heap regions.
1008     assert(mr.end() <= finger, "invariant");
1009 
1010     // Separated the asserts so that we know which one fires.
1011     assert(mr.start() <= mr.end(),
1012            "region boundaries should fall within the committed space");
1013     assert(_heap_start <= mr.start(),
1014            "region boundaries should fall within the committed space");
1015     assert(mr.end() <= _heap_end,
1016            "region boundaries should fall within the committed space");
1017     if (verbose_low()) {
1018       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
1019                              "below the finger, pushing it",
1020                              mr.start(), mr.end());
1021     }
1022 
1023     if (!region_stack_push_lock_free(mr)) {
1024       if (verbose_low()) {
1025         gclog_or_tty->print_cr("[global] region stack has overflown.");
1026       }
1027     }
1028   }
1029 }
1030 
1031 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p, int worker_i) {
1032   // The object is not marked by the caller. We need to at least mark
1033   // it and maybe push in on the stack.
1034 
1035   HeapWord* addr = (HeapWord*)p;
1036   if (!_nextMarkBitMap->isMarked(addr)) {
1037     // We definitely need to mark it, irrespective whether we bail out
1038     // because we're done with marking.
1039     if (_nextMarkBitMap->parMark(addr)) {
1040       // Update the task specific count data for object p
1041       add_to_count_data_for(p, worker_i);
1042       
1043       if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1044         // If we're done with concurrent marking and we're waiting for
1045         // remark, then we're not pushing anything on the stack.
1046         return;
1047       }
1048 
1049       // No OrderAccess:store_load() is needed. It is implicit in the
1050       // CAS done in parMark(addr) above
1051       HeapWord* finger = _finger;
1052 
1053       if (addr < finger) {
1054         if (!mark_stack_push(oop(addr))) {
1055           if (verbose_low()) {
1056             gclog_or_tty->print_cr("[global] global stack overflow "
1057                                    "during parMark");
1058           }
1059         }
1060       }
1061     }
1062   }
1063 }
1064 
1065 class CMConcurrentMarkingTask: public AbstractGangTask {
1066 private:
1067   ConcurrentMark*       _cm;
1068   ConcurrentMarkThread* _cmt;
1069 
1070 public:
1071   void work(int worker_i) {
1072     assert(Thread::current()->is_ConcurrentGC_thread(),
1073            "this should only be done by a conc GC thread");
1074     ResourceMark rm;
1075 
1076     double start_vtime = os::elapsedVTime();
1077 
1078     ConcurrentGCThread::stsJoin();
1079 
1080     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1081     CMTask* the_task = _cm->task(worker_i);
1082     the_task->record_start_time();
1083     if (!_cm->has_aborted()) {
1084       do {
1085         double start_vtime_sec = os::elapsedVTime();
1086         double start_time_sec = os::elapsedTime();
1087         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1088 
1089         the_task->do_marking_step(mark_step_duration_ms,
1090                                   true /* do_stealing    */,
1091                                   true /* do_termination */);
1092 
1093         double end_time_sec = os::elapsedTime();
1094         double end_vtime_sec = os::elapsedVTime();
1095         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1096         double elapsed_time_sec = end_time_sec - start_time_sec;
1097         _cm->clear_has_overflown();
1098 
1099         bool ret = _cm->do_yield_check(worker_i);
1100 
1101         jlong sleep_time_ms;
1102         if (!_cm->has_aborted() && the_task->has_aborted()) {
1103           sleep_time_ms =
1104             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1105           ConcurrentGCThread::stsLeave();
1106           os::sleep(Thread::current(), sleep_time_ms, false);
1107           ConcurrentGCThread::stsJoin();
1108         }
1109         double end_time2_sec = os::elapsedTime();
1110         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1111 
1112 #if 0
1113           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1114                                  "overhead %1.4lf",
1115                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1116                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1117           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1118                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1119 #endif
1120       } while (!_cm->has_aborted() && the_task->has_aborted());
1121     }
1122     the_task->record_end_time();
1123     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1124 
1125     ConcurrentGCThread::stsLeave();
1126 
1127     double end_vtime = os::elapsedVTime();
1128     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1129   }
1130 
1131   CMConcurrentMarkingTask(ConcurrentMark* cm,
1132                           ConcurrentMarkThread* cmt) :
1133       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1134 
1135   ~CMConcurrentMarkingTask() { }
1136 };
1137 
1138 void ConcurrentMark::markFromRoots() {
1139   // we might be tempted to assert that:
1140   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1141   //        "inconsistent argument?");
1142   // However that wouldn't be right, because it's possible that
1143   // a safepoint is indeed in progress as a younger generation
1144   // stop-the-world GC happens even as we mark in this generation.
1145 
1146   _restart_for_overflow = false;
1147 
1148   size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
1149   force_overflow_conc()->init();
1150   set_phase(active_workers, true /* concurrent */);
1151 
1152   CMConcurrentMarkingTask markingTask(this, cmThread());
1153   if (parallel_marking_threads() > 0) {
1154     _parallel_workers->run_task(&markingTask);
1155   } else {
1156     markingTask.work(0);
1157   }
1158   print_stats();
1159 }
1160 
1161 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1162   // world is stopped at this checkpoint
1163   assert(SafepointSynchronize::is_at_safepoint(),
1164          "world should be stopped");
1165 
1166   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1167 
1168   // If a full collection has happened, we shouldn't do this.
1169   if (has_aborted()) {
1170     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1171     return;
1172   }
1173 
1174   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1175 
1176   if (VerifyDuringGC) {
1177     HandleMark hm;  // handle scope
1178     gclog_or_tty->print(" VerifyDuringGC:(before)");
1179     Universe::heap()->prepare_for_verify();
1180     Universe::verify(/* allow dirty */ true,
1181                      /* silent      */ false,
1182                      /* option      */ VerifyOption_G1UsePrevMarking);
1183   }
1184 
1185   G1CollectorPolicy* g1p = g1h->g1_policy();
1186   g1p->record_concurrent_mark_remark_start();
1187 
1188   double start = os::elapsedTime();
1189 
1190   checkpointRootsFinalWork();
1191 
1192   double mark_work_end = os::elapsedTime();
1193 
1194   weakRefsWork(clear_all_soft_refs);
1195 
1196   if (has_overflown()) {
1197     // Oops.  We overflowed.  Restart concurrent marking.
1198     _restart_for_overflow = true;
1199     // Clear the flag. We do not need it any more.
1200     clear_has_overflown();
1201     if (G1TraceMarkStackOverflow) {
1202       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1203     }
1204   } else {
1205     // Aggregate the per-task counting data that we have accumulated
1206     // while marking.
1207     aggregate_all_count_data();
1208 
1209     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1210     // We're done with marking.
1211     // This is the end of  the marking cycle, we're expected all
1212     // threads to have SATB queues with active set to true.
1213     satb_mq_set.set_active_all_threads(false, /* new active value */
1214                                        true /* expected_active */);
1215 
1216     if (VerifyDuringGC) {
1217       HandleMark hm;  // handle scope
1218       gclog_or_tty->print(" VerifyDuringGC:(after)");
1219       Universe::heap()->prepare_for_verify();
1220       Universe::verify(/* allow dirty */ true,
1221                        /* silent      */ false,
1222                        /* option      */ VerifyOption_G1UseNextMarking);
1223     }
1224     assert(!restart_for_overflow(), "sanity");
1225   }
1226 
1227   // Reset the marking state if marking completed
1228   if (!restart_for_overflow()) {
1229     set_non_marking_state();
1230   }
1231 
1232 #if VERIFY_OBJS_PROCESSED
1233   _scan_obj_cl.objs_processed = 0;
1234   ThreadLocalObjQueue::objs_enqueued = 0;
1235 #endif
1236 
1237   // Statistics
1238   double now = os::elapsedTime();
1239   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1240   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1241   _remark_times.add((now - start) * 1000.0);
1242 
1243   g1p->record_concurrent_mark_remark_end();
1244 }
1245 
1246 #define CARD_BM_TEST_MODE 0
1247 
1248 // Used to calculate the # live objects per region
1249 // for verification purposes
1250 class CalcLiveObjectsClosure: public HeapRegionClosure {
1251 
1252   CMBitMapRO* _bm;
1253   ConcurrentMark* _cm;
1254   BitMap* _region_bm;
1255   BitMap* _card_bm;
1256 
1257   size_t _tot_words_done;
1258   size_t _tot_live;
1259   size_t _tot_used;
1260 
1261   size_t _region_marked_bytes;
1262 
1263   intptr_t _bottom_card_num;
1264 
1265   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1266     BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
1267     BitMap::idx_t last_idx = last_card_num - _bottom_card_num;
1268     
1269     for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
1270 #if CARD_BM_TEST_MODE
1271       guarantee(_card_bm->at(i), "Should already be set.");
1272 #else
1273       _card_bm->par_at_put(i, 1);
1274 #endif
1275     }
1276   }
1277 
1278 public:
1279   CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm,
1280                          BitMap* region_bm, BitMap* card_bm) :
1281     _bm(bm), _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
1282     _region_marked_bytes(0), _tot_words_done(0),
1283     _tot_live(0), _tot_used(0)
1284   {
1285     _bottom_card_num =
1286       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1287                CardTableModRefBS::card_shift);
1288   }
1289 
1290   // It takes a region that's not empty (i.e., it has at least one
1291   // live object in it and sets its corresponding bit on the region
1292   // bitmap to 1. If the region is "starts humongous" it will also set
1293   // to 1 the bits on the region bitmap that correspond to its
1294   // associated "continues humongous" regions.
1295   void set_bit_for_region(HeapRegion* hr) {
1296     assert(!hr->continuesHumongous(), "should have filtered those out");
1297 
1298     size_t index = hr->hrs_index();
1299     if (!hr->startsHumongous()) {
1300       // Normal (non-humongous) case: just set the bit.
1301       _region_bm->par_at_put((BitMap::idx_t) index, true);
1302     } else {
1303       // Starts humongous case: calculate how many regions are part of
1304       // this humongous region and then set the bit range. It might
1305       // have been a bit more efficient to look at the object that
1306       // spans these humongous regions to calculate their number from
1307       // the object's size. However, it's a good idea to calculate
1308       // this based on the metadata itself, and not the region
1309       // contents, so that this code is not aware of what goes into
1310       // the humongous regions (in case this changes in the future).
1311       G1CollectedHeap* g1h = G1CollectedHeap::heap();
1312       size_t end_index = index + 1;
1313       while (end_index < g1h->n_regions()) {
1314         HeapRegion* chr = g1h->region_at(end_index);
1315         if (!chr->continuesHumongous()) break;
1316         end_index += 1;
1317       }
1318       _region_bm->par_at_put_range((BitMap::idx_t) index,
1319                                    (BitMap::idx_t) end_index, true);
1320     }
1321   }
1322 
1323   bool doHeapRegion(HeapRegion* hr) {
1324 
1325     if (hr->continuesHumongous()) {
1326       // We will ignore these here and process them when their
1327       // associated "starts humongous" region is processed (see
1328       // set_bit_for_heap_region()). Note that we cannot rely on their
1329       // associated "starts humongous" region to have their bit set to
1330       // 1 since, due to the region chunking in the parallel region
1331       // iteration, a "continues humongous" region might be visited
1332       // before its associated "starts humongous".
1333       return false;
1334     }
1335 
1336     HeapWord* nextTop = hr->next_top_at_mark_start();
1337     HeapWord* start   = hr->bottom();
1338 
1339     assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(),
1340                 "Preconditions.");
1341 
1342     // Record the number of word's we'll examine.
1343     size_t words_done = (nextTop - start);
1344 
1345     // Find the first marked object at or after "start".
1346     start = _bm->getNextMarkedWordAddress(start, nextTop);
1347 
1348     size_t marked_bytes = 0;
1349     _region_marked_bytes = 0;
1350 
1351     // Below, the term "card num" means the result of shifting an address
1352     // by the card shift -- address 0 corresponds to card number 0.  One
1353     // must subtract the card num of the bottom of the heap to obtain a
1354     // card table index.
1355 
1356     // The first card num of the sequence of live cards currently being
1357     // constructed.  -1 ==> no sequence.
1358     intptr_t start_card_num = -1;
1359 
1360     // The last card num of the sequence of live cards currently being
1361     // constructed.  -1 ==> no sequence.
1362     intptr_t last_card_num = -1;
1363 
1364     while (start < nextTop) {
1365       oop obj = oop(start);
1366       int obj_sz = obj->size();
1367 
1368       // The card num of the start of the current object.
1369       intptr_t obj_card_num =
1370         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1371       HeapWord* obj_last = start + obj_sz - 1;
1372       intptr_t obj_last_card_num =
1373         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1374 
1375       if (obj_card_num != last_card_num) {
1376         if (start_card_num == -1) {
1377           assert(last_card_num == -1, "Both or neither.");
1378           start_card_num = obj_card_num;
1379         } else {
1380           assert(last_card_num != -1, "Both or neither.");
1381           assert(obj_card_num >= last_card_num, "Inv");
1382           if ((obj_card_num - last_card_num) > 1) {
1383             // Mark the last run, and start a new one.
1384             mark_card_num_range(start_card_num, last_card_num);
1385             start_card_num = obj_card_num;
1386           }
1387         }
1388 #if CARD_BM_TEST_MODE
1389         /*
1390         gclog_or_tty->print_cr("Setting bits from %d/%d.",
1391                                obj_card_num - _bottom_card_num,
1392                                obj_last_card_num - _bottom_card_num);
1393         */
1394         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1395           _card_bm->par_at_put(j - _bottom_card_num, 1);
1396         }
1397 #endif
1398       }
1399       // In any case, we set the last card num.
1400       last_card_num = obj_last_card_num;
1401 
1402       marked_bytes += (size_t)obj_sz * HeapWordSize;
1403 
1404       // Find the next marked object after this one.
1405       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1406     }
1407 
1408     // Handle the last range, if any.
1409     if (start_card_num != -1) {
1410       mark_card_num_range(start_card_num, last_card_num);
1411     }
1412 
1413     // Mark the allocated-since-marking portion...
1414     HeapWord* top = hr->top();
1415     if (nextTop < top) {
1416       start_card_num = intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1417       last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift);
1418 
1419       mark_card_num_range(start_card_num, last_card_num);
1420       
1421       // This definitely means the region has live objects.
1422       set_bit_for_region(hr);
1423     }
1424 
1425     // Update the live region bitmap.
1426     if (marked_bytes > 0) {
1427       set_bit_for_region(hr);
1428     }
1429 
1430     // Set the marked bytes for the current region so that
1431     // it can be queried by a calling verificiation routine
1432     _region_marked_bytes = marked_bytes;
1433 
1434     _tot_live += hr->next_live_bytes();
1435     _tot_used += hr->used();
1436     _tot_words_done = words_done;
1437 
1438     return false;
1439   }
1440 
1441   size_t region_marked_bytes() const { return _region_marked_bytes; }
1442   size_t tot_words_done() const      { return _tot_words_done; }
1443   size_t tot_live() const            { return _tot_live; }
1444   size_t tot_used() const            { return _tot_used; }
1445 };
1446 
1447 // Aggregate the counting data that was constructed concurrently
1448 // with marking.
1449 class AddToMarkedBytesClosure: public HeapRegionClosure {
1450   ConcurrentMark* _cm;
1451   size_t _task_num;
1452   size_t _max_task_num;
1453 
1454   bool _final;
1455 
1456 public:
1457   AddToMarkedBytesClosure(ConcurrentMark *cm,
1458                           size_t task_num,
1459                           size_t max_task_num) :
1460     _cm(cm),
1461     _task_num(task_num),
1462     _max_task_num(max_task_num),
1463     _final(false)
1464   {
1465     assert(0 <= _task_num && _task_num < _max_task_num, "sanity");
1466     if ((_max_task_num - _task_num) == 1) {
1467       // Last task
1468       _final = true;
1469     }
1470   }
1471 
1472   bool doHeapRegion(HeapRegion* hr) {
1473     // Adds the value in the counted marked bytes array for
1474     // _task_num for region hr to the value cached in heap
1475     // region itself.
1476     // For the final task we also set the top at conc count
1477     // for the region.
1478     // The bits in the live region bitmap are set for regions
1479     // that contain live data during the cleanup pause.
1480 
1481     if (hr->continuesHumongous()) {
1482       // We will ignore these here and process them when their
1483       // associated "starts humongous" region is processed.
1484       // Note that we cannot rely on their associated
1485       // "starts humongous" region to have their bit set to 1
1486       // since, due to the region chunking in the parallel region
1487       // iteration, a "continues humongous" region might be visited
1488       // before its associated "starts humongous".
1489       return false;
1490     }
1491 
1492     int hrs_index = hr->hrs_index();
1493     size_t* marked_bytes_array = _cm->count_marked_bytes_for(_task_num);
1494     size_t marked_bytes = marked_bytes_array[hrs_index];
1495     hr->add_to_marked_bytes(marked_bytes);
1496 
1497     if (_final) {
1498       HeapWord* ntams = hr->next_top_at_mark_start();
1499       HeapWord* start = hr->bottom();
1500       
1501       assert(start <= ntams && ntams <= hr->top() && hr->top() <= hr->end(),
1502              "Preconditions.");
1503 
1504       hr->set_top_at_conc_mark_count(ntams);
1505     }
1506 
1507     return false;
1508   }
1509 };
1510 
1511 void ConcurrentMark::aggregate_all_count_data() {
1512   _card_bm.clear();
1513 
1514   // Unions the per task card bitmaps into the global card bitmap,
1515   // and aggregates the per task marked bytes for each region into
1516   // the heap region itself.
1517 
1518   for (int i = 0; i < _max_task_num; i += 1) {
1519     BitMap& task_card_bm = count_card_bitmap_for(i);
1520     _card_bm.set_union(task_card_bm);
1521 
1522     // Update the marked bytes for each region
1523     AddToMarkedBytesClosure cl(this, i, _max_task_num);
1524     _g1h->heap_region_iterate(&cl);
1525   }
1526 
1527   // We're done with the accumulated per-task concurrent
1528   // counting data so let's clear it for the next marking.
1529   clear_all_count_data();
1530 }
1531 
1532 // Final update of count data (during cleanup).
1533 // Adds [top_at_count, NTAMS) to the marked bytes for each
1534 // region. Sets the bits in the card bitmap corresponding
1535 // to the interval [top_at_count, top], and sets the
1536 // liveness bit for each region containing live data
1537 // in the region bitmap.
1538 
1539 class FinalCountDataUpdateClosure: public HeapRegionClosure {
1540   ConcurrentMark* _cm;
1541   BitMap* _region_bm;
1542   BitMap* _card_bm;
1543   intptr_t _bottom_card_num;
1544 
1545   size_t _total_live_bytes;
1546   size_t _total_used_bytes;
1547   size_t _total_words_done;
1548 
1549   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1550     BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
1551     BitMap::idx_t last_idx = last_card_num - _bottom_card_num;
1552     
1553     // Inclusive bit range [start_idx, last_idx]. par_at_put_range
1554     // is exclusive so we have to also set the bit for last_idx.
1555     // Passing last_idx+1 to the clear_range would work in
1556     // most cases but could trip an OOB assertion.
1557 
1558     if ((last_idx - start_idx) > 0) {
1559       _card_bm->par_at_put_range(start_idx, last_idx, true);
1560     }
1561     _card_bm->par_set_bit(last_idx);
1562   }
1563 
1564   // It takes a region that's not empty (i.e., it has at least one
1565   // live object in it and sets its corresponding bit on the region
1566   // bitmap to 1. If the region is "starts humongous" it will also set
1567   // to 1 the bits on the region bitmap that correspond to its
1568   // associated "continues humongous" regions.
1569   void set_bit_for_region(HeapRegion* hr) {
1570     assert(!hr->continuesHumongous(), "should have filtered those out");
1571 
1572     size_t index = hr->hrs_index();
1573     if (!hr->startsHumongous()) {
1574       // Normal (non-humongous) case: just set the bit.
1575       _region_bm->par_set_bit((BitMap::idx_t) index);
1576     } else {
1577       // Starts humongous case: calculate how many regions are part of
1578       // this humongous region and then set the bit range. It might
1579       // have been a bit more efficient to look at the object that
1580       // spans these humongous regions to calculate their number from
1581       // the object's size. However, it's a good idea to calculate
1582       // this based on the metadata itself, and not the region
1583       // contents, so that this code is not aware of what goes into
1584       // the humongous regions (in case this changes in the future).
1585       G1CollectedHeap* g1h = G1CollectedHeap::heap();
1586       size_t end_index = index + 1;
1587       while (end_index < g1h->n_regions()) {
1588         HeapRegion* chr = g1h->region_at(end_index);
1589         if (!chr->continuesHumongous()) break;
1590         end_index += 1;
1591       }
1592       _region_bm->par_at_put_range((BitMap::idx_t) index,
1593                                    (BitMap::idx_t) end_index, true);
1594     }
1595   }
1596 
1597  public:
1598   FinalCountDataUpdateClosure(ConcurrentMark* cm,
1599                               BitMap* region_bm,
1600                               BitMap* card_bm) :
1601     _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
1602     _total_words_done(0), _total_live_bytes(0), _total_used_bytes(0)
1603   {
1604     _bottom_card_num =
1605       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1606                CardTableModRefBS::card_shift);
1607   }
1608 
1609   bool doHeapRegion(HeapRegion* hr) {
1610 
1611     if (hr->continuesHumongous()) {
1612       // We will ignore these here and process them when their
1613       // associated "starts humongous" region is processed (see
1614       // set_bit_for_heap_region()). Note that we cannot rely on their
1615       // associated "starts humongous" region to have their bit set to
1616       // 1 since, due to the region chunking in the parallel region
1617       // iteration, a "continues humongous" region might be visited
1618       // before its associated "starts humongous".
1619       return false;
1620     }
1621 
1622     HeapWord* start = hr->top_at_conc_mark_count();
1623     HeapWord* ntams = hr->next_top_at_mark_start();
1624     HeapWord* top   = hr->top();
1625     
1626     assert(hr->bottom() <= start && start <= hr->end() &&
1627            hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1628     
1629     size_t words_done = ntams - hr->bottom();
1630 
1631     intptr_t start_card_num = intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1632     intptr_t last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift);
1633 
1634 
1635     if (start < ntams) {
1636       // Region was changed between remark and cleanup pauses
1637       // We need to add (ntams - start) to the marked bytes
1638       // for this region, and set bits for the range
1639       // [ card_num(start), card_num(ntams) ) in the
1640       // card bitmap.
1641       size_t live_bytes = (ntams - start) * HeapWordSize;
1642       hr->add_to_marked_bytes(live_bytes);
1643       
1644       // Record the new top at conc count
1645       hr->set_top_at_conc_mark_count(ntams);
1646 
1647       // The setting of the bits card bitmap takes place below
1648     }
1649 
1650     // Mark the allocated-since-marking portion...
1651     if (ntams < top) {
1652       // This definitely means the region has live objects.
1653       set_bit_for_region(hr);
1654     }
1655 
1656     // Now set the bits for [start, top]
1657     mark_card_num_range(start_card_num, last_card_num);
1658 
1659     // Set the bit for the region if it contains live data
1660     if (hr->next_marked_bytes() > 0) {
1661       set_bit_for_region(hr);
1662     }
1663 
1664     _total_words_done += words_done;
1665     _total_used_bytes += hr->used();
1666     _total_live_bytes += hr->next_marked_bytes();
1667 
1668     return false;
1669   }
1670 
1671   size_t total_words_done() const { return _total_words_done; }
1672   size_t total_live_bytes() const { return _total_live_bytes; }
1673   size_t total_used_bytes() const { return _total_used_bytes; }
1674 };
1675 
1676 // Heap region closure used for verifying the counting data
1677 // that was accumulated concurrently and aggregated during
1678 // the remark pause. This closure is applied to the heap
1679 // regions during the STW cleanup pause.
1680 
1681 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1682   ConcurrentMark* _cm;
1683   CalcLiveObjectsClosure _calc_cl;
1684   BitMap* _region_bm;   // Region BM to be verified
1685   BitMap* _card_bm;     // Card BM to be verified
1686   bool _verbose;        // verbose output?
1687 
1688   BitMap* _exp_region_bm; // Expected Region BM values
1689   BitMap* _exp_card_bm;   // Expected card BM values
1690 
1691   intptr_t _bottom_card_num; // Used for calculatint bitmap indices
1692 
1693   int _failures;
1694 
1695 public:
1696   VerifyLiveObjectDataHRClosure(ConcurrentMark* cm,
1697                                 BitMap* region_bm,
1698                                 BitMap* card_bm,
1699                                 BitMap* exp_region_bm,
1700                                 BitMap* exp_card_bm,
1701                                 bool verbose) :
1702     _cm(cm),
1703     _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm),
1704     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1705     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1706     _failures(0)
1707   { 
1708     _bottom_card_num =
1709       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1710                CardTableModRefBS::card_shift);
1711   }
1712 
1713   int failures() const { return _failures; }
1714 
1715   bool doHeapRegion(HeapRegion* hr) {
1716     if (hr->continuesHumongous()) {
1717       // We will ignore these here and process them when their
1718       // associated "starts humongous" region is processed (see
1719       // set_bit_for_heap_region()). Note that we cannot rely on their
1720       // associated "starts humongous" region to have their bit set to
1721       // 1 since, due to the region chunking in the parallel region
1722       // iteration, a "continues humongous" region might be visited
1723       // before its associated "starts humongous".
1724       return false;
1725     }
1726 
1727     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1728     // this region and set the corresponding bits in the expected region
1729     // and card bitmaps.
1730     bool res = _calc_cl.doHeapRegion(hr);
1731     assert(res == false, "should be continuing");
1732 
1733     // Note that the calculated count data could be a subset of the
1734     // count data that was accumlated during marking. See the comment
1735     // in G1ParCopyHelper::copy_to_survivor space for an explanation
1736     // why.
1737 
1738     if (_verbose) {
1739       gclog_or_tty->print("Region %d: bottom: "PTR_FORMAT", ntams: "
1740                           PTR_FORMAT", top: "PTR_FORMAT", end: "PTR_FORMAT,
1741                           hr->hrs_index(), hr->bottom(), hr->next_top_at_mark_start(),
1742                           hr->top(), hr->end());
1743       gclog_or_tty->print_cr(", marked_bytes: calc/actual "SIZE_FORMAT"/"SIZE_FORMAT,
1744                              _calc_cl.region_marked_bytes(),
1745                              hr->next_marked_bytes());
1746     }
1747 
1748     // Verify that _top_at_conc_count == ntams
1749     if (hr->top_at_conc_mark_count() != hr->next_top_at_mark_start()) {
1750       if (_verbose) {
1751         gclog_or_tty->print_cr("Region %d: top at conc count incorrect: expected "
1752                                PTR_FORMAT", actual: "PTR_FORMAT,
1753                                hr->hrs_index(), hr->next_top_at_mark_start(),
1754                                hr->top_at_conc_mark_count());
1755       }
1756       _failures += 1;
1757     }
1758 
1759     // Verify the marked bytes for this region. 
1760     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1761     size_t act_marked_bytes = hr->next_marked_bytes();
1762 
1763     // We're OK if actual marked bytes >= expected.
1764     if (exp_marked_bytes > act_marked_bytes) {
1765       if (_verbose) {
1766         gclog_or_tty->print_cr("Region %d: marked bytes mismatch: expected: "
1767                                SIZE_FORMAT", actual: "SIZE_FORMAT,
1768                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1769       }
1770       _failures += 1;
1771     }
1772 
1773     // Verify the bit, for this region, in the actual and expected
1774     // (which was just calculated) region bit maps.
1775     // We're not OK if the expected bit is set and the actual is not set.
1776     BitMap::idx_t index = (BitMap::idx_t)hr->hrs_index();
1777     
1778     bool expected = _exp_region_bm->at(index);
1779     bool actual = _region_bm->at(index);
1780     if (expected && !actual) {
1781       if (_verbose) {
1782         gclog_or_tty->print_cr("Region %d: region bitmap mismatch: expected: %d, actual: %d",
1783                                hr->hrs_index(), expected, actual);
1784       }
1785       _failures += 1;
1786     }
1787 
1788     // Verify that the card bit maps for the cards spanned by the current
1789     // region match. The set of offsets that have set bits in the expected
1790     // bitmap should be a subset of the offsets with set bits from the actual
1791     // calculated card bitmap.
1792     // Again it's more important that if the expected bit is set then the
1793     // actual bit be set.
1794     intptr_t start_card_num =
1795         intptr_t(uintptr_t(hr->bottom()) >> CardTableModRefBS::card_shift);
1796     intptr_t top_card_num =
1797         intptr_t(uintptr_t(hr->top()) >> CardTableModRefBS::card_shift);
1798 
1799     BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
1800     BitMap::idx_t end_idx = top_card_num - _bottom_card_num;
1801 
1802     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1803       expected = _exp_card_bm->at(i);
1804       actual = _card_bm->at(i);
1805       
1806       if (expected && !actual) {
1807         if (_verbose) {
1808           gclog_or_tty->print_cr("Region %d: card bitmap mismatch at idx %d: expected: %d, actual: %d",
1809                                  hr->hrs_index(), i, expected, actual);
1810         }
1811         _failures += 1;
1812       }
1813     }
1814     if (_failures) {
1815       // Stop iteration?
1816       return true;
1817     }
1818 
1819     return false;
1820   }
1821 };
1822 
1823 class Mux2HRClosure: public HeapRegionClosure {
1824   HeapRegionClosure* _cl1;
1825   HeapRegionClosure* _cl2;
1826 
1827 public:
1828   Mux2HRClosure(HeapRegionClosure *c1, HeapRegionClosure *c2) : _cl1(c1), _cl2(c2) { }
1829   bool doHeapRegion(HeapRegion* hr) {
1830     bool res1 = _cl1->doHeapRegion(hr);
1831     bool res2 = _cl2->doHeapRegion(hr);
1832 
1833     // Only continue if both return false;
1834     return res1 || res2;
1835   }
1836 };
1837 
1838 class G1ParFinalCountTask: public AbstractGangTask {
1839 protected:
1840   G1CollectedHeap* _g1h;
1841   CMBitMap* _bm;
1842   size_t _n_workers;
1843   size_t *_live_bytes;
1844   size_t *_used_bytes;
1845 
1846   BitMap* _actual_region_bm;
1847   BitMap* _actual_card_bm;
1848 
1849   BitMap _expected_region_bm;
1850   BitMap _expected_card_bm;
1851 
1852   int _failures;
1853 
1854 public:
1855   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1856                       BitMap* region_bm, BitMap* card_bm)
1857     : AbstractGangTask("G1 final counting"),
1858       _g1h(g1h), _bm(bm),
1859       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1860       _expected_region_bm(0, false), _expected_card_bm(0, false),
1861       _failures(0)
1862   {
1863     if (ParallelGCThreads > 0) {
1864       _n_workers = _g1h->workers()->total_workers();
1865     } else {
1866       _n_workers = 1;
1867     }
1868 
1869     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1870     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1871 
1872     if (VerifyDuringGC) {
1873       _expected_card_bm.resize(_actual_card_bm->size(), false);
1874       _expected_region_bm.resize(_actual_region_bm->size(), false);
1875     }
1876   }
1877 
1878   ~G1ParFinalCountTask() {
1879     if (VerifyDuringGC) {
1880       _expected_region_bm.resize(0);
1881       _expected_card_bm.resize(0);
1882     }
1883     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1884     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1885   }
1886 
1887   void work(int i) {
1888 
1889     FinalCountDataUpdateClosure final_update_cl(_g1h->concurrent_mark(),
1890                                                 _actual_region_bm, _actual_card_bm);
1891 
1892     VerifyLiveObjectDataHRClosure verify_cl(_g1h->concurrent_mark(),
1893                                             _actual_region_bm, _actual_card_bm,
1894                                             &_expected_region_bm,
1895                                             &_expected_card_bm,
1896                                             true /* verbose */);
1897 
1898     Mux2HRClosure update_and_verify_cl(&final_update_cl, &verify_cl);
1899 
1900     HeapRegionClosure* hr_cl = &final_update_cl;
1901     if (VerifyDuringGC) {
1902       hr_cl = &update_and_verify_cl;
1903     }
1904 
1905     if (G1CollectedHeap::use_parallel_gc_threads()) {
1906       _g1h->heap_region_par_iterate_chunked(hr_cl, i,
1907                                             HeapRegion::FinalCountClaimValue);
1908     } else {
1909       _g1h->heap_region_iterate(hr_cl);
1910     }
1911 
1912     assert((size_t) i < _n_workers, "invariant");
1913     _live_bytes[i] = final_update_cl.total_live_bytes();
1914     _used_bytes[i] = final_update_cl.total_used_bytes();
1915 
1916     if (VerifyDuringGC) {
1917       _failures += verify_cl.failures();
1918     }
1919   }
1920 
1921   size_t live_bytes()  {
1922     size_t live_bytes = 0;
1923     for (size_t i = 0; i < _n_workers; ++i)
1924       live_bytes += _live_bytes[i];
1925     return live_bytes;
1926   }
1927 
1928   size_t used_bytes()  {
1929     size_t used_bytes = 0;
1930     for (size_t i = 0; i < _n_workers; ++i)
1931       used_bytes += _used_bytes[i];
1932     return used_bytes;
1933   }
1934 
1935   int failures() const { return _failures; }
1936 };
1937 
1938 class G1ParNoteEndTask;
1939 
1940 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1941   G1CollectedHeap* _g1;
1942   int _worker_num;
1943   size_t _max_live_bytes;
1944   size_t _regions_claimed;
1945   size_t _freed_bytes;
1946   FreeRegionList* _local_cleanup_list;
1947   HumongousRegionSet* _humongous_proxy_set;
1948   HRRSCleanupTask* _hrrs_cleanup_task;
1949   double _claimed_region_time;
1950   double _max_region_time;
1951 
1952 public:
1953   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1954                              int worker_num,
1955                              FreeRegionList* local_cleanup_list,
1956                              HumongousRegionSet* humongous_proxy_set,
1957                              HRRSCleanupTask* hrrs_cleanup_task);
1958   size_t freed_bytes() { return _freed_bytes; }
1959 
1960   bool doHeapRegion(HeapRegion *r);
1961 
1962   size_t max_live_bytes() { return _max_live_bytes; }
1963   size_t regions_claimed() { return _regions_claimed; }
1964   double claimed_region_time_sec() { return _claimed_region_time; }
1965   double max_region_time_sec() { return _max_region_time; }
1966 };
1967 
1968 class G1ParNoteEndTask: public AbstractGangTask {
1969   friend class G1NoteEndOfConcMarkClosure;
1970 
1971 protected:
1972   G1CollectedHeap* _g1h;
1973   size_t _max_live_bytes;
1974   size_t _freed_bytes;
1975   FreeRegionList* _cleanup_list;
1976 
1977 public:
1978   G1ParNoteEndTask(G1CollectedHeap* g1h,
1979                    FreeRegionList* cleanup_list) :
1980     AbstractGangTask("G1 note end"), _g1h(g1h),
1981     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1982 
1983   void work(int i) {
1984     double start = os::elapsedTime();
1985     FreeRegionList local_cleanup_list("Local Cleanup List");
1986     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1987     HRRSCleanupTask hrrs_cleanup_task;
1988     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
1989                                            &humongous_proxy_set,
1990                                            &hrrs_cleanup_task);
1991     if (G1CollectedHeap::use_parallel_gc_threads()) {
1992       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1993                                             HeapRegion::NoteEndClaimValue);
1994     } else {
1995       _g1h->heap_region_iterate(&g1_note_end);
1996     }
1997     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1998 
1999     // Now update the lists
2000     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
2001                                             NULL /* free_list */,
2002                                             &humongous_proxy_set,
2003                                             true /* par */);
2004     {
2005       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
2006       _max_live_bytes += g1_note_end.max_live_bytes();
2007       _freed_bytes += g1_note_end.freed_bytes();
2008 
2009       // If we iterate over the global cleanup list at the end of
2010       // cleanup to do this printing we will not guarantee to only
2011       // generate output for the newly-reclaimed regions (the list
2012       // might not be empty at the beginning of cleanup; we might
2013       // still be working on its previous contents). So we do the
2014       // printing here, before we append the new regions to the global
2015       // cleanup list.
2016 
2017       G1HRPrinter* hr_printer = _g1h->hr_printer();
2018       if (hr_printer->is_active()) {
2019         HeapRegionLinkedListIterator iter(&local_cleanup_list);
2020         while (iter.more_available()) {
2021           HeapRegion* hr = iter.get_next();
2022           hr_printer->cleanup(hr);
2023         }
2024       }
2025 
2026       _cleanup_list->add_as_tail(&local_cleanup_list);
2027       assert(local_cleanup_list.is_empty(), "post-condition");
2028 
2029       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
2030     }
2031     double end = os::elapsedTime();
2032     if (G1PrintParCleanupStats) {
2033       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
2034                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
2035                           i, start, end, (end-start)*1000.0,
2036                           g1_note_end.regions_claimed(),
2037                           g1_note_end.claimed_region_time_sec()*1000.0,
2038                           g1_note_end.max_region_time_sec()*1000.0);
2039     }
2040   }
2041   size_t max_live_bytes() { return _max_live_bytes; }
2042   size_t freed_bytes() { return _freed_bytes; }
2043 };
2044 
2045 class G1ParScrubRemSetTask: public AbstractGangTask {
2046 protected:
2047   G1RemSet* _g1rs;
2048   BitMap* _region_bm;
2049   BitMap* _card_bm;
2050 public:
2051   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
2052                        BitMap* region_bm, BitMap* card_bm) :
2053     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
2054     _region_bm(region_bm), _card_bm(card_bm)
2055   {}
2056 
2057   void work(int i) {
2058     if (G1CollectedHeap::use_parallel_gc_threads()) {
2059       _g1rs->scrub_par(_region_bm, _card_bm, i,
2060                        HeapRegion::ScrubRemSetClaimValue);
2061     } else {
2062       _g1rs->scrub(_region_bm, _card_bm);
2063     }
2064   }
2065 
2066 };
2067 
2068 G1NoteEndOfConcMarkClosure::
2069 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
2070                            int worker_num,
2071                            FreeRegionList* local_cleanup_list,
2072                            HumongousRegionSet* humongous_proxy_set,
2073                            HRRSCleanupTask* hrrs_cleanup_task)
2074   : _g1(g1), _worker_num(worker_num),
2075     _max_live_bytes(0), _regions_claimed(0),
2076     _freed_bytes(0),
2077     _claimed_region_time(0.0), _max_region_time(0.0),
2078     _local_cleanup_list(local_cleanup_list),
2079     _humongous_proxy_set(humongous_proxy_set),
2080     _hrrs_cleanup_task(hrrs_cleanup_task) { }
2081 
2082 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) {
2083   // We use a claim value of zero here because all regions
2084   // were claimed with value 1 in the FinalCount task.
2085   hr->reset_gc_time_stamp();
2086   if (!hr->continuesHumongous()) {
2087     double start = os::elapsedTime();
2088     _regions_claimed++;
2089     hr->note_end_of_marking();
2090     _max_live_bytes += hr->max_live_bytes();
2091     _g1->free_region_if_empty(hr,
2092                               &_freed_bytes,
2093                               _local_cleanup_list,
2094                               _humongous_proxy_set,
2095                               _hrrs_cleanup_task,
2096                               true /* par */);
2097     double region_time = (os::elapsedTime() - start);
2098     _claimed_region_time += region_time;
2099     if (region_time > _max_region_time) {
2100       _max_region_time = region_time;
2101     }
2102   }
2103   return false;
2104 }
2105 
2106 void ConcurrentMark::cleanup() {
2107   // world is stopped at this checkpoint
2108   assert(SafepointSynchronize::is_at_safepoint(),
2109          "world should be stopped");
2110   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2111 
2112   // If a full collection has happened, we shouldn't do this.
2113   if (has_aborted()) {
2114     g1h->set_marking_complete(); // So bitmap clearing isn't confused
2115     return;
2116   }
2117 
2118   g1h->verify_region_sets_optional();
2119 
2120   if (VerifyDuringGC) {
2121     HandleMark hm;  // handle scope
2122     gclog_or_tty->print(" VerifyDuringGC:(before)");
2123     Universe::heap()->prepare_for_verify();
2124     Universe::verify(/* allow dirty */ true,
2125                      /* silent      */ false,
2126                      /* option      */ VerifyOption_G1UsePrevMarking);
2127   }
2128 
2129   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
2130   g1p->record_concurrent_mark_cleanup_start();
2131 
2132   double start = os::elapsedTime();
2133 
2134   HeapRegionRemSet::reset_for_cleanup_tasks();
2135 
2136   // Clear the global region bitmap - it will be filled as part
2137   // of the final counting task.
2138   _region_bm.clear();
2139 
2140   // Do counting once more with the world stopped for good measure.
2141   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
2142                                         &_region_bm, &_card_bm);
2143 
2144   if (G1CollectedHeap::use_parallel_gc_threads()) {
2145     assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2146            "sanity check");
2147 
2148     int n_workers = g1h->workers()->total_workers();
2149     g1h->set_par_threads(n_workers);
2150     g1h->workers()->run_task(&g1_par_count_task);
2151     g1h->set_par_threads(0);
2152 
2153     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
2154            "sanity check");
2155   } else {
2156     g1_par_count_task.work(0);
2157   }
2158 
2159   // Verify that there were no verification failures of
2160   // the live counting data.
2161   if (VerifyDuringGC) {
2162     assert(g1_par_count_task.failures() == 0, "Unexpected failures");
2163   }
2164 
2165   size_t known_garbage_bytes =
2166     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
2167   g1p->set_known_garbage_bytes(known_garbage_bytes);
2168 
2169   size_t start_used_bytes = g1h->used();
2170   _at_least_one_mark_complete = true;
2171   g1h->set_marking_complete();
2172 
2173   ergo_verbose4(ErgoConcCycles,
2174            "finish cleanup",
2175            ergo_format_byte("occupancy")
2176            ergo_format_byte("capacity")
2177            ergo_format_byte_perc("known garbage"),
2178            start_used_bytes, g1h->capacity(),
2179            known_garbage_bytes,
2180            ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0);
2181 
2182   double count_end = os::elapsedTime();
2183   double this_final_counting_time = (count_end - start);
2184   if (G1PrintParCleanupStats) {
2185     gclog_or_tty->print_cr("Cleanup:");
2186     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
2187                            this_final_counting_time*1000.0);
2188   }
2189   _total_counting_time += this_final_counting_time;
2190 
2191   if (G1PrintRegionLivenessInfo) {
2192     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2193     _g1h->heap_region_iterate(&cl);
2194   }
2195 
2196   // Install newly created mark bitMap as "prev".
2197   swapMarkBitMaps();
2198 
2199   g1h->reset_gc_time_stamp();
2200 
2201   // Note end of marking in all heap regions.
2202   double note_end_start = os::elapsedTime();
2203   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2204   if (G1CollectedHeap::use_parallel_gc_threads()) {
2205     int n_workers = g1h->workers()->total_workers();
2206     g1h->set_par_threads(n_workers);
2207     g1h->workers()->run_task(&g1_par_note_end_task);
2208     g1h->set_par_threads(0);
2209 
2210     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2211            "sanity check");
2212   } else {
2213     g1_par_note_end_task.work(0);
2214   }
2215 
2216   if (!cleanup_list_is_empty()) {
2217     // The cleanup list is not empty, so we'll have to process it
2218     // concurrently. Notify anyone else that might be wanting free
2219     // regions that there will be more free regions coming soon.
2220     g1h->set_free_regions_coming();
2221   }
2222   double note_end_end = os::elapsedTime();
2223   if (G1PrintParCleanupStats) {
2224     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
2225                            (note_end_end - note_end_start)*1000.0);
2226   }
2227 
2228 
2229   // call below, since it affects the metric by which we sort the heap
2230   // regions.
2231   if (G1ScrubRemSets) {
2232     double rs_scrub_start = os::elapsedTime();
2233     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2234     if (G1CollectedHeap::use_parallel_gc_threads()) {
2235       int n_workers = g1h->workers()->total_workers();
2236       g1h->set_par_threads(n_workers);
2237       g1h->workers()->run_task(&g1_par_scrub_rs_task);
2238       g1h->set_par_threads(0);
2239 
2240       assert(g1h->check_heap_region_claim_values(
2241                                             HeapRegion::ScrubRemSetClaimValue),
2242              "sanity check");
2243     } else {
2244       g1_par_scrub_rs_task.work(0);
2245     }
2246 
2247     double rs_scrub_end = os::elapsedTime();
2248     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2249     _total_rs_scrub_time += this_rs_scrub_time;
2250   }
2251 
2252   // this will also free any regions totally full of garbage objects,
2253   // and sort the regions.
2254   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
2255                         g1_par_note_end_task.freed_bytes(),
2256                         g1_par_note_end_task.max_live_bytes());
2257 
2258   // Statistics.
2259   double end = os::elapsedTime();
2260   _cleanup_times.add((end - start) * 1000.0);
2261 
2262   // G1CollectedHeap::heap()->print();
2263   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
2264   // G1CollectedHeap::heap()->get_gc_time_stamp());
2265 
2266   if (PrintGC || PrintGCDetails) {
2267     g1h->print_size_transition(gclog_or_tty,
2268                                start_used_bytes,
2269                                g1h->used(),
2270                                g1h->capacity());
2271   }
2272 
2273   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
2274   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
2275 
2276   // Clean up will have freed any regions completely full of garbage.
2277   // Update the soft reference policy with the new heap occupancy.
2278   Universe::update_heap_info_at_gc();
2279 
2280   // We need to make this be a "collection" so any collection pause that
2281   // races with it goes around and waits for completeCleanup to finish.
2282   g1h->increment_total_collections();
2283 
2284   if (VerifyDuringGC) {
2285     HandleMark hm;  // handle scope
2286     gclog_or_tty->print(" VerifyDuringGC:(after)");
2287     Universe::heap()->prepare_for_verify();
2288     Universe::verify(/* allow dirty */ true,
2289                      /* silent      */ false,
2290                      /* option      */ VerifyOption_G1UsePrevMarking);
2291   }
2292 
2293   g1h->verify_region_sets_optional();
2294 }
2295 
2296 void ConcurrentMark::completeCleanup() {
2297   if (has_aborted()) return;
2298 
2299   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2300 
2301   _cleanup_list.verify_optional();
2302   FreeRegionList tmp_free_list("Tmp Free List");
2303 
2304   if (G1ConcRegionFreeingVerbose) {
2305     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2306                            "cleanup list has "SIZE_FORMAT" entries",
2307                            _cleanup_list.length());
2308   }
2309 
2310   // Noone else should be accessing the _cleanup_list at this point,
2311   // so it's not necessary to take any locks
2312   while (!_cleanup_list.is_empty()) {
2313     HeapRegion* hr = _cleanup_list.remove_head();
2314     assert(hr != NULL, "the list was not empty");
2315     hr->par_clear();
2316     tmp_free_list.add_as_tail(hr);
2317 
2318     // Instead of adding one region at a time to the secondary_free_list,
2319     // we accumulate them in the local list and move them a few at a
2320     // time. This also cuts down on the number of notify_all() calls
2321     // we do during this process. We'll also append the local list when
2322     // _cleanup_list is empty (which means we just removed the last
2323     // region from the _cleanup_list).
2324     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2325         _cleanup_list.is_empty()) {
2326       if (G1ConcRegionFreeingVerbose) {
2327         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2328                                "appending "SIZE_FORMAT" entries to the "
2329                                "secondary_free_list, clean list still has "
2330                                SIZE_FORMAT" entries",
2331                                tmp_free_list.length(),
2332                                _cleanup_list.length());
2333       }
2334 
2335       {
2336         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2337         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
2338         SecondaryFreeList_lock->notify_all();
2339       }
2340 
2341       if (G1StressConcRegionFreeing) {
2342         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2343           os::sleep(Thread::current(), (jlong) 1, false);
2344         }
2345       }
2346     }
2347   }
2348   assert(tmp_free_list.is_empty(), "post-condition");
2349 }
2350 
2351 // Support closures for reference procssing in G1
2352 
2353 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2354   HeapWord* addr = (HeapWord*)obj;
2355   return addr != NULL &&
2356          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2357 }
2358 
2359 class G1CMKeepAliveClosure: public OopClosure {
2360   G1CollectedHeap* _g1;
2361   ConcurrentMark*  _cm;
2362   CMBitMap*        _bitMap;
2363  public:
2364   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
2365                        CMBitMap* bitMap) :
2366     _g1(g1), _cm(cm),
2367     _bitMap(bitMap)
2368   {
2369     assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
2370   }
2371 
2372   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2373   virtual void do_oop(      oop* p) { do_oop_work(p); }
2374 
2375   template <class T> void do_oop_work(T* p) {
2376     oop obj = oopDesc::load_decode_heap_oop(p);
2377     HeapWord* addr = (HeapWord*)obj;
2378 
2379     if (_cm->verbose_high()) {
2380       gclog_or_tty->print_cr("\t[0] we're looking at location "
2381                              "*"PTR_FORMAT" = "PTR_FORMAT,
2382                              p, (void*) obj);
2383     }
2384 
2385     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2386       _bitMap->mark(addr);
2387       // Update the task specific count data for obj
2388       _cm->add_to_count_data_for(obj, 0 /* worker_i */);
2389 
2390       _cm->mark_stack_push(obj);
2391     }
2392   }
2393 };
2394 
2395 class G1CMDrainMarkingStackClosure: public VoidClosure {
2396   CMMarkStack*                  _markStack;
2397   CMBitMap*                     _bitMap;
2398   G1CMKeepAliveClosure*         _oopClosure;
2399  public:
2400   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
2401                                G1CMKeepAliveClosure* oopClosure) :
2402     _bitMap(bitMap),
2403     _markStack(markStack),
2404     _oopClosure(oopClosure)
2405   {}
2406 
2407   void do_void() {
2408     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
2409   }
2410 };
2411 
2412 // 'Keep Alive' closure used by parallel reference processing.
2413 // An instance of this closure is used in the parallel reference processing
2414 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2415 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2416 // placed on to discovered ref lists once so we can mark and push with no
2417 // need to check whether the object has already been marked. Using the
2418 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2419 // operating on the global mark stack. This means that an individual
2420 // worker would be doing lock-free pushes while it processes its own
2421 // discovered ref list followed by drain call. If the discovered ref lists
2422 // are unbalanced then this could cause interference with the other
2423 // workers. Using a CMTask (and its embedded local data structures)
2424 // avoids that potential interference.
2425 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2426   ConcurrentMark*  _cm;
2427   CMTask*          _task;
2428   CMBitMap*        _bitMap;
2429   int              _ref_counter_limit;
2430   int              _ref_counter;
2431  public:
2432   G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm,
2433                                   CMTask* task,
2434                                   CMBitMap* bitMap) :
2435     _cm(cm), _task(task), _bitMap(bitMap),
2436     _ref_counter_limit(G1RefProcDrainInterval)
2437   {
2438     assert(_ref_counter_limit > 0, "sanity");
2439     _ref_counter = _ref_counter_limit;
2440   }
2441 
2442   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2443   virtual void do_oop(      oop* p) { do_oop_work(p); }
2444 
2445   template <class T> void do_oop_work(T* p) {
2446     if (!_cm->has_overflown()) {
2447       oop obj = oopDesc::load_decode_heap_oop(p);
2448       if (_cm->verbose_high()) {
2449         gclog_or_tty->print_cr("\t[%d] we're looking at location "
2450                                "*"PTR_FORMAT" = "PTR_FORMAT,
2451                                _task->task_id(), p, (void*) obj);
2452       }
2453 
2454       _task->deal_with_reference(obj);
2455       _ref_counter--;
2456 
2457       if (_ref_counter == 0) {
2458         // We have dealt with _ref_counter_limit references, pushing them and objects
2459         // reachable from them on to the local stack (and possibly the global stack).
2460         // Call do_marking_step() to process these entries. We call the routine in a
2461         // loop, which we'll exit if there's nothing more to do (i.e. we're done
2462         // with the entries that we've pushed as a result of the deal_with_reference
2463         // calls above) or we overflow.
2464         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2465         // while there may still be some work to do. (See the comment at the
2466         // beginning of CMTask::do_marking_step() for those conditions - one of which
2467         // is reaching the specified time target.) It is only when
2468         // CMTask::do_marking_step() returns without setting the has_aborted() flag
2469         // that the marking has completed.
2470         do {
2471           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2472           _task->do_marking_step(mark_step_duration_ms,
2473                                  false /* do_stealing    */,
2474                                  false /* do_termination */);
2475         } while (_task->has_aborted() && !_cm->has_overflown());
2476         _ref_counter = _ref_counter_limit;
2477       }
2478     } else {
2479       if (_cm->verbose_high()) {
2480          gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2481       }
2482     }
2483   }
2484 };
2485 
2486 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2487   ConcurrentMark* _cm;
2488   CMTask* _task;
2489  public:
2490   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2491     _cm(cm), _task(task)
2492   {}
2493 
2494   void do_void() {
2495     do {
2496       if (_cm->verbose_high()) {
2497         gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2498                                _task->task_id());
2499       }
2500 
2501       // We call CMTask::do_marking_step() to completely drain the local and
2502       // global marking stacks. The routine is called in a loop, which we'll
2503       // exit if there's nothing more to do (i.e. we'completely drained the
2504       // entries that were pushed as a result of applying the
2505       // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2506       // lists above) or we overflow the global marking stack.
2507       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2508       // while there may still be some work to do. (See the comment at the
2509       // beginning of CMTask::do_marking_step() for those conditions - one of which
2510       // is reaching the specified time target.) It is only when
2511       // CMTask::do_marking_step() returns without setting the has_aborted() flag
2512       // that the marking has completed.
2513 
2514       _task->do_marking_step(1000000000.0 /* something very large */,
2515                              true /* do_stealing    */,
2516                              true /* do_termination */);
2517     } while (_task->has_aborted() && !_cm->has_overflown());
2518   }
2519 };
2520 
2521 // Implementation of AbstractRefProcTaskExecutor for parallel
2522 // reference processing at the end of G1 concurrent marking
2523 
2524 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2525 private:
2526   G1CollectedHeap* _g1h;
2527   ConcurrentMark*  _cm;
2528   CMBitMap*        _bitmap;
2529   WorkGang*        _workers;
2530   int              _active_workers;
2531 
2532 public:
2533   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2534                         ConcurrentMark* cm,
2535                         CMBitMap* bitmap,
2536                         WorkGang* workers,
2537                         int n_workers) :
2538     _g1h(g1h), _cm(cm), _bitmap(bitmap),
2539     _workers(workers), _active_workers(n_workers)
2540   { }
2541 
2542   // Executes the given task using concurrent marking worker threads.
2543   virtual void execute(ProcessTask& task);
2544   virtual void execute(EnqueueTask& task);
2545 };
2546 
2547 class G1CMRefProcTaskProxy: public AbstractGangTask {
2548   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2549   ProcessTask&     _proc_task;
2550   G1CollectedHeap* _g1h;
2551   ConcurrentMark*  _cm;
2552   CMBitMap*        _bitmap;
2553 
2554 public:
2555   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2556                      G1CollectedHeap* g1h,
2557                      ConcurrentMark* cm,
2558                      CMBitMap* bitmap) :
2559     AbstractGangTask("Process reference objects in parallel"),
2560     _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap)
2561   {}
2562 
2563   virtual void work(int i) {
2564     CMTask* marking_task = _cm->task(i);
2565     G1CMIsAliveClosure g1_is_alive(_g1h);
2566     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap);
2567     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2568 
2569     _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2570   }
2571 };
2572 
2573 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2574   assert(_workers != NULL, "Need parallel worker threads.");
2575 
2576   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap);
2577 
2578   // We need to reset the phase for each task execution so that
2579   // the termination protocol of CMTask::do_marking_step works.
2580   _cm->set_phase(_active_workers, false /* concurrent */);
2581   _g1h->set_par_threads(_active_workers);
2582   _workers->run_task(&proc_task_proxy);
2583   _g1h->set_par_threads(0);
2584 }
2585 
2586 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2587   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2588   EnqueueTask& _enq_task;
2589 
2590 public:
2591   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2592     AbstractGangTask("Enqueue reference objects in parallel"),
2593     _enq_task(enq_task)
2594   { }
2595 
2596   virtual void work(int i) {
2597     _enq_task.work(i);
2598   }
2599 };
2600 
2601 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2602   assert(_workers != NULL, "Need parallel worker threads.");
2603 
2604   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2605 
2606   _g1h->set_par_threads(_active_workers);
2607   _workers->run_task(&enq_task_proxy);
2608   _g1h->set_par_threads(0);
2609 }
2610 
2611 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2612   ResourceMark rm;
2613   HandleMark   hm;
2614 
2615   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2616 
2617   // Is alive closure.
2618   G1CMIsAliveClosure g1_is_alive(g1h);
2619 
2620   // Inner scope to exclude the cleaning of the string and symbol
2621   // tables from the displayed time.
2622   {
2623     bool verbose = PrintGC && PrintGCDetails;
2624     if (verbose) {
2625       gclog_or_tty->put(' ');
2626     }
2627     TraceTime t("GC ref-proc", verbose, false, gclog_or_tty);
2628 
2629     ReferenceProcessor* rp = g1h->ref_processor_cm();
2630 
2631     // See the comment in G1CollectedHeap::ref_processing_init()
2632     // about how reference processing currently works in G1.
2633 
2634     // Process weak references.
2635     rp->setup_policy(clear_all_soft_refs);
2636     assert(_markStack.isEmpty(), "mark stack should be empty");
2637 
2638     G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
2639     G1CMDrainMarkingStackClosure
2640       g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
2641 
2642     // We use the work gang from the G1CollectedHeap and we utilize all
2643     // the worker threads.
2644     int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1;
2645     active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
2646 
2647     G1CMRefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(),
2648                                               g1h->workers(), active_workers);
2649 
2650     if (rp->processing_is_mt()) {
2651       // Set the degree of MT here.  If the discovery is done MT, there
2652       // may have been a different number of threads doing the discovery
2653       // and a different number of discovered lists may have Ref objects.
2654       // That is OK as long as the Reference lists are balanced (see
2655       // balance_all_queues() and balance_queues()).
2656       rp->set_active_mt_degree(active_workers);
2657 
2658       rp->process_discovered_references(&g1_is_alive,
2659                                       &g1_keep_alive,
2660                                       &g1_drain_mark_stack,
2661                                       &par_task_executor);
2662 
2663       // The work routines of the parallel keep_alive and drain_marking_stack
2664       // will set the has_overflown flag if we overflow the global marking
2665       // stack.
2666     } else {
2667       rp->process_discovered_references(&g1_is_alive,
2668                                         &g1_keep_alive,
2669                                         &g1_drain_mark_stack,
2670                                         NULL);
2671     }
2672 
2673     assert(_markStack.overflow() || _markStack.isEmpty(),
2674             "mark stack should be empty (unless it overflowed)");
2675     if (_markStack.overflow()) {
2676       // Should have been done already when we tried to push an
2677       // entry on to the global mark stack. But let's do it again.
2678       set_has_overflown();
2679     }
2680 
2681     if (rp->processing_is_mt()) {
2682       assert(rp->num_q() == active_workers, "why not");
2683       rp->enqueue_discovered_references(&par_task_executor);
2684     } else {
2685       rp->enqueue_discovered_references();
2686     }
2687 
2688     rp->verify_no_references_recorded();
2689     assert(!rp->discovery_enabled(), "Post condition");
2690   }
2691 
2692   // Now clean up stale oops in StringTable
2693   StringTable::unlink(&g1_is_alive);
2694   // Clean up unreferenced symbols in symbol table.
2695   SymbolTable::unlink();
2696 }
2697 
2698 void ConcurrentMark::swapMarkBitMaps() {
2699   CMBitMapRO* temp = _prevMarkBitMap;
2700   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2701   _nextMarkBitMap  = (CMBitMap*)  temp;
2702 }
2703 
2704 class CMRemarkTask: public AbstractGangTask {
2705 private:
2706   ConcurrentMark *_cm;
2707 
2708 public:
2709   void work(int worker_i) {
2710     // Since all available tasks are actually started, we should
2711     // only proceed if we're supposed to be actived.
2712     if ((size_t)worker_i < _cm->active_tasks()) {
2713       CMTask* task = _cm->task(worker_i);
2714       task->record_start_time();
2715       do {
2716         task->do_marking_step(1000000000.0 /* something very large */,
2717                               true /* do_stealing    */,
2718                               true /* do_termination */);
2719       } while (task->has_aborted() && !_cm->has_overflown());
2720       // If we overflow, then we do not want to restart. We instead
2721       // want to abort remark and do concurrent marking again.
2722       task->record_end_time();
2723     }
2724   }
2725 
2726   CMRemarkTask(ConcurrentMark* cm) :
2727     AbstractGangTask("Par Remark"), _cm(cm) { }
2728 };
2729 
2730 void ConcurrentMark::checkpointRootsFinalWork() {
2731   ResourceMark rm;
2732   HandleMark   hm;
2733   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2734 
2735   g1h->ensure_parsability(false);
2736 
2737   if (G1CollectedHeap::use_parallel_gc_threads()) {
2738     G1CollectedHeap::StrongRootsScope srs(g1h);
2739     // this is remark, so we'll use up all available threads
2740     int active_workers = ParallelGCThreads;
2741     set_phase(active_workers, false /* concurrent */);
2742 
2743     CMRemarkTask remarkTask(this);
2744     // We will start all available threads, even if we decide that the
2745     // active_workers will be fewer. The extra ones will just bail out
2746     // immediately.
2747     int n_workers = g1h->workers()->total_workers();
2748     g1h->set_par_threads(n_workers);
2749     g1h->workers()->run_task(&remarkTask);
2750     g1h->set_par_threads(0);
2751   } else {
2752     G1CollectedHeap::StrongRootsScope srs(g1h);
2753     // this is remark, so we'll use up all available threads
2754     int active_workers = 1;
2755     set_phase(active_workers, false /* concurrent */);
2756 
2757     CMRemarkTask remarkTask(this);
2758     // We will start all available threads, even if we decide that the
2759     // active_workers will be fewer. The extra ones will just bail out
2760     // immediately.
2761     remarkTask.work(0);
2762   }
2763   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2764   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2765 
2766   print_stats();
2767 
2768 #if VERIFY_OBJS_PROCESSED
2769   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2770     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2771                            _scan_obj_cl.objs_processed,
2772                            ThreadLocalObjQueue::objs_enqueued);
2773     guarantee(_scan_obj_cl.objs_processed ==
2774               ThreadLocalObjQueue::objs_enqueued,
2775               "Different number of objs processed and enqueued.");
2776   }
2777 #endif
2778 }
2779 
2780 #ifndef PRODUCT
2781 
2782 class PrintReachableOopClosure: public OopClosure {
2783 private:
2784   G1CollectedHeap* _g1h;
2785   outputStream*    _out;
2786   VerifyOption     _vo;
2787   bool             _all;
2788 
2789 public:
2790   PrintReachableOopClosure(outputStream* out,
2791                            VerifyOption  vo,
2792                            bool          all) :
2793     _g1h(G1CollectedHeap::heap()),
2794     _out(out), _vo(vo), _all(all) { }
2795 
2796   void do_oop(narrowOop* p) { do_oop_work(p); }
2797   void do_oop(      oop* p) { do_oop_work(p); }
2798 
2799   template <class T> void do_oop_work(T* p) {
2800     oop         obj = oopDesc::load_decode_heap_oop(p);
2801     const char* str = NULL;
2802     const char* str2 = "";
2803 
2804     if (obj == NULL) {
2805       str = "";
2806     } else if (!_g1h->is_in_g1_reserved(obj)) {
2807       str = " O";
2808     } else {
2809       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2810       guarantee(hr != NULL, "invariant");
2811       bool over_tams = false;
2812       bool marked = false;
2813 
2814       switch (_vo) {
2815         case VerifyOption_G1UsePrevMarking:
2816           over_tams = hr->obj_allocated_since_prev_marking(obj);
2817           marked = _g1h->isMarkedPrev(obj);
2818           break;
2819         case VerifyOption_G1UseNextMarking:
2820           over_tams = hr->obj_allocated_since_next_marking(obj);
2821           marked = _g1h->isMarkedNext(obj);
2822           break;
2823         case VerifyOption_G1UseMarkWord:
2824           marked = obj->is_gc_marked();
2825           break;
2826         default:
2827           ShouldNotReachHere();
2828       }
2829 
2830       if (over_tams) {
2831         str = " >";
2832         if (marked) {
2833           str2 = " AND MARKED";
2834         }
2835       } else if (marked) {
2836         str = " M";
2837       } else {
2838         str = " NOT";
2839       }
2840     }
2841 
2842     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2843                    p, (void*) obj, str, str2);
2844   }
2845 };
2846 
2847 class PrintReachableObjectClosure : public ObjectClosure {
2848 private:
2849   G1CollectedHeap* _g1h;
2850   outputStream*    _out;
2851   VerifyOption     _vo;
2852   bool             _all;
2853   HeapRegion*      _hr;
2854 
2855 public:
2856   PrintReachableObjectClosure(outputStream* out,
2857                               VerifyOption  vo,
2858                               bool          all,
2859                               HeapRegion*   hr) :
2860     _g1h(G1CollectedHeap::heap()),
2861     _out(out), _vo(vo), _all(all), _hr(hr) { }
2862 
2863   void do_object(oop o) {
2864     bool over_tams = false;
2865     bool marked = false;
2866 
2867     switch (_vo) {
2868       case VerifyOption_G1UsePrevMarking:
2869         over_tams = _hr->obj_allocated_since_prev_marking(o);
2870         marked = _g1h->isMarkedPrev(o);
2871         break;
2872       case VerifyOption_G1UseNextMarking:
2873         over_tams = _hr->obj_allocated_since_next_marking(o);
2874         marked = _g1h->isMarkedNext(o);
2875         break;
2876       case VerifyOption_G1UseMarkWord:
2877         marked = o->is_gc_marked();
2878         break;
2879       default:
2880         ShouldNotReachHere();
2881     }
2882     bool print_it = _all || over_tams || marked;
2883 
2884     if (print_it) {
2885       _out->print_cr(" "PTR_FORMAT"%s",
2886                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2887       PrintReachableOopClosure oopCl(_out, _vo, _all);
2888       o->oop_iterate(&oopCl);
2889     }
2890   }
2891 };
2892 
2893 class PrintReachableRegionClosure : public HeapRegionClosure {
2894 private:
2895   outputStream* _out;
2896   VerifyOption  _vo;
2897   bool          _all;
2898 
2899 public:
2900   bool doHeapRegion(HeapRegion* hr) {
2901     HeapWord* b = hr->bottom();
2902     HeapWord* e = hr->end();
2903     HeapWord* t = hr->top();
2904     HeapWord* p = NULL;
2905 
2906     switch (_vo) {
2907       case VerifyOption_G1UsePrevMarking:
2908         p = hr->prev_top_at_mark_start();
2909         break;
2910       case VerifyOption_G1UseNextMarking:
2911         p = hr->next_top_at_mark_start();
2912         break;
2913       case VerifyOption_G1UseMarkWord:
2914         // When we are verifying marking using the mark word
2915         // TAMS has no relevance.
2916         assert(p == NULL, "post-condition");
2917         break;
2918       default:
2919         ShouldNotReachHere();
2920     }
2921     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2922                    "TAMS: "PTR_FORMAT, b, e, t, p);
2923     _out->cr();
2924 
2925     HeapWord* from = b;
2926     HeapWord* to   = t;
2927 
2928     if (to > from) {
2929       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2930       _out->cr();
2931       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2932       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2933       _out->cr();
2934     }
2935 
2936     return false;
2937   }
2938 
2939   PrintReachableRegionClosure(outputStream* out,
2940                               VerifyOption  vo,
2941                               bool          all) :
2942     _out(out), _vo(vo), _all(all) { }
2943 };
2944 
2945 static const char* verify_option_to_tams(VerifyOption vo) {
2946   switch (vo) {
2947     case VerifyOption_G1UsePrevMarking:
2948       return "PTAMS";
2949     case VerifyOption_G1UseNextMarking:
2950       return "NTAMS";
2951     default:
2952       return "NONE";
2953   }
2954 }
2955 
2956 void ConcurrentMark::print_reachable(const char* str,
2957                                      VerifyOption vo,
2958                                      bool all) {
2959   gclog_or_tty->cr();
2960   gclog_or_tty->print_cr("== Doing heap dump... ");
2961 
2962   if (G1PrintReachableBaseFile == NULL) {
2963     gclog_or_tty->print_cr("  #### error: no base file defined");
2964     return;
2965   }
2966 
2967   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2968       (JVM_MAXPATHLEN - 1)) {
2969     gclog_or_tty->print_cr("  #### error: file name too long");
2970     return;
2971   }
2972 
2973   char file_name[JVM_MAXPATHLEN];
2974   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2975   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2976 
2977   fileStream fout(file_name);
2978   if (!fout.is_open()) {
2979     gclog_or_tty->print_cr("  #### error: could not open file");
2980     return;
2981   }
2982 
2983   outputStream* out = &fout;
2984   out->print_cr("-- USING %s", verify_option_to_tams(vo));
2985   out->cr();
2986 
2987   out->print_cr("--- ITERATING OVER REGIONS");
2988   out->cr();
2989   PrintReachableRegionClosure rcl(out, vo, all);
2990   _g1h->heap_region_iterate(&rcl);
2991   out->cr();
2992 
2993   gclog_or_tty->print_cr("  done");
2994   gclog_or_tty->flush();
2995 }
2996 
2997 #endif // PRODUCT
2998 
2999 // This note is for drainAllSATBBuffers and the code in between.
3000 // In the future we could reuse a task to do this work during an
3001 // evacuation pause (since now tasks are not active and can be claimed
3002 // during an evacuation pause). This was a late change to the code and
3003 // is currently not being taken advantage of.
3004 
3005 class CMGlobalObjectClosure : public ObjectClosure {
3006 private:
3007   ConcurrentMark* _cm;
3008 
3009 public:
3010   void do_object(oop obj) {
3011     _cm->deal_with_reference(obj);
3012   }
3013 
3014   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
3015 };
3016 
3017 void ConcurrentMark::deal_with_reference(oop obj) {
3018   if (verbose_high()) {
3019     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
3020                            (void*) obj);
3021   }
3022 
3023   HeapWord* objAddr = (HeapWord*) obj;
3024   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
3025   if (_g1h->is_in_g1_reserved(objAddr)) {
3026     assert(obj != NULL, "null check is implicit");
3027     if (!_nextMarkBitMap->isMarked(objAddr)) {
3028       // Only get the containing region if the object is not marked on the
3029       // bitmap (otherwise, it's a waste of time since we won't do
3030       // anything with it).
3031       HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
3032       if (!hr->obj_allocated_since_next_marking(obj)) {
3033         if (verbose_high()) {
3034           gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
3035                                  "marked", (void*) obj);
3036         }
3037 
3038         // we need to mark it first
3039         if (_nextMarkBitMap->parMark(objAddr)) {
3040           // Update the task specific count data for obj
3041           add_to_count_data_for(obj, hr, 0 /* worker_i */);
3042 
3043           // No OrderAccess:store_load() is needed. It is implicit in the
3044           // CAS done in parMark(objAddr) above
3045           HeapWord* finger = _finger;
3046           if (objAddr < finger) {
3047             if (verbose_high()) {
3048               gclog_or_tty->print_cr("[global] below the global finger "
3049                                      "("PTR_FORMAT"), pushing it", finger);
3050             }
3051             if (!mark_stack_push(obj)) {
3052               if (verbose_low()) {
3053                 gclog_or_tty->print_cr("[global] global stack overflow during "
3054                                        "deal_with_reference");
3055               }
3056             }
3057           }
3058         }
3059       }
3060     }
3061   }
3062 }
3063 
3064 void ConcurrentMark::drainAllSATBBuffers() {
3065   CMGlobalObjectClosure oc(this);
3066   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3067   satb_mq_set.set_closure(&oc);
3068 
3069   while (satb_mq_set.apply_closure_to_completed_buffer()) {
3070     if (verbose_medium()) {
3071       gclog_or_tty->print_cr("[global] processed an SATB buffer");
3072     }
3073   }
3074 
3075   // no need to check whether we should do this, as this is only
3076   // called during an evacuation pause
3077   satb_mq_set.iterate_closure_all_threads();
3078 
3079   satb_mq_set.set_closure(NULL);
3080   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
3081 }
3082 
3083 void ConcurrentMark::markPrev(oop p) {
3084   // Note we are overriding the read-only view of the prev map here, via
3085   // the cast.
3086   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
3087 }
3088 
3089 void ConcurrentMark::clear(oop p) {
3090   assert(p != NULL && p->is_oop(), "expected an oop");
3091   HeapWord* addr = (HeapWord*)p;
3092   assert(addr >= _nextMarkBitMap->startWord() ||
3093          addr < _nextMarkBitMap->endWord(), "in a region");
3094 
3095   _nextMarkBitMap->clear(addr);
3096 }
3097 
3098 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
3099   // Note we are overriding the read-only view of the prev map here, via
3100   // the cast.
3101   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
3102   _nextMarkBitMap->clearRange(mr);
3103 }
3104 
3105 HeapRegion*
3106 ConcurrentMark::claim_region(int task_num) {
3107   // "checkpoint" the finger
3108   HeapWord* finger = _finger;
3109 
3110   // _heap_end will not change underneath our feet; it only changes at
3111   // yield points.
3112   while (finger < _heap_end) {
3113     assert(_g1h->is_in_g1_reserved(finger), "invariant");
3114 
3115     // Note on how this code handles humongous regions. In the
3116     // normal case the finger will reach the start of a "starts
3117     // humongous" (SH) region. Its end will either be the end of the
3118     // last "continues humongous" (CH) region in the sequence, or the
3119     // standard end of the SH region (if the SH is the only region in
3120     // the sequence). That way claim_region() will skip over the CH
3121     // regions. However, there is a subtle race between a CM thread
3122     // executing this method and a mutator thread doing a humongous
3123     // object allocation. The two are not mutually exclusive as the CM
3124     // thread does not need to hold the Heap_lock when it gets
3125     // here. So there is a chance that claim_region() will come across
3126     // a free region that's in the progress of becoming a SH or a CH
3127     // region. In the former case, it will either
3128     //   a) Miss the update to the region's end, in which case it will
3129     //      visit every subsequent CH region, will find their bitmaps
3130     //      empty, and do nothing, or
3131     //   b) Will observe the update of the region's end (in which case
3132     //      it will skip the subsequent CH regions).
3133     // If it comes across a region that suddenly becomes CH, the
3134     // scenario will be similar to b). So, the race between
3135     // claim_region() and a humongous object allocation might force us
3136     // to do a bit of unnecessary work (due to some unnecessary bitmap
3137     // iterations) but it should not introduce and correctness issues.
3138     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
3139     HeapWord*   bottom        = curr_region->bottom();
3140     HeapWord*   end           = curr_region->end();
3141     HeapWord*   limit         = curr_region->next_top_at_mark_start();
3142 
3143     if (verbose_low()) {
3144       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
3145                              "["PTR_FORMAT", "PTR_FORMAT"), "
3146                              "limit = "PTR_FORMAT,
3147                              task_num, curr_region, bottom, end, limit);
3148     }
3149 
3150     // Is the gap between reading the finger and doing the CAS too long?
3151     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
3152     if (res == finger) {
3153       // we succeeded
3154 
3155       // notice that _finger == end cannot be guaranteed here since,
3156       // someone else might have moved the finger even further
3157       assert(_finger >= end, "the finger should have moved forward");
3158 
3159       if (verbose_low()) {
3160         gclog_or_tty->print_cr("[%d] we were successful with region = "
3161                                PTR_FORMAT, task_num, curr_region);
3162       }
3163 
3164       if (limit > bottom) {
3165         if (verbose_low()) {
3166           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
3167                                  "returning it ", task_num, curr_region);
3168         }
3169         return curr_region;
3170       } else {
3171         assert(limit == bottom,
3172                "the region limit should be at bottom");
3173         if (verbose_low()) {
3174           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
3175                                  "returning NULL", task_num, curr_region);
3176         }
3177         // we return NULL and the caller should try calling
3178         // claim_region() again.
3179         return NULL;
3180       }
3181     } else {
3182       assert(_finger > finger, "the finger should have moved forward");
3183       if (verbose_low()) {
3184         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
3185                                "global finger = "PTR_FORMAT", "
3186                                "our finger = "PTR_FORMAT,
3187                                task_num, _finger, finger);
3188       }
3189 
3190       // read it again
3191       finger = _finger;
3192     }
3193   }
3194 
3195   return NULL;
3196 }
3197 
3198 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
3199   bool result = false;
3200   for (int i = 0; i < (int)_max_task_num; ++i) {
3201     CMTask* the_task = _tasks[i];
3202     MemRegion mr = the_task->aborted_region();
3203     if (mr.start() != NULL) {
3204       assert(mr.end() != NULL, "invariant");
3205       assert(mr.word_size() > 0, "invariant");
3206       HeapRegion* hr = _g1h->heap_region_containing(mr.start());
3207       assert(hr != NULL, "invariant");
3208       if (hr->in_collection_set()) {
3209         // The region points into the collection set
3210         the_task->set_aborted_region(MemRegion());
3211         result = true;
3212       }
3213     }
3214   }
3215   return result;
3216 }
3217 
3218 bool ConcurrentMark::has_aborted_regions() {
3219   for (int i = 0; i < (int)_max_task_num; ++i) {
3220     CMTask* the_task = _tasks[i];
3221     MemRegion mr = the_task->aborted_region();
3222     if (mr.start() != NULL) {
3223       assert(mr.end() != NULL, "invariant");
3224       assert(mr.word_size() > 0, "invariant");
3225       return true;
3226     }
3227   }
3228   return false;
3229 }
3230 
3231 void ConcurrentMark::oops_do(OopClosure* cl) {
3232   if (_markStack.size() > 0 && verbose_low()) {
3233     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
3234                            "size = %d", _markStack.size());
3235   }
3236   // we first iterate over the contents of the mark stack...
3237   _markStack.oops_do(cl);
3238 
3239   for (int i = 0; i < (int)_max_task_num; ++i) {
3240     OopTaskQueue* queue = _task_queues->queue((int)i);
3241 
3242     if (queue->size() > 0 && verbose_low()) {
3243       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
3244                              "size = %d", i, queue->size());
3245     }
3246 
3247     // ...then over the contents of the all the task queues.
3248     queue->oops_do(cl);
3249   }
3250 
3251   // Invalidate any entries, that are in the region stack, that
3252   // point into the collection set
3253   if (_regionStack.invalidate_entries_into_cset()) {
3254     // otherwise, any gray objects copied during the evacuation pause
3255     // might not be visited.
3256     assert(_should_gray_objects, "invariant");
3257   }
3258 
3259   // Invalidate any aborted regions, recorded in the individual CM
3260   // tasks, that point into the collection set.
3261   if (invalidate_aborted_regions_in_cset()) {
3262     // otherwise, any gray objects copied during the evacuation pause
3263     // might not be visited.
3264     assert(_should_gray_objects, "invariant");
3265   }
3266 
3267 }
3268 
3269 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
3270   _markStack.setEmpty();
3271   _markStack.clear_overflow();
3272   _regionStack.setEmpty();
3273   _regionStack.clear_overflow();
3274   if (clear_overflow) {
3275     clear_has_overflown();
3276   } else {
3277     assert(has_overflown(), "pre-condition");
3278   }
3279   _finger = _heap_start;
3280 
3281   for (int i = 0; i < (int)_max_task_num; ++i) {
3282     OopTaskQueue* queue = _task_queues->queue(i);
3283     queue->set_empty();
3284     // Clear any partial regions from the CMTasks
3285     _tasks[i]->clear_aborted_region();
3286   }
3287 }
3288 
3289 // Clear the per-worker arrays used to store the per-region counting data
3290 void ConcurrentMark::clear_all_count_data() {
3291   assert(SafepointSynchronize::is_at_safepoint() ||
3292          !Universe::is_fully_initialized(), "must be");
3293 
3294   int max_regions = _g1h->max_regions();
3295   
3296   assert(_max_task_num != 0, "unitialized");
3297   assert(_count_card_bitmaps != NULL, "uninitialized");
3298   assert(_count_marked_bytes != NULL, "uninitialized");
3299 
3300   for (int i = 0; i < _max_task_num; i += 1) {
3301     BitMap& task_card_bm = count_card_bitmap_for(i);
3302     size_t* marked_bytes_array = count_marked_bytes_for(i);
3303 
3304     assert(task_card_bm.size() == _card_bm.size(), "size mismatch");
3305     assert(marked_bytes_array != NULL, "uninitialized");
3306 
3307     for (int j = 0; j < max_regions; j++) {
3308       marked_bytes_array[j] = 0;
3309     }
3310     task_card_bm.clear();
3311   }
3312 }
3313 
3314 // Adds the given region to the counting data structures
3315 // for the given task id.
3316 void ConcurrentMark::add_to_count_data_for(MemRegion mr,
3317                                            HeapRegion* hr,
3318                                            int worker_i) {
3319   G1CollectedHeap* g1h = _g1h;
3320   HeapWord* start = mr.start();
3321   HeapWord* last = mr.last();
3322   size_t index = hr->hrs_index();
3323 
3324   assert(!hr->continuesHumongous(), "should not be HC region");
3325   assert(hr == g1h->heap_region_containing(start), "sanity");
3326   assert(hr == g1h->heap_region_containing(mr.last()), "sanity");
3327   assert(0 <= worker_i && worker_i < _max_task_num, "oob");
3328 
3329   BitMap& task_card_bm = count_card_bitmap_for(worker_i);
3330   size_t* marked_bytes_array = count_marked_bytes_for(worker_i);
3331 
3332   // Below, the term "card num" means the result of shifting an address
3333   // by the card shift -- address 0 corresponds to card number 0.  One
3334   // must subtract the card num of the bottom of the heap to obtain a
3335   // card table index.
3336 
3337   intptr_t start_card_num = 
3338     intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
3339   intptr_t last_card_num =
3340     intptr_t(uintptr_t(last) >> CardTableModRefBS::card_shift);
3341 
3342   intptr_t bottom_card_num = 
3343     intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> 
3344         CardTableModRefBS::card_shift);
3345 
3346   BitMap::idx_t start_idx = start_card_num - bottom_card_num;
3347   BitMap::idx_t last_idx = last_card_num - bottom_card_num;
3348   
3349   // The card bitmap is task/worker specific => no need to use 'par' routines.
3350   // Inclusive bit range [start_idx, last_idx]. set_range is exclusive
3351   // so we have to also explicitly set the bit for last_idx.
3352   // Passing last_idx+1 to the clear_range would work in most cases
3353   // but could trip an OOB assertion.
3354 
3355   if ((last_idx - start_idx) > 0) {
3356     task_card_bm.set_range(start_idx, last_idx);
3357   }
3358   task_card_bm.set_bit(last_idx);
3359   
3360   // Add to the task local marked bytes for this region.
3361   marked_bytes_array[index] += mr.byte_size();
3362 }
3363 
3364 void ConcurrentMark::add_to_count_data_for(oop obj, HeapRegion* hr, int worker_i) {
3365   MemRegion mr((HeapWord*)obj, obj->size());
3366   add_to_count_data_for(mr, hr, worker_i);
3367 }
3368 
3369 void ConcurrentMark::add_to_count_data_for(MemRegion mr, int worker_i) {
3370   HeapRegion* hr = _g1h->heap_region_containing(mr.start());
3371   add_to_count_data_for(mr, hr, worker_i);
3372 }
3373 
3374 void ConcurrentMark::add_to_count_data_for(oop obj, int worker_i) {
3375   MemRegion mr((HeapWord*)obj, obj->size());
3376   add_to_count_data_for(mr, worker_i);
3377 }
3378 
3379 // Updates the counting data with liveness info recorded for a
3380 // region (typically a GCLab).
3381 void ConcurrentMark::add_to_count_data_for_region(MemRegion lab_mr,
3382                                                   BitMap* lab_card_bm,
3383                                                   intptr_t lab_bottom_card_num,
3384                                                   size_t lab_marked_bytes,
3385                                                   int worker_i) {
3386   HeapRegion* hr = _g1h->heap_region_containing(lab_mr.start());
3387 
3388   BitMap& task_card_bm = count_card_bitmap_for(worker_i);
3389   size_t* marked_bytes_array = count_marked_bytes_for(worker_i);
3390 
3391   // Below, the term "card num" means the result of shifting an address
3392   // by the card shift -- address 0 corresponds to card number 0.  One
3393   // must subtract the card num of the bottom of the heap to obtain a
3394   // card table index.
3395   
3396   intptr_t heap_bottom_card_num = 
3397     intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> 
3398         CardTableModRefBS::card_shift);
3399 
3400   assert(intptr_t(uintptr_t(lab_mr.start()) >> CardTableModRefBS::card_shift) == lab_bottom_card_num,
3401          "sanity");
3402 
3403   // We have to map the indices of set bits in lab_card_bm, using
3404   // lab_bottom_card_num, to indices the card bitmap for the given task.
3405 
3406   BitMap::idx_t end_idx = lab_card_bm->size();
3407   BitMap::idx_t start_idx = lab_card_bm->get_next_one_offset(0, end_idx);
3408   while (start_idx < end_idx) {
3409     assert(lab_card_bm->at(start_idx), "should be set");
3410 
3411     intptr_t lab_card_num = lab_bottom_card_num + start_idx;
3412     BitMap::idx_t card_bm_idx = lab_card_num - heap_bottom_card_num;
3413 
3414     task_card_bm.set_bit(card_bm_idx);
3415 
3416     // Get the offset of the next set bit
3417     start_idx = lab_card_bm->get_next_one_offset(start_idx+1, end_idx);
3418   }
3419 
3420   // Now add to the marked bytes
3421   marked_bytes_array[hr->hrs_index()] += lab_marked_bytes;
3422 }
3423 
3424 void ConcurrentMark::clear_count_data_for_heap_region(HeapRegion* hr) {
3425   // Clears the count data for the given region from _all_ of
3426   // the per-task counting data structures.
3427 
3428   MemRegion used_region = hr->used_region();
3429   HeapWord* start = used_region.start();
3430   HeapWord* last = used_region.last();
3431   size_t hr_index = hr->hrs_index();
3432 
3433   intptr_t bottom_card_num =
3434       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
3435                CardTableModRefBS::card_shift);
3436   
3437   intptr_t start_card_num =
3438     intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
3439   intptr_t last_card_num =
3440     intptr_t(uintptr_t(last) >> CardTableModRefBS::card_shift);
3441   
3442   BitMap::idx_t start_idx = start_card_num - bottom_card_num;
3443   BitMap::idx_t last_idx = last_card_num - bottom_card_num;
3444 
3445   size_t used_region_bytes = used_region.byte_size();
3446   size_t marked_bytes = 0;
3447 
3448   for (int i=0; i < _max_task_num; i += 1) {
3449     BitMap& task_card_bm = count_card_bitmap_for(i);
3450     size_t* marked_bytes_array = count_marked_bytes_for(i);
3451 
3452     marked_bytes += marked_bytes_array[hr_index];
3453     // clear the amount of marked bytes in the task array for this
3454     // region
3455     marked_bytes_array[hr_index] = 0;
3456     
3457     // Clear the inclusive range [start_idx, last_idx] from the
3458     // card bitmap. The clear_range routine is exclusive so we
3459     // need to also explicitly clear the bit at last_idx.
3460     // Passing last_idx+1 to the clear_range would work in
3461     // most cases but could trip an OOB assertion.
3462 
3463     if ((last_idx - start_idx) > 0) {
3464       task_card_bm.clear_range(start_idx, last_idx);
3465     }
3466     task_card_bm.clear_bit(last_idx);
3467   }
3468   // We could assert here that marked_bytes == used_region_bytes
3469 }
3470 
3471 void ConcurrentMark::print_stats() {
3472   if (verbose_stats()) {
3473     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3474     for (size_t i = 0; i < _active_tasks; ++i) {
3475       _tasks[i]->print_stats();
3476       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3477     }
3478   }
3479 }
3480 
3481 class CSMarkOopClosure: public OopClosure {
3482   friend class CSMarkBitMapClosure;
3483 
3484   G1CollectedHeap* _g1h;
3485   CMBitMap*        _bm;
3486   ConcurrentMark*  _cm;
3487   oop*             _ms;
3488   jint*            _array_ind_stack;
3489   int              _ms_size;
3490   int              _ms_ind;
3491   int              _array_increment;
3492 
3493   bool push(oop obj, int arr_ind = 0) {
3494     if (_ms_ind == _ms_size) {
3495       gclog_or_tty->print_cr("Mark stack is full.");
3496       return false;
3497     }
3498     _ms[_ms_ind] = obj;
3499     if (obj->is_objArray()) {
3500       _array_ind_stack[_ms_ind] = arr_ind;
3501     }
3502     _ms_ind++;
3503     return true;
3504   }
3505 
3506   oop pop() {
3507     if (_ms_ind == 0) {
3508       return NULL;
3509     } else {
3510       _ms_ind--;
3511       return _ms[_ms_ind];
3512     }
3513   }
3514 
3515   template <class T> bool drain() {
3516     while (_ms_ind > 0) {
3517       oop obj = pop();
3518       assert(obj != NULL, "Since index was non-zero.");
3519       if (obj->is_objArray()) {
3520         jint arr_ind = _array_ind_stack[_ms_ind];
3521         objArrayOop aobj = objArrayOop(obj);
3522         jint len = aobj->length();
3523         jint next_arr_ind = arr_ind + _array_increment;
3524         if (next_arr_ind < len) {
3525           push(obj, next_arr_ind);
3526         }
3527         // Now process this portion of this one.
3528         int lim = MIN2(next_arr_ind, len);
3529         for (int j = arr_ind; j < lim; j++) {
3530           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
3531         }
3532 
3533       } else {
3534         obj->oop_iterate(this);
3535       }
3536       if (abort()) return false;
3537     }
3538     return true;
3539   }
3540 
3541 public:
3542   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
3543     _g1h(G1CollectedHeap::heap()),
3544     _cm(cm),
3545     _bm(cm->nextMarkBitMap()),
3546     _ms_size(ms_size), _ms_ind(0),
3547     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
3548     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
3549     _array_increment(MAX2(ms_size/8, 16))
3550   {}
3551 
3552   ~CSMarkOopClosure() {
3553     FREE_C_HEAP_ARRAY(oop, _ms);
3554     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
3555   }
3556 
3557   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
3558   virtual void do_oop(      oop* p) { do_oop_work(p); }
3559 
3560   template <class T> void do_oop_work(T* p) {
3561     T heap_oop = oopDesc::load_heap_oop(p);
3562     if (oopDesc::is_null(heap_oop)) return;
3563     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
3564     if (obj->is_forwarded()) {
3565       // If the object has already been forwarded, we have to make sure
3566       // that it's marked.  So follow the forwarding pointer.  Note that
3567       // this does the right thing for self-forwarding pointers in the
3568       // evacuation failure case.
3569       obj = obj->forwardee();
3570     }
3571     HeapRegion* hr = _g1h->heap_region_containing(obj);
3572     if (hr != NULL) {
3573       if (hr->in_collection_set()) {
3574         if (_g1h->is_obj_ill(obj)) {
3575           _bm->mark((HeapWord*)obj);
3576           // Update the task specific count data for object
3577           _cm->add_to_count_data_for(obj, hr, 0 /* worker_i */);
3578 
3579           if (!push(obj)) {
3580             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
3581             set_abort();
3582           }
3583         }
3584       } else {
3585         // Outside the collection set; we need to gray it
3586         _cm->deal_with_reference(obj);
3587       }
3588     }
3589   }
3590 };
3591 
3592 class CSMarkBitMapClosure: public BitMapClosure {
3593   G1CollectedHeap* _g1h;
3594   CMBitMap*        _bitMap;
3595   ConcurrentMark*  _cm;
3596   CSMarkOopClosure _oop_cl;
3597 public:
3598   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
3599     _g1h(G1CollectedHeap::heap()),
3600     _bitMap(cm->nextMarkBitMap()),
3601     _oop_cl(cm, ms_size)
3602   {}
3603 
3604   ~CSMarkBitMapClosure() {}
3605 
3606   bool do_bit(size_t offset) {
3607     // convert offset into a HeapWord*
3608     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
3609     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
3610            "address out of range");
3611     assert(_bitMap->isMarked(addr), "tautology");
3612     oop obj = oop(addr);
3613     if (!obj->is_forwarded()) {
3614       if (!_oop_cl.push(obj)) return false;
3615       if (UseCompressedOops) {
3616         if (!_oop_cl.drain<narrowOop>()) return false;
3617       } else {
3618         if (!_oop_cl.drain<oop>()) return false;
3619       }
3620     }
3621     // Otherwise...
3622     return true;
3623   }
3624 };
3625 
3626 
3627 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
3628   CMBitMap* _bm;
3629   CSMarkBitMapClosure _bit_cl;
3630   enum SomePrivateConstants {
3631     MSSize = 1000
3632   };
3633   bool _completed;
3634 public:
3635   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
3636     _bm(cm->nextMarkBitMap()),
3637     _bit_cl(cm, MSSize),
3638     _completed(true)
3639   {}
3640 
3641   ~CompleteMarkingInCSHRClosure() {}
3642 
3643   bool doHeapRegion(HeapRegion* r) {
3644     if (!r->evacuation_failed()) {
3645       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
3646       if (!mr.is_empty()) {
3647         if (!_bm->iterate(&_bit_cl, mr)) {
3648           _completed = false;
3649           return true;
3650         }
3651       }
3652     }
3653     return false;
3654   }
3655 
3656   bool completed() { return _completed; }
3657 };
3658 
3659 class ClearMarksInHRClosure: public HeapRegionClosure {
3660   ConcurrentMark* _cm;
3661   CMBitMap* _bm;
3662 public:
3663   ClearMarksInHRClosure(ConcurrentMark* cm, CMBitMap* bm):
3664     _cm(cm), _bm(bm)
3665   { }
3666 
3667   bool doHeapRegion(HeapRegion* r) {
3668     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
3669       _bm->clearRange(r->used_region());
3670       // Need to remove values from the count info
3671       _cm->clear_count_data_for_heap_region(r);
3672     }
3673     return false;
3674   }
3675 };
3676 
3677 void ConcurrentMark::complete_marking_in_collection_set() {
3678   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
3679 
3680   if (!g1h->mark_in_progress()) {
3681     g1h->g1_policy()->record_mark_closure_time(0.0);
3682     return;
3683   }
3684 
3685   int i = 1;
3686   double start = os::elapsedTime();
3687   while (true) {
3688     i++;
3689     CompleteMarkingInCSHRClosure cmplt(this);
3690     g1h->collection_set_iterate(&cmplt);
3691     if (cmplt.completed()) break;
3692   }
3693   double end_time = os::elapsedTime();
3694   double elapsed_time_ms = (end_time - start) * 1000.0;
3695   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
3696 
3697   ClearMarksInHRClosure clr(this, nextMarkBitMap());
3698   g1h->collection_set_iterate(&clr);
3699 }
3700 
3701 // The next two methods deal with the following optimisation. Some
3702 // objects are gray by being marked and located above the finger. If
3703 // they are copied, during an evacuation pause, below the finger then
3704 // the need to be pushed on the stack. The observation is that, if
3705 // there are no regions in the collection set located above the
3706 // finger, then the above cannot happen, hence we do not need to
3707 // explicitly gray any objects when copying them to below the
3708 // finger. The global stack will be scanned to ensure that, if it
3709 // points to objects being copied, it will update their
3710 // location. There is a tricky situation with the gray objects in
3711 // region stack that are being coped, however. See the comment in
3712 // newCSet().
3713 
3714 void ConcurrentMark::newCSet() {
3715   if (!concurrent_marking_in_progress()) {
3716     // nothing to do if marking is not in progress
3717     return;
3718   }
3719 
3720   // find what the lowest finger is among the global and local fingers
3721   _min_finger = _finger;
3722   for (int i = 0; i < (int)_max_task_num; ++i) {
3723     CMTask* task = _tasks[i];
3724     HeapWord* task_finger = task->finger();
3725     if (task_finger != NULL && task_finger < _min_finger) {
3726       _min_finger = task_finger;
3727     }
3728   }
3729 
3730   _should_gray_objects = false;
3731 
3732   // This fixes a very subtle and fustrating bug. It might be the case
3733   // that, during en evacuation pause, heap regions that contain
3734   // objects that are gray (by being in regions contained in the
3735   // region stack) are included in the collection set. Since such gray
3736   // objects will be moved, and because it's not easy to redirect
3737   // region stack entries to point to a new location (because objects
3738   // in one region might be scattered to multiple regions after they
3739   // are copied), one option is to ensure that all marked objects
3740   // copied during a pause are pushed on the stack. Notice, however,
3741   // that this problem can only happen when the region stack is not
3742   // empty during an evacuation pause. So, we make the fix a bit less
3743   // conservative and ensure that regions are pushed on the stack,
3744   // irrespective whether all collection set regions are below the
3745   // finger, if the region stack is not empty. This is expected to be
3746   // a rare case, so I don't think it's necessary to be smarted about it.
3747   if (!region_stack_empty() || has_aborted_regions()) {
3748     _should_gray_objects = true;
3749   }
3750 }
3751 
3752 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
3753   if (!concurrent_marking_in_progress()) return;
3754 
3755   HeapWord* region_end = hr->end();
3756   if (region_end > _min_finger) {
3757     _should_gray_objects = true;
3758   }
3759 }
3760 
3761 // Resets the region fields of active CMTasks whose values point
3762 // into the collection set.
3763 void ConcurrentMark::reset_active_task_region_fields_in_cset() {
3764   assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
3765   assert(parallel_marking_threads() <= _max_task_num, "sanity");
3766 
3767   for (int i = 0; i < (int)parallel_marking_threads(); i += 1) {
3768     CMTask* task = _tasks[i];
3769     HeapWord* task_finger = task->finger();
3770     if (task_finger != NULL) {
3771       assert(_g1h->is_in_g1_reserved(task_finger), "not in heap");
3772       HeapRegion* finger_region = _g1h->heap_region_containing(task_finger);
3773       if (finger_region->in_collection_set()) {
3774         // The task's current region is in the collection set.
3775         // This region will be evacuated in the current GC and
3776         // the region fields in the task will be stale.
3777         task->giveup_current_region();
3778       }
3779     }
3780   }
3781 }
3782 
3783 // abandon current marking iteration due to a Full GC
3784 void ConcurrentMark::abort() {
3785   // Clear all marks to force marking thread to do nothing
3786   _nextMarkBitMap->clearAll();
3787   // Empty mark stack
3788   clear_marking_state();
3789   for (int i = 0; i < (int)_max_task_num; ++i) {
3790     _tasks[i]->clear_region_fields();
3791   }
3792   _has_aborted = true;
3793 
3794   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3795   satb_mq_set.abandon_partial_marking();
3796   // This can be called either during or outside marking, we'll read
3797   // the expected_active value from the SATB queue set.
3798   satb_mq_set.set_active_all_threads(
3799                                  false, /* new active value */
3800                                  satb_mq_set.is_active() /* expected_active */);
3801 }
3802 
3803 static void print_ms_time_info(const char* prefix, const char* name,
3804                                NumberSeq& ns) {
3805   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3806                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3807   if (ns.num() > 0) {
3808     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3809                            prefix, ns.sd(), ns.maximum());
3810   }
3811 }
3812 
3813 void ConcurrentMark::print_summary_info() {
3814   gclog_or_tty->print_cr(" Concurrent marking:");
3815   print_ms_time_info("  ", "init marks", _init_times);
3816   print_ms_time_info("  ", "remarks", _remark_times);
3817   {
3818     print_ms_time_info("     ", "final marks", _remark_mark_times);
3819     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3820 
3821   }
3822   print_ms_time_info("  ", "cleanups", _cleanup_times);
3823   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3824                          _total_counting_time,
3825                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3826                           (double)_cleanup_times.num()
3827                          : 0.0));
3828   if (G1ScrubRemSets) {
3829     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3830                            _total_rs_scrub_time,
3831                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3832                             (double)_cleanup_times.num()
3833                            : 0.0));
3834   }
3835   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3836                          (_init_times.sum() + _remark_times.sum() +
3837                           _cleanup_times.sum())/1000.0);
3838   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3839                 "(%8.2f s marking).",
3840                 cmThread()->vtime_accum(),
3841                 cmThread()->vtime_mark_accum());
3842 }
3843 
3844 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3845   _parallel_workers->print_worker_threads_on(st);
3846 }
3847 
3848 // Closures
3849 // XXX: there seems to be a lot of code  duplication here;
3850 // should refactor and consolidate the shared code.
3851 
3852 // This closure is used to mark refs into the CMS generation in
3853 // the CMS bit map. Called at the first checkpoint.
3854 
3855 // We take a break if someone is trying to stop the world.
3856 bool ConcurrentMark::do_yield_check(int worker_i) {
3857   if (should_yield()) {
3858     if (worker_i == 0) {
3859       _g1h->g1_policy()->record_concurrent_pause();
3860     }
3861     cmThread()->yield();
3862     if (worker_i == 0) {
3863       _g1h->g1_policy()->record_concurrent_pause_end();
3864     }
3865     return true;
3866   } else {
3867     return false;
3868   }
3869 }
3870 
3871 bool ConcurrentMark::should_yield() {
3872   return cmThread()->should_yield();
3873 }
3874 
3875 bool ConcurrentMark::containing_card_is_marked(void* p) {
3876   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3877   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3878 }
3879 
3880 bool ConcurrentMark::containing_cards_are_marked(void* start,
3881                                                  void* last) {
3882   return containing_card_is_marked(start) &&
3883          containing_card_is_marked(last);
3884 }
3885 
3886 #ifndef PRODUCT
3887 // for debugging purposes
3888 void ConcurrentMark::print_finger() {
3889   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3890                          _heap_start, _heap_end, _finger);
3891   for (int i = 0; i < (int) _max_task_num; ++i) {
3892     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
3893   }
3894   gclog_or_tty->print_cr("");
3895 }
3896 #endif
3897 
3898 void CMTask::scan_object(oop obj) {
3899   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3900 
3901   if (_cm->verbose_high()) {
3902     gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3903                            _task_id, (void*) obj);
3904   }
3905 
3906   size_t obj_size = obj->size();
3907   _words_scanned += obj_size;
3908 
3909   obj->oop_iterate(_cm_oop_closure);
3910   statsOnly( ++_objs_scanned );
3911   check_limits();
3912 }
3913 
3914 // Closure for iteration over bitmaps
3915 class CMBitMapClosure : public BitMapClosure {
3916 private:
3917   // the bitmap that is being iterated over
3918   CMBitMap*                   _nextMarkBitMap;
3919   ConcurrentMark*             _cm;
3920   CMTask*                     _task;
3921   // true if we're scanning a heap region claimed by the task (so that
3922   // we move the finger along), false if we're not, i.e. currently when
3923   // scanning a heap region popped from the region stack (so that we
3924   // do not move the task finger along; it'd be a mistake if we did so).
3925   bool                        _scanning_heap_region;
3926 
3927 public:
3928   CMBitMapClosure(CMTask *task,
3929                   ConcurrentMark* cm,
3930                   CMBitMap* nextMarkBitMap)
3931     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3932 
3933   void set_scanning_heap_region(bool scanning_heap_region) {
3934     _scanning_heap_region = scanning_heap_region;
3935   }
3936 
3937   bool do_bit(size_t offset) {
3938     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3939     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3940     assert( addr < _cm->finger(), "invariant");
3941 
3942     if (_scanning_heap_region) {
3943       statsOnly( _task->increase_objs_found_on_bitmap() );
3944       assert(addr >= _task->finger(), "invariant");
3945       // We move that task's local finger along.
3946       _task->move_finger_to(addr);
3947     } else {
3948       // We move the task's region finger along.
3949       _task->move_region_finger_to(addr);
3950     }
3951 
3952     _task->scan_object(oop(addr));
3953     // we only partially drain the local queue and global stack
3954     _task->drain_local_queue(true);
3955     _task->drain_global_stack(true);
3956 
3957     // if the has_aborted flag has been raised, we need to bail out of
3958     // the iteration
3959     return !_task->has_aborted();
3960   }
3961 };
3962 
3963 // Closure for iterating over objects, currently only used for
3964 // processing SATB buffers.
3965 class CMObjectClosure : public ObjectClosure {
3966 private:
3967   CMTask* _task;
3968 
3969 public:
3970   void do_object(oop obj) {
3971     _task->deal_with_reference(obj);
3972   }
3973 
3974   CMObjectClosure(CMTask* task) : _task(task) { }
3975 };
3976 
3977 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3978                                ConcurrentMark* cm,
3979                                CMTask* task)
3980   : _g1h(g1h), _cm(cm), _task(task) {
3981   assert(_ref_processor == NULL, "should be initialized to NULL");
3982 
3983   if (G1UseConcMarkReferenceProcessing) {
3984     _ref_processor = g1h->ref_processor_cm();
3985     assert(_ref_processor != NULL, "should not be NULL");
3986   }
3987 }
3988 
3989 void CMTask::setup_for_region(HeapRegion* hr) {
3990   // Separated the asserts so that we know which one fires.
3991   assert(hr != NULL,
3992         "claim_region() should have filtered out continues humongous regions");
3993   assert(!hr->continuesHumongous(),
3994         "claim_region() should have filtered out continues humongous regions");
3995 
3996   if (_cm->verbose_low()) {
3997     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3998                            _task_id, hr);
3999   }
4000 
4001   _curr_region  = hr;
4002   _finger       = hr->bottom();
4003   update_region_limit();
4004 }
4005 
4006 void CMTask::update_region_limit() {
4007   HeapRegion* hr            = _curr_region;
4008   HeapWord* bottom          = hr->bottom();
4009   HeapWord* limit           = hr->next_top_at_mark_start();
4010 
4011   if (limit == bottom) {
4012     if (_cm->verbose_low()) {
4013       gclog_or_tty->print_cr("[%d] found an empty region "
4014                              "["PTR_FORMAT", "PTR_FORMAT")",
4015                              _task_id, bottom, limit);
4016     }
4017     // The region was collected underneath our feet.
4018     // We set the finger to bottom to ensure that the bitmap
4019     // iteration that will follow this will not do anything.
4020     // (this is not a condition that holds when we set the region up,
4021     // as the region is not supposed to be empty in the first place)
4022     _finger = bottom;
4023   } else if (limit >= _region_limit) {
4024     assert(limit >= _finger, "peace of mind");
4025   } else {
4026     assert(limit < _region_limit, "only way to get here");
4027     // This can happen under some pretty unusual circumstances.  An
4028     // evacuation pause empties the region underneath our feet (NTAMS
4029     // at bottom). We then do some allocation in the region (NTAMS
4030     // stays at bottom), followed by the region being used as a GC
4031     // alloc region (NTAMS will move to top() and the objects
4032     // originally below it will be grayed). All objects now marked in
4033     // the region are explicitly grayed, if below the global finger,
4034     // and we do not need in fact to scan anything else. So, we simply
4035     // set _finger to be limit to ensure that the bitmap iteration
4036     // doesn't do anything.
4037     _finger = limit;
4038   }
4039 
4040   _region_limit = limit;
4041 }
4042 
4043 void CMTask::giveup_current_region() {
4044   assert(_curr_region != NULL, "invariant");
4045   if (_cm->verbose_low()) {
4046     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
4047                            _task_id, _curr_region);
4048   }
4049   clear_region_fields();
4050 }
4051 
4052 void CMTask::clear_region_fields() {
4053   // Values for these three fields that indicate that we're not
4054   // holding on to a region.
4055   _curr_region   = NULL;
4056   _finger        = NULL;
4057   _region_limit  = NULL;
4058 
4059   _region_finger = NULL;
4060 }
4061 
4062 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
4063   if (cm_oop_closure == NULL) {
4064     assert(_cm_oop_closure != NULL, "invariant");
4065   } else {
4066     assert(_cm_oop_closure == NULL, "invariant");
4067   }
4068   _cm_oop_closure = cm_oop_closure;
4069 }
4070 
4071 void CMTask::reset(CMBitMap* nextMarkBitMap) {
4072   guarantee(nextMarkBitMap != NULL, "invariant");
4073 
4074   if (_cm->verbose_low()) {
4075     gclog_or_tty->print_cr("[%d] resetting", _task_id);
4076   }
4077 
4078   _nextMarkBitMap                = nextMarkBitMap;
4079   clear_region_fields();
4080   assert(_aborted_region.is_empty(), "should have been cleared");
4081 
4082   _calls                         = 0;
4083   _elapsed_time_ms               = 0.0;
4084   _termination_time_ms           = 0.0;
4085   _termination_start_time_ms     = 0.0;
4086 
4087 #if _MARKING_STATS_
4088   _local_pushes                  = 0;
4089   _local_pops                    = 0;
4090   _local_max_size                = 0;
4091   _objs_scanned                  = 0;
4092   _global_pushes                 = 0;
4093   _global_pops                   = 0;
4094   _global_max_size               = 0;
4095   _global_transfers_to           = 0;
4096   _global_transfers_from         = 0;
4097   _region_stack_pops             = 0;
4098   _regions_claimed               = 0;
4099   _objs_found_on_bitmap          = 0;
4100   _satb_buffers_processed        = 0;
4101   _steal_attempts                = 0;
4102   _steals                        = 0;
4103   _aborted                       = 0;
4104   _aborted_overflow              = 0;
4105   _aborted_cm_aborted            = 0;
4106   _aborted_yield                 = 0;
4107   _aborted_timed_out             = 0;
4108   _aborted_satb                  = 0;
4109   _aborted_termination           = 0;
4110 #endif // _MARKING_STATS_
4111 }
4112 
4113 bool CMTask::should_exit_termination() {
4114   regular_clock_call();
4115   // This is called when we are in the termination protocol. We should
4116   // quit if, for some reason, this task wants to abort or the global
4117   // stack is not empty (this means that we can get work from it).
4118   return !_cm->mark_stack_empty() || has_aborted();
4119 }
4120 
4121 void CMTask::reached_limit() {
4122   assert(_words_scanned >= _words_scanned_limit ||
4123          _refs_reached >= _refs_reached_limit ,
4124          "shouldn't have been called otherwise");
4125   regular_clock_call();
4126 }
4127 
4128 void CMTask::regular_clock_call() {
4129   if (has_aborted()) return;
4130 
4131   // First, we need to recalculate the words scanned and refs reached
4132   // limits for the next clock call.
4133   recalculate_limits();
4134 
4135   // During the regular clock call we do the following
4136 
4137   // (1) If an overflow has been flagged, then we abort.
4138   if (_cm->has_overflown()) {
4139     set_has_aborted();
4140     return;
4141   }
4142 
4143   // If we are not concurrent (i.e. we're doing remark) we don't need
4144   // to check anything else. The other steps are only needed during
4145   // the concurrent marking phase.
4146   if (!concurrent()) return;
4147 
4148   // (2) If marking has been aborted for Full GC, then we also abort.
4149   if (_cm->has_aborted()) {
4150     set_has_aborted();
4151     statsOnly( ++_aborted_cm_aborted );
4152     return;
4153   }
4154 
4155   double curr_time_ms = os::elapsedVTime() * 1000.0;
4156 
4157   // (3) If marking stats are enabled, then we update the step history.
4158 #if _MARKING_STATS_
4159   if (_words_scanned >= _words_scanned_limit) {
4160     ++_clock_due_to_scanning;
4161   }
4162   if (_refs_reached >= _refs_reached_limit) {
4163     ++_clock_due_to_marking;
4164   }
4165 
4166   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
4167   _interval_start_time_ms = curr_time_ms;
4168   _all_clock_intervals_ms.add(last_interval_ms);
4169 
4170   if (_cm->verbose_medium()) {
4171       gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
4172                         "scanned = %d%s, refs reached = %d%s",
4173                         _task_id, last_interval_ms,
4174                         _words_scanned,
4175                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
4176                         _refs_reached,
4177                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
4178   }
4179 #endif // _MARKING_STATS_
4180 
4181   // (4) We check whether we should yield. If we have to, then we abort.
4182   if (_cm->should_yield()) {
4183     // We should yield. To do this we abort the task. The caller is
4184     // responsible for yielding.
4185     set_has_aborted();
4186     statsOnly( ++_aborted_yield );
4187     return;
4188   }
4189 
4190   // (5) We check whether we've reached our time quota. If we have,
4191   // then we abort.
4192   double elapsed_time_ms = curr_time_ms - _start_time_ms;
4193   if (elapsed_time_ms > _time_target_ms) {
4194     set_has_aborted();
4195     _has_timed_out = true;
4196     statsOnly( ++_aborted_timed_out );
4197     return;
4198   }
4199 
4200   // (6) Finally, we check whether there are enough completed STAB
4201   // buffers available for processing. If there are, we abort.
4202   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
4203   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
4204     if (_cm->verbose_low()) {
4205       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
4206                              _task_id);
4207     }
4208     // we do need to process SATB buffers, we'll abort and restart
4209     // the marking task to do so
4210     set_has_aborted();
4211     statsOnly( ++_aborted_satb );
4212     return;
4213   }
4214 }
4215 
4216 void CMTask::recalculate_limits() {
4217   _real_words_scanned_limit = _words_scanned + words_scanned_period;
4218   _words_scanned_limit      = _real_words_scanned_limit;
4219 
4220   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
4221   _refs_reached_limit       = _real_refs_reached_limit;
4222 }
4223 
4224 void CMTask::decrease_limits() {
4225   // This is called when we believe that we're going to do an infrequent
4226   // operation which will increase the per byte scanned cost (i.e. move
4227   // entries to/from the global stack). It basically tries to decrease the
4228   // scanning limit so that the clock is called earlier.
4229 
4230   if (_cm->verbose_medium()) {
4231     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
4232   }
4233 
4234   _words_scanned_limit = _real_words_scanned_limit -
4235     3 * words_scanned_period / 4;
4236   _refs_reached_limit  = _real_refs_reached_limit -
4237     3 * refs_reached_period / 4;
4238 }
4239 
4240 void CMTask::move_entries_to_global_stack() {
4241   // local array where we'll store the entries that will be popped
4242   // from the local queue
4243   oop buffer[global_stack_transfer_size];
4244 
4245   int n = 0;
4246   oop obj;
4247   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
4248     buffer[n] = obj;
4249     ++n;
4250   }
4251 
4252   if (n > 0) {
4253     // we popped at least one entry from the local queue
4254 
4255     statsOnly( ++_global_transfers_to; _local_pops += n );
4256 
4257     if (!_cm->mark_stack_push(buffer, n)) {
4258       if (_cm->verbose_low()) {
4259         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
4260                                _task_id);
4261       }
4262       set_has_aborted();
4263     } else {
4264       // the transfer was successful
4265 
4266       if (_cm->verbose_medium()) {
4267         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
4268                                _task_id, n);
4269       }
4270       statsOnly( int tmp_size = _cm->mark_stack_size();
4271                  if (tmp_size > _global_max_size) {
4272                    _global_max_size = tmp_size;
4273                  }
4274                  _global_pushes += n );
4275     }
4276   }
4277 
4278   // this operation was quite expensive, so decrease the limits
4279   decrease_limits();
4280 }
4281 
4282 void CMTask::get_entries_from_global_stack() {
4283   // local array where we'll store the entries that will be popped
4284   // from the global stack.
4285   oop buffer[global_stack_transfer_size];
4286   int n;
4287   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
4288   assert(n <= global_stack_transfer_size,
4289          "we should not pop more than the given limit");
4290   if (n > 0) {
4291     // yes, we did actually pop at least one entry
4292 
4293     statsOnly( ++_global_transfers_from; _global_pops += n );
4294     if (_cm->verbose_medium()) {
4295       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
4296                              _task_id, n);
4297     }
4298     for (int i = 0; i < n; ++i) {
4299       bool success = _task_queue->push(buffer[i]);
4300       // We only call this when the local queue is empty or under a
4301       // given target limit. So, we do not expect this push to fail.
4302       assert(success, "invariant");
4303     }
4304 
4305     statsOnly( int tmp_size = _task_queue->size();
4306                if (tmp_size > _local_max_size) {
4307                  _local_max_size = tmp_size;
4308                }
4309                _local_pushes += n );
4310   }
4311 
4312   // this operation was quite expensive, so decrease the limits
4313   decrease_limits();
4314 }
4315 
4316 void CMTask::drain_local_queue(bool partially) {
4317   if (has_aborted()) return;
4318 
4319   // Decide what the target size is, depending whether we're going to
4320   // drain it partially (so that other tasks can steal if they run out
4321   // of things to do) or totally (at the very end).
4322   size_t target_size;
4323   if (partially) {
4324     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
4325   } else {
4326     target_size = 0;
4327   }
4328 
4329   if (_task_queue->size() > target_size) {
4330     if (_cm->verbose_high()) {
4331       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
4332                              _task_id, target_size);
4333     }
4334 
4335     oop obj;
4336     bool ret = _task_queue->pop_local(obj);
4337     while (ret) {
4338       statsOnly( ++_local_pops );
4339 
4340       if (_cm->verbose_high()) {
4341         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
4342                                (void*) obj);
4343       }
4344 
4345       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
4346       assert(!_g1h->is_on_master_free_list(
4347                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
4348 
4349       scan_object(obj);
4350 
4351       if (_task_queue->size() <= target_size || has_aborted()) {
4352         ret = false;
4353       } else {
4354         ret = _task_queue->pop_local(obj);
4355       }
4356     }
4357 
4358     if (_cm->verbose_high()) {
4359       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
4360                              _task_id, _task_queue->size());
4361     }
4362   }
4363 }
4364 
4365 void CMTask::drain_global_stack(bool partially) {
4366   if (has_aborted()) return;
4367 
4368   // We have a policy to drain the local queue before we attempt to
4369   // drain the global stack.
4370   assert(partially || _task_queue->size() == 0, "invariant");
4371 
4372   // Decide what the target size is, depending whether we're going to
4373   // drain it partially (so that other tasks can steal if they run out
4374   // of things to do) or totally (at the very end).  Notice that,
4375   // because we move entries from the global stack in chunks or
4376   // because another task might be doing the same, we might in fact
4377   // drop below the target. But, this is not a problem.
4378   size_t target_size;
4379   if (partially) {
4380     target_size = _cm->partial_mark_stack_size_target();
4381   } else {
4382     target_size = 0;
4383   }
4384 
4385   if (_cm->mark_stack_size() > target_size) {
4386     if (_cm->verbose_low()) {
4387       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
4388                              _task_id, target_size);
4389     }
4390 
4391     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
4392       get_entries_from_global_stack();
4393       drain_local_queue(partially);
4394     }
4395 
4396     if (_cm->verbose_low()) {
4397       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
4398                              _task_id, _cm->mark_stack_size());
4399     }
4400   }
4401 }
4402 
4403 // SATB Queue has several assumptions on whether to call the par or
4404 // non-par versions of the methods. this is why some of the code is
4405 // replicated. We should really get rid of the single-threaded version
4406 // of the code to simplify things.
4407 void CMTask::drain_satb_buffers() {
4408   if (has_aborted()) return;
4409 
4410   // We set this so that the regular clock knows that we're in the
4411   // middle of draining buffers and doesn't set the abort flag when it
4412   // notices that SATB buffers are available for draining. It'd be
4413   // very counter productive if it did that. :-)
4414   _draining_satb_buffers = true;
4415 
4416   CMObjectClosure oc(this);
4417   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
4418   if (G1CollectedHeap::use_parallel_gc_threads()) {
4419     satb_mq_set.set_par_closure(_task_id, &oc);
4420   } else {
4421     satb_mq_set.set_closure(&oc);
4422   }
4423 
4424   // This keeps claiming and applying the closure to completed buffers
4425   // until we run out of buffers or we need to abort.
4426   if (G1CollectedHeap::use_parallel_gc_threads()) {
4427     while (!has_aborted() &&
4428            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
4429       if (_cm->verbose_medium()) {
4430         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
4431       }
4432       statsOnly( ++_satb_buffers_processed );
4433       regular_clock_call();
4434     }
4435   } else {
4436     while (!has_aborted() &&
4437            satb_mq_set.apply_closure_to_completed_buffer()) {
4438       if (_cm->verbose_medium()) {
4439         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
4440       }
4441       statsOnly( ++_satb_buffers_processed );
4442       regular_clock_call();
4443     }
4444   }
4445 
4446   if (!concurrent() && !has_aborted()) {
4447     // We should only do this during remark.
4448     if (G1CollectedHeap::use_parallel_gc_threads()) {
4449       satb_mq_set.par_iterate_closure_all_threads(_task_id);
4450     } else {
4451       satb_mq_set.iterate_closure_all_threads();
4452     }
4453   }
4454 
4455   _draining_satb_buffers = false;
4456 
4457   assert(has_aborted() ||
4458          concurrent() ||
4459          satb_mq_set.completed_buffers_num() == 0, "invariant");
4460 
4461   if (G1CollectedHeap::use_parallel_gc_threads()) {
4462     satb_mq_set.set_par_closure(_task_id, NULL);
4463   } else {
4464     satb_mq_set.set_closure(NULL);
4465   }
4466 
4467   // again, this was a potentially expensive operation, decrease the
4468   // limits to get the regular clock call early
4469   decrease_limits();
4470 }
4471 
4472 void CMTask::drain_region_stack(BitMapClosure* bc) {
4473   if (has_aborted()) return;
4474 
4475   assert(_region_finger == NULL,
4476          "it should be NULL when we're not scanning a region");
4477 
4478   if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
4479     if (_cm->verbose_low()) {
4480       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
4481                              _task_id, _cm->region_stack_size());
4482     }
4483 
4484     MemRegion mr;
4485 
4486     if (!_aborted_region.is_empty()) {
4487       mr = _aborted_region;
4488       _aborted_region = MemRegion();
4489 
4490       if (_cm->verbose_low()) {
4491         gclog_or_tty->print_cr("[%d] scanning aborted region "
4492                                "[ " PTR_FORMAT ", " PTR_FORMAT " )",
4493                                _task_id, mr.start(), mr.end());
4494       }
4495     } else {
4496       mr = _cm->region_stack_pop_lock_free();
4497       // it returns MemRegion() if the pop fails
4498       statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
4499     }
4500 
4501     while (mr.start() != NULL) {
4502       if (_cm->verbose_medium()) {
4503         gclog_or_tty->print_cr("[%d] we are scanning region "
4504                                "["PTR_FORMAT", "PTR_FORMAT")",
4505                                _task_id, mr.start(), mr.end());
4506       }
4507 
4508       assert(mr.end() <= _cm->finger(),
4509              "otherwise the region shouldn't be on the stack");
4510       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
4511       if (_nextMarkBitMap->iterate(bc, mr)) {
4512         assert(!has_aborted(),
4513                "cannot abort the task without aborting the bitmap iteration");
4514 
4515         // We finished iterating over the region without aborting.
4516         regular_clock_call();
4517         if (has_aborted()) {
4518           mr = MemRegion();
4519         } else {
4520           mr = _cm->region_stack_pop_lock_free();
4521           // it returns MemRegion() if the pop fails
4522           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
4523         }
4524       } else {
4525         assert(has_aborted(), "currently the only way to do so");
4526 
4527         // The only way to abort the bitmap iteration is to return
4528         // false from the do_bit() method. However, inside the
4529         // do_bit() method we move the _region_finger to point to the
4530         // object currently being looked at. So, if we bail out, we
4531         // have definitely set _region_finger to something non-null.
4532         assert(_region_finger != NULL, "invariant");
4533 
4534         // Make sure that any previously aborted region has been
4535         // cleared.
4536         assert(_aborted_region.is_empty(), "aborted region not cleared");
4537 
4538         // The iteration was actually aborted. So now _region_finger
4539         // points to the address of the object we last scanned. If we
4540         // leave it there, when we restart this task, we will rescan
4541         // the object. It is easy to avoid this. We move the finger by
4542         // enough to point to the next possible object header (the
4543         // bitmap knows by how much we need to move it as it knows its
4544         // granularity).
4545         MemRegion newRegion =
4546           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
4547 
4548         if (!newRegion.is_empty()) {
4549           if (_cm->verbose_low()) {
4550             gclog_or_tty->print_cr("[%d] recording unscanned region"
4551                                    "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
4552                                    _task_id,
4553                                    newRegion.start(), newRegion.end());
4554           }
4555           // Now record the part of the region we didn't scan to
4556           // make sure this task scans it later.
4557           _aborted_region = newRegion;
4558         }
4559         // break from while
4560         mr = MemRegion();
4561       }
4562       _region_finger = NULL;
4563     }
4564 
4565     if (_cm->verbose_low()) {
4566       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
4567                              _task_id, _cm->region_stack_size());
4568     }
4569   }
4570 }
4571 
4572 void CMTask::print_stats() {
4573   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
4574                          _task_id, _calls);
4575   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
4576                          _elapsed_time_ms, _termination_time_ms);
4577   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4578                          _step_times_ms.num(), _step_times_ms.avg(),
4579                          _step_times_ms.sd());
4580   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
4581                          _step_times_ms.maximum(), _step_times_ms.sum());
4582 
4583 #if _MARKING_STATS_
4584   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4585                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
4586                          _all_clock_intervals_ms.sd());
4587   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
4588                          _all_clock_intervals_ms.maximum(),
4589                          _all_clock_intervals_ms.sum());
4590   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
4591                          _clock_due_to_scanning, _clock_due_to_marking);
4592   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
4593                          _objs_scanned, _objs_found_on_bitmap);
4594   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
4595                          _local_pushes, _local_pops, _local_max_size);
4596   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
4597                          _global_pushes, _global_pops, _global_max_size);
4598   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
4599                          _global_transfers_to,_global_transfers_from);
4600   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
4601                          _regions_claimed, _region_stack_pops);
4602   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
4603   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
4604                          _steal_attempts, _steals);
4605   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
4606   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
4607                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
4608   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
4609                          _aborted_timed_out, _aborted_satb, _aborted_termination);
4610 #endif // _MARKING_STATS_
4611 }
4612 
4613 /*****************************************************************************
4614 
4615     The do_marking_step(time_target_ms) method is the building block
4616     of the parallel marking framework. It can be called in parallel
4617     with other invocations of do_marking_step() on different tasks
4618     (but only one per task, obviously) and concurrently with the
4619     mutator threads, or during remark, hence it eliminates the need
4620     for two versions of the code. When called during remark, it will
4621     pick up from where the task left off during the concurrent marking
4622     phase. Interestingly, tasks are also claimable during evacuation
4623     pauses too, since do_marking_step() ensures that it aborts before
4624     it needs to yield.
4625 
4626     The data structures that is uses to do marking work are the
4627     following:
4628 
4629       (1) Marking Bitmap. If there are gray objects that appear only
4630       on the bitmap (this happens either when dealing with an overflow
4631       or when the initial marking phase has simply marked the roots
4632       and didn't push them on the stack), then tasks claim heap
4633       regions whose bitmap they then scan to find gray objects. A
4634       global finger indicates where the end of the last claimed region
4635       is. A local finger indicates how far into the region a task has
4636       scanned. The two fingers are used to determine how to gray an
4637       object (i.e. whether simply marking it is OK, as it will be
4638       visited by a task in the future, or whether it needs to be also
4639       pushed on a stack).
4640 
4641       (2) Local Queue. The local queue of the task which is accessed
4642       reasonably efficiently by the task. Other tasks can steal from
4643       it when they run out of work. Throughout the marking phase, a
4644       task attempts to keep its local queue short but not totally
4645       empty, so that entries are available for stealing by other
4646       tasks. Only when there is no more work, a task will totally
4647       drain its local queue.
4648 
4649       (3) Global Mark Stack. This handles local queue overflow. During
4650       marking only sets of entries are moved between it and the local
4651       queues, as access to it requires a mutex and more fine-grain
4652       interaction with it which might cause contention. If it
4653       overflows, then the marking phase should restart and iterate
4654       over the bitmap to identify gray objects. Throughout the marking
4655       phase, tasks attempt to keep the global mark stack at a small
4656       length but not totally empty, so that entries are available for
4657       popping by other tasks. Only when there is no more work, tasks
4658       will totally drain the global mark stack.
4659 
4660       (4) Global Region Stack. Entries on it correspond to areas of
4661       the bitmap that need to be scanned since they contain gray
4662       objects. Pushes on the region stack only happen during
4663       evacuation pauses and typically correspond to areas covered by
4664       GC LABS. If it overflows, then the marking phase should restart
4665       and iterate over the bitmap to identify gray objects. Tasks will
4666       try to totally drain the region stack as soon as possible.
4667 
4668       (5) SATB Buffer Queue. This is where completed SATB buffers are
4669       made available. Buffers are regularly removed from this queue
4670       and scanned for roots, so that the queue doesn't get too
4671       long. During remark, all completed buffers are processed, as
4672       well as the filled in parts of any uncompleted buffers.
4673 
4674     The do_marking_step() method tries to abort when the time target
4675     has been reached. There are a few other cases when the
4676     do_marking_step() method also aborts:
4677 
4678       (1) When the marking phase has been aborted (after a Full GC).
4679 
4680       (2) When a global overflow (either on the global stack or the
4681       region stack) has been triggered. Before the task aborts, it
4682       will actually sync up with the other tasks to ensure that all
4683       the marking data structures (local queues, stacks, fingers etc.)
4684       are re-initialised so that when do_marking_step() completes,
4685       the marking phase can immediately restart.
4686 
4687       (3) When enough completed SATB buffers are available. The
4688       do_marking_step() method only tries to drain SATB buffers right
4689       at the beginning. So, if enough buffers are available, the
4690       marking step aborts and the SATB buffers are processed at
4691       the beginning of the next invocation.
4692 
4693       (4) To yield. when we have to yield then we abort and yield
4694       right at the end of do_marking_step(). This saves us from a lot
4695       of hassle as, by yielding we might allow a Full GC. If this
4696       happens then objects will be compacted underneath our feet, the
4697       heap might shrink, etc. We save checking for this by just
4698       aborting and doing the yield right at the end.
4699 
4700     From the above it follows that the do_marking_step() method should
4701     be called in a loop (or, otherwise, regularly) until it completes.
4702 
4703     If a marking step completes without its has_aborted() flag being
4704     true, it means it has completed the current marking phase (and
4705     also all other marking tasks have done so and have all synced up).
4706 
4707     A method called regular_clock_call() is invoked "regularly" (in
4708     sub ms intervals) throughout marking. It is this clock method that
4709     checks all the abort conditions which were mentioned above and
4710     decides when the task should abort. A work-based scheme is used to
4711     trigger this clock method: when the number of object words the
4712     marking phase has scanned or the number of references the marking
4713     phase has visited reach a given limit. Additional invocations to
4714     the method clock have been planted in a few other strategic places
4715     too. The initial reason for the clock method was to avoid calling
4716     vtime too regularly, as it is quite expensive. So, once it was in
4717     place, it was natural to piggy-back all the other conditions on it
4718     too and not constantly check them throughout the code.
4719 
4720  *****************************************************************************/
4721 
4722 void CMTask::do_marking_step(double time_target_ms,
4723                              bool do_stealing,
4724                              bool do_termination) {
4725   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4726   assert(concurrent() == _cm->concurrent(), "they should be the same");
4727 
4728   assert(concurrent() || _cm->region_stack_empty(),
4729          "the region stack should have been cleared before remark");
4730   assert(concurrent() || !_cm->has_aborted_regions(),
4731          "aborted regions should have been cleared before remark");
4732   assert(_region_finger == NULL,
4733          "this should be non-null only when a region is being scanned");
4734 
4735   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4736   assert(_task_queues != NULL, "invariant");
4737   assert(_task_queue != NULL, "invariant");
4738   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
4739 
4740   assert(!_claimed,
4741          "only one thread should claim this task at any one time");
4742 
4743   // OK, this doesn't safeguard again all possible scenarios, as it is
4744   // possible for two threads to set the _claimed flag at the same
4745   // time. But it is only for debugging purposes anyway and it will
4746   // catch most problems.
4747   _claimed = true;
4748 
4749   _start_time_ms = os::elapsedVTime() * 1000.0;
4750   statsOnly( _interval_start_time_ms = _start_time_ms );
4751 
4752   double diff_prediction_ms =
4753     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4754   _time_target_ms = time_target_ms - diff_prediction_ms;
4755 
4756   // set up the variables that are used in the work-based scheme to
4757   // call the regular clock method
4758   _words_scanned = 0;
4759   _refs_reached  = 0;
4760   recalculate_limits();
4761 
4762   // clear all flags
4763   clear_has_aborted();
4764   _has_timed_out = false;
4765   _draining_satb_buffers = false;
4766 
4767   ++_calls;
4768 
4769   if (_cm->verbose_low()) {
4770     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
4771                            "target = %1.2lfms >>>>>>>>>>",
4772                            _task_id, _calls, _time_target_ms);
4773   }
4774 
4775   // Set up the bitmap and oop closures. Anything that uses them is
4776   // eventually called from this method, so it is OK to allocate these
4777   // statically.
4778   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4779   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4780   set_cm_oop_closure(&cm_oop_closure);
4781 
4782   if (_cm->has_overflown()) {
4783     // This can happen if the region stack or the mark stack overflows
4784     // during a GC pause and this task, after a yield point,
4785     // restarts. We have to abort as we need to get into the overflow
4786     // protocol which happens right at the end of this task.
4787     set_has_aborted();
4788   }
4789 
4790   // First drain any available SATB buffers. After this, we will not
4791   // look at SATB buffers before the next invocation of this method.
4792   // If enough completed SATB buffers are queued up, the regular clock
4793   // will abort this task so that it restarts.
4794   drain_satb_buffers();
4795   // ...then partially drain the local queue and the global stack
4796   drain_local_queue(true);
4797   drain_global_stack(true);
4798 
4799   // Then totally drain the region stack.  We will not look at
4800   // it again before the next invocation of this method. Entries on
4801   // the region stack are only added during evacuation pauses, for
4802   // which we have to yield. When we do, we abort the task anyway so
4803   // it will look at the region stack again when it restarts.
4804   bitmap_closure.set_scanning_heap_region(false);
4805   drain_region_stack(&bitmap_closure);
4806   // ...then partially drain the local queue and the global stack
4807   drain_local_queue(true);
4808   drain_global_stack(true);
4809 
4810   do {
4811     if (!has_aborted() && _curr_region != NULL) {
4812       // This means that we're already holding on to a region.
4813       assert(_finger != NULL, "if region is not NULL, then the finger "
4814              "should not be NULL either");
4815 
4816       // We might have restarted this task after an evacuation pause
4817       // which might have evacuated the region we're holding on to
4818       // underneath our feet. Let's read its limit again to make sure
4819       // that we do not iterate over a region of the heap that
4820       // contains garbage (update_region_limit() will also move
4821       // _finger to the start of the region if it is found empty).
4822       update_region_limit();
4823       // We will start from _finger not from the start of the region,
4824       // as we might be restarting this task after aborting half-way
4825       // through scanning this region. In this case, _finger points to
4826       // the address where we last found a marked object. If this is a
4827       // fresh region, _finger points to start().
4828       MemRegion mr = MemRegion(_finger, _region_limit);
4829 
4830       if (_cm->verbose_low()) {
4831         gclog_or_tty->print_cr("[%d] we're scanning part "
4832                                "["PTR_FORMAT", "PTR_FORMAT") "
4833                                "of region "PTR_FORMAT,
4834                                _task_id, _finger, _region_limit, _curr_region);
4835       }
4836 
4837       // Let's iterate over the bitmap of the part of the
4838       // region that is left.
4839       bitmap_closure.set_scanning_heap_region(true);
4840       if (mr.is_empty() ||
4841           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4842         // We successfully completed iterating over the region. Now,
4843         // let's give up the region.
4844         giveup_current_region();
4845         regular_clock_call();
4846       } else {
4847         assert(has_aborted(), "currently the only way to do so");
4848         // The only way to abort the bitmap iteration is to return
4849         // false from the do_bit() method. However, inside the
4850         // do_bit() method we move the _finger to point to the
4851         // object currently being looked at. So, if we bail out, we
4852         // have definitely set _finger to something non-null.
4853         assert(_finger != NULL, "invariant");
4854 
4855         // Region iteration was actually aborted. So now _finger
4856         // points to the address of the object we last scanned. If we
4857         // leave it there, when we restart this task, we will rescan
4858         // the object. It is easy to avoid this. We move the finger by
4859         // enough to point to the next possible object header (the
4860         // bitmap knows by how much we need to move it as it knows its
4861         // granularity).
4862         assert(_finger < _region_limit, "invariant");
4863         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4864         // Check if bitmap iteration was aborted while scanning the last object
4865         if (new_finger >= _region_limit) {
4866             giveup_current_region();
4867         } else {
4868             move_finger_to(new_finger);
4869         }
4870       }
4871     }
4872     // At this point we have either completed iterating over the
4873     // region we were holding on to, or we have aborted.
4874 
4875     // We then partially drain the local queue and the global stack.
4876     // (Do we really need this?)
4877     drain_local_queue(true);
4878     drain_global_stack(true);
4879 
4880     // Read the note on the claim_region() method on why it might
4881     // return NULL with potentially more regions available for
4882     // claiming and why we have to check out_of_regions() to determine
4883     // whether we're done or not.
4884     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4885       // We are going to try to claim a new region. We should have
4886       // given up on the previous one.
4887       // Separated the asserts so that we know which one fires.
4888       assert(_curr_region  == NULL, "invariant");
4889       assert(_finger       == NULL, "invariant");
4890       assert(_region_limit == NULL, "invariant");
4891       if (_cm->verbose_low()) {
4892         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4893       }
4894       HeapRegion* claimed_region = _cm->claim_region(_task_id);
4895       if (claimed_region != NULL) {
4896         // Yes, we managed to claim one
4897         statsOnly( ++_regions_claimed );
4898 
4899         if (_cm->verbose_low()) {
4900           gclog_or_tty->print_cr("[%d] we successfully claimed "
4901                                  "region "PTR_FORMAT,
4902                                  _task_id, claimed_region);
4903         }
4904 
4905         setup_for_region(claimed_region);
4906         assert(_curr_region == claimed_region, "invariant");
4907       }
4908       // It is important to call the regular clock here. It might take
4909       // a while to claim a region if, for example, we hit a large
4910       // block of empty regions. So we need to call the regular clock
4911       // method once round the loop to make sure it's called
4912       // frequently enough.
4913       regular_clock_call();
4914     }
4915 
4916     if (!has_aborted() && _curr_region == NULL) {
4917       assert(_cm->out_of_regions(),
4918              "at this point we should be out of regions");
4919     }
4920   } while ( _curr_region != NULL && !has_aborted());
4921 
4922   if (!has_aborted()) {
4923     // We cannot check whether the global stack is empty, since other
4924     // tasks might be pushing objects to it concurrently. We also cannot
4925     // check if the region stack is empty because if a thread is aborting
4926     // it can push a partially done region back.
4927     assert(_cm->out_of_regions(),
4928            "at this point we should be out of regions");
4929 
4930     if (_cm->verbose_low()) {
4931       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4932     }
4933 
4934     // Try to reduce the number of available SATB buffers so that
4935     // remark has less work to do.
4936     drain_satb_buffers();
4937   }
4938 
4939   // Since we've done everything else, we can now totally drain the
4940   // local queue and global stack.
4941   drain_local_queue(false);
4942   drain_global_stack(false);
4943 
4944   // Attempt at work stealing from other task's queues.
4945   if (do_stealing && !has_aborted()) {
4946     // We have not aborted. This means that we have finished all that
4947     // we could. Let's try to do some stealing...
4948 
4949     // We cannot check whether the global stack is empty, since other
4950     // tasks might be pushing objects to it concurrently. We also cannot
4951     // check if the region stack is empty because if a thread is aborting
4952     // it can push a partially done region back.
4953     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4954            "only way to reach here");
4955 
4956     if (_cm->verbose_low()) {
4957       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4958     }
4959 
4960     while (!has_aborted()) {
4961       oop obj;
4962       statsOnly( ++_steal_attempts );
4963 
4964       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4965         if (_cm->verbose_medium()) {
4966           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4967                                  _task_id, (void*) obj);
4968         }
4969 
4970         statsOnly( ++_steals );
4971 
4972         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4973                "any stolen object should be marked");
4974         scan_object(obj);
4975 
4976         // And since we're towards the end, let's totally drain the
4977         // local queue and global stack.
4978         drain_local_queue(false);
4979         drain_global_stack(false);
4980       } else {
4981         break;
4982       }
4983     }
4984   }
4985 
4986   // If we are about to wrap up and go into termination, check if we
4987   // should raise the overflow flag.
4988   if (do_termination && !has_aborted()) {
4989     if (_cm->force_overflow()->should_force()) {
4990       _cm->set_has_overflown();
4991       regular_clock_call();
4992     }
4993   }
4994 
4995   // We still haven't aborted. Now, let's try to get into the
4996   // termination protocol.
4997   if (do_termination && !has_aborted()) {
4998     // We cannot check whether the global stack is empty, since other
4999     // tasks might be concurrently pushing objects on it. We also cannot
5000     // check if the region stack is empty because if a thread is aborting
5001     // it can push a partially done region back.
5002     // Separated the asserts so that we know which one fires.
5003     assert(_cm->out_of_regions(), "only way to reach here");
5004     assert(_task_queue->size() == 0, "only way to reach here");
5005 
5006     if (_cm->verbose_low()) {
5007       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
5008     }
5009 
5010     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
5011     // The CMTask class also extends the TerminatorTerminator class,
5012     // hence its should_exit_termination() method will also decide
5013     // whether to exit the termination protocol or not.
5014     bool finished = _cm->terminator()->offer_termination(this);
5015     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
5016     _termination_time_ms +=
5017       termination_end_time_ms - _termination_start_time_ms;
5018 
5019     if (finished) {
5020       // We're all done.
5021 
5022       if (_task_id == 0) {
5023         // let's allow task 0 to do this
5024         if (concurrent()) {
5025           assert(_cm->concurrent_marking_in_progress(), "invariant");
5026           // we need to set this to false before the next
5027           // safepoint. This way we ensure that the marking phase
5028           // doesn't observe any more heap expansions.
5029           _cm->clear_concurrent_marking_in_progress();
5030         }
5031       }
5032 
5033       // We can now guarantee that the global stack is empty, since
5034       // all other tasks have finished. We separated the guarantees so
5035       // that, if a condition is false, we can immediately find out
5036       // which one.
5037       guarantee(_cm->out_of_regions(), "only way to reach here");
5038       guarantee(_aborted_region.is_empty(), "only way to reach here");
5039       guarantee(_cm->region_stack_empty(), "only way to reach here");
5040       guarantee(_cm->mark_stack_empty(), "only way to reach here");
5041       guarantee(_task_queue->size() == 0, "only way to reach here");
5042       guarantee(!_cm->has_overflown(), "only way to reach here");
5043       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
5044       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
5045 
5046       if (_cm->verbose_low()) {
5047         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
5048       }
5049     } else {
5050       // Apparently there's more work to do. Let's abort this task. It
5051       // will restart it and we can hopefully find more things to do.
5052 
5053       if (_cm->verbose_low()) {
5054         gclog_or_tty->print_cr("[%d] apparently there is more work to do",
5055                                _task_id);
5056       }
5057 
5058       set_has_aborted();
5059       statsOnly( ++_aborted_termination );
5060     }
5061   }
5062 
5063   // Mainly for debugging purposes to make sure that a pointer to the
5064   // closure which was statically allocated in this frame doesn't
5065   // escape it by accident.
5066   set_cm_oop_closure(NULL);
5067   double end_time_ms = os::elapsedVTime() * 1000.0;
5068   double elapsed_time_ms = end_time_ms - _start_time_ms;
5069   // Update the step history.
5070   _step_times_ms.add(elapsed_time_ms);
5071 
5072   if (has_aborted()) {
5073     // The task was aborted for some reason.
5074 
5075     statsOnly( ++_aborted );
5076 
5077     if (_has_timed_out) {
5078       double diff_ms = elapsed_time_ms - _time_target_ms;
5079       // Keep statistics of how well we did with respect to hitting
5080       // our target only if we actually timed out (if we aborted for
5081       // other reasons, then the results might get skewed).
5082       _marking_step_diffs_ms.add(diff_ms);
5083     }
5084 
5085     if (_cm->has_overflown()) {
5086       // This is the interesting one. We aborted because a global
5087       // overflow was raised. This means we have to restart the
5088       // marking phase and start iterating over regions. However, in
5089       // order to do this we have to make sure that all tasks stop
5090       // what they are doing and re-initialise in a safe manner. We
5091       // will achieve this with the use of two barrier sync points.
5092 
5093       if (_cm->verbose_low()) {
5094         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
5095       }
5096 
5097       _cm->enter_first_sync_barrier(_task_id);
5098       // When we exit this sync barrier we know that all tasks have
5099       // stopped doing marking work. So, it's now safe to
5100       // re-initialise our data structures. At the end of this method,
5101       // task 0 will clear the global data structures.
5102 
5103       statsOnly( ++_aborted_overflow );
5104 
5105       // We clear the local state of this task...
5106       clear_region_fields();
5107 
5108       // ...and enter the second barrier.
5109       _cm->enter_second_sync_barrier(_task_id);
5110       // At this point everything has bee re-initialised and we're
5111       // ready to restart.
5112     }
5113 
5114     if (_cm->verbose_low()) {
5115       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
5116                              "elapsed = %1.2lfms <<<<<<<<<<",
5117                              _task_id, _time_target_ms, elapsed_time_ms);
5118       if (_cm->has_aborted()) {
5119         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
5120                                _task_id);
5121       }
5122     }
5123   } else {
5124     if (_cm->verbose_low()) {
5125       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
5126                              "elapsed = %1.2lfms <<<<<<<<<<",
5127                              _task_id, _time_target_ms, elapsed_time_ms);
5128     }
5129   }
5130 
5131   _claimed = false;
5132 }
5133 
5134 CMTask::CMTask(int task_id,
5135                ConcurrentMark* cm,
5136                CMTaskQueue* task_queue,
5137                CMTaskQueueSet* task_queues)
5138   : _g1h(G1CollectedHeap::heap()),
5139     _task_id(task_id), _cm(cm),
5140     _claimed(false),
5141     _nextMarkBitMap(NULL), _hash_seed(17),
5142     _task_queue(task_queue),
5143     _task_queues(task_queues),
5144     _cm_oop_closure(NULL),
5145     _aborted_region(MemRegion()) {
5146   guarantee(task_queue != NULL, "invariant");
5147   guarantee(task_queues != NULL, "invariant");
5148 
5149   statsOnly( _clock_due_to_scanning = 0;
5150              _clock_due_to_marking  = 0 );
5151 
5152   _marking_step_diffs_ms.add(0.5);
5153 }
5154 
5155 // These are formatting macros that are used below to ensure
5156 // consistent formatting. The *_H_* versions are used to format the
5157 // header for a particular value and they should be kept consistent
5158 // with the corresponding macro. Also note that most of the macros add
5159 // the necessary white space (as a prefix) which makes them a bit
5160 // easier to compose.
5161 
5162 // All the output lines are prefixed with this string to be able to
5163 // identify them easily in a large log file.
5164 #define G1PPRL_LINE_PREFIX            "###"
5165 
5166 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
5167 #ifdef _LP64
5168 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
5169 #else // _LP64
5170 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
5171 #endif // _LP64
5172 
5173 // For per-region info
5174 #define G1PPRL_TYPE_FORMAT            "   %-4s"
5175 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
5176 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
5177 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
5178 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
5179 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
5180 
5181 // For summary info
5182 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
5183 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
5184 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
5185 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
5186 
5187 G1PrintRegionLivenessInfoClosure::
5188 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
5189   : _out(out),
5190     _total_used_bytes(0), _total_capacity_bytes(0),
5191     _total_prev_live_bytes(0), _total_next_live_bytes(0),
5192     _hum_used_bytes(0), _hum_capacity_bytes(0),
5193     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
5194   G1CollectedHeap* g1h = G1CollectedHeap::heap();
5195   MemRegion g1_committed = g1h->g1_committed();
5196   MemRegion g1_reserved = g1h->g1_reserved();
5197   double now = os::elapsedTime();
5198 
5199   // Print the header of the output.
5200   _out->cr();
5201   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
5202   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
5203                  G1PPRL_SUM_ADDR_FORMAT("committed")
5204                  G1PPRL_SUM_ADDR_FORMAT("reserved")
5205                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
5206                  g1_committed.start(), g1_committed.end(),
5207                  g1_reserved.start(), g1_reserved.end(),
5208                  (size_t)HeapRegion::GrainBytes);
5209   _out->print_cr(G1PPRL_LINE_PREFIX);
5210   _out->print_cr(G1PPRL_LINE_PREFIX
5211                  G1PPRL_TYPE_H_FORMAT
5212                  G1PPRL_ADDR_BASE_H_FORMAT
5213                  G1PPRL_BYTE_H_FORMAT
5214                  G1PPRL_BYTE_H_FORMAT
5215                  G1PPRL_BYTE_H_FORMAT
5216                  G1PPRL_DOUBLE_H_FORMAT,
5217                  "type", "address-range",
5218                  "used", "prev-live", "next-live", "gc-eff");
5219   _out->print_cr(G1PPRL_LINE_PREFIX
5220                  G1PPRL_TYPE_H_FORMAT
5221                  G1PPRL_ADDR_BASE_H_FORMAT
5222                  G1PPRL_BYTE_H_FORMAT
5223                  G1PPRL_BYTE_H_FORMAT
5224                  G1PPRL_BYTE_H_FORMAT
5225                  G1PPRL_DOUBLE_H_FORMAT,
5226                  "", "",
5227                  "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
5228 }
5229 
5230 // It takes as a parameter a reference to one of the _hum_* fields, it
5231 // deduces the corresponding value for a region in a humongous region
5232 // series (either the region size, or what's left if the _hum_* field
5233 // is < the region size), and updates the _hum_* field accordingly.
5234 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
5235   size_t bytes = 0;
5236   // The > 0 check is to deal with the prev and next live bytes which
5237   // could be 0.
5238   if (*hum_bytes > 0) {
5239     bytes = MIN2((size_t) HeapRegion::GrainBytes, *hum_bytes);
5240     *hum_bytes -= bytes;
5241   }
5242   return bytes;
5243 }
5244 
5245 // It deduces the values for a region in a humongous region series
5246 // from the _hum_* fields and updates those accordingly. It assumes
5247 // that that _hum_* fields have already been set up from the "starts
5248 // humongous" region and we visit the regions in address order.
5249 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
5250                                                      size_t* capacity_bytes,
5251                                                      size_t* prev_live_bytes,
5252                                                      size_t* next_live_bytes) {
5253   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
5254   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
5255   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
5256   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
5257   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
5258 }
5259 
5260 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
5261   const char* type = "";
5262   HeapWord* bottom       = r->bottom();
5263   HeapWord* end          = r->end();
5264   size_t capacity_bytes  = r->capacity();
5265   size_t used_bytes      = r->used();
5266   size_t prev_live_bytes = r->live_bytes();
5267   size_t next_live_bytes = r->next_live_bytes();
5268   double gc_eff          = r->gc_efficiency();
5269   if (r->used() == 0) {
5270     type = "FREE";
5271   } else if (r->is_survivor()) {
5272     type = "SURV";
5273   } else if (r->is_young()) {
5274     type = "EDEN";
5275   } else if (r->startsHumongous()) {
5276     type = "HUMS";
5277 
5278     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
5279            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
5280            "they should have been zeroed after the last time we used them");
5281     // Set up the _hum_* fields.
5282     _hum_capacity_bytes  = capacity_bytes;
5283     _hum_used_bytes      = used_bytes;
5284     _hum_prev_live_bytes = prev_live_bytes;
5285     _hum_next_live_bytes = next_live_bytes;
5286     get_hum_bytes(&used_bytes, &capacity_bytes,
5287                   &prev_live_bytes, &next_live_bytes);
5288     end = bottom + HeapRegion::GrainWords;
5289   } else if (r->continuesHumongous()) {
5290     type = "HUMC";
5291     get_hum_bytes(&used_bytes, &capacity_bytes,
5292                   &prev_live_bytes, &next_live_bytes);
5293     assert(end == bottom + HeapRegion::GrainWords, "invariant");
5294   } else {
5295     type = "OLD";
5296   }
5297 
5298   _total_used_bytes      += used_bytes;
5299   _total_capacity_bytes  += capacity_bytes;
5300   _total_prev_live_bytes += prev_live_bytes;
5301   _total_next_live_bytes += next_live_bytes;
5302 
5303   // Print a line for this particular region.
5304   _out->print_cr(G1PPRL_LINE_PREFIX
5305                  G1PPRL_TYPE_FORMAT
5306                  G1PPRL_ADDR_BASE_FORMAT
5307                  G1PPRL_BYTE_FORMAT
5308                  G1PPRL_BYTE_FORMAT
5309                  G1PPRL_BYTE_FORMAT
5310                  G1PPRL_DOUBLE_FORMAT,
5311                  type, bottom, end,
5312                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
5313 
5314   return false;
5315 }
5316 
5317 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
5318   // Print the footer of the output.
5319   _out->print_cr(G1PPRL_LINE_PREFIX);
5320   _out->print_cr(G1PPRL_LINE_PREFIX
5321                  " SUMMARY"
5322                  G1PPRL_SUM_MB_FORMAT("capacity")
5323                  G1PPRL_SUM_MB_PERC_FORMAT("used")
5324                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
5325                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
5326                  bytes_to_mb(_total_capacity_bytes),
5327                  bytes_to_mb(_total_used_bytes),
5328                  perc(_total_used_bytes, _total_capacity_bytes),
5329                  bytes_to_mb(_total_prev_live_bytes),
5330                  perc(_total_prev_live_bytes, _total_capacity_bytes),
5331                  bytes_to_mb(_total_next_live_bytes),
5332                  perc(_total_next_live_bytes, _total_capacity_bytes));
5333   _out->cr();
5334 }