1 /*
   2  * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMarkThread.inline.hpp"
  30 #include "gc/g1/g1CollectedHeap.inline.hpp"
  31 #include "gc/g1/g1CollectorState.hpp"
  32 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  33 #include "gc/g1/g1HeapVerifier.hpp"
  34 #include "gc/g1/g1OopClosures.inline.hpp"
  35 #include "gc/g1/g1CardLiveData.inline.hpp"
  36 #include "gc/g1/g1Policy.hpp"
  37 #include "gc/g1/g1StringDedup.hpp"
  38 #include "gc/g1/heapRegion.inline.hpp"
  39 #include "gc/g1/heapRegionRemSet.hpp"
  40 #include "gc/g1/heapRegionSet.inline.hpp"
  41 #include "gc/g1/suspendibleThreadSet.hpp"
  42 #include "gc/shared/gcId.hpp"
  43 #include "gc/shared/gcTimer.hpp"
  44 #include "gc/shared/gcTrace.hpp"
  45 #include "gc/shared/gcTraceTime.inline.hpp"
  46 #include "gc/shared/genOopClosures.inline.hpp"
  47 #include "gc/shared/referencePolicy.hpp"
  48 #include "gc/shared/strongRootsScope.hpp"
  49 #include "gc/shared/taskqueue.inline.hpp"
  50 #include "gc/shared/vmGCOperations.hpp"
  51 #include "logging/log.hpp"
  52 #include "memory/allocation.hpp"
  53 #include "memory/resourceArea.hpp"
  54 #include "oops/oop.inline.hpp"
  55 #include "runtime/atomic.hpp"
  56 #include "runtime/handles.inline.hpp"
  57 #include "runtime/java.hpp"
  58 #include "runtime/prefetch.inline.hpp"
  59 #include "services/memTracker.hpp"
  60 #include "utilities/growableArray.hpp"
  61 
  62 // Concurrent marking bit map wrapper
  63 
  64 G1CMBitMapRO::G1CMBitMapRO(int shifter) :
  65   _bm(),
  66   _shifter(shifter) {
  67   _bmStartWord = 0;
  68   _bmWordSize = 0;
  69 }
  70 
  71 HeapWord* G1CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  72                                                  const HeapWord* limit) const {
  73   // First we must round addr *up* to a possible object boundary.
  74   addr = (HeapWord*)align_size_up((intptr_t)addr,
  75                                   HeapWordSize << _shifter);
  76   size_t addrOffset = heapWordToOffset(addr);
  77   assert(limit != NULL, "limit must not be NULL");
  78   size_t limitOffset = heapWordToOffset(limit);
  79   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  80   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  81   assert(nextAddr >= addr, "get_next_one postcondition");
  82   assert(nextAddr == limit || isMarked(nextAddr),
  83          "get_next_one postcondition");
  84   return nextAddr;
  85 }
  86 
  87 #ifndef PRODUCT
  88 bool G1CMBitMapRO::covers(MemRegion heap_rs) const {
  89   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
  90   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
  91          "size inconsistency");
  92   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
  93          _bmWordSize  == heap_rs.word_size();
  94 }
  95 #endif
  96 
  97 void G1CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
  98   _bm.print_on_error(st, prefix);
  99 }
 100 
 101 size_t G1CMBitMap::compute_size(size_t heap_size) {
 102   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 103 }
 104 
 105 size_t G1CMBitMap::mark_distance() {
 106   return MinObjAlignmentInBytes * BitsPerByte;
 107 }
 108 
 109 void G1CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 110   _bmStartWord = heap.start();
 111   _bmWordSize = heap.word_size();
 112 
 113   _bm = BitMapView((BitMap::bm_word_t*) storage->reserved().start(), _bmWordSize >> _shifter);
 114 
 115   storage->set_mapping_changed_listener(&_listener);
 116 }
 117 
 118 void G1CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 119   if (zero_filled) {
 120     return;
 121   }
 122   // We need to clear the bitmap on commit, removing any existing information.
 123   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 124   _bm->clear_range(mr);
 125 }
 126 
 127 void G1CMBitMap::clear_range(MemRegion mr) {
 128   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 129   assert(!mr.is_empty(), "unexpected empty region");
 130   // convert address range into offset range
 131   _bm.at_put_range(heapWordToOffset(mr.start()),
 132                    heapWordToOffset(mr.end()), false);
 133 }
 134 
 135 G1CMMarkStack::G1CMMarkStack() :
 136   _max_chunk_capacity(0),
 137   _base(NULL),
 138   _chunk_capacity(0),
 139   _out_of_memory(false),
 140   _should_expand(false) {
 141   set_empty();
 142 }
 143 
 144 bool G1CMMarkStack::resize(size_t new_capacity) {
 145   assert(is_empty(), "Only resize when stack is empty.");
 146   assert(new_capacity <= _max_chunk_capacity,
 147          "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
 148 
 149   OopChunk* new_base = MmapArrayAllocator<OopChunk, mtGC>::allocate_or_null(new_capacity);
 150 
 151   if (new_base == NULL) {
 152     log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(OopChunk));
 153     return false;
 154   }
 155   // Release old mapping.
 156   if (_base != NULL) {
 157     MmapArrayAllocator<OopChunk, mtGC>::free(_base, _chunk_capacity);
 158   }
 159 
 160   _base = new_base;
 161   _chunk_capacity = new_capacity;
 162   set_empty();
 163   _should_expand = false;
 164 
 165   return true;
 166 }
 167 
 168 size_t G1CMMarkStack::capacity_alignment() {
 169   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(OopChunk)) / sizeof(void*);
 170 }
 171 
 172 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
 173   guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
 174 
 175   size_t const OopChunkSizeInVoidStar = sizeof(OopChunk) / sizeof(void*);
 176 
 177   _max_chunk_capacity = (size_t)align_size_up(max_capacity, capacity_alignment()) / OopChunkSizeInVoidStar;
 178   size_t initial_chunk_capacity = (size_t)align_size_up(initial_capacity, capacity_alignment()) / OopChunkSizeInVoidStar;
 179 
 180   guarantee(initial_chunk_capacity <= _max_chunk_capacity,
 181             "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
 182             _max_chunk_capacity,
 183             initial_chunk_capacity);
 184 
 185   log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
 186                 initial_chunk_capacity, _max_chunk_capacity);
 187 
 188   return resize(initial_chunk_capacity);
 189 }
 190 
 191 void G1CMMarkStack::expand() {
 192   // Clear expansion flag
 193   _should_expand = false;
 194 
 195   if (_chunk_capacity == _max_chunk_capacity) {
 196     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
 197     return;
 198   }
 199   size_t old_capacity = _chunk_capacity;
 200   // Double capacity if possible
 201   size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
 202 
 203   if (resize(new_capacity)) {
 204     log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 205                   old_capacity, new_capacity);
 206   } else {
 207     log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 208                     old_capacity, new_capacity);
 209   }
 210 }
 211 
 212 G1CMMarkStack::~G1CMMarkStack() {
 213   if (_base != NULL) {
 214     MmapArrayAllocator<OopChunk, mtGC>::free(_base, _chunk_capacity);
 215   }
 216 }
 217 
 218 void G1CMMarkStack::add_chunk_to_list(OopChunk* volatile* list, OopChunk* elem) {
 219   elem->next = *list;
 220   *list = elem;
 221 }
 222 
 223 void G1CMMarkStack::add_chunk_to_chunk_list(OopChunk* elem) {
 224   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 225   add_chunk_to_list(&_chunk_list, elem);
 226   _chunks_in_chunk_list++;
 227 }
 228 
 229 void G1CMMarkStack::add_chunk_to_free_list(OopChunk* elem) {
 230   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 231   add_chunk_to_list(&_free_list, elem);
 232 }
 233 
 234 G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_list(OopChunk* volatile* list) {
 235   OopChunk* result = *list;
 236   if (result != NULL) {
 237     *list = (*list)->next;
 238   }
 239   return result;
 240 }
 241 
 242 G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 243   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 244   OopChunk* result = remove_chunk_from_list(&_chunk_list);
 245   if (result != NULL) {
 246     _chunks_in_chunk_list--;
 247   }
 248   return result;
 249 }
 250 
 251 G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 252   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 253   return remove_chunk_from_list(&_free_list);
 254 }
 255 
 256 G1CMMarkStack::OopChunk* G1CMMarkStack::allocate_new_chunk() {
 257   // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
 258   // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
 259   // wraparound of _hwm.
 260   if (_hwm >= _chunk_capacity) {
 261     return NULL;
 262   }
 263 
 264   size_t cur_idx = Atomic::add(1, &_hwm) - 1;
 265   if (cur_idx >= _chunk_capacity) {
 266     return NULL;
 267   }
 268 
 269   OopChunk* result = ::new (&_base[cur_idx]) OopChunk;
 270   result->next = NULL;
 271   return result;
 272 }
 273 
 274 bool G1CMMarkStack::par_push_chunk(oop* ptr_arr) {
 275   // Get a new chunk.
 276   OopChunk* new_chunk = remove_chunk_from_free_list();
 277 
 278   if (new_chunk == NULL) {
 279     // Did not get a chunk from the free list. Allocate from backing memory.
 280     new_chunk = allocate_new_chunk();
 281   }
 282 
 283   if (new_chunk == NULL) {
 284     _out_of_memory = true;
 285     return false;
 286   }
 287 
 288   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, OopsPerChunk * sizeof(oop));
 289 
 290   add_chunk_to_chunk_list(new_chunk);
 291 
 292   return true;
 293 }
 294 
 295 bool G1CMMarkStack::par_pop_chunk(oop* ptr_arr) {
 296   OopChunk* cur = remove_chunk_from_chunk_list();
 297 
 298   if (cur == NULL) {
 299     return false;
 300   }
 301 
 302   Copy::conjoint_memory_atomic(cur->data, ptr_arr, OopsPerChunk * sizeof(oop));
 303 
 304   add_chunk_to_free_list(cur);
 305   return true;
 306 }
 307 
 308 void G1CMMarkStack::set_empty() {
 309   _chunks_in_chunk_list = 0;
 310   _hwm = 0;
 311   clear_out_of_memory();
 312   _chunk_list = NULL;
 313   _free_list = NULL;
 314 }
 315 
 316 G1CMRootRegions::G1CMRootRegions() :
 317   _cm(NULL), _scan_in_progress(false),
 318   _should_abort(false), _claimed_survivor_index(0) { }
 319 
 320 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
 321   _survivors = survivors;
 322   _cm = cm;
 323 }
 324 
 325 void G1CMRootRegions::prepare_for_scan() {
 326   assert(!scan_in_progress(), "pre-condition");
 327 
 328   // Currently, only survivors can be root regions.
 329   _claimed_survivor_index = 0;
 330   _scan_in_progress = _survivors->regions()->is_nonempty();
 331   _should_abort = false;
 332 }
 333 
 334 HeapRegion* G1CMRootRegions::claim_next() {
 335   if (_should_abort) {
 336     // If someone has set the should_abort flag, we return NULL to
 337     // force the caller to bail out of their loop.
 338     return NULL;
 339   }
 340 
 341   // Currently, only survivors can be root regions.
 342   const GrowableArray<HeapRegion*>* survivor_regions = _survivors->regions();
 343 
 344   int claimed_index = Atomic::add(1, &_claimed_survivor_index) - 1;
 345   if (claimed_index < survivor_regions->length()) {
 346     return survivor_regions->at(claimed_index);
 347   }
 348   return NULL;
 349 }
 350 
 351 uint G1CMRootRegions::num_root_regions() const {
 352   return (uint)_survivors->regions()->length();
 353 }
 354 
 355 void G1CMRootRegions::notify_scan_done() {
 356   MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 357   _scan_in_progress = false;
 358   RootRegionScan_lock->notify_all();
 359 }
 360 
 361 void G1CMRootRegions::cancel_scan() {
 362   notify_scan_done();
 363 }
 364 
 365 void G1CMRootRegions::scan_finished() {
 366   assert(scan_in_progress(), "pre-condition");
 367 
 368   // Currently, only survivors can be root regions.
 369   if (!_should_abort) {
 370     assert(_claimed_survivor_index >= 0, "otherwise comparison is invalid: %d", _claimed_survivor_index);
 371     assert((uint)_claimed_survivor_index >= _survivors->length(),
 372            "we should have claimed all survivors, claimed index = %u, length = %u",
 373            (uint)_claimed_survivor_index, _survivors->length());
 374   }
 375 
 376   notify_scan_done();
 377 }
 378 
 379 bool G1CMRootRegions::wait_until_scan_finished() {
 380   if (!scan_in_progress()) return false;
 381 
 382   {
 383     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 384     while (scan_in_progress()) {
 385       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 386     }
 387   }
 388   return true;
 389 }
 390 
 391 uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 392   return MAX2((n_par_threads + 2) / 4, 1U);
 393 }
 394 
 395 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 396   _g1h(g1h),
 397   _markBitMap1(),
 398   _markBitMap2(),
 399   _parallel_marking_threads(0),
 400   _max_parallel_marking_threads(0),
 401   _sleep_factor(0.0),
 402   _marking_task_overhead(1.0),
 403   _cleanup_list("Cleanup List"),
 404 
 405   _prevMarkBitMap(&_markBitMap1),
 406   _nextMarkBitMap(&_markBitMap2),
 407 
 408   _global_mark_stack(),
 409   // _finger set in set_non_marking_state
 410 
 411   _max_worker_id(ParallelGCThreads),
 412   // _active_tasks set in set_non_marking_state
 413   // _tasks set inside the constructor
 414   _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)),
 415   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 416 
 417   _concurrent(false),
 418   _has_aborted(false),
 419   _restart_for_overflow(false),
 420   _concurrent_marking_in_progress(false),
 421   _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()),
 422   _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()),
 423 
 424   // _verbose_level set below
 425 
 426   _init_times(),
 427   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 428   _cleanup_times(),
 429   _total_counting_time(0.0),
 430   _total_rs_scrub_time(0.0),
 431 
 432   _parallel_workers(NULL),
 433 
 434   _completed_initialization(false) {
 435 
 436   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 437   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 438 
 439   // Create & start a ConcurrentMark thread.
 440   _cmThread = new ConcurrentMarkThread(this);
 441   assert(cmThread() != NULL, "CM Thread should have been created");
 442   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 443   if (_cmThread->osthread() == NULL) {
 444       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 445   }
 446 
 447   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 448   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 449   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 450 
 451   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 452   satb_qs.set_buffer_size(G1SATBBufferSize);
 453 
 454   _root_regions.init(_g1h->survivor(), this);
 455 
 456   if (ConcGCThreads > ParallelGCThreads) {
 457     log_warning(gc)("Can't have more ConcGCThreads (%u) than ParallelGCThreads (%u).",
 458                     ConcGCThreads, ParallelGCThreads);
 459     return;
 460   }
 461   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 462     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 463     // if both are set
 464     _sleep_factor             = 0.0;
 465     _marking_task_overhead    = 1.0;
 466   } else if (G1MarkingOverheadPercent > 0) {
 467     // We will calculate the number of parallel marking threads based
 468     // on a target overhead with respect to the soft real-time goal
 469     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 470     double overall_cm_overhead =
 471       (double) MaxGCPauseMillis * marking_overhead /
 472       (double) GCPauseIntervalMillis;
 473     double cpu_ratio = 1.0 / os::initial_active_processor_count();
 474     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 475     double marking_task_overhead =
 476       overall_cm_overhead / marking_thread_num * os::initial_active_processor_count();
 477     double sleep_factor =
 478                        (1.0 - marking_task_overhead) / marking_task_overhead;
 479 
 480     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 481     _sleep_factor             = sleep_factor;
 482     _marking_task_overhead    = marking_task_overhead;
 483   } else {
 484     // Calculate the number of parallel marking threads by scaling
 485     // the number of parallel GC threads.
 486     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 487     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 488     _sleep_factor             = 0.0;
 489     _marking_task_overhead    = 1.0;
 490   }
 491 
 492   assert(ConcGCThreads > 0, "Should have been set");
 493   log_debug(gc)("ConcGCThreads: %u", ConcGCThreads);
 494   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 495   _parallel_marking_threads = ConcGCThreads;
 496   _max_parallel_marking_threads = _parallel_marking_threads;
 497 
 498   _parallel_workers = new WorkGang("G1 Marker",
 499        _max_parallel_marking_threads, false, true);
 500   if (_parallel_workers == NULL) {
 501     vm_exit_during_initialization("Failed necessary allocation.");
 502   } else {
 503     _parallel_workers->initialize_workers();
 504   }
 505 
 506   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 507     size_t mark_stack_size =
 508       MIN2(MarkStackSizeMax,
 509           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 510     // Verify that the calculated value for MarkStackSize is in range.
 511     // It would be nice to use the private utility routine from Arguments.
 512     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 513       log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 514                       "must be between 1 and " SIZE_FORMAT,
 515                       mark_stack_size, MarkStackSizeMax);
 516       return;
 517     }
 518     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 519   } else {
 520     // Verify MarkStackSize is in range.
 521     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 522       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 523         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 524           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 525                           "must be between 1 and " SIZE_FORMAT,
 526                           MarkStackSize, MarkStackSizeMax);
 527           return;
 528         }
 529       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 530         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 531           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 532                           " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 533                           MarkStackSize, MarkStackSizeMax);
 534           return;
 535         }
 536       }
 537     }
 538   }
 539 
 540   if (!_global_mark_stack.initialize(MarkStackSize, MarkStackSizeMax)) {
 541     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 542   }
 543 
 544   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC);
 545   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 546 
 547   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 548   _active_tasks = _max_worker_id;
 549 
 550   for (uint i = 0; i < _max_worker_id; ++i) {
 551     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 552     task_queue->initialize();
 553     _task_queues->register_queue(i, task_queue);
 554 
 555     _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues);
 556 
 557     _accum_task_vtime[i] = 0.0;
 558   }
 559 
 560   // so that the call below can read a sensible value
 561   _heap_start = g1h->reserved_region().start();
 562   set_non_marking_state();
 563   _completed_initialization = true;
 564 }
 565 
 566 void G1ConcurrentMark::reset() {
 567   // Starting values for these two. This should be called in a STW
 568   // phase.
 569   MemRegion reserved = _g1h->g1_reserved();
 570   _heap_start = reserved.start();
 571   _heap_end   = reserved.end();
 572 
 573   // Separated the asserts so that we know which one fires.
 574   assert(_heap_start != NULL, "heap bounds should look ok");
 575   assert(_heap_end != NULL, "heap bounds should look ok");
 576   assert(_heap_start < _heap_end, "heap bounds should look ok");
 577 
 578   // Reset all the marking data structures and any necessary flags
 579   reset_marking_state();
 580 
 581   // We do reset all of them, since different phases will use
 582   // different number of active threads. So, it's easiest to have all
 583   // of them ready.
 584   for (uint i = 0; i < _max_worker_id; ++i) {
 585     _tasks[i]->reset(_nextMarkBitMap);
 586   }
 587 
 588   // we need this to make sure that the flag is on during the evac
 589   // pause with initial mark piggy-backed
 590   set_concurrent_marking_in_progress();
 591 }
 592 
 593 
 594 void G1ConcurrentMark::reset_marking_state() {
 595   _global_mark_stack.set_should_expand(has_overflown());
 596   _global_mark_stack.set_empty();        // Also clears the overflow stack's overflow flag
 597   _finger = _heap_start;
 598 
 599   for (uint i = 0; i < _max_worker_id; ++i) {
 600     G1CMTaskQueue* queue = _task_queues->queue(i);
 601     queue->set_empty();
 602   }
 603 }
 604 
 605 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 606   assert(active_tasks <= _max_worker_id, "we should not have more");
 607 
 608   _active_tasks = active_tasks;
 609   // Need to update the three data structures below according to the
 610   // number of active threads for this phase.
 611   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 612   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 613   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 614 }
 615 
 616 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 617   set_concurrency(active_tasks);
 618 
 619   _concurrent = concurrent;
 620   // We propagate this to all tasks, not just the active ones.
 621   for (uint i = 0; i < _max_worker_id; ++i)
 622     _tasks[i]->set_concurrent(concurrent);
 623 
 624   if (concurrent) {
 625     set_concurrent_marking_in_progress();
 626   } else {
 627     // We currently assume that the concurrent flag has been set to
 628     // false before we start remark. At this point we should also be
 629     // in a STW phase.
 630     assert(!concurrent_marking_in_progress(), "invariant");
 631     assert(out_of_regions(),
 632            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 633            p2i(_finger), p2i(_heap_end));
 634   }
 635 }
 636 
 637 void G1ConcurrentMark::set_non_marking_state() {
 638   // We set the global marking state to some default values when we're
 639   // not doing marking.
 640   reset_marking_state();
 641   _active_tasks = 0;
 642   clear_concurrent_marking_in_progress();
 643 }
 644 
 645 G1ConcurrentMark::~G1ConcurrentMark() {
 646   // The G1ConcurrentMark instance is never freed.
 647   ShouldNotReachHere();
 648 }
 649 
 650 class G1ClearBitMapTask : public AbstractGangTask {
 651 public:
 652   static size_t chunk_size() { return M; }
 653 
 654 private:
 655   // Heap region closure used for clearing the given mark bitmap.
 656   class G1ClearBitmapHRClosure : public HeapRegionClosure {
 657   private:
 658     G1CMBitMap* _bitmap;
 659     G1ConcurrentMark* _cm;
 660   public:
 661     G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) {
 662     }
 663 
 664     virtual bool doHeapRegion(HeapRegion* r) {
 665       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 666 
 667       HeapWord* cur = r->bottom();
 668       HeapWord* const end = r->end();
 669 
 670       while (cur < end) {
 671         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 672         _bitmap->clear_range(mr);
 673 
 674         cur += chunk_size_in_words;
 675 
 676         // Abort iteration if after yielding the marking has been aborted.
 677         if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) {
 678           return true;
 679         }
 680         // Repeat the asserts from before the start of the closure. We will do them
 681         // as asserts here to minimize their overhead on the product. However, we
 682         // will have them as guarantees at the beginning / end of the bitmap
 683         // clearing to get some checking in the product.
 684         assert(_cm == NULL || _cm->cmThread()->during_cycle(), "invariant");
 685         assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 686       }
 687       assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 688 
 689       return false;
 690     }
 691   };
 692 
 693   G1ClearBitmapHRClosure _cl;
 694   HeapRegionClaimer _hr_claimer;
 695   bool _suspendible; // If the task is suspendible, workers must join the STS.
 696 
 697 public:
 698   G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 699     AbstractGangTask("G1 Clear Bitmap"),
 700     _cl(bitmap, suspendible ? cm : NULL),
 701     _hr_claimer(n_workers),
 702     _suspendible(suspendible)
 703   { }
 704 
 705   void work(uint worker_id) {
 706     SuspendibleThreadSetJoiner sts_join(_suspendible);
 707     G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer, true);
 708   }
 709 
 710   bool is_complete() {
 711     return _cl.complete();
 712   }
 713 };
 714 
 715 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) {
 716   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 717 
 718   size_t const num_bytes_to_clear = (HeapRegion::GrainBytes * _g1h->num_regions()) / G1CMBitMap::heap_map_factor();
 719   size_t const num_chunks = align_size_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 720 
 721   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 722 
 723   G1ClearBitMapTask cl(bitmap, this, num_workers, may_yield);
 724 
 725   log_debug(gc, ergo)("Running %s with %u workers for " SIZE_FORMAT " work units.", cl.name(), num_workers, num_chunks);
 726   workers->run_task(&cl, num_workers);
 727   guarantee(!may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
 728 }
 729 
 730 void G1ConcurrentMark::cleanup_for_next_mark() {
 731   // Make sure that the concurrent mark thread looks to still be in
 732   // the current cycle.
 733   guarantee(cmThread()->during_cycle(), "invariant");
 734 
 735   // We are finishing up the current cycle by clearing the next
 736   // marking bitmap and getting it ready for the next cycle. During
 737   // this time no other cycle can start. So, let's make sure that this
 738   // is the case.
 739   guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
 740 
 741   clear_bitmap(_nextMarkBitMap, _parallel_workers, true);
 742 
 743   // Clear the live count data. If the marking has been aborted, the abort()
 744   // call already did that.
 745   if (!has_aborted()) {
 746     clear_live_data(_parallel_workers);
 747     DEBUG_ONLY(verify_live_data_clear());
 748   }
 749 
 750   // Repeat the asserts from above.
 751   guarantee(cmThread()->during_cycle(), "invariant");
 752   guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
 753 }
 754 
 755 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) {
 756   assert(SafepointSynchronize::is_at_safepoint(), "Should only clear the entire prev bitmap at a safepoint.");
 757   clear_bitmap((G1CMBitMap*)_prevMarkBitMap, workers, false);
 758 }
 759 
 760 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 761   G1CMBitMap* _bitmap;
 762   bool _error;
 763  public:
 764   CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) {
 765   }
 766 
 767   virtual bool doHeapRegion(HeapRegion* r) {
 768     // This closure can be called concurrently to the mutator, so we must make sure
 769     // that the result of the getNextMarkedWordAddress() call is compared to the
 770     // value passed to it as limit to detect any found bits.
 771     // end never changes in G1.
 772     HeapWord* end = r->end();
 773     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 774   }
 775 };
 776 
 777 bool G1ConcurrentMark::nextMarkBitmapIsClear() {
 778   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 779   _g1h->heap_region_iterate(&cl);
 780   return cl.complete();
 781 }
 782 
 783 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 784 public:
 785   bool doHeapRegion(HeapRegion* r) {
 786     r->note_start_of_marking();
 787     return false;
 788   }
 789 };
 790 
 791 void G1ConcurrentMark::checkpointRootsInitialPre() {
 792   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 793   G1Policy* g1p = g1h->g1_policy();
 794 
 795   _has_aborted = false;
 796 
 797   // Initialize marking structures. This has to be done in a STW phase.
 798   reset();
 799 
 800   // For each region note start of marking.
 801   NoteStartOfMarkHRClosure startcl;
 802   g1h->heap_region_iterate(&startcl);
 803 }
 804 
 805 
 806 void G1ConcurrentMark::checkpointRootsInitialPost() {
 807   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 808 
 809   // Start Concurrent Marking weak-reference discovery.
 810   ReferenceProcessor* rp = g1h->ref_processor_cm();
 811   // enable ("weak") refs discovery
 812   rp->enable_discovery();
 813   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 814 
 815   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 816   // This is the start of  the marking cycle, we're expected all
 817   // threads to have SATB queues with active set to false.
 818   satb_mq_set.set_active_all_threads(true, /* new active value */
 819                                      false /* expected_active */);
 820 
 821   _root_regions.prepare_for_scan();
 822 
 823   // update_g1_committed() will be called at the end of an evac pause
 824   // when marking is on. So, it's also called at the end of the
 825   // initial-mark pause to update the heap end, if the heap expands
 826   // during it. No need to call it here.
 827 }
 828 
 829 /*
 830  * Notice that in the next two methods, we actually leave the STS
 831  * during the barrier sync and join it immediately afterwards. If we
 832  * do not do this, the following deadlock can occur: one thread could
 833  * be in the barrier sync code, waiting for the other thread to also
 834  * sync up, whereas another one could be trying to yield, while also
 835  * waiting for the other threads to sync up too.
 836  *
 837  * Note, however, that this code is also used during remark and in
 838  * this case we should not attempt to leave / enter the STS, otherwise
 839  * we'll either hit an assert (debug / fastdebug) or deadlock
 840  * (product). So we should only leave / enter the STS if we are
 841  * operating concurrently.
 842  *
 843  * Because the thread that does the sync barrier has left the STS, it
 844  * is possible to be suspended for a Full GC or an evacuation pause
 845  * could occur. This is actually safe, since the entering the sync
 846  * barrier is one of the last things do_marking_step() does, and it
 847  * doesn't manipulate any data structures afterwards.
 848  */
 849 
 850 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 851   bool barrier_aborted;
 852   {
 853     SuspendibleThreadSetLeaver sts_leave(concurrent());
 854     barrier_aborted = !_first_overflow_barrier_sync.enter();
 855   }
 856 
 857   // at this point everyone should have synced up and not be doing any
 858   // more work
 859 
 860   if (barrier_aborted) {
 861     // If the barrier aborted we ignore the overflow condition and
 862     // just abort the whole marking phase as quickly as possible.
 863     return;
 864   }
 865 
 866   // If we're executing the concurrent phase of marking, reset the marking
 867   // state; otherwise the marking state is reset after reference processing,
 868   // during the remark pause.
 869   // If we reset here as a result of an overflow during the remark we will
 870   // see assertion failures from any subsequent set_concurrency_and_phase()
 871   // calls.
 872   if (concurrent()) {
 873     // let the task associated with with worker 0 do this
 874     if (worker_id == 0) {
 875       // task 0 is responsible for clearing the global data structures
 876       // We should be here because of an overflow. During STW we should
 877       // not clear the overflow flag since we rely on it being true when
 878       // we exit this method to abort the pause and restart concurrent
 879       // marking.
 880       reset_marking_state();
 881 
 882       log_info(gc, marking)("Concurrent Mark reset for overflow");
 883     }
 884   }
 885 
 886   // after this, each task should reset its own data structures then
 887   // then go into the second barrier
 888 }
 889 
 890 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 891   SuspendibleThreadSetLeaver sts_leave(concurrent());
 892   _second_overflow_barrier_sync.enter();
 893 
 894   // at this point everything should be re-initialized and ready to go
 895 }
 896 
 897 class G1CMConcurrentMarkingTask: public AbstractGangTask {
 898 private:
 899   G1ConcurrentMark*     _cm;
 900   ConcurrentMarkThread* _cmt;
 901 
 902 public:
 903   void work(uint worker_id) {
 904     assert(Thread::current()->is_ConcurrentGC_thread(),
 905            "this should only be done by a conc GC thread");
 906     ResourceMark rm;
 907 
 908     double start_vtime = os::elapsedVTime();
 909 
 910     {
 911       SuspendibleThreadSetJoiner sts_join;
 912 
 913       assert(worker_id < _cm->active_tasks(), "invariant");
 914       G1CMTask* the_task = _cm->task(worker_id);
 915       the_task->record_start_time();
 916       if (!_cm->has_aborted()) {
 917         do {
 918           double start_vtime_sec = os::elapsedVTime();
 919           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 920 
 921           the_task->do_marking_step(mark_step_duration_ms,
 922                                     true  /* do_termination */,
 923                                     false /* is_serial*/);
 924 
 925           double end_vtime_sec = os::elapsedVTime();
 926           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 927           _cm->clear_has_overflown();
 928 
 929           _cm->do_yield_check();
 930 
 931           jlong sleep_time_ms;
 932           if (!_cm->has_aborted() && the_task->has_aborted()) {
 933             sleep_time_ms =
 934               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 935             {
 936               SuspendibleThreadSetLeaver sts_leave;
 937               os::sleep(Thread::current(), sleep_time_ms, false);
 938             }
 939           }
 940         } while (!_cm->has_aborted() && the_task->has_aborted());
 941       }
 942       the_task->record_end_time();
 943       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 944     }
 945 
 946     double end_vtime = os::elapsedVTime();
 947     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 948   }
 949 
 950   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm,
 951                             ConcurrentMarkThread* cmt) :
 952       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 953 
 954   ~G1CMConcurrentMarkingTask() { }
 955 };
 956 
 957 // Calculates the number of active workers for a concurrent
 958 // phase.
 959 uint G1ConcurrentMark::calc_parallel_marking_threads() {
 960   uint n_conc_workers = 0;
 961   if (!UseDynamicNumberOfGCThreads ||
 962       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 963        !ForceDynamicNumberOfGCThreads)) {
 964     n_conc_workers = max_parallel_marking_threads();
 965   } else {
 966     n_conc_workers =
 967       AdaptiveSizePolicy::calc_default_active_workers(max_parallel_marking_threads(),
 968                                                       1, /* Minimum workers */
 969                                                       parallel_marking_threads(),
 970                                                       Threads::number_of_non_daemon_threads());
 971     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
 972     // that scaling has already gone into "_max_parallel_marking_threads".
 973   }
 974   assert(n_conc_workers > 0 && n_conc_workers <= max_parallel_marking_threads(),
 975          "Calculated number of workers must be larger than zero and at most the maximum %u, but is %u",
 976          max_parallel_marking_threads(), n_conc_workers);
 977   return n_conc_workers;
 978 }
 979 
 980 void G1ConcurrentMark::scanRootRegion(HeapRegion* hr) {
 981   // Currently, only survivors can be root regions.
 982   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 983   G1RootRegionScanClosure cl(_g1h, this);
 984 
 985   const uintx interval = PrefetchScanIntervalInBytes;
 986   HeapWord* curr = hr->bottom();
 987   const HeapWord* end = hr->top();
 988   while (curr < end) {
 989     Prefetch::read(curr, interval);
 990     oop obj = oop(curr);
 991     int size = obj->oop_iterate_size(&cl);
 992     assert(size == obj->size(), "sanity");
 993     curr += size;
 994   }
 995 }
 996 
 997 class G1CMRootRegionScanTask : public AbstractGangTask {
 998 private:
 999   G1ConcurrentMark* _cm;
1000 
1001 public:
1002   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
1003     AbstractGangTask("G1 Root Region Scan"), _cm(cm) { }
1004 
1005   void work(uint worker_id) {
1006     assert(Thread::current()->is_ConcurrentGC_thread(),
1007            "this should only be done by a conc GC thread");
1008 
1009     G1CMRootRegions* root_regions = _cm->root_regions();
1010     HeapRegion* hr = root_regions->claim_next();
1011     while (hr != NULL) {
1012       _cm->scanRootRegion(hr);
1013       hr = root_regions->claim_next();
1014     }
1015   }
1016 };
1017 
1018 void G1ConcurrentMark::scan_root_regions() {
1019   // scan_in_progress() will have been set to true only if there was
1020   // at least one root region to scan. So, if it's false, we
1021   // should not attempt to do any further work.
1022   if (root_regions()->scan_in_progress()) {
1023     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
1024 
1025     _parallel_marking_threads = MIN2(calc_parallel_marking_threads(),
1026                                      // We distribute work on a per-region basis, so starting
1027                                      // more threads than that is useless.
1028                                      root_regions()->num_root_regions());
1029     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1030            "Maximum number of marking threads exceeded");
1031 
1032     G1CMRootRegionScanTask task(this);
1033     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1034                         task.name(), _parallel_marking_threads, root_regions()->num_root_regions());
1035     _parallel_workers->run_task(&task, _parallel_marking_threads);
1036 
1037     // It's possible that has_aborted() is true here without actually
1038     // aborting the survivor scan earlier. This is OK as it's
1039     // mainly used for sanity checking.
1040     root_regions()->scan_finished();
1041   }
1042 }
1043 
1044 void G1ConcurrentMark::concurrent_cycle_start() {
1045   _gc_timer_cm->register_gc_start();
1046 
1047   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1048 
1049   _g1h->trace_heap_before_gc(_gc_tracer_cm);
1050 }
1051 
1052 void G1ConcurrentMark::concurrent_cycle_end() {
1053   _g1h->trace_heap_after_gc(_gc_tracer_cm);
1054 
1055   if (has_aborted()) {
1056     _gc_tracer_cm->report_concurrent_mode_failure();
1057   }
1058 
1059   _gc_timer_cm->register_gc_end();
1060 
1061   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1062 }
1063 
1064 void G1ConcurrentMark::mark_from_roots() {
1065   // we might be tempted to assert that:
1066   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1067   //        "inconsistent argument?");
1068   // However that wouldn't be right, because it's possible that
1069   // a safepoint is indeed in progress as a younger generation
1070   // stop-the-world GC happens even as we mark in this generation.
1071 
1072   _restart_for_overflow = false;
1073 
1074   // _g1h has _n_par_threads
1075   _parallel_marking_threads = calc_parallel_marking_threads();
1076   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1077     "Maximum number of marking threads exceeded");
1078 
1079   uint active_workers = MAX2(1U, parallel_marking_threads());
1080   assert(active_workers > 0, "Should have been set");
1081 
1082   // Setting active workers is not guaranteed since fewer
1083   // worker threads may currently exist and more may not be
1084   // available.
1085   active_workers = _parallel_workers->update_active_workers(active_workers);
1086   log_info(gc, task)("Using %u workers of %u for marking", active_workers, _parallel_workers->total_workers());
1087 
1088   // Parallel task terminator is set in "set_concurrency_and_phase()"
1089   set_concurrency_and_phase(active_workers, true /* concurrent */);
1090 
1091   G1CMConcurrentMarkingTask markingTask(this, cmThread());
1092   _parallel_workers->run_task(&markingTask);
1093   print_stats();
1094 }
1095 
1096 void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1097   // world is stopped at this checkpoint
1098   assert(SafepointSynchronize::is_at_safepoint(),
1099          "world should be stopped");
1100 
1101   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1102 
1103   // If a full collection has happened, we shouldn't do this.
1104   if (has_aborted()) {
1105     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1106     return;
1107   }
1108 
1109   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1110 
1111   if (VerifyDuringGC) {
1112     HandleMark hm;  // handle scope
1113     g1h->prepare_for_verify();
1114     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1115   }
1116   g1h->verifier()->check_bitmaps("Remark Start");
1117 
1118   G1Policy* g1p = g1h->g1_policy();
1119   g1p->record_concurrent_mark_remark_start();
1120 
1121   double start = os::elapsedTime();
1122 
1123   checkpointRootsFinalWork();
1124 
1125   double mark_work_end = os::elapsedTime();
1126 
1127   weakRefsWork(clear_all_soft_refs);
1128 
1129   if (has_overflown()) {
1130     // We overflowed.  Restart concurrent marking.
1131     _restart_for_overflow = true;
1132 
1133     // Verify the heap w.r.t. the previous marking bitmap.
1134     if (VerifyDuringGC) {
1135       HandleMark hm;  // handle scope
1136       g1h->prepare_for_verify();
1137       Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)");
1138     }
1139 
1140     // Clear the marking state because we will be restarting
1141     // marking due to overflowing the global mark stack.
1142     reset_marking_state();
1143   } else {
1144     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1145     // We're done with marking.
1146     // This is the end of  the marking cycle, we're expected all
1147     // threads to have SATB queues with active set to true.
1148     satb_mq_set.set_active_all_threads(false, /* new active value */
1149                                        true /* expected_active */);
1150 
1151     if (VerifyDuringGC) {
1152       HandleMark hm;  // handle scope
1153       g1h->prepare_for_verify();
1154       Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)");
1155     }
1156     g1h->verifier()->check_bitmaps("Remark End");
1157     assert(!restart_for_overflow(), "sanity");
1158     // Completely reset the marking state since marking completed
1159     set_non_marking_state();
1160   }
1161 
1162   // Expand the marking stack, if we have to and if we can.
1163   if (_global_mark_stack.should_expand()) {
1164     _global_mark_stack.expand();
1165   }
1166 
1167   // Statistics
1168   double now = os::elapsedTime();
1169   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1170   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1171   _remark_times.add((now - start) * 1000.0);
1172 
1173   g1p->record_concurrent_mark_remark_end();
1174 
1175   G1CMIsAliveClosure is_alive(g1h);
1176   _gc_tracer_cm->report_object_count_after_gc(&is_alive);
1177 }
1178 
1179 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1180   G1CollectedHeap* _g1;
1181   size_t _freed_bytes;
1182   FreeRegionList* _local_cleanup_list;
1183   uint _old_regions_removed;
1184   uint _humongous_regions_removed;
1185   HRRSCleanupTask* _hrrs_cleanup_task;
1186 
1187 public:
1188   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1189                              FreeRegionList* local_cleanup_list,
1190                              HRRSCleanupTask* hrrs_cleanup_task) :
1191     _g1(g1),
1192     _freed_bytes(0),
1193     _local_cleanup_list(local_cleanup_list),
1194     _old_regions_removed(0),
1195     _humongous_regions_removed(0),
1196     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1197 
1198   size_t freed_bytes() { return _freed_bytes; }
1199   const uint old_regions_removed() { return _old_regions_removed; }
1200   const uint humongous_regions_removed() { return _humongous_regions_removed; }
1201 
1202   bool doHeapRegion(HeapRegion *hr) {
1203     if (hr->is_archive()) {
1204       return false;
1205     }
1206     _g1->reset_gc_time_stamps(hr);
1207     hr->note_end_of_marking();
1208 
1209     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1210       _freed_bytes += hr->used();
1211       hr->set_containing_set(NULL);
1212       if (hr->is_humongous()) {
1213         _humongous_regions_removed++;
1214         _g1->free_humongous_region(hr, _local_cleanup_list, true /* skip_remset */);
1215       } else {
1216         _old_regions_removed++;
1217         _g1->free_region(hr, _local_cleanup_list, true /* skip_remset */);
1218       }
1219     } else {
1220       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1221     }
1222 
1223     return false;
1224   }
1225 };
1226 
1227 class G1ParNoteEndTask: public AbstractGangTask {
1228   friend class G1NoteEndOfConcMarkClosure;
1229 
1230 protected:
1231   G1CollectedHeap* _g1h;
1232   FreeRegionList* _cleanup_list;
1233   HeapRegionClaimer _hrclaimer;
1234 
1235 public:
1236   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1237       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1238   }
1239 
1240   void work(uint worker_id) {
1241     FreeRegionList local_cleanup_list("Local Cleanup List");
1242     HRRSCleanupTask hrrs_cleanup_task;
1243     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1244                                            &hrrs_cleanup_task);
1245     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1246     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1247 
1248     // Now update the lists
1249     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1250     {
1251       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1252       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1253 
1254       // If we iterate over the global cleanup list at the end of
1255       // cleanup to do this printing we will not guarantee to only
1256       // generate output for the newly-reclaimed regions (the list
1257       // might not be empty at the beginning of cleanup; we might
1258       // still be working on its previous contents). So we do the
1259       // printing here, before we append the new regions to the global
1260       // cleanup list.
1261 
1262       G1HRPrinter* hr_printer = _g1h->hr_printer();
1263       if (hr_printer->is_active()) {
1264         FreeRegionListIterator iter(&local_cleanup_list);
1265         while (iter.more_available()) {
1266           HeapRegion* hr = iter.get_next();
1267           hr_printer->cleanup(hr);
1268         }
1269       }
1270 
1271       _cleanup_list->add_ordered(&local_cleanup_list);
1272       assert(local_cleanup_list.is_empty(), "post-condition");
1273 
1274       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1275     }
1276   }
1277 };
1278 
1279 void G1ConcurrentMark::cleanup() {
1280   // world is stopped at this checkpoint
1281   assert(SafepointSynchronize::is_at_safepoint(),
1282          "world should be stopped");
1283   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1284 
1285   // If a full collection has happened, we shouldn't do this.
1286   if (has_aborted()) {
1287     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1288     return;
1289   }
1290 
1291   g1h->verifier()->verify_region_sets_optional();
1292 
1293   if (VerifyDuringGC) {
1294     HandleMark hm;  // handle scope
1295     g1h->prepare_for_verify();
1296     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1297   }
1298   g1h->verifier()->check_bitmaps("Cleanup Start");
1299 
1300   G1Policy* g1p = g1h->g1_policy();
1301   g1p->record_concurrent_mark_cleanup_start();
1302 
1303   double start = os::elapsedTime();
1304 
1305   HeapRegionRemSet::reset_for_cleanup_tasks();
1306 
1307   {
1308     GCTraceTime(Debug, gc)("Finalize Live Data");
1309     finalize_live_data();
1310   }
1311 
1312   if (VerifyDuringGC) {
1313     GCTraceTime(Debug, gc)("Verify Live Data");
1314     verify_live_data();
1315   }
1316 
1317   g1h->collector_state()->set_mark_in_progress(false);
1318 
1319   double count_end = os::elapsedTime();
1320   double this_final_counting_time = (count_end - start);
1321   _total_counting_time += this_final_counting_time;
1322 
1323   if (log_is_enabled(Trace, gc, liveness)) {
1324     G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1325     _g1h->heap_region_iterate(&cl);
1326   }
1327 
1328   // Install newly created mark bitMap as "prev".
1329   swapMarkBitMaps();
1330 
1331   g1h->reset_gc_time_stamp();
1332 
1333   uint n_workers = _g1h->workers()->active_workers();
1334 
1335   // Note end of marking in all heap regions.
1336   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1337   g1h->workers()->run_task(&g1_par_note_end_task);
1338   g1h->check_gc_time_stamps();
1339 
1340   if (!cleanup_list_is_empty()) {
1341     // The cleanup list is not empty, so we'll have to process it
1342     // concurrently. Notify anyone else that might be wanting free
1343     // regions that there will be more free regions coming soon.
1344     g1h->set_free_regions_coming();
1345   }
1346 
1347   // call below, since it affects the metric by which we sort the heap
1348   // regions.
1349   if (G1ScrubRemSets) {
1350     double rs_scrub_start = os::elapsedTime();
1351     g1h->scrub_rem_set();
1352     _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start);
1353   }
1354 
1355   // this will also free any regions totally full of garbage objects,
1356   // and sort the regions.
1357   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1358 
1359   // Statistics.
1360   double end = os::elapsedTime();
1361   _cleanup_times.add((end - start) * 1000.0);
1362 
1363   // Clean up will have freed any regions completely full of garbage.
1364   // Update the soft reference policy with the new heap occupancy.
1365   Universe::update_heap_info_at_gc();
1366 
1367   if (VerifyDuringGC) {
1368     HandleMark hm;  // handle scope
1369     g1h->prepare_for_verify();
1370     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)");
1371   }
1372 
1373   g1h->verifier()->check_bitmaps("Cleanup End");
1374 
1375   g1h->verifier()->verify_region_sets_optional();
1376 
1377   // We need to make this be a "collection" so any collection pause that
1378   // races with it goes around and waits for completeCleanup to finish.
1379   g1h->increment_total_collections();
1380 
1381   // Clean out dead classes and update Metaspace sizes.
1382   if (ClassUnloadingWithConcurrentMark) {
1383     ClassLoaderDataGraph::purge();
1384   }
1385   MetaspaceGC::compute_new_size();
1386 
1387   // We reclaimed old regions so we should calculate the sizes to make
1388   // sure we update the old gen/space data.
1389   g1h->g1mm()->update_sizes();
1390   g1h->allocation_context_stats().update_after_mark();
1391 }
1392 
1393 void G1ConcurrentMark::complete_cleanup() {
1394   if (has_aborted()) return;
1395 
1396   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1397 
1398   _cleanup_list.verify_optional();
1399   FreeRegionList tmp_free_list("Tmp Free List");
1400 
1401   log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1402                                   "cleanup list has %u entries",
1403                                   _cleanup_list.length());
1404 
1405   // No one else should be accessing the _cleanup_list at this point,
1406   // so it is not necessary to take any locks
1407   while (!_cleanup_list.is_empty()) {
1408     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1409     assert(hr != NULL, "Got NULL from a non-empty list");
1410     hr->par_clear();
1411     tmp_free_list.add_ordered(hr);
1412 
1413     // Instead of adding one region at a time to the secondary_free_list,
1414     // we accumulate them in the local list and move them a few at a
1415     // time. This also cuts down on the number of notify_all() calls
1416     // we do during this process. We'll also append the local list when
1417     // _cleanup_list is empty (which means we just removed the last
1418     // region from the _cleanup_list).
1419     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1420         _cleanup_list.is_empty()) {
1421       log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1422                                       "appending %u entries to the secondary_free_list, "
1423                                       "cleanup list still has %u entries",
1424                                       tmp_free_list.length(),
1425                                       _cleanup_list.length());
1426 
1427       {
1428         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1429         g1h->secondary_free_list_add(&tmp_free_list);
1430         SecondaryFreeList_lock->notify_all();
1431       }
1432 #ifndef PRODUCT
1433       if (G1StressConcRegionFreeing) {
1434         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1435           os::sleep(Thread::current(), (jlong) 1, false);
1436         }
1437       }
1438 #endif
1439     }
1440   }
1441   assert(tmp_free_list.is_empty(), "post-condition");
1442 }
1443 
1444 // Supporting Object and Oop closures for reference discovery
1445 // and processing in during marking
1446 
1447 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1448   HeapWord* addr = (HeapWord*)obj;
1449   return addr != NULL &&
1450          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1451 }
1452 
1453 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1454 // Uses the G1CMTask associated with a worker thread (for serial reference
1455 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1456 // trace referent objects.
1457 //
1458 // Using the G1CMTask and embedded local queues avoids having the worker
1459 // threads operating on the global mark stack. This reduces the risk
1460 // of overflowing the stack - which we would rather avoid at this late
1461 // state. Also using the tasks' local queues removes the potential
1462 // of the workers interfering with each other that could occur if
1463 // operating on the global stack.
1464 
1465 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1466   G1ConcurrentMark* _cm;
1467   G1CMTask*         _task;
1468   int               _ref_counter_limit;
1469   int               _ref_counter;
1470   bool              _is_serial;
1471  public:
1472   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1473     _cm(cm), _task(task), _is_serial(is_serial),
1474     _ref_counter_limit(G1RefProcDrainInterval) {
1475     assert(_ref_counter_limit > 0, "sanity");
1476     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1477     _ref_counter = _ref_counter_limit;
1478   }
1479 
1480   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1481   virtual void do_oop(      oop* p) { do_oop_work(p); }
1482 
1483   template <class T> void do_oop_work(T* p) {
1484     if (!_cm->has_overflown()) {
1485       oop obj = oopDesc::load_decode_heap_oop(p);
1486       _task->deal_with_reference(obj);
1487       _ref_counter--;
1488 
1489       if (_ref_counter == 0) {
1490         // We have dealt with _ref_counter_limit references, pushing them
1491         // and objects reachable from them on to the local stack (and
1492         // possibly the global stack). Call G1CMTask::do_marking_step() to
1493         // process these entries.
1494         //
1495         // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1496         // there's nothing more to do (i.e. we're done with the entries that
1497         // were pushed as a result of the G1CMTask::deal_with_reference() calls
1498         // above) or we overflow.
1499         //
1500         // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1501         // flag while there may still be some work to do. (See the comment at
1502         // the beginning of G1CMTask::do_marking_step() for those conditions -
1503         // one of which is reaching the specified time target.) It is only
1504         // when G1CMTask::do_marking_step() returns without setting the
1505         // has_aborted() flag that the marking step has completed.
1506         do {
1507           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1508           _task->do_marking_step(mark_step_duration_ms,
1509                                  false      /* do_termination */,
1510                                  _is_serial);
1511         } while (_task->has_aborted() && !_cm->has_overflown());
1512         _ref_counter = _ref_counter_limit;
1513       }
1514     }
1515   }
1516 };
1517 
1518 // 'Drain' oop closure used by both serial and parallel reference processing.
1519 // Uses the G1CMTask associated with a given worker thread (for serial
1520 // reference processing the G1CMtask for worker 0 is used). Calls the
1521 // do_marking_step routine, with an unbelievably large timeout value,
1522 // to drain the marking data structures of the remaining entries
1523 // added by the 'keep alive' oop closure above.
1524 
1525 class G1CMDrainMarkingStackClosure: public VoidClosure {
1526   G1ConcurrentMark* _cm;
1527   G1CMTask*         _task;
1528   bool              _is_serial;
1529  public:
1530   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1531     _cm(cm), _task(task), _is_serial(is_serial) {
1532     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1533   }
1534 
1535   void do_void() {
1536     do {
1537       // We call G1CMTask::do_marking_step() to completely drain the local
1538       // and global marking stacks of entries pushed by the 'keep alive'
1539       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1540       //
1541       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1542       // if there's nothing more to do (i.e. we've completely drained the
1543       // entries that were pushed as a a result of applying the 'keep alive'
1544       // closure to the entries on the discovered ref lists) or we overflow
1545       // the global marking stack.
1546       //
1547       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1548       // flag while there may still be some work to do. (See the comment at
1549       // the beginning of G1CMTask::do_marking_step() for those conditions -
1550       // one of which is reaching the specified time target.) It is only
1551       // when G1CMTask::do_marking_step() returns without setting the
1552       // has_aborted() flag that the marking step has completed.
1553 
1554       _task->do_marking_step(1000000000.0 /* something very large */,
1555                              true         /* do_termination */,
1556                              _is_serial);
1557     } while (_task->has_aborted() && !_cm->has_overflown());
1558   }
1559 };
1560 
1561 // Implementation of AbstractRefProcTaskExecutor for parallel
1562 // reference processing at the end of G1 concurrent marking
1563 
1564 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
1565 private:
1566   G1CollectedHeap*  _g1h;
1567   G1ConcurrentMark* _cm;
1568   WorkGang*         _workers;
1569   uint              _active_workers;
1570 
1571 public:
1572   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1573                           G1ConcurrentMark* cm,
1574                           WorkGang* workers,
1575                           uint n_workers) :
1576     _g1h(g1h), _cm(cm),
1577     _workers(workers), _active_workers(n_workers) { }
1578 
1579   // Executes the given task using concurrent marking worker threads.
1580   virtual void execute(ProcessTask& task);
1581   virtual void execute(EnqueueTask& task);
1582 };
1583 
1584 class G1CMRefProcTaskProxy: public AbstractGangTask {
1585   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1586   ProcessTask&      _proc_task;
1587   G1CollectedHeap*  _g1h;
1588   G1ConcurrentMark* _cm;
1589 
1590 public:
1591   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1592                        G1CollectedHeap* g1h,
1593                        G1ConcurrentMark* cm) :
1594     AbstractGangTask("Process reference objects in parallel"),
1595     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1596     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1597     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1598   }
1599 
1600   virtual void work(uint worker_id) {
1601     ResourceMark rm;
1602     HandleMark hm;
1603     G1CMTask* task = _cm->task(worker_id);
1604     G1CMIsAliveClosure g1_is_alive(_g1h);
1605     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1606     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1607 
1608     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1609   }
1610 };
1611 
1612 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1613   assert(_workers != NULL, "Need parallel worker threads.");
1614   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1615 
1616   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1617 
1618   // We need to reset the concurrency level before each
1619   // proxy task execution, so that the termination protocol
1620   // and overflow handling in G1CMTask::do_marking_step() knows
1621   // how many workers to wait for.
1622   _cm->set_concurrency(_active_workers);
1623   _workers->run_task(&proc_task_proxy);
1624 }
1625 
1626 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
1627   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1628   EnqueueTask& _enq_task;
1629 
1630 public:
1631   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1632     AbstractGangTask("Enqueue reference objects in parallel"),
1633     _enq_task(enq_task) { }
1634 
1635   virtual void work(uint worker_id) {
1636     _enq_task.work(worker_id);
1637   }
1638 };
1639 
1640 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
1641   assert(_workers != NULL, "Need parallel worker threads.");
1642   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1643 
1644   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
1645 
1646   // Not strictly necessary but...
1647   //
1648   // We need to reset the concurrency level before each
1649   // proxy task execution, so that the termination protocol
1650   // and overflow handling in G1CMTask::do_marking_step() knows
1651   // how many workers to wait for.
1652   _cm->set_concurrency(_active_workers);
1653   _workers->run_task(&enq_task_proxy);
1654 }
1655 
1656 void G1ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
1657   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
1658 }
1659 
1660 void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1661   if (has_overflown()) {
1662     // Skip processing the discovered references if we have
1663     // overflown the global marking stack. Reference objects
1664     // only get discovered once so it is OK to not
1665     // de-populate the discovered reference lists. We could have,
1666     // but the only benefit would be that, when marking restarts,
1667     // less reference objects are discovered.
1668     return;
1669   }
1670 
1671   ResourceMark rm;
1672   HandleMark   hm;
1673 
1674   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1675 
1676   // Is alive closure.
1677   G1CMIsAliveClosure g1_is_alive(g1h);
1678 
1679   // Inner scope to exclude the cleaning of the string and symbol
1680   // tables from the displayed time.
1681   {
1682     GCTraceTime(Debug, gc, phases) trace("Reference Processing", _gc_timer_cm);
1683 
1684     ReferenceProcessor* rp = g1h->ref_processor_cm();
1685 
1686     // See the comment in G1CollectedHeap::ref_processing_init()
1687     // about how reference processing currently works in G1.
1688 
1689     // Set the soft reference policy
1690     rp->setup_policy(clear_all_soft_refs);
1691     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1692 
1693     // Instances of the 'Keep Alive' and 'Complete GC' closures used
1694     // in serial reference processing. Note these closures are also
1695     // used for serially processing (by the the current thread) the
1696     // JNI references during parallel reference processing.
1697     //
1698     // These closures do not need to synchronize with the worker
1699     // threads involved in parallel reference processing as these
1700     // instances are executed serially by the current thread (e.g.
1701     // reference processing is not multi-threaded and is thus
1702     // performed by the current thread instead of a gang worker).
1703     //
1704     // The gang tasks involved in parallel reference processing create
1705     // their own instances of these closures, which do their own
1706     // synchronization among themselves.
1707     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
1708     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
1709 
1710     // We need at least one active thread. If reference processing
1711     // is not multi-threaded we use the current (VMThread) thread,
1712     // otherwise we use the work gang from the G1CollectedHeap and
1713     // we utilize all the worker threads we can.
1714     bool processing_is_mt = rp->processing_is_mt();
1715     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
1716     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
1717 
1718     // Parallel processing task executor.
1719     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
1720                                               g1h->workers(), active_workers);
1721     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
1722 
1723     // Set the concurrency level. The phase was already set prior to
1724     // executing the remark task.
1725     set_concurrency(active_workers);
1726 
1727     // Set the degree of MT processing here.  If the discovery was done MT,
1728     // the number of threads involved during discovery could differ from
1729     // the number of active workers.  This is OK as long as the discovered
1730     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
1731     rp->set_active_mt_degree(active_workers);
1732 
1733     // Process the weak references.
1734     const ReferenceProcessorStats& stats =
1735         rp->process_discovered_references(&g1_is_alive,
1736                                           &g1_keep_alive,
1737                                           &g1_drain_mark_stack,
1738                                           executor,
1739                                           _gc_timer_cm);
1740     _gc_tracer_cm->report_gc_reference_stats(stats);
1741 
1742     // The do_oop work routines of the keep_alive and drain_marking_stack
1743     // oop closures will set the has_overflown flag if we overflow the
1744     // global marking stack.
1745 
1746     assert(_global_mark_stack.is_out_of_memory() || _global_mark_stack.is_empty(),
1747             "Mark stack should be empty (unless it is out of memory)");
1748 
1749     assert(rp->num_q() == active_workers, "why not");
1750 
1751     rp->enqueue_discovered_references(executor);
1752 
1753     rp->verify_no_references_recorded();
1754     assert(!rp->discovery_enabled(), "Post condition");
1755   }
1756 
1757   if (has_overflown()) {
1758     // We can not trust g1_is_alive if the marking stack overflowed
1759     return;
1760   }
1761 
1762   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1763 
1764   // Unload Klasses, String, Symbols, Code Cache, etc.
1765   if (ClassUnloadingWithConcurrentMark) {
1766     bool purged_classes;
1767 
1768     {
1769       GCTraceTime(Debug, gc, phases) trace("System Dictionary Unloading", _gc_timer_cm);
1770       purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
1771     }
1772 
1773     {
1774       GCTraceTime(Debug, gc, phases) trace("Parallel Unloading", _gc_timer_cm);
1775       weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
1776     }
1777   }
1778 
1779   if (G1StringDedup::is_enabled()) {
1780     GCTraceTime(Debug, gc, phases) trace("String Deduplication Unlink", _gc_timer_cm);
1781     G1StringDedup::unlink(&g1_is_alive);
1782   }
1783 }
1784 
1785 void G1ConcurrentMark::swapMarkBitMaps() {
1786   G1CMBitMapRO* temp = _prevMarkBitMap;
1787   _prevMarkBitMap    = (G1CMBitMapRO*)_nextMarkBitMap;
1788   _nextMarkBitMap    = (G1CMBitMap*)  temp;
1789 }
1790 
1791 // Closure for marking entries in SATB buffers.
1792 class G1CMSATBBufferClosure : public SATBBufferClosure {
1793 private:
1794   G1CMTask* _task;
1795   G1CollectedHeap* _g1h;
1796 
1797   // This is very similar to G1CMTask::deal_with_reference, but with
1798   // more relaxed requirements for the argument, so this must be more
1799   // circumspect about treating the argument as an object.
1800   void do_entry(void* entry) const {
1801     _task->increment_refs_reached();
1802     HeapRegion* hr = _g1h->heap_region_containing(entry);
1803     if (entry < hr->next_top_at_mark_start()) {
1804       // Until we get here, we don't know whether entry refers to a valid
1805       // object; it could instead have been a stale reference.
1806       oop obj = static_cast<oop>(entry);
1807       assert(obj->is_oop(true /* ignore mark word */),
1808              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
1809       _task->make_reference_grey(obj);
1810     }
1811   }
1812 
1813 public:
1814   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1815     : _task(task), _g1h(g1h) { }
1816 
1817   virtual void do_buffer(void** buffer, size_t size) {
1818     for (size_t i = 0; i < size; ++i) {
1819       do_entry(buffer[i]);
1820     }
1821   }
1822 };
1823 
1824 class G1RemarkThreadsClosure : public ThreadClosure {
1825   G1CMSATBBufferClosure _cm_satb_cl;
1826   G1CMOopClosure _cm_cl;
1827   MarkingCodeBlobClosure _code_cl;
1828   int _thread_parity;
1829 
1830  public:
1831   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1832     _cm_satb_cl(task, g1h),
1833     _cm_cl(g1h, g1h->concurrent_mark(), task),
1834     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
1835     _thread_parity(Threads::thread_claim_parity()) {}
1836 
1837   void do_thread(Thread* thread) {
1838     if (thread->is_Java_thread()) {
1839       if (thread->claim_oops_do(true, _thread_parity)) {
1840         JavaThread* jt = (JavaThread*)thread;
1841 
1842         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
1843         // however the liveness of oops reachable from nmethods have very complex lifecycles:
1844         // * Alive if on the stack of an executing method
1845         // * Weakly reachable otherwise
1846         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
1847         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
1848         jt->nmethods_do(&_code_cl);
1849 
1850         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
1851       }
1852     } else if (thread->is_VM_thread()) {
1853       if (thread->claim_oops_do(true, _thread_parity)) {
1854         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
1855       }
1856     }
1857   }
1858 };
1859 
1860 class G1CMRemarkTask: public AbstractGangTask {
1861 private:
1862   G1ConcurrentMark* _cm;
1863 public:
1864   void work(uint worker_id) {
1865     // Since all available tasks are actually started, we should
1866     // only proceed if we're supposed to be active.
1867     if (worker_id < _cm->active_tasks()) {
1868       G1CMTask* task = _cm->task(worker_id);
1869       task->record_start_time();
1870       {
1871         ResourceMark rm;
1872         HandleMark hm;
1873 
1874         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1875         Threads::threads_do(&threads_f);
1876       }
1877 
1878       do {
1879         task->do_marking_step(1000000000.0 /* something very large */,
1880                               true         /* do_termination       */,
1881                               false        /* is_serial            */);
1882       } while (task->has_aborted() && !_cm->has_overflown());
1883       // If we overflow, then we do not want to restart. We instead
1884       // want to abort remark and do concurrent marking again.
1885       task->record_end_time();
1886     }
1887   }
1888 
1889   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1890     AbstractGangTask("Par Remark"), _cm(cm) {
1891     _cm->terminator()->reset_for_reuse(active_workers);
1892   }
1893 };
1894 
1895 void G1ConcurrentMark::checkpointRootsFinalWork() {
1896   ResourceMark rm;
1897   HandleMark   hm;
1898   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1899 
1900   GCTraceTime(Debug, gc, phases) trace("Finalize Marking", _gc_timer_cm);
1901 
1902   g1h->ensure_parsability(false);
1903 
1904   // this is remark, so we'll use up all active threads
1905   uint active_workers = g1h->workers()->active_workers();
1906   set_concurrency_and_phase(active_workers, false /* concurrent */);
1907   // Leave _parallel_marking_threads at it's
1908   // value originally calculated in the G1ConcurrentMark
1909   // constructor and pass values of the active workers
1910   // through the gang in the task.
1911 
1912   {
1913     StrongRootsScope srs(active_workers);
1914 
1915     G1CMRemarkTask remarkTask(this, active_workers);
1916     // We will start all available threads, even if we decide that the
1917     // active_workers will be fewer. The extra ones will just bail out
1918     // immediately.
1919     g1h->workers()->run_task(&remarkTask);
1920   }
1921 
1922   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1923   guarantee(has_overflown() ||
1924             satb_mq_set.completed_buffers_num() == 0,
1925             "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
1926             BOOL_TO_STR(has_overflown()),
1927             satb_mq_set.completed_buffers_num());
1928 
1929   print_stats();
1930 }
1931 
1932 void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
1933   // Note we are overriding the read-only view of the prev map here, via
1934   // the cast.
1935   ((G1CMBitMap*)_prevMarkBitMap)->clear_range(mr);
1936 }
1937 
1938 HeapRegion*
1939 G1ConcurrentMark::claim_region(uint worker_id) {
1940   // "checkpoint" the finger
1941   HeapWord* finger = _finger;
1942 
1943   // _heap_end will not change underneath our feet; it only changes at
1944   // yield points.
1945   while (finger < _heap_end) {
1946     assert(_g1h->is_in_g1_reserved(finger), "invariant");
1947 
1948     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
1949     // Make sure that the reads below do not float before loading curr_region.
1950     OrderAccess::loadload();
1951     // Above heap_region_containing may return NULL as we always scan claim
1952     // until the end of the heap. In this case, just jump to the next region.
1953     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
1954 
1955     // Is the gap between reading the finger and doing the CAS too long?
1956     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
1957     if (res == finger && curr_region != NULL) {
1958       // we succeeded
1959       HeapWord*   bottom        = curr_region->bottom();
1960       HeapWord*   limit         = curr_region->next_top_at_mark_start();
1961 
1962       // notice that _finger == end cannot be guaranteed here since,
1963       // someone else might have moved the finger even further
1964       assert(_finger >= end, "the finger should have moved forward");
1965 
1966       if (limit > bottom) {
1967         return curr_region;
1968       } else {
1969         assert(limit == bottom,
1970                "the region limit should be at bottom");
1971         // we return NULL and the caller should try calling
1972         // claim_region() again.
1973         return NULL;
1974       }
1975     } else {
1976       assert(_finger > finger, "the finger should have moved forward");
1977       // read it again
1978       finger = _finger;
1979     }
1980   }
1981 
1982   return NULL;
1983 }
1984 
1985 #ifndef PRODUCT
1986 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
1987 private:
1988   G1CollectedHeap* _g1h;
1989   const char* _phase;
1990   int _info;
1991 
1992 public:
1993   VerifyNoCSetOops(const char* phase, int info = -1) :
1994     _g1h(G1CollectedHeap::heap()),
1995     _phase(phase),
1996     _info(info)
1997   { }
1998 
1999   void operator()(oop obj) const {
2000     guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || obj->is_oop(),
2001               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2002               p2i(obj), _phase, _info);
2003     guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->is_in_cset(obj),
2004               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2005               p2i(obj), _phase, _info);
2006   }
2007 };
2008 
2009 void G1ConcurrentMark::verify_no_cset_oops() {
2010   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2011   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2012     return;
2013   }
2014 
2015   // Verify entries on the global mark stack
2016   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
2017 
2018   // Verify entries on the task queues
2019   for (uint i = 0; i < _max_worker_id; ++i) {
2020     G1CMTaskQueue* queue = _task_queues->queue(i);
2021     queue->iterate(VerifyNoCSetOops("Queue", i));
2022   }
2023 
2024   // Verify the global finger
2025   HeapWord* global_finger = finger();
2026   if (global_finger != NULL && global_finger < _heap_end) {
2027     // Since we always iterate over all regions, we might get a NULL HeapRegion
2028     // here.
2029     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
2030     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2031               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2032               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2033   }
2034 
2035   // Verify the task fingers
2036   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2037   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2038     G1CMTask* task = _tasks[i];
2039     HeapWord* task_finger = task->finger();
2040     if (task_finger != NULL && task_finger < _heap_end) {
2041       // See above note on the global finger verification.
2042       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
2043       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2044                 !task_hr->in_collection_set(),
2045                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2046                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2047     }
2048   }
2049 }
2050 #endif // PRODUCT
2051 void G1ConcurrentMark::create_live_data() {
2052   _g1h->g1_rem_set()->create_card_live_data(_parallel_workers, _nextMarkBitMap);
2053 }
2054 
2055 void G1ConcurrentMark::finalize_live_data() {
2056   _g1h->g1_rem_set()->finalize_card_live_data(_g1h->workers(), _nextMarkBitMap);
2057 }
2058 
2059 void G1ConcurrentMark::verify_live_data() {
2060   _g1h->g1_rem_set()->verify_card_live_data(_g1h->workers(), _nextMarkBitMap);
2061 }
2062 
2063 void G1ConcurrentMark::clear_live_data(WorkGang* workers) {
2064   _g1h->g1_rem_set()->clear_card_live_data(workers);
2065 }
2066 
2067 #ifdef ASSERT
2068 void G1ConcurrentMark::verify_live_data_clear() {
2069   _g1h->g1_rem_set()->verify_card_live_data_is_clear();
2070 }
2071 #endif
2072 
2073 void G1ConcurrentMark::print_stats() {
2074   if (!log_is_enabled(Debug, gc, stats)) {
2075     return;
2076   }
2077   log_debug(gc, stats)("---------------------------------------------------------------------");
2078   for (size_t i = 0; i < _active_tasks; ++i) {
2079     _tasks[i]->print_stats();
2080     log_debug(gc, stats)("---------------------------------------------------------------------");
2081   }
2082 }
2083 
2084 void G1ConcurrentMark::abort() {
2085   if (!cmThread()->during_cycle() || _has_aborted) {
2086     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2087     return;
2088   }
2089 
2090   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2091   // concurrent bitmap clearing.
2092   {
2093     GCTraceTime(Debug, gc)("Clear Next Bitmap");
2094     clear_bitmap(_nextMarkBitMap, _g1h->workers(), false);
2095   }
2096   // Note we cannot clear the previous marking bitmap here
2097   // since VerifyDuringGC verifies the objects marked during
2098   // a full GC against the previous bitmap.
2099 
2100   {
2101     GCTraceTime(Debug, gc)("Clear Live Data");
2102     clear_live_data(_g1h->workers());
2103   }
2104   DEBUG_ONLY({
2105     GCTraceTime(Debug, gc)("Verify Live Data Clear");
2106     verify_live_data_clear();
2107   })
2108   // Empty mark stack
2109   reset_marking_state();
2110   for (uint i = 0; i < _max_worker_id; ++i) {
2111     _tasks[i]->clear_region_fields();
2112   }
2113   _first_overflow_barrier_sync.abort();
2114   _second_overflow_barrier_sync.abort();
2115   _has_aborted = true;
2116 
2117   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2118   satb_mq_set.abandon_partial_marking();
2119   // This can be called either during or outside marking, we'll read
2120   // the expected_active value from the SATB queue set.
2121   satb_mq_set.set_active_all_threads(
2122                                  false, /* new active value */
2123                                  satb_mq_set.is_active() /* expected_active */);
2124 }
2125 
2126 static void print_ms_time_info(const char* prefix, const char* name,
2127                                NumberSeq& ns) {
2128   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2129                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2130   if (ns.num() > 0) {
2131     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2132                            prefix, ns.sd(), ns.maximum());
2133   }
2134 }
2135 
2136 void G1ConcurrentMark::print_summary_info() {
2137   Log(gc, marking) log;
2138   if (!log.is_trace()) {
2139     return;
2140   }
2141 
2142   log.trace(" Concurrent marking:");
2143   print_ms_time_info("  ", "init marks", _init_times);
2144   print_ms_time_info("  ", "remarks", _remark_times);
2145   {
2146     print_ms_time_info("     ", "final marks", _remark_mark_times);
2147     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2148 
2149   }
2150   print_ms_time_info("  ", "cleanups", _cleanup_times);
2151   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2152             _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2153   if (G1ScrubRemSets) {
2154     log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2155               _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2156   }
2157   log.trace("  Total stop_world time = %8.2f s.",
2158             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
2159   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2160             cmThread()->vtime_accum(), cmThread()->vtime_mark_accum());
2161 }
2162 
2163 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2164   _parallel_workers->print_worker_threads_on(st);
2165 }
2166 
2167 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2168   _parallel_workers->threads_do(tc);
2169 }
2170 
2171 void G1ConcurrentMark::print_on_error(outputStream* st) const {
2172   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2173       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2174   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2175   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2176 }
2177 
2178 // Closure for iteration over bitmaps
2179 class G1CMBitMapClosure : public BitMapClosure {
2180 private:
2181   // the bitmap that is being iterated over
2182   G1CMBitMap*                 _nextMarkBitMap;
2183   G1ConcurrentMark*           _cm;
2184   G1CMTask*                   _task;
2185 
2186 public:
2187   G1CMBitMapClosure(G1CMTask *task, G1ConcurrentMark* cm, G1CMBitMap* nextMarkBitMap) :
2188     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2189 
2190   bool do_bit(size_t offset) {
2191     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2192     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2193     assert( addr < _cm->finger(), "invariant");
2194     assert(addr >= _task->finger(), "invariant");
2195 
2196     // We move that task's local finger along.
2197     _task->move_finger_to(addr);
2198 
2199     _task->scan_object(oop(addr));
2200     // we only partially drain the local queue and global stack
2201     _task->drain_local_queue(true);
2202     _task->drain_global_stack(true);
2203 
2204     // if the has_aborted flag has been raised, we need to bail out of
2205     // the iteration
2206     return !_task->has_aborted();
2207   }
2208 };
2209 
2210 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2211   ReferenceProcessor* result = g1h->ref_processor_cm();
2212   assert(result != NULL, "CM reference processor should not be NULL");
2213   return result;
2214 }
2215 
2216 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2217                                G1ConcurrentMark* cm,
2218                                G1CMTask* task)
2219   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2220     _g1h(g1h), _cm(cm), _task(task)
2221 { }
2222 
2223 void G1CMTask::setup_for_region(HeapRegion* hr) {
2224   assert(hr != NULL,
2225         "claim_region() should have filtered out NULL regions");
2226   _curr_region  = hr;
2227   _finger       = hr->bottom();
2228   update_region_limit();
2229 }
2230 
2231 void G1CMTask::update_region_limit() {
2232   HeapRegion* hr            = _curr_region;
2233   HeapWord* bottom          = hr->bottom();
2234   HeapWord* limit           = hr->next_top_at_mark_start();
2235 
2236   if (limit == bottom) {
2237     // The region was collected underneath our feet.
2238     // We set the finger to bottom to ensure that the bitmap
2239     // iteration that will follow this will not do anything.
2240     // (this is not a condition that holds when we set the region up,
2241     // as the region is not supposed to be empty in the first place)
2242     _finger = bottom;
2243   } else if (limit >= _region_limit) {
2244     assert(limit >= _finger, "peace of mind");
2245   } else {
2246     assert(limit < _region_limit, "only way to get here");
2247     // This can happen under some pretty unusual circumstances.  An
2248     // evacuation pause empties the region underneath our feet (NTAMS
2249     // at bottom). We then do some allocation in the region (NTAMS
2250     // stays at bottom), followed by the region being used as a GC
2251     // alloc region (NTAMS will move to top() and the objects
2252     // originally below it will be grayed). All objects now marked in
2253     // the region are explicitly grayed, if below the global finger,
2254     // and we do not need in fact to scan anything else. So, we simply
2255     // set _finger to be limit to ensure that the bitmap iteration
2256     // doesn't do anything.
2257     _finger = limit;
2258   }
2259 
2260   _region_limit = limit;
2261 }
2262 
2263 void G1CMTask::giveup_current_region() {
2264   assert(_curr_region != NULL, "invariant");
2265   clear_region_fields();
2266 }
2267 
2268 void G1CMTask::clear_region_fields() {
2269   // Values for these three fields that indicate that we're not
2270   // holding on to a region.
2271   _curr_region   = NULL;
2272   _finger        = NULL;
2273   _region_limit  = NULL;
2274 }
2275 
2276 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2277   if (cm_oop_closure == NULL) {
2278     assert(_cm_oop_closure != NULL, "invariant");
2279   } else {
2280     assert(_cm_oop_closure == NULL, "invariant");
2281   }
2282   _cm_oop_closure = cm_oop_closure;
2283 }
2284 
2285 void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) {
2286   guarantee(nextMarkBitMap != NULL, "invariant");
2287   _nextMarkBitMap                = nextMarkBitMap;
2288   clear_region_fields();
2289 
2290   _calls                         = 0;
2291   _elapsed_time_ms               = 0.0;
2292   _termination_time_ms           = 0.0;
2293   _termination_start_time_ms     = 0.0;
2294 }
2295 
2296 bool G1CMTask::should_exit_termination() {
2297   regular_clock_call();
2298   // This is called when we are in the termination protocol. We should
2299   // quit if, for some reason, this task wants to abort or the global
2300   // stack is not empty (this means that we can get work from it).
2301   return !_cm->mark_stack_empty() || has_aborted();
2302 }
2303 
2304 void G1CMTask::reached_limit() {
2305   assert(_words_scanned >= _words_scanned_limit ||
2306          _refs_reached >= _refs_reached_limit ,
2307          "shouldn't have been called otherwise");
2308   regular_clock_call();
2309 }
2310 
2311 void G1CMTask::regular_clock_call() {
2312   if (has_aborted()) return;
2313 
2314   // First, we need to recalculate the words scanned and refs reached
2315   // limits for the next clock call.
2316   recalculate_limits();
2317 
2318   // During the regular clock call we do the following
2319 
2320   // (1) If an overflow has been flagged, then we abort.
2321   if (_cm->has_overflown()) {
2322     set_has_aborted();
2323     return;
2324   }
2325 
2326   // If we are not concurrent (i.e. we're doing remark) we don't need
2327   // to check anything else. The other steps are only needed during
2328   // the concurrent marking phase.
2329   if (!concurrent()) return;
2330 
2331   // (2) If marking has been aborted for Full GC, then we also abort.
2332   if (_cm->has_aborted()) {
2333     set_has_aborted();
2334     return;
2335   }
2336 
2337   double curr_time_ms = os::elapsedVTime() * 1000.0;
2338 
2339   // (4) We check whether we should yield. If we have to, then we abort.
2340   if (SuspendibleThreadSet::should_yield()) {
2341     // We should yield. To do this we abort the task. The caller is
2342     // responsible for yielding.
2343     set_has_aborted();
2344     return;
2345   }
2346 
2347   // (5) We check whether we've reached our time quota. If we have,
2348   // then we abort.
2349   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2350   if (elapsed_time_ms > _time_target_ms) {
2351     set_has_aborted();
2352     _has_timed_out = true;
2353     return;
2354   }
2355 
2356   // (6) Finally, we check whether there are enough completed STAB
2357   // buffers available for processing. If there are, we abort.
2358   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2359   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2360     // we do need to process SATB buffers, we'll abort and restart
2361     // the marking task to do so
2362     set_has_aborted();
2363     return;
2364   }
2365 }
2366 
2367 void G1CMTask::recalculate_limits() {
2368   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2369   _words_scanned_limit      = _real_words_scanned_limit;
2370 
2371   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2372   _refs_reached_limit       = _real_refs_reached_limit;
2373 }
2374 
2375 void G1CMTask::decrease_limits() {
2376   // This is called when we believe that we're going to do an infrequent
2377   // operation which will increase the per byte scanned cost (i.e. move
2378   // entries to/from the global stack). It basically tries to decrease the
2379   // scanning limit so that the clock is called earlier.
2380 
2381   _words_scanned_limit = _real_words_scanned_limit -
2382     3 * words_scanned_period / 4;
2383   _refs_reached_limit  = _real_refs_reached_limit -
2384     3 * refs_reached_period / 4;
2385 }
2386 
2387 void G1CMTask::move_entries_to_global_stack() {
2388   // Local array where we'll store the entries that will be popped
2389   // from the local queue.
2390   oop buffer[G1CMMarkStack::OopsPerChunk];
2391 
2392   size_t n = 0;
2393   oop obj;
2394   while (n < G1CMMarkStack::OopsPerChunk && _task_queue->pop_local(obj)) {
2395     buffer[n] = obj;
2396     ++n;
2397   }
2398   if (n < G1CMMarkStack::OopsPerChunk) {
2399     buffer[n] = NULL;
2400   }
2401 
2402   if (n > 0) {
2403     if (!_cm->mark_stack_push(buffer)) {
2404       set_has_aborted();
2405     }
2406   }
2407 
2408   // This operation was quite expensive, so decrease the limits.
2409   decrease_limits();
2410 }
2411 
2412 bool G1CMTask::get_entries_from_global_stack() {
2413   // Local array where we'll store the entries that will be popped
2414   // from the global stack.
2415   oop buffer[G1CMMarkStack::OopsPerChunk];
2416 
2417   if (!_cm->mark_stack_pop(buffer)) {
2418     return false;
2419   }
2420 
2421   // We did actually pop at least one entry.
2422   for (size_t i = 0; i < G1CMMarkStack::OopsPerChunk; ++i) {
2423     oop elem = buffer[i];
2424     if (elem == NULL) {
2425       break;
2426     }
2427     assert(G1CMObjArrayProcessor::is_array_slice(elem) || elem->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(elem));
2428     bool success = _task_queue->push(elem);
2429     // We only call this when the local queue is empty or under a
2430     // given target limit. So, we do not expect this push to fail.
2431     assert(success, "invariant");
2432   }
2433 
2434   // This operation was quite expensive, so decrease the limits
2435   decrease_limits();
2436   return true;
2437 }
2438 
2439 void G1CMTask::drain_local_queue(bool partially) {
2440   if (has_aborted()) {
2441     return;
2442   }
2443 
2444   // Decide what the target size is, depending whether we're going to
2445   // drain it partially (so that other tasks can steal if they run out
2446   // of things to do) or totally (at the very end).
2447   size_t target_size;
2448   if (partially) {
2449     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2450   } else {
2451     target_size = 0;
2452   }
2453 
2454   if (_task_queue->size() > target_size) {
2455     oop obj;
2456     bool ret = _task_queue->pop_local(obj);
2457     while (ret) {
2458       scan_object(obj);
2459       if (_task_queue->size() <= target_size || has_aborted()) {
2460         ret = false;
2461       } else {
2462         ret = _task_queue->pop_local(obj);
2463       }
2464     }
2465   }
2466 }
2467 
2468 void G1CMTask::drain_global_stack(bool partially) {
2469   if (has_aborted()) return;
2470 
2471   // We have a policy to drain the local queue before we attempt to
2472   // drain the global stack.
2473   assert(partially || _task_queue->size() == 0, "invariant");
2474 
2475   // Decide what the target size is, depending whether we're going to
2476   // drain it partially (so that other tasks can steal if they run out
2477   // of things to do) or totally (at the very end).
2478   // Notice that when draining the global mark stack partially, due to the racyness
2479   // of the mark stack size update we might in fact drop below the target. But,
2480   // this is not a problem.
2481   // In case of total draining, we simply process until the global mark stack is
2482   // totally empty, disregarding the size counter.
2483   if (partially) {
2484     size_t const target_size = _cm->partial_mark_stack_size_target();
2485     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2486       if (get_entries_from_global_stack()) {
2487         drain_local_queue(partially);
2488       }
2489     }
2490   } else {
2491     while (!has_aborted() && get_entries_from_global_stack()) {
2492       drain_local_queue(partially);
2493     }
2494   }
2495 }
2496 
2497 // SATB Queue has several assumptions on whether to call the par or
2498 // non-par versions of the methods. this is why some of the code is
2499 // replicated. We should really get rid of the single-threaded version
2500 // of the code to simplify things.
2501 void G1CMTask::drain_satb_buffers() {
2502   if (has_aborted()) return;
2503 
2504   // We set this so that the regular clock knows that we're in the
2505   // middle of draining buffers and doesn't set the abort flag when it
2506   // notices that SATB buffers are available for draining. It'd be
2507   // very counter productive if it did that. :-)
2508   _draining_satb_buffers = true;
2509 
2510   G1CMSATBBufferClosure satb_cl(this, _g1h);
2511   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2512 
2513   // This keeps claiming and applying the closure to completed buffers
2514   // until we run out of buffers or we need to abort.
2515   while (!has_aborted() &&
2516          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2517     regular_clock_call();
2518   }
2519 
2520   _draining_satb_buffers = false;
2521 
2522   assert(has_aborted() ||
2523          concurrent() ||
2524          satb_mq_set.completed_buffers_num() == 0, "invariant");
2525 
2526   // again, this was a potentially expensive operation, decrease the
2527   // limits to get the regular clock call early
2528   decrease_limits();
2529 }
2530 
2531 void G1CMTask::print_stats() {
2532   log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
2533                        _worker_id, _calls);
2534   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2535                        _elapsed_time_ms, _termination_time_ms);
2536   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
2537                        _step_times_ms.num(), _step_times_ms.avg(),
2538                        _step_times_ms.sd());
2539   log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
2540                        _step_times_ms.maximum(), _step_times_ms.sum());
2541 }
2542 
2543 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
2544   return _task_queues->steal(worker_id, hash_seed, obj);
2545 }
2546 
2547 /*****************************************************************************
2548 
2549     The do_marking_step(time_target_ms, ...) method is the building
2550     block of the parallel marking framework. It can be called in parallel
2551     with other invocations of do_marking_step() on different tasks
2552     (but only one per task, obviously) and concurrently with the
2553     mutator threads, or during remark, hence it eliminates the need
2554     for two versions of the code. When called during remark, it will
2555     pick up from where the task left off during the concurrent marking
2556     phase. Interestingly, tasks are also claimable during evacuation
2557     pauses too, since do_marking_step() ensures that it aborts before
2558     it needs to yield.
2559 
2560     The data structures that it uses to do marking work are the
2561     following:
2562 
2563       (1) Marking Bitmap. If there are gray objects that appear only
2564       on the bitmap (this happens either when dealing with an overflow
2565       or when the initial marking phase has simply marked the roots
2566       and didn't push them on the stack), then tasks claim heap
2567       regions whose bitmap they then scan to find gray objects. A
2568       global finger indicates where the end of the last claimed region
2569       is. A local finger indicates how far into the region a task has
2570       scanned. The two fingers are used to determine how to gray an
2571       object (i.e. whether simply marking it is OK, as it will be
2572       visited by a task in the future, or whether it needs to be also
2573       pushed on a stack).
2574 
2575       (2) Local Queue. The local queue of the task which is accessed
2576       reasonably efficiently by the task. Other tasks can steal from
2577       it when they run out of work. Throughout the marking phase, a
2578       task attempts to keep its local queue short but not totally
2579       empty, so that entries are available for stealing by other
2580       tasks. Only when there is no more work, a task will totally
2581       drain its local queue.
2582 
2583       (3) Global Mark Stack. This handles local queue overflow. During
2584       marking only sets of entries are moved between it and the local
2585       queues, as access to it requires a mutex and more fine-grain
2586       interaction with it which might cause contention. If it
2587       overflows, then the marking phase should restart and iterate
2588       over the bitmap to identify gray objects. Throughout the marking
2589       phase, tasks attempt to keep the global mark stack at a small
2590       length but not totally empty, so that entries are available for
2591       popping by other tasks. Only when there is no more work, tasks
2592       will totally drain the global mark stack.
2593 
2594       (4) SATB Buffer Queue. This is where completed SATB buffers are
2595       made available. Buffers are regularly removed from this queue
2596       and scanned for roots, so that the queue doesn't get too
2597       long. During remark, all completed buffers are processed, as
2598       well as the filled in parts of any uncompleted buffers.
2599 
2600     The do_marking_step() method tries to abort when the time target
2601     has been reached. There are a few other cases when the
2602     do_marking_step() method also aborts:
2603 
2604       (1) When the marking phase has been aborted (after a Full GC).
2605 
2606       (2) When a global overflow (on the global stack) has been
2607       triggered. Before the task aborts, it will actually sync up with
2608       the other tasks to ensure that all the marking data structures
2609       (local queues, stacks, fingers etc.)  are re-initialized so that
2610       when do_marking_step() completes, the marking phase can
2611       immediately restart.
2612 
2613       (3) When enough completed SATB buffers are available. The
2614       do_marking_step() method only tries to drain SATB buffers right
2615       at the beginning. So, if enough buffers are available, the
2616       marking step aborts and the SATB buffers are processed at
2617       the beginning of the next invocation.
2618 
2619       (4) To yield. when we have to yield then we abort and yield
2620       right at the end of do_marking_step(). This saves us from a lot
2621       of hassle as, by yielding we might allow a Full GC. If this
2622       happens then objects will be compacted underneath our feet, the
2623       heap might shrink, etc. We save checking for this by just
2624       aborting and doing the yield right at the end.
2625 
2626     From the above it follows that the do_marking_step() method should
2627     be called in a loop (or, otherwise, regularly) until it completes.
2628 
2629     If a marking step completes without its has_aborted() flag being
2630     true, it means it has completed the current marking phase (and
2631     also all other marking tasks have done so and have all synced up).
2632 
2633     A method called regular_clock_call() is invoked "regularly" (in
2634     sub ms intervals) throughout marking. It is this clock method that
2635     checks all the abort conditions which were mentioned above and
2636     decides when the task should abort. A work-based scheme is used to
2637     trigger this clock method: when the number of object words the
2638     marking phase has scanned or the number of references the marking
2639     phase has visited reach a given limit. Additional invocations to
2640     the method clock have been planted in a few other strategic places
2641     too. The initial reason for the clock method was to avoid calling
2642     vtime too regularly, as it is quite expensive. So, once it was in
2643     place, it was natural to piggy-back all the other conditions on it
2644     too and not constantly check them throughout the code.
2645 
2646     If do_termination is true then do_marking_step will enter its
2647     termination protocol.
2648 
2649     The value of is_serial must be true when do_marking_step is being
2650     called serially (i.e. by the VMThread) and do_marking_step should
2651     skip any synchronization in the termination and overflow code.
2652     Examples include the serial remark code and the serial reference
2653     processing closures.
2654 
2655     The value of is_serial must be false when do_marking_step is
2656     being called by any of the worker threads in a work gang.
2657     Examples include the concurrent marking code (CMMarkingTask),
2658     the MT remark code, and the MT reference processing closures.
2659 
2660  *****************************************************************************/
2661 
2662 void G1CMTask::do_marking_step(double time_target_ms,
2663                                bool do_termination,
2664                                bool is_serial) {
2665   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2666   assert(concurrent() == _cm->concurrent(), "they should be the same");
2667 
2668   G1Policy* g1_policy = _g1h->g1_policy();
2669   assert(_task_queues != NULL, "invariant");
2670   assert(_task_queue != NULL, "invariant");
2671   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
2672 
2673   assert(!_claimed,
2674          "only one thread should claim this task at any one time");
2675 
2676   // OK, this doesn't safeguard again all possible scenarios, as it is
2677   // possible for two threads to set the _claimed flag at the same
2678   // time. But it is only for debugging purposes anyway and it will
2679   // catch most problems.
2680   _claimed = true;
2681 
2682   _start_time_ms = os::elapsedVTime() * 1000.0;
2683 
2684   // If do_stealing is true then do_marking_step will attempt to
2685   // steal work from the other G1CMTasks. It only makes sense to
2686   // enable stealing when the termination protocol is enabled
2687   // and do_marking_step() is not being called serially.
2688   bool do_stealing = do_termination && !is_serial;
2689 
2690   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
2691   _time_target_ms = time_target_ms - diff_prediction_ms;
2692 
2693   // set up the variables that are used in the work-based scheme to
2694   // call the regular clock method
2695   _words_scanned = 0;
2696   _refs_reached  = 0;
2697   recalculate_limits();
2698 
2699   // clear all flags
2700   clear_has_aborted();
2701   _has_timed_out = false;
2702   _draining_satb_buffers = false;
2703 
2704   ++_calls;
2705 
2706   // Set up the bitmap and oop closures. Anything that uses them is
2707   // eventually called from this method, so it is OK to allocate these
2708   // statically.
2709   G1CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
2710   G1CMOopClosure    cm_oop_closure(_g1h, _cm, this);
2711   set_cm_oop_closure(&cm_oop_closure);
2712 
2713   if (_cm->has_overflown()) {
2714     // This can happen if the mark stack overflows during a GC pause
2715     // and this task, after a yield point, restarts. We have to abort
2716     // as we need to get into the overflow protocol which happens
2717     // right at the end of this task.
2718     set_has_aborted();
2719   }
2720 
2721   // First drain any available SATB buffers. After this, we will not
2722   // look at SATB buffers before the next invocation of this method.
2723   // If enough completed SATB buffers are queued up, the regular clock
2724   // will abort this task so that it restarts.
2725   drain_satb_buffers();
2726   // ...then partially drain the local queue and the global stack
2727   drain_local_queue(true);
2728   drain_global_stack(true);
2729 
2730   do {
2731     if (!has_aborted() && _curr_region != NULL) {
2732       // This means that we're already holding on to a region.
2733       assert(_finger != NULL, "if region is not NULL, then the finger "
2734              "should not be NULL either");
2735 
2736       // We might have restarted this task after an evacuation pause
2737       // which might have evacuated the region we're holding on to
2738       // underneath our feet. Let's read its limit again to make sure
2739       // that we do not iterate over a region of the heap that
2740       // contains garbage (update_region_limit() will also move
2741       // _finger to the start of the region if it is found empty).
2742       update_region_limit();
2743       // We will start from _finger not from the start of the region,
2744       // as we might be restarting this task after aborting half-way
2745       // through scanning this region. In this case, _finger points to
2746       // the address where we last found a marked object. If this is a
2747       // fresh region, _finger points to start().
2748       MemRegion mr = MemRegion(_finger, _region_limit);
2749 
2750       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2751              "humongous regions should go around loop once only");
2752 
2753       // Some special cases:
2754       // If the memory region is empty, we can just give up the region.
2755       // If the current region is humongous then we only need to check
2756       // the bitmap for the bit associated with the start of the object,
2757       // scan the object if it's live, and give up the region.
2758       // Otherwise, let's iterate over the bitmap of the part of the region
2759       // that is left.
2760       // If the iteration is successful, give up the region.
2761       if (mr.is_empty()) {
2762         giveup_current_region();
2763         regular_clock_call();
2764       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2765         if (_nextMarkBitMap->isMarked(mr.start())) {
2766           // The object is marked - apply the closure
2767           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
2768           bitmap_closure.do_bit(offset);
2769         }
2770         // Even if this task aborted while scanning the humongous object
2771         // we can (and should) give up the current region.
2772         giveup_current_region();
2773         regular_clock_call();
2774       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
2775         giveup_current_region();
2776         regular_clock_call();
2777       } else {
2778         assert(has_aborted(), "currently the only way to do so");
2779         // The only way to abort the bitmap iteration is to return
2780         // false from the do_bit() method. However, inside the
2781         // do_bit() method we move the _finger to point to the
2782         // object currently being looked at. So, if we bail out, we
2783         // have definitely set _finger to something non-null.
2784         assert(_finger != NULL, "invariant");
2785 
2786         // Region iteration was actually aborted. So now _finger
2787         // points to the address of the object we last scanned. If we
2788         // leave it there, when we restart this task, we will rescan
2789         // the object. It is easy to avoid this. We move the finger by
2790         // enough to point to the next possible object header (the
2791         // bitmap knows by how much we need to move it as it knows its
2792         // granularity).
2793         assert(_finger < _region_limit, "invariant");
2794         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
2795         // Check if bitmap iteration was aborted while scanning the last object
2796         if (new_finger >= _region_limit) {
2797           giveup_current_region();
2798         } else {
2799           move_finger_to(new_finger);
2800         }
2801       }
2802     }
2803     // At this point we have either completed iterating over the
2804     // region we were holding on to, or we have aborted.
2805 
2806     // We then partially drain the local queue and the global stack.
2807     // (Do we really need this?)
2808     drain_local_queue(true);
2809     drain_global_stack(true);
2810 
2811     // Read the note on the claim_region() method on why it might
2812     // return NULL with potentially more regions available for
2813     // claiming and why we have to check out_of_regions() to determine
2814     // whether we're done or not.
2815     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
2816       // We are going to try to claim a new region. We should have
2817       // given up on the previous one.
2818       // Separated the asserts so that we know which one fires.
2819       assert(_curr_region  == NULL, "invariant");
2820       assert(_finger       == NULL, "invariant");
2821       assert(_region_limit == NULL, "invariant");
2822       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2823       if (claimed_region != NULL) {
2824         // Yes, we managed to claim one
2825         setup_for_region(claimed_region);
2826         assert(_curr_region == claimed_region, "invariant");
2827       }
2828       // It is important to call the regular clock here. It might take
2829       // a while to claim a region if, for example, we hit a large
2830       // block of empty regions. So we need to call the regular clock
2831       // method once round the loop to make sure it's called
2832       // frequently enough.
2833       regular_clock_call();
2834     }
2835 
2836     if (!has_aborted() && _curr_region == NULL) {
2837       assert(_cm->out_of_regions(),
2838              "at this point we should be out of regions");
2839     }
2840   } while ( _curr_region != NULL && !has_aborted());
2841 
2842   if (!has_aborted()) {
2843     // We cannot check whether the global stack is empty, since other
2844     // tasks might be pushing objects to it concurrently.
2845     assert(_cm->out_of_regions(),
2846            "at this point we should be out of regions");
2847     // Try to reduce the number of available SATB buffers so that
2848     // remark has less work to do.
2849     drain_satb_buffers();
2850   }
2851 
2852   // Since we've done everything else, we can now totally drain the
2853   // local queue and global stack.
2854   drain_local_queue(false);
2855   drain_global_stack(false);
2856 
2857   // Attempt at work stealing from other task's queues.
2858   if (do_stealing && !has_aborted()) {
2859     // We have not aborted. This means that we have finished all that
2860     // we could. Let's try to do some stealing...
2861 
2862     // We cannot check whether the global stack is empty, since other
2863     // tasks might be pushing objects to it concurrently.
2864     assert(_cm->out_of_regions() && _task_queue->size() == 0,
2865            "only way to reach here");
2866     while (!has_aborted()) {
2867       oop obj;
2868       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
2869         scan_object(obj);
2870 
2871         // And since we're towards the end, let's totally drain the
2872         // local queue and global stack.
2873         drain_local_queue(false);
2874         drain_global_stack(false);
2875       } else {
2876         break;
2877       }
2878     }
2879   }
2880 
2881   // We still haven't aborted. Now, let's try to get into the
2882   // termination protocol.
2883   if (do_termination && !has_aborted()) {
2884     // We cannot check whether the global stack is empty, since other
2885     // tasks might be concurrently pushing objects on it.
2886     // Separated the asserts so that we know which one fires.
2887     assert(_cm->out_of_regions(), "only way to reach here");
2888     assert(_task_queue->size() == 0, "only way to reach here");
2889     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
2890 
2891     // The G1CMTask class also extends the TerminatorTerminator class,
2892     // hence its should_exit_termination() method will also decide
2893     // whether to exit the termination protocol or not.
2894     bool finished = (is_serial ||
2895                      _cm->terminator()->offer_termination(this));
2896     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
2897     _termination_time_ms +=
2898       termination_end_time_ms - _termination_start_time_ms;
2899 
2900     if (finished) {
2901       // We're all done.
2902 
2903       if (_worker_id == 0) {
2904         // let's allow task 0 to do this
2905         if (concurrent()) {
2906           assert(_cm->concurrent_marking_in_progress(), "invariant");
2907           // we need to set this to false before the next
2908           // safepoint. This way we ensure that the marking phase
2909           // doesn't observe any more heap expansions.
2910           _cm->clear_concurrent_marking_in_progress();
2911         }
2912       }
2913 
2914       // We can now guarantee that the global stack is empty, since
2915       // all other tasks have finished. We separated the guarantees so
2916       // that, if a condition is false, we can immediately find out
2917       // which one.
2918       guarantee(_cm->out_of_regions(), "only way to reach here");
2919       guarantee(_cm->mark_stack_empty(), "only way to reach here");
2920       guarantee(_task_queue->size() == 0, "only way to reach here");
2921       guarantee(!_cm->has_overflown(), "only way to reach here");
2922     } else {
2923       // Apparently there's more work to do. Let's abort this task. It
2924       // will restart it and we can hopefully find more things to do.
2925       set_has_aborted();
2926     }
2927   }
2928 
2929   // Mainly for debugging purposes to make sure that a pointer to the
2930   // closure which was statically allocated in this frame doesn't
2931   // escape it by accident.
2932   set_cm_oop_closure(NULL);
2933   double end_time_ms = os::elapsedVTime() * 1000.0;
2934   double elapsed_time_ms = end_time_ms - _start_time_ms;
2935   // Update the step history.
2936   _step_times_ms.add(elapsed_time_ms);
2937 
2938   if (has_aborted()) {
2939     // The task was aborted for some reason.
2940     if (_has_timed_out) {
2941       double diff_ms = elapsed_time_ms - _time_target_ms;
2942       // Keep statistics of how well we did with respect to hitting
2943       // our target only if we actually timed out (if we aborted for
2944       // other reasons, then the results might get skewed).
2945       _marking_step_diffs_ms.add(diff_ms);
2946     }
2947 
2948     if (_cm->has_overflown()) {
2949       // This is the interesting one. We aborted because a global
2950       // overflow was raised. This means we have to restart the
2951       // marking phase and start iterating over regions. However, in
2952       // order to do this we have to make sure that all tasks stop
2953       // what they are doing and re-initialize in a safe manner. We
2954       // will achieve this with the use of two barrier sync points.
2955 
2956       if (!is_serial) {
2957         // We only need to enter the sync barrier if being called
2958         // from a parallel context
2959         _cm->enter_first_sync_barrier(_worker_id);
2960 
2961         // When we exit this sync barrier we know that all tasks have
2962         // stopped doing marking work. So, it's now safe to
2963         // re-initialize our data structures. At the end of this method,
2964         // task 0 will clear the global data structures.
2965       }
2966 
2967       // We clear the local state of this task...
2968       clear_region_fields();
2969 
2970       if (!is_serial) {
2971         // ...and enter the second barrier.
2972         _cm->enter_second_sync_barrier(_worker_id);
2973       }
2974       // At this point, if we're during the concurrent phase of
2975       // marking, everything has been re-initialized and we're
2976       // ready to restart.
2977     }
2978   }
2979 
2980   _claimed = false;
2981 }
2982 
2983 G1CMTask::G1CMTask(uint worker_id,
2984                    G1ConcurrentMark* cm,
2985                    G1CMTaskQueue* task_queue,
2986                    G1CMTaskQueueSet* task_queues)
2987   : _g1h(G1CollectedHeap::heap()),
2988     _worker_id(worker_id), _cm(cm),
2989     _objArray_processor(this),
2990     _claimed(false),
2991     _nextMarkBitMap(NULL), _hash_seed(17),
2992     _task_queue(task_queue),
2993     _task_queues(task_queues),
2994     _cm_oop_closure(NULL) {
2995   guarantee(task_queue != NULL, "invariant");
2996   guarantee(task_queues != NULL, "invariant");
2997 
2998   _marking_step_diffs_ms.add(0.5);
2999 }
3000 
3001 // These are formatting macros that are used below to ensure
3002 // consistent formatting. The *_H_* versions are used to format the
3003 // header for a particular value and they should be kept consistent
3004 // with the corresponding macro. Also note that most of the macros add
3005 // the necessary white space (as a prefix) which makes them a bit
3006 // easier to compose.
3007 
3008 // All the output lines are prefixed with this string to be able to
3009 // identify them easily in a large log file.
3010 #define G1PPRL_LINE_PREFIX            "###"
3011 
3012 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
3013 #ifdef _LP64
3014 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
3015 #else // _LP64
3016 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3017 #endif // _LP64
3018 
3019 // For per-region info
3020 #define G1PPRL_TYPE_FORMAT            "   %-4s"
3021 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
3022 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
3023 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
3024 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
3025 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
3026 
3027 // For summary info
3028 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3029 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
3030 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3031 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3032 
3033 G1PrintRegionLivenessInfoClosure::
3034 G1PrintRegionLivenessInfoClosure(const char* phase_name)
3035   : _total_used_bytes(0), _total_capacity_bytes(0),
3036     _total_prev_live_bytes(0), _total_next_live_bytes(0),
3037     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
3038   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3039   MemRegion g1_reserved = g1h->g1_reserved();
3040   double now = os::elapsedTime();
3041 
3042   // Print the header of the output.
3043   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3044   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3045                           G1PPRL_SUM_ADDR_FORMAT("reserved")
3046                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
3047                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
3048                           HeapRegion::GrainBytes);
3049   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3050   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3051                           G1PPRL_TYPE_H_FORMAT
3052                           G1PPRL_ADDR_BASE_H_FORMAT
3053                           G1PPRL_BYTE_H_FORMAT
3054                           G1PPRL_BYTE_H_FORMAT
3055                           G1PPRL_BYTE_H_FORMAT
3056                           G1PPRL_DOUBLE_H_FORMAT
3057                           G1PPRL_BYTE_H_FORMAT
3058                           G1PPRL_BYTE_H_FORMAT,
3059                           "type", "address-range",
3060                           "used", "prev-live", "next-live", "gc-eff",
3061                           "remset", "code-roots");
3062   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3063                           G1PPRL_TYPE_H_FORMAT
3064                           G1PPRL_ADDR_BASE_H_FORMAT
3065                           G1PPRL_BYTE_H_FORMAT
3066                           G1PPRL_BYTE_H_FORMAT
3067                           G1PPRL_BYTE_H_FORMAT
3068                           G1PPRL_DOUBLE_H_FORMAT
3069                           G1PPRL_BYTE_H_FORMAT
3070                           G1PPRL_BYTE_H_FORMAT,
3071                           "", "",
3072                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3073                           "(bytes)", "(bytes)");
3074 }
3075 
3076 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
3077   const char* type       = r->get_type_str();
3078   HeapWord* bottom       = r->bottom();
3079   HeapWord* end          = r->end();
3080   size_t capacity_bytes  = r->capacity();
3081   size_t used_bytes      = r->used();
3082   size_t prev_live_bytes = r->live_bytes();
3083   size_t next_live_bytes = r->next_live_bytes();
3084   double gc_eff          = r->gc_efficiency();
3085   size_t remset_bytes    = r->rem_set()->mem_size();
3086   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3087 
3088   _total_used_bytes      += used_bytes;
3089   _total_capacity_bytes  += capacity_bytes;
3090   _total_prev_live_bytes += prev_live_bytes;
3091   _total_next_live_bytes += next_live_bytes;
3092   _total_remset_bytes    += remset_bytes;
3093   _total_strong_code_roots_bytes += strong_code_roots_bytes;
3094 
3095   // Print a line for this particular region.
3096   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3097                           G1PPRL_TYPE_FORMAT
3098                           G1PPRL_ADDR_BASE_FORMAT
3099                           G1PPRL_BYTE_FORMAT
3100                           G1PPRL_BYTE_FORMAT
3101                           G1PPRL_BYTE_FORMAT
3102                           G1PPRL_DOUBLE_FORMAT
3103                           G1PPRL_BYTE_FORMAT
3104                           G1PPRL_BYTE_FORMAT,
3105                           type, p2i(bottom), p2i(end),
3106                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3107                           remset_bytes, strong_code_roots_bytes);
3108 
3109   return false;
3110 }
3111 
3112 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3113   // add static memory usages to remembered set sizes
3114   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3115   // Print the footer of the output.
3116   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3117   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3118                          " SUMMARY"
3119                          G1PPRL_SUM_MB_FORMAT("capacity")
3120                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3121                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3122                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3123                          G1PPRL_SUM_MB_FORMAT("remset")
3124                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3125                          bytes_to_mb(_total_capacity_bytes),
3126                          bytes_to_mb(_total_used_bytes),
3127                          perc(_total_used_bytes, _total_capacity_bytes),
3128                          bytes_to_mb(_total_prev_live_bytes),
3129                          perc(_total_prev_live_bytes, _total_capacity_bytes),
3130                          bytes_to_mb(_total_next_live_bytes),
3131                          perc(_total_next_live_bytes, _total_capacity_bytes),
3132                          bytes_to_mb(_total_remset_bytes),
3133                          bytes_to_mb(_total_strong_code_roots_bytes));
3134 }