1 /*
   2  * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/classLoaderData.hpp"
  27 #include "classfile/stringTable.hpp"
  28 #include "classfile/systemDictionary.hpp"
  29 #include "code/codeCache.hpp"
  30 #include "gc_implementation/shared/adaptiveSizePolicy.hpp"
  31 #include "gc_implementation/concurrentMarkSweep/cmsCollectorPolicy.hpp"
  32 #include "gc_implementation/concurrentMarkSweep/cmsOopClosures.inline.hpp"
  33 #include "gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp"
  34 #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.inline.hpp"
  35 #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp"
  36 #include "gc_implementation/concurrentMarkSweep/vmCMSOperations.hpp"
  37 #include "gc_implementation/parNew/parNewGeneration.hpp"
  38 #include "gc_implementation/shared/collectorCounters.hpp"
  39 #include "gc_implementation/shared/gcTimer.hpp"
  40 #include "gc_implementation/shared/gcTrace.hpp"
  41 #include "gc_implementation/shared/gcTraceTime.hpp"
  42 #include "gc_implementation/shared/isGCActiveMark.hpp"
  43 #include "gc_interface/collectedHeap.inline.hpp"
  44 #include "memory/allocation.hpp"
  45 #include "memory/cardTableRS.hpp"
  46 #include "memory/collectorPolicy.hpp"
  47 #include "memory/gcLocker.inline.hpp"
  48 #include "memory/genCollectedHeap.hpp"
  49 #include "memory/genMarkSweep.hpp"
  50 #include "memory/genOopClosures.inline.hpp"
  51 #include "memory/iterator.inline.hpp"
  52 #include "memory/padded.hpp"
  53 #include "memory/referencePolicy.hpp"
  54 #include "memory/resourceArea.hpp"
  55 #include "memory/tenuredGeneration.hpp"
  56 #include "oops/oop.inline.hpp"
  57 #include "prims/jvmtiExport.hpp"
  58 #include "runtime/atomic.inline.hpp"
  59 #include "runtime/globals_extension.hpp"
  60 #include "runtime/handles.inline.hpp"
  61 #include "runtime/java.hpp"
  62 #include "runtime/orderAccess.inline.hpp"
  63 #include "runtime/vmThread.hpp"
  64 #include "services/memoryService.hpp"
  65 #include "services/runtimeService.hpp"
  66 
  67 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
  68 
  69 // statics
  70 CMSCollector* ConcurrentMarkSweepGeneration::_collector = NULL;
  71 bool CMSCollector::_full_gc_requested = false;
  72 GCCause::Cause CMSCollector::_full_gc_cause = GCCause::_no_gc;
  73 
  74 //////////////////////////////////////////////////////////////////
  75 // In support of CMS/VM thread synchronization
  76 //////////////////////////////////////////////////////////////////
  77 // We split use of the CGC_lock into 2 "levels".
  78 // The low-level locking is of the usual CGC_lock monitor. We introduce
  79 // a higher level "token" (hereafter "CMS token") built on top of the
  80 // low level monitor (hereafter "CGC lock").
  81 // The token-passing protocol gives priority to the VM thread. The
  82 // CMS-lock doesn't provide any fairness guarantees, but clients
  83 // should ensure that it is only held for very short, bounded
  84 // durations.
  85 //
  86 // When either of the CMS thread or the VM thread is involved in
  87 // collection operations during which it does not want the other
  88 // thread to interfere, it obtains the CMS token.
  89 //
  90 // If either thread tries to get the token while the other has
  91 // it, that thread waits. However, if the VM thread and CMS thread
  92 // both want the token, then the VM thread gets priority while the
  93 // CMS thread waits. This ensures, for instance, that the "concurrent"
  94 // phases of the CMS thread's work do not block out the VM thread
  95 // for long periods of time as the CMS thread continues to hog
  96 // the token. (See bug 4616232).
  97 //
  98 // The baton-passing functions are, however, controlled by the
  99 // flags _foregroundGCShouldWait and _foregroundGCIsActive,
 100 // and here the low-level CMS lock, not the high level token,
 101 // ensures mutual exclusion.
 102 //
 103 // Two important conditions that we have to satisfy:
 104 // 1. if a thread does a low-level wait on the CMS lock, then it
 105 //    relinquishes the CMS token if it were holding that token
 106 //    when it acquired the low-level CMS lock.
 107 // 2. any low-level notifications on the low-level lock
 108 //    should only be sent when a thread has relinquished the token.
 109 //
 110 // In the absence of either property, we'd have potential deadlock.
 111 //
 112 // We protect each of the CMS (concurrent and sequential) phases
 113 // with the CMS _token_, not the CMS _lock_.
 114 //
 115 // The only code protected by CMS lock is the token acquisition code
 116 // itself, see ConcurrentMarkSweepThread::[de]synchronize(), and the
 117 // baton-passing code.
 118 //
 119 // Unfortunately, i couldn't come up with a good abstraction to factor and
 120 // hide the naked CGC_lock manipulation in the baton-passing code
 121 // further below. That's something we should try to do. Also, the proof
 122 // of correctness of this 2-level locking scheme is far from obvious,
 123 // and potentially quite slippery. We have an uneasy suspicion, for instance,
 124 // that there may be a theoretical possibility of delay/starvation in the
 125 // low-level lock/wait/notify scheme used for the baton-passing because of
 126 // potential interference with the priority scheme embodied in the
 127 // CMS-token-passing protocol. See related comments at a CGC_lock->wait()
 128 // invocation further below and marked with "XXX 20011219YSR".
 129 // Indeed, as we note elsewhere, this may become yet more slippery
 130 // in the presence of multiple CMS and/or multiple VM threads. XXX
 131 
 132 class CMSTokenSync: public StackObj {
 133  private:
 134   bool _is_cms_thread;
 135  public:
 136   CMSTokenSync(bool is_cms_thread):
 137     _is_cms_thread(is_cms_thread) {
 138     assert(is_cms_thread == Thread::current()->is_ConcurrentGC_thread(),
 139            "Incorrect argument to constructor");
 140     ConcurrentMarkSweepThread::synchronize(_is_cms_thread);
 141   }
 142 
 143   ~CMSTokenSync() {
 144     assert(_is_cms_thread ?
 145              ConcurrentMarkSweepThread::cms_thread_has_cms_token() :
 146              ConcurrentMarkSweepThread::vm_thread_has_cms_token(),
 147           "Incorrect state");
 148     ConcurrentMarkSweepThread::desynchronize(_is_cms_thread);
 149   }
 150 };
 151 
 152 // Convenience class that does a CMSTokenSync, and then acquires
 153 // upto three locks.
 154 class CMSTokenSyncWithLocks: public CMSTokenSync {
 155  private:
 156   // Note: locks are acquired in textual declaration order
 157   // and released in the opposite order
 158   MutexLockerEx _locker1, _locker2, _locker3;
 159  public:
 160   CMSTokenSyncWithLocks(bool is_cms_thread, Mutex* mutex1,
 161                         Mutex* mutex2 = NULL, Mutex* mutex3 = NULL):
 162     CMSTokenSync(is_cms_thread),
 163     _locker1(mutex1, Mutex::_no_safepoint_check_flag),
 164     _locker2(mutex2, Mutex::_no_safepoint_check_flag),
 165     _locker3(mutex3, Mutex::_no_safepoint_check_flag)
 166   { }
 167 };
 168 
 169 
 170 //////////////////////////////////////////////////////////////////
 171 //  Concurrent Mark-Sweep Generation /////////////////////////////
 172 //////////////////////////////////////////////////////////////////
 173 
 174 NOT_PRODUCT(CompactibleFreeListSpace* debug_cms_space;)
 175 
 176 // This struct contains per-thread things necessary to support parallel
 177 // young-gen collection.
 178 class CMSParGCThreadState: public CHeapObj<mtGC> {
 179  public:
 180   CFLS_LAB lab;
 181   PromotionInfo promo;
 182 
 183   // Constructor.
 184   CMSParGCThreadState(CompactibleFreeListSpace* cfls) : lab(cfls) {
 185     promo.setSpace(cfls);
 186   }
 187 };
 188 
 189 ConcurrentMarkSweepGeneration::ConcurrentMarkSweepGeneration(
 190      ReservedSpace rs, size_t initial_byte_size, int level,
 191      CardTableRS* ct, bool use_adaptive_freelists,
 192      FreeBlockDictionary<FreeChunk>::DictionaryChoice dictionaryChoice) :
 193   CardGeneration(rs, initial_byte_size, level, ct),
 194   _dilatation_factor(((double)MinChunkSize)/((double)(CollectedHeap::min_fill_size()))),
 195   _did_compact(false)
 196 {
 197   HeapWord* bottom = (HeapWord*) _virtual_space.low();
 198   HeapWord* end    = (HeapWord*) _virtual_space.high();
 199 
 200   _direct_allocated_words = 0;
 201   NOT_PRODUCT(
 202     _numObjectsPromoted = 0;
 203     _numWordsPromoted = 0;
 204     _numObjectsAllocated = 0;
 205     _numWordsAllocated = 0;
 206   )
 207 
 208   _cmsSpace = new CompactibleFreeListSpace(_bts, MemRegion(bottom, end),
 209                                            use_adaptive_freelists,
 210                                            dictionaryChoice);
 211   NOT_PRODUCT(debug_cms_space = _cmsSpace;)
 212   if (_cmsSpace == NULL) {
 213     vm_exit_during_initialization(
 214       "CompactibleFreeListSpace allocation failure");
 215   }
 216   _cmsSpace->_gen = this;
 217 
 218   _gc_stats = new CMSGCStats();
 219 
 220   // Verify the assumption that FreeChunk::_prev and OopDesc::_klass
 221   // offsets match. The ability to tell free chunks from objects
 222   // depends on this property.
 223   debug_only(
 224     FreeChunk* junk = NULL;
 225     assert(UseCompressedClassPointers ||
 226            junk->prev_addr() == (void*)(oop(junk)->klass_addr()),
 227            "Offset of FreeChunk::_prev within FreeChunk must match"
 228            "  that of OopDesc::_klass within OopDesc");
 229   )
 230   if (CollectedHeap::use_parallel_gc_threads()) {
 231     typedef CMSParGCThreadState* CMSParGCThreadStatePtr;
 232     _par_gc_thread_states =
 233       NEW_C_HEAP_ARRAY(CMSParGCThreadStatePtr, ParallelGCThreads, mtGC);
 234     if (_par_gc_thread_states == NULL) {
 235       vm_exit_during_initialization("Could not allocate par gc structs");
 236     }
 237     for (uint i = 0; i < ParallelGCThreads; i++) {
 238       _par_gc_thread_states[i] = new CMSParGCThreadState(cmsSpace());
 239       if (_par_gc_thread_states[i] == NULL) {
 240         vm_exit_during_initialization("Could not allocate par gc structs");
 241       }
 242     }
 243   } else {
 244     _par_gc_thread_states = NULL;
 245   }
 246   _incremental_collection_failed = false;
 247   // The "dilatation_factor" is the expansion that can occur on
 248   // account of the fact that the minimum object size in the CMS
 249   // generation may be larger than that in, say, a contiguous young
 250   //  generation.
 251   // Ideally, in the calculation below, we'd compute the dilatation
 252   // factor as: MinChunkSize/(promoting_gen's min object size)
 253   // Since we do not have such a general query interface for the
 254   // promoting generation, we'll instead just use the minimum
 255   // object size (which today is a header's worth of space);
 256   // note that all arithmetic is in units of HeapWords.
 257   assert(MinChunkSize >= CollectedHeap::min_fill_size(), "just checking");
 258   assert(_dilatation_factor >= 1.0, "from previous assert");
 259 }
 260 
 261 
 262 // The field "_initiating_occupancy" represents the occupancy percentage
 263 // at which we trigger a new collection cycle.  Unless explicitly specified
 264 // via CMSInitiatingOccupancyFraction (argument "io" below), it
 265 // is calculated by:
 266 //
 267 //   Let "f" be MinHeapFreeRatio in
 268 //
 269 //    _initiating_occupancy = 100-f +
 270 //                           f * (CMSTriggerRatio/100)
 271 //   where CMSTriggerRatio is the argument "tr" below.
 272 //
 273 // That is, if we assume the heap is at its desired maximum occupancy at the
 274 // end of a collection, we let CMSTriggerRatio of the (purported) free
 275 // space be allocated before initiating a new collection cycle.
 276 //
 277 void ConcurrentMarkSweepGeneration::init_initiating_occupancy(intx io, uintx tr) {
 278   assert(io <= 100 && tr <= 100, "Check the arguments");
 279   if (io >= 0) {
 280     _initiating_occupancy = (double)io / 100.0;
 281   } else {
 282     _initiating_occupancy = ((100 - MinHeapFreeRatio) +
 283                              (double)(tr * MinHeapFreeRatio) / 100.0)
 284                             / 100.0;
 285   }
 286 }
 287 
 288 void ConcurrentMarkSweepGeneration::ref_processor_init() {
 289   assert(collector() != NULL, "no collector");
 290   collector()->ref_processor_init();
 291 }
 292 
 293 void CMSCollector::ref_processor_init() {
 294   if (_ref_processor == NULL) {
 295     // Allocate and initialize a reference processor
 296     _ref_processor =
 297       new ReferenceProcessor(_span,                               // span
 298                              (ParallelGCThreads > 1) && ParallelRefProcEnabled, // mt processing
 299                              (int) ParallelGCThreads,             // mt processing degree
 300                              _cmsGen->refs_discovery_is_mt(),     // mt discovery
 301                              (int) MAX2(ConcGCThreads, ParallelGCThreads), // mt discovery degree
 302                              _cmsGen->refs_discovery_is_atomic(), // discovery is not atomic
 303                              &_is_alive_closure);                 // closure for liveness info
 304     // Initialize the _ref_processor field of CMSGen
 305     _cmsGen->set_ref_processor(_ref_processor);
 306 
 307   }
 308 }
 309 
 310 AdaptiveSizePolicy* CMSCollector::size_policy() {
 311   GenCollectedHeap* gch = GenCollectedHeap::heap();
 312   assert(gch->kind() == CollectedHeap::GenCollectedHeap,
 313     "Wrong type of heap");
 314   return gch->gen_policy()->size_policy();
 315 }
 316 
 317 void ConcurrentMarkSweepGeneration::initialize_performance_counters() {
 318 
 319   const char* gen_name = "old";
 320   GenCollectorPolicy* gcp = (GenCollectorPolicy*) GenCollectedHeap::heap()->collector_policy();
 321 
 322   // Generation Counters - generation 1, 1 subspace
 323   _gen_counters = new GenerationCounters(gen_name, 1, 1,
 324       gcp->min_old_size(), gcp->max_old_size(), &_virtual_space);
 325 
 326   _space_counters = new GSpaceCounters(gen_name, 0,
 327                                        _virtual_space.reserved_size(),
 328                                        this, _gen_counters);
 329 }
 330 
 331 CMSStats::CMSStats(ConcurrentMarkSweepGeneration* cms_gen, unsigned int alpha):
 332   _cms_gen(cms_gen)
 333 {
 334   assert(alpha <= 100, "bad value");
 335   _saved_alpha = alpha;
 336 
 337   // Initialize the alphas to the bootstrap value of 100.
 338   _gc0_alpha = _cms_alpha = 100;
 339 
 340   _cms_begin_time.update();
 341   _cms_end_time.update();
 342 
 343   _gc0_duration = 0.0;
 344   _gc0_period = 0.0;
 345   _gc0_promoted = 0;
 346 
 347   _cms_duration = 0.0;
 348   _cms_period = 0.0;
 349   _cms_allocated = 0;
 350 
 351   _cms_used_at_gc0_begin = 0;
 352   _cms_used_at_gc0_end = 0;
 353   _allow_duty_cycle_reduction = false;
 354   _valid_bits = 0;
 355 }
 356 
 357 double CMSStats::cms_free_adjustment_factor(size_t free) const {
 358   // TBD: CR 6909490
 359   return 1.0;
 360 }
 361 
 362 void CMSStats::adjust_cms_free_adjustment_factor(bool fail, size_t free) {
 363 }
 364 
 365 // If promotion failure handling is on use
 366 // the padded average size of the promotion for each
 367 // young generation collection.
 368 double CMSStats::time_until_cms_gen_full() const {
 369   size_t cms_free = _cms_gen->cmsSpace()->free();
 370   GenCollectedHeap* gch = GenCollectedHeap::heap();
 371   size_t expected_promotion = MIN2(gch->get_gen(0)->capacity(),
 372                                    (size_t) _cms_gen->gc_stats()->avg_promoted()->padded_average());
 373   if (cms_free > expected_promotion) {
 374     // Start a cms collection if there isn't enough space to promote
 375     // for the next minor collection.  Use the padded average as
 376     // a safety factor.
 377     cms_free -= expected_promotion;
 378 
 379     // Adjust by the safety factor.
 380     double cms_free_dbl = (double)cms_free;
 381     double cms_adjustment = (100.0 - CMSIncrementalSafetyFactor)/100.0;
 382     // Apply a further correction factor which tries to adjust
 383     // for recent occurance of concurrent mode failures.
 384     cms_adjustment = cms_adjustment * cms_free_adjustment_factor(cms_free);
 385     cms_free_dbl = cms_free_dbl * cms_adjustment;
 386 
 387     if (PrintGCDetails && Verbose) {
 388       gclog_or_tty->print_cr("CMSStats::time_until_cms_gen_full: cms_free "
 389         SIZE_FORMAT " expected_promotion " SIZE_FORMAT,
 390         cms_free, expected_promotion);
 391       gclog_or_tty->print_cr("  cms_free_dbl %f cms_consumption_rate %f",
 392         cms_free_dbl, cms_consumption_rate() + 1.0);
 393     }
 394     // Add 1 in case the consumption rate goes to zero.
 395     return cms_free_dbl / (cms_consumption_rate() + 1.0);
 396   }
 397   return 0.0;
 398 }
 399 
 400 // Compare the duration of the cms collection to the
 401 // time remaining before the cms generation is empty.
 402 // Note that the time from the start of the cms collection
 403 // to the start of the cms sweep (less than the total
 404 // duration of the cms collection) can be used.  This
 405 // has been tried and some applications experienced
 406 // promotion failures early in execution.  This was
 407 // possibly because the averages were not accurate
 408 // enough at the beginning.
 409 double CMSStats::time_until_cms_start() const {
 410   // We add "gc0_period" to the "work" calculation
 411   // below because this query is done (mostly) at the
 412   // end of a scavenge, so we need to conservatively
 413   // account for that much possible delay
 414   // in the query so as to avoid concurrent mode failures
 415   // due to starting the collection just a wee bit too
 416   // late.
 417   double work = cms_duration() + gc0_period();
 418   double deadline = time_until_cms_gen_full();
 419   // If a concurrent mode failure occurred recently, we want to be
 420   // more conservative and halve our expected time_until_cms_gen_full()
 421   if (work > deadline) {
 422     if (Verbose && PrintGCDetails) {
 423       gclog_or_tty->print(
 424         " CMSCollector: collect because of anticipated promotion "
 425         "before full %3.7f + %3.7f > %3.7f ", cms_duration(),
 426         gc0_period(), time_until_cms_gen_full());
 427     }
 428     return 0.0;
 429   }
 430   return work - deadline;
 431 }
 432 
 433 #ifndef PRODUCT
 434 void CMSStats::print_on(outputStream *st) const {
 435   st->print(" gc0_alpha=%d,cms_alpha=%d", _gc0_alpha, _cms_alpha);
 436   st->print(",gc0_dur=%g,gc0_per=%g,gc0_promo=" SIZE_FORMAT,
 437                gc0_duration(), gc0_period(), gc0_promoted());
 438   st->print(",cms_dur=%g,cms_per=%g,cms_alloc=" SIZE_FORMAT,
 439             cms_duration(), cms_period(), cms_allocated());
 440   st->print(",cms_since_beg=%g,cms_since_end=%g",
 441             cms_time_since_begin(), cms_time_since_end());
 442   st->print(",cms_used_beg=" SIZE_FORMAT ",cms_used_end=" SIZE_FORMAT,
 443             _cms_used_at_gc0_begin, _cms_used_at_gc0_end);
 444 
 445   if (valid()) {
 446     st->print(",promo_rate=%g,cms_alloc_rate=%g",
 447               promotion_rate(), cms_allocation_rate());
 448     st->print(",cms_consumption_rate=%g,time_until_full=%g",
 449               cms_consumption_rate(), time_until_cms_gen_full());
 450   }
 451   st->print(" ");
 452 }
 453 #endif // #ifndef PRODUCT
 454 
 455 CMSCollector::CollectorState CMSCollector::_collectorState =
 456                              CMSCollector::Idling;
 457 bool CMSCollector::_foregroundGCIsActive = false;
 458 bool CMSCollector::_foregroundGCShouldWait = false;
 459 
 460 CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
 461                            CardTableRS*                   ct,
 462                            ConcurrentMarkSweepPolicy*     cp):
 463   _cmsGen(cmsGen),
 464   _ct(ct),
 465   _ref_processor(NULL),    // will be set later
 466   _conc_workers(NULL),     // may be set later
 467   _abort_preclean(false),
 468   _start_sampling(false),
 469   _between_prologue_and_epilogue(false),
 470   _markBitMap(0, Mutex::leaf + 1, "CMS_markBitMap_lock"),
 471   _modUnionTable((CardTableModRefBS::card_shift - LogHeapWordSize),
 472                  -1 /* lock-free */, "No_lock" /* dummy */),
 473   _modUnionClosure(&_modUnionTable),
 474   _modUnionClosurePar(&_modUnionTable),
 475   // Adjust my span to cover old (cms) gen
 476   _span(cmsGen->reserved()),
 477   // Construct the is_alive_closure with _span & markBitMap
 478   _is_alive_closure(_span, &_markBitMap),
 479   _restart_addr(NULL),
 480   _overflow_list(NULL),
 481   _stats(cmsGen),
 482   _eden_chunk_lock(new Mutex(Mutex::leaf + 1, "CMS_eden_chunk_lock", true)),
 483   _eden_chunk_array(NULL),     // may be set in ctor body
 484   _eden_chunk_capacity(0),     // -- ditto --
 485   _eden_chunk_index(0),        // -- ditto --
 486   _survivor_plab_array(NULL),  // -- ditto --
 487   _survivor_chunk_array(NULL), // -- ditto --
 488   _survivor_chunk_capacity(0), // -- ditto --
 489   _survivor_chunk_index(0),    // -- ditto --
 490   _ser_pmc_preclean_ovflw(0),
 491   _ser_kac_preclean_ovflw(0),
 492   _ser_pmc_remark_ovflw(0),
 493   _par_pmc_remark_ovflw(0),
 494   _ser_kac_ovflw(0),
 495   _par_kac_ovflw(0),
 496 #ifndef PRODUCT
 497   _num_par_pushes(0),
 498 #endif
 499   _collection_count_start(0),
 500   _verifying(false),
 501   _verification_mark_bm(0, Mutex::leaf + 1, "CMS_verification_mark_bm_lock"),
 502   _completed_initialization(false),
 503   _collector_policy(cp),
 504   _should_unload_classes(CMSClassUnloadingEnabled),
 505   _concurrent_cycles_since_last_unload(0),
 506   _roots_scanning_options(SharedHeap::SO_None),
 507   _inter_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding),
 508   _intra_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding),
 509   _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) CMSTracer()),
 510   _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()),
 511   _cms_start_registered(false)
 512 {
 513   if (ExplicitGCInvokesConcurrentAndUnloadsClasses) {
 514     ExplicitGCInvokesConcurrent = true;
 515   }
 516   // Now expand the span and allocate the collection support structures
 517   // (MUT, marking bit map etc.) to cover both generations subject to
 518   // collection.
 519 
 520   // For use by dirty card to oop closures.
 521   _cmsGen->cmsSpace()->set_collector(this);
 522 
 523   // Allocate MUT and marking bit map
 524   {
 525     MutexLockerEx x(_markBitMap.lock(), Mutex::_no_safepoint_check_flag);
 526     if (!_markBitMap.allocate(_span)) {
 527       warning("Failed to allocate CMS Bit Map");
 528       return;
 529     }
 530     assert(_markBitMap.covers(_span), "_markBitMap inconsistency?");
 531   }
 532   {
 533     _modUnionTable.allocate(_span);
 534     assert(_modUnionTable.covers(_span), "_modUnionTable inconsistency?");
 535   }
 536 
 537   if (!_markStack.allocate(MarkStackSize)) {
 538     warning("Failed to allocate CMS Marking Stack");
 539     return;
 540   }
 541 
 542   // Support for multi-threaded concurrent phases
 543   if (CMSConcurrentMTEnabled) {
 544     if (FLAG_IS_DEFAULT(ConcGCThreads)) {
 545       // just for now
 546       FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4);
 547     }
 548     if (ConcGCThreads > 1) {
 549       _conc_workers = new YieldingFlexibleWorkGang("Parallel CMS Threads",
 550                                  ConcGCThreads, true);
 551       if (_conc_workers == NULL) {
 552         warning("GC/CMS: _conc_workers allocation failure: "
 553               "forcing -CMSConcurrentMTEnabled");
 554         CMSConcurrentMTEnabled = false;
 555       } else {
 556         _conc_workers->initialize_workers();
 557       }
 558     } else {
 559       CMSConcurrentMTEnabled = false;
 560     }
 561   }
 562   if (!CMSConcurrentMTEnabled) {
 563     ConcGCThreads = 0;
 564   } else {
 565     // Turn off CMSCleanOnEnter optimization temporarily for
 566     // the MT case where it's not fixed yet; see 6178663.
 567     CMSCleanOnEnter = false;
 568   }
 569   assert((_conc_workers != NULL) == (ConcGCThreads > 1),
 570          "Inconsistency");
 571 
 572   // Parallel task queues; these are shared for the
 573   // concurrent and stop-world phases of CMS, but
 574   // are not shared with parallel scavenge (ParNew).
 575   {
 576     uint i;
 577     uint num_queues = (uint) MAX2(ParallelGCThreads, ConcGCThreads);
 578 
 579     if ((CMSParallelRemarkEnabled || CMSConcurrentMTEnabled
 580          || ParallelRefProcEnabled)
 581         && num_queues > 0) {
 582       _task_queues = new OopTaskQueueSet(num_queues);
 583       if (_task_queues == NULL) {
 584         warning("task_queues allocation failure.");
 585         return;
 586       }
 587       _hash_seed = NEW_C_HEAP_ARRAY(int, num_queues, mtGC);
 588       if (_hash_seed == NULL) {
 589         warning("_hash_seed array allocation failure");
 590         return;
 591       }
 592 
 593       typedef Padded<OopTaskQueue> PaddedOopTaskQueue;
 594       for (i = 0; i < num_queues; i++) {
 595         PaddedOopTaskQueue *q = new PaddedOopTaskQueue();
 596         if (q == NULL) {
 597           warning("work_queue allocation failure.");
 598           return;
 599         }
 600         _task_queues->register_queue(i, q);
 601       }
 602       for (i = 0; i < num_queues; i++) {
 603         _task_queues->queue(i)->initialize();
 604         _hash_seed[i] = 17;  // copied from ParNew
 605       }
 606     }
 607   }
 608 
 609   _cmsGen ->init_initiating_occupancy(CMSInitiatingOccupancyFraction, CMSTriggerRatio);
 610 
 611   // Clip CMSBootstrapOccupancy between 0 and 100.
 612   _bootstrap_occupancy = ((double)CMSBootstrapOccupancy)/(double)100;
 613 
 614   // Now tell CMS generations the identity of their collector
 615   ConcurrentMarkSweepGeneration::set_collector(this);
 616 
 617   // Create & start a CMS thread for this CMS collector
 618   _cmsThread = ConcurrentMarkSweepThread::start(this);
 619   assert(cmsThread() != NULL, "CMS Thread should have been created");
 620   assert(cmsThread()->collector() == this,
 621          "CMS Thread should refer to this gen");
 622   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 623 
 624   // Support for parallelizing young gen rescan
 625   GenCollectedHeap* gch = GenCollectedHeap::heap();
 626   assert(gch->prev_gen(_cmsGen)->kind() == Generation::ParNew, "CMS can only be used with ParNew");
 627   _young_gen = (ParNewGeneration*)gch->prev_gen(_cmsGen);
 628   if (gch->supports_inline_contig_alloc()) {
 629     _top_addr = gch->top_addr();
 630     _end_addr = gch->end_addr();
 631     assert(_young_gen != NULL, "no _young_gen");
 632     _eden_chunk_index = 0;
 633     _eden_chunk_capacity = (_young_gen->max_capacity()+CMSSamplingGrain)/CMSSamplingGrain;
 634     _eden_chunk_array = NEW_C_HEAP_ARRAY(HeapWord*, _eden_chunk_capacity, mtGC);
 635     if (_eden_chunk_array == NULL) {
 636       _eden_chunk_capacity = 0;
 637       warning("GC/CMS: _eden_chunk_array allocation failure");
 638     }
 639   }
 640   assert(_eden_chunk_array != NULL || _eden_chunk_capacity == 0, "Error");
 641 
 642   // Support for parallelizing survivor space rescan
 643   if ((CMSParallelRemarkEnabled && CMSParallelSurvivorRemarkEnabled) || CMSParallelInitialMarkEnabled) {
 644     const size_t max_plab_samples =
 645       ((DefNewGeneration*)_young_gen)->max_survivor_size()/MinTLABSize;
 646 
 647     _survivor_plab_array  = NEW_C_HEAP_ARRAY(ChunkArray, ParallelGCThreads, mtGC);
 648     _survivor_chunk_array = NEW_C_HEAP_ARRAY(HeapWord*, 2*max_plab_samples, mtGC);
 649     _cursor               = NEW_C_HEAP_ARRAY(size_t, ParallelGCThreads, mtGC);
 650     if (_survivor_plab_array == NULL || _survivor_chunk_array == NULL
 651         || _cursor == NULL) {
 652       warning("Failed to allocate survivor plab/chunk array");
 653       if (_survivor_plab_array  != NULL) {
 654         FREE_C_HEAP_ARRAY(ChunkArray, _survivor_plab_array, mtGC);
 655         _survivor_plab_array = NULL;
 656       }
 657       if (_survivor_chunk_array != NULL) {
 658         FREE_C_HEAP_ARRAY(HeapWord*, _survivor_chunk_array, mtGC);
 659         _survivor_chunk_array = NULL;
 660       }
 661       if (_cursor != NULL) {
 662         FREE_C_HEAP_ARRAY(size_t, _cursor, mtGC);
 663         _cursor = NULL;
 664       }
 665     } else {
 666       _survivor_chunk_capacity = 2*max_plab_samples;
 667       for (uint i = 0; i < ParallelGCThreads; i++) {
 668         HeapWord** vec = NEW_C_HEAP_ARRAY(HeapWord*, max_plab_samples, mtGC);
 669         if (vec == NULL) {
 670           warning("Failed to allocate survivor plab array");
 671           for (int j = i; j > 0; j--) {
 672             FREE_C_HEAP_ARRAY(HeapWord*, _survivor_plab_array[j-1].array(), mtGC);
 673           }
 674           FREE_C_HEAP_ARRAY(ChunkArray, _survivor_plab_array, mtGC);
 675           FREE_C_HEAP_ARRAY(HeapWord*, _survivor_chunk_array, mtGC);
 676           _survivor_plab_array = NULL;
 677           _survivor_chunk_array = NULL;
 678           _survivor_chunk_capacity = 0;
 679           break;
 680         } else {
 681           ChunkArray* cur =
 682             ::new (&_survivor_plab_array[i]) ChunkArray(vec,
 683                                                         max_plab_samples);
 684           assert(cur->end() == 0, "Should be 0");
 685           assert(cur->array() == vec, "Should be vec");
 686           assert(cur->capacity() == max_plab_samples, "Error");
 687         }
 688       }
 689     }
 690   }
 691   assert(   (   _survivor_plab_array  != NULL
 692              && _survivor_chunk_array != NULL)
 693          || (   _survivor_chunk_capacity == 0
 694              && _survivor_chunk_index == 0),
 695          "Error");
 696 
 697   NOT_PRODUCT(_overflow_counter = CMSMarkStackOverflowInterval;)
 698   _gc_counters = new CollectorCounters("CMS", 1);
 699   _completed_initialization = true;
 700   _inter_sweep_timer.start();  // start of time
 701 }
 702 
 703 const char* ConcurrentMarkSweepGeneration::name() const {
 704   return "concurrent mark-sweep generation";
 705 }
 706 void ConcurrentMarkSweepGeneration::update_counters() {
 707   if (UsePerfData) {
 708     _space_counters->update_all();
 709     _gen_counters->update_all();
 710   }
 711 }
 712 
 713 // this is an optimized version of update_counters(). it takes the
 714 // used value as a parameter rather than computing it.
 715 //
 716 void ConcurrentMarkSweepGeneration::update_counters(size_t used) {
 717   if (UsePerfData) {
 718     _space_counters->update_used(used);
 719     _space_counters->update_capacity();
 720     _gen_counters->update_all();
 721   }
 722 }
 723 
 724 void ConcurrentMarkSweepGeneration::print() const {
 725   Generation::print();
 726   cmsSpace()->print();
 727 }
 728 
 729 #ifndef PRODUCT
 730 void ConcurrentMarkSweepGeneration::print_statistics() {
 731   cmsSpace()->printFLCensus(0);
 732 }
 733 #endif
 734 
 735 void ConcurrentMarkSweepGeneration::printOccupancy(const char *s) {
 736   GenCollectedHeap* gch = GenCollectedHeap::heap();
 737   if (PrintGCDetails) {
 738     if (Verbose) {
 739       gclog_or_tty->print("[%d %s-%s: "SIZE_FORMAT"("SIZE_FORMAT")]",
 740         level(), short_name(), s, used(), capacity());
 741     } else {
 742       gclog_or_tty->print("[%d %s-%s: "SIZE_FORMAT"K("SIZE_FORMAT"K)]",
 743         level(), short_name(), s, used() / K, capacity() / K);
 744     }
 745   }
 746   if (Verbose) {
 747     gclog_or_tty->print(" "SIZE_FORMAT"("SIZE_FORMAT")",
 748               gch->used(), gch->capacity());
 749   } else {
 750     gclog_or_tty->print(" "SIZE_FORMAT"K("SIZE_FORMAT"K)",
 751               gch->used() / K, gch->capacity() / K);
 752   }
 753 }
 754 
 755 size_t
 756 ConcurrentMarkSweepGeneration::contiguous_available() const {
 757   // dld proposes an improvement in precision here. If the committed
 758   // part of the space ends in a free block we should add that to
 759   // uncommitted size in the calculation below. Will make this
 760   // change later, staying with the approximation below for the
 761   // time being. -- ysr.
 762   return MAX2(_virtual_space.uncommitted_size(), unsafe_max_alloc_nogc());
 763 }
 764 
 765 size_t
 766 ConcurrentMarkSweepGeneration::unsafe_max_alloc_nogc() const {
 767   return _cmsSpace->max_alloc_in_words() * HeapWordSize;
 768 }
 769 
 770 size_t ConcurrentMarkSweepGeneration::max_available() const {
 771   return free() + _virtual_space.uncommitted_size();
 772 }
 773 
 774 bool ConcurrentMarkSweepGeneration::promotion_attempt_is_safe(size_t max_promotion_in_bytes) const {
 775   size_t available = max_available();
 776   size_t av_promo  = (size_t)gc_stats()->avg_promoted()->padded_average();
 777   bool   res = (available >= av_promo) || (available >= max_promotion_in_bytes);
 778   if (Verbose && PrintGCDetails) {
 779     gclog_or_tty->print_cr(
 780       "CMS: promo attempt is%s safe: available("SIZE_FORMAT") %s av_promo("SIZE_FORMAT"),"
 781       "max_promo("SIZE_FORMAT")",
 782       res? "":" not", available, res? ">=":"<",
 783       av_promo, max_promotion_in_bytes);
 784   }
 785   return res;
 786 }
 787 
 788 // At a promotion failure dump information on block layout in heap
 789 // (cms old generation).
 790 void ConcurrentMarkSweepGeneration::promotion_failure_occurred() {
 791   if (CMSDumpAtPromotionFailure) {
 792     cmsSpace()->dump_at_safepoint_with_locks(collector(), gclog_or_tty);
 793   }
 794 }
 795 
 796 void ConcurrentMarkSweepGeneration::reset_after_compaction() {
 797   // Clear the promotion information.  These pointers can be adjusted
 798   // along with all the other pointers into the heap but
 799   // compaction is expected to be a rare event with
 800   // a heap using cms so don't do it without seeing the need.
 801   for (uint i = 0; i < ParallelGCThreads; i++) {
 802     _par_gc_thread_states[i]->promo.reset();
 803   }
 804 }
 805 
 806 void ConcurrentMarkSweepGeneration::compute_new_size() {
 807   assert_locked_or_safepoint(Heap_lock);
 808 
 809   // If incremental collection failed, we just want to expand
 810   // to the limit.
 811   if (incremental_collection_failed()) {
 812     clear_incremental_collection_failed();
 813     grow_to_reserved();
 814     return;
 815   }
 816 
 817   // The heap has been compacted but not reset yet.
 818   // Any metric such as free() or used() will be incorrect.
 819 
 820   CardGeneration::compute_new_size();
 821 
 822   // Reset again after a possible resizing
 823   if (did_compact()) {
 824     cmsSpace()->reset_after_compaction();
 825   }
 826 }
 827 
 828 void ConcurrentMarkSweepGeneration::compute_new_size_free_list() {
 829   assert_locked_or_safepoint(Heap_lock);
 830 
 831   // If incremental collection failed, we just want to expand
 832   // to the limit.
 833   if (incremental_collection_failed()) {
 834     clear_incremental_collection_failed();
 835     grow_to_reserved();
 836     return;
 837   }
 838 
 839   double free_percentage = ((double) free()) / capacity();
 840   double desired_free_percentage = (double) MinHeapFreeRatio / 100;
 841   double maximum_free_percentage = (double) MaxHeapFreeRatio / 100;
 842 
 843   // compute expansion delta needed for reaching desired free percentage
 844   if (free_percentage < desired_free_percentage) {
 845     size_t desired_capacity = (size_t)(used() / ((double) 1 - desired_free_percentage));
 846     assert(desired_capacity >= capacity(), "invalid expansion size");
 847     size_t expand_bytes = MAX2(desired_capacity - capacity(), MinHeapDeltaBytes);
 848     if (PrintGCDetails && Verbose) {
 849       size_t desired_capacity = (size_t)(used() / ((double) 1 - desired_free_percentage));
 850       gclog_or_tty->print_cr("\nFrom compute_new_size: ");
 851       gclog_or_tty->print_cr("  Free fraction %f", free_percentage);
 852       gclog_or_tty->print_cr("  Desired free fraction %f",
 853         desired_free_percentage);
 854       gclog_or_tty->print_cr("  Maximum free fraction %f",
 855         maximum_free_percentage);
 856       gclog_or_tty->print_cr("  Capacity "SIZE_FORMAT, capacity()/1000);
 857       gclog_or_tty->print_cr("  Desired capacity "SIZE_FORMAT,
 858         desired_capacity/1000);
 859       int prev_level = level() - 1;
 860       if (prev_level >= 0) {
 861         size_t prev_size = 0;
 862         GenCollectedHeap* gch = GenCollectedHeap::heap();
 863         Generation* prev_gen = gch->_gens[prev_level];
 864         prev_size = prev_gen->capacity();
 865           gclog_or_tty->print_cr("  Younger gen size "SIZE_FORMAT,
 866                                  prev_size/1000);
 867       }
 868       gclog_or_tty->print_cr("  unsafe_max_alloc_nogc "SIZE_FORMAT,
 869         unsafe_max_alloc_nogc()/1000);
 870       gclog_or_tty->print_cr("  contiguous available "SIZE_FORMAT,
 871         contiguous_available()/1000);
 872       gclog_or_tty->print_cr("  Expand by "SIZE_FORMAT" (bytes)",
 873         expand_bytes);
 874     }
 875     // safe if expansion fails
 876     expand_for_gc_cause(expand_bytes, 0, CMSExpansionCause::_satisfy_free_ratio);
 877     if (PrintGCDetails && Verbose) {
 878       gclog_or_tty->print_cr("  Expanded free fraction %f",
 879         ((double) free()) / capacity());
 880     }
 881   } else {
 882     size_t desired_capacity = (size_t)(used() / ((double) 1 - desired_free_percentage));
 883     assert(desired_capacity <= capacity(), "invalid expansion size");
 884     size_t shrink_bytes = capacity() - desired_capacity;
 885     // Don't shrink unless the delta is greater than the minimum shrink we want
 886     if (shrink_bytes >= MinHeapDeltaBytes) {
 887       shrink_free_list_by(shrink_bytes);
 888     }
 889   }
 890 }
 891 
 892 Mutex* ConcurrentMarkSweepGeneration::freelistLock() const {
 893   return cmsSpace()->freelistLock();
 894 }
 895 
 896 HeapWord* ConcurrentMarkSweepGeneration::allocate(size_t size,
 897                                                   bool   tlab) {
 898   CMSSynchronousYieldRequest yr;
 899   MutexLockerEx x(freelistLock(),
 900                   Mutex::_no_safepoint_check_flag);
 901   return have_lock_and_allocate(size, tlab);
 902 }
 903 
 904 HeapWord* ConcurrentMarkSweepGeneration::have_lock_and_allocate(size_t size,
 905                                                   bool   tlab /* ignored */) {
 906   assert_lock_strong(freelistLock());
 907   size_t adjustedSize = CompactibleFreeListSpace::adjustObjectSize(size);
 908   HeapWord* res = cmsSpace()->allocate(adjustedSize);
 909   // Allocate the object live (grey) if the background collector has
 910   // started marking. This is necessary because the marker may
 911   // have passed this address and consequently this object will
 912   // not otherwise be greyed and would be incorrectly swept up.
 913   // Note that if this object contains references, the writing
 914   // of those references will dirty the card containing this object
 915   // allowing the object to be blackened (and its references scanned)
 916   // either during a preclean phase or at the final checkpoint.
 917   if (res != NULL) {
 918     // We may block here with an uninitialized object with
 919     // its mark-bit or P-bits not yet set. Such objects need
 920     // to be safely navigable by block_start().
 921     assert(oop(res)->klass_or_null() == NULL, "Object should be uninitialized here.");
 922     assert(!((FreeChunk*)res)->is_free(), "Error, block will look free but show wrong size");
 923     collector()->direct_allocated(res, adjustedSize);
 924     _direct_allocated_words += adjustedSize;
 925     // allocation counters
 926     NOT_PRODUCT(
 927       _numObjectsAllocated++;
 928       _numWordsAllocated += (int)adjustedSize;
 929     )
 930   }
 931   return res;
 932 }
 933 
 934 // In the case of direct allocation by mutators in a generation that
 935 // is being concurrently collected, the object must be allocated
 936 // live (grey) if the background collector has started marking.
 937 // This is necessary because the marker may
 938 // have passed this address and consequently this object will
 939 // not otherwise be greyed and would be incorrectly swept up.
 940 // Note that if this object contains references, the writing
 941 // of those references will dirty the card containing this object
 942 // allowing the object to be blackened (and its references scanned)
 943 // either during a preclean phase or at the final checkpoint.
 944 void CMSCollector::direct_allocated(HeapWord* start, size_t size) {
 945   assert(_markBitMap.covers(start, size), "Out of bounds");
 946   if (_collectorState >= Marking) {
 947     MutexLockerEx y(_markBitMap.lock(),
 948                     Mutex::_no_safepoint_check_flag);
 949     // [see comments preceding SweepClosure::do_blk() below for details]
 950     //
 951     // Can the P-bits be deleted now?  JJJ
 952     //
 953     // 1. need to mark the object as live so it isn't collected
 954     // 2. need to mark the 2nd bit to indicate the object may be uninitialized
 955     // 3. need to mark the end of the object so marking, precleaning or sweeping
 956     //    can skip over uninitialized or unparsable objects. An allocated
 957     //    object is considered uninitialized for our purposes as long as
 958     //    its klass word is NULL.  All old gen objects are parsable
 959     //    as soon as they are initialized.)
 960     _markBitMap.mark(start);          // object is live
 961     _markBitMap.mark(start + 1);      // object is potentially uninitialized?
 962     _markBitMap.mark(start + size - 1);
 963                                       // mark end of object
 964   }
 965   // check that oop looks uninitialized
 966   assert(oop(start)->klass_or_null() == NULL, "_klass should be NULL");
 967 }
 968 
 969 void CMSCollector::promoted(bool par, HeapWord* start,
 970                             bool is_obj_array, size_t obj_size) {
 971   assert(_markBitMap.covers(start), "Out of bounds");
 972   // See comment in direct_allocated() about when objects should
 973   // be allocated live.
 974   if (_collectorState >= Marking) {
 975     // we already hold the marking bit map lock, taken in
 976     // the prologue
 977     if (par) {
 978       _markBitMap.par_mark(start);
 979     } else {
 980       _markBitMap.mark(start);
 981     }
 982     // We don't need to mark the object as uninitialized (as
 983     // in direct_allocated above) because this is being done with the
 984     // world stopped and the object will be initialized by the
 985     // time the marking, precleaning or sweeping get to look at it.
 986     // But see the code for copying objects into the CMS generation,
 987     // where we need to ensure that concurrent readers of the
 988     // block offset table are able to safely navigate a block that
 989     // is in flux from being free to being allocated (and in
 990     // transition while being copied into) and subsequently
 991     // becoming a bona-fide object when the copy/promotion is complete.
 992     assert(SafepointSynchronize::is_at_safepoint(),
 993            "expect promotion only at safepoints");
 994 
 995     if (_collectorState < Sweeping) {
 996       // Mark the appropriate cards in the modUnionTable, so that
 997       // this object gets scanned before the sweep. If this is
 998       // not done, CMS generation references in the object might
 999       // not get marked.
1000       // For the case of arrays, which are otherwise precisely
1001       // marked, we need to dirty the entire array, not just its head.
1002       if (is_obj_array) {
1003         // The [par_]mark_range() method expects mr.end() below to
1004         // be aligned to the granularity of a bit's representation
1005         // in the heap. In the case of the MUT below, that's a
1006         // card size.
1007         MemRegion mr(start,
1008                      (HeapWord*)round_to((intptr_t)(start + obj_size),
1009                         CardTableModRefBS::card_size /* bytes */));
1010         if (par) {
1011           _modUnionTable.par_mark_range(mr);
1012         } else {
1013           _modUnionTable.mark_range(mr);
1014         }
1015       } else {  // not an obj array; we can just mark the head
1016         if (par) {
1017           _modUnionTable.par_mark(start);
1018         } else {
1019           _modUnionTable.mark(start);
1020         }
1021       }
1022     }
1023   }
1024 }
1025 
1026 oop ConcurrentMarkSweepGeneration::promote(oop obj, size_t obj_size) {
1027   assert(obj_size == (size_t)obj->size(), "bad obj_size passed in");
1028   // allocate, copy and if necessary update promoinfo --
1029   // delegate to underlying space.
1030   assert_lock_strong(freelistLock());
1031 
1032 #ifndef PRODUCT
1033   if (Universe::heap()->promotion_should_fail()) {
1034     return NULL;
1035   }
1036 #endif  // #ifndef PRODUCT
1037 
1038   oop res = _cmsSpace->promote(obj, obj_size);
1039   if (res == NULL) {
1040     // expand and retry
1041     size_t s = _cmsSpace->expansionSpaceRequired(obj_size);  // HeapWords
1042     expand_for_gc_cause(s*HeapWordSize, MinHeapDeltaBytes, CMSExpansionCause::_satisfy_promotion);
1043     // Since there's currently no next generation, we don't try to promote
1044     // into a more senior generation.
1045     assert(next_gen() == NULL, "assumption, based upon which no attempt "
1046                                "is made to pass on a possibly failing "
1047                                "promotion to next generation");
1048     res = _cmsSpace->promote(obj, obj_size);
1049   }
1050   if (res != NULL) {
1051     // See comment in allocate() about when objects should
1052     // be allocated live.
1053     assert(obj->is_oop(), "Will dereference klass pointer below");
1054     collector()->promoted(false,           // Not parallel
1055                           (HeapWord*)res, obj->is_objArray(), obj_size);
1056     // promotion counters
1057     NOT_PRODUCT(
1058       _numObjectsPromoted++;
1059       _numWordsPromoted +=
1060         (int)(CompactibleFreeListSpace::adjustObjectSize(obj->size()));
1061     )
1062   }
1063   return res;
1064 }
1065 
1066 
1067 // IMPORTANT: Notes on object size recognition in CMS.
1068 // ---------------------------------------------------
1069 // A block of storage in the CMS generation is always in
1070 // one of three states. A free block (FREE), an allocated
1071 // object (OBJECT) whose size() method reports the correct size,
1072 // and an intermediate state (TRANSIENT) in which its size cannot
1073 // be accurately determined.
1074 // STATE IDENTIFICATION:   (32 bit and 64 bit w/o COOPS)
1075 // -----------------------------------------------------
1076 // FREE:      klass_word & 1 == 1; mark_word holds block size
1077 //
1078 // OBJECT:    klass_word installed; klass_word != 0 && klass_word & 1 == 0;
1079 //            obj->size() computes correct size
1080 //
1081 // TRANSIENT: klass_word == 0; size is indeterminate until we become an OBJECT
1082 //
1083 // STATE IDENTIFICATION: (64 bit+COOPS)
1084 // ------------------------------------
1085 // FREE:      mark_word & CMS_FREE_BIT == 1; mark_word & ~CMS_FREE_BIT gives block_size
1086 //
1087 // OBJECT:    klass_word installed; klass_word != 0;
1088 //            obj->size() computes correct size
1089 //
1090 // TRANSIENT: klass_word == 0; size is indeterminate until we become an OBJECT
1091 //
1092 //
1093 // STATE TRANSITION DIAGRAM
1094 //
1095 //        mut / parnew                     mut  /  parnew
1096 // FREE --------------------> TRANSIENT ---------------------> OBJECT --|
1097 //  ^                                                                   |
1098 //  |------------------------ DEAD <------------------------------------|
1099 //         sweep                            mut
1100 //
1101 // While a block is in TRANSIENT state its size cannot be determined
1102 // so readers will either need to come back later or stall until
1103 // the size can be determined. Note that for the case of direct
1104 // allocation, P-bits, when available, may be used to determine the
1105 // size of an object that may not yet have been initialized.
1106 
1107 // Things to support parallel young-gen collection.
1108 oop
1109 ConcurrentMarkSweepGeneration::par_promote(int thread_num,
1110                                            oop old, markOop m,
1111                                            size_t word_sz) {
1112 #ifndef PRODUCT
1113   if (Universe::heap()->promotion_should_fail()) {
1114     return NULL;
1115   }
1116 #endif  // #ifndef PRODUCT
1117 
1118   CMSParGCThreadState* ps = _par_gc_thread_states[thread_num];
1119   PromotionInfo* promoInfo = &ps->promo;
1120   // if we are tracking promotions, then first ensure space for
1121   // promotion (including spooling space for saving header if necessary).
1122   // then allocate and copy, then track promoted info if needed.
1123   // When tracking (see PromotionInfo::track()), the mark word may
1124   // be displaced and in this case restoration of the mark word
1125   // occurs in the (oop_since_save_marks_)iterate phase.
1126   if (promoInfo->tracking() && !promoInfo->ensure_spooling_space()) {
1127     // Out of space for allocating spooling buffers;
1128     // try expanding and allocating spooling buffers.
1129     if (!expand_and_ensure_spooling_space(promoInfo)) {
1130       return NULL;
1131     }
1132   }
1133   assert(promoInfo->has_spooling_space(), "Control point invariant");
1134   const size_t alloc_sz = CompactibleFreeListSpace::adjustObjectSize(word_sz);
1135   HeapWord* obj_ptr = ps->lab.alloc(alloc_sz);
1136   if (obj_ptr == NULL) {
1137      obj_ptr = expand_and_par_lab_allocate(ps, alloc_sz);
1138      if (obj_ptr == NULL) {
1139        return NULL;
1140      }
1141   }
1142   oop obj = oop(obj_ptr);
1143   OrderAccess::storestore();
1144   assert(obj->klass_or_null() == NULL, "Object should be uninitialized here.");
1145   assert(!((FreeChunk*)obj_ptr)->is_free(), "Error, block will look free but show wrong size");
1146   // IMPORTANT: See note on object initialization for CMS above.
1147   // Otherwise, copy the object.  Here we must be careful to insert the
1148   // klass pointer last, since this marks the block as an allocated object.
1149   // Except with compressed oops it's the mark word.
1150   HeapWord* old_ptr = (HeapWord*)old;
1151   // Restore the mark word copied above.
1152   obj->set_mark(m);
1153   assert(obj->klass_or_null() == NULL, "Object should be uninitialized here.");
1154   assert(!((FreeChunk*)obj_ptr)->is_free(), "Error, block will look free but show wrong size");
1155   OrderAccess::storestore();
1156 
1157   if (UseCompressedClassPointers) {
1158     // Copy gap missed by (aligned) header size calculation below
1159     obj->set_klass_gap(old->klass_gap());
1160   }
1161   if (word_sz > (size_t)oopDesc::header_size()) {
1162     Copy::aligned_disjoint_words(old_ptr + oopDesc::header_size(),
1163                                  obj_ptr + oopDesc::header_size(),
1164                                  word_sz - oopDesc::header_size());
1165   }
1166 
1167   // Now we can track the promoted object, if necessary.  We take care
1168   // to delay the transition from uninitialized to full object
1169   // (i.e., insertion of klass pointer) until after, so that it
1170   // atomically becomes a promoted object.
1171   if (promoInfo->tracking()) {
1172     promoInfo->track((PromotedObject*)obj, old->klass());
1173   }
1174   assert(obj->klass_or_null() == NULL, "Object should be uninitialized here.");
1175   assert(!((FreeChunk*)obj_ptr)->is_free(), "Error, block will look free but show wrong size");
1176   assert(old->is_oop(), "Will use and dereference old klass ptr below");
1177 
1178   // Finally, install the klass pointer (this should be volatile).
1179   OrderAccess::storestore();
1180   obj->set_klass(old->klass());
1181   // We should now be able to calculate the right size for this object
1182   assert(obj->is_oop() && obj->size() == (int)word_sz, "Error, incorrect size computed for promoted object");
1183 
1184   collector()->promoted(true,          // parallel
1185                         obj_ptr, old->is_objArray(), word_sz);
1186 
1187   NOT_PRODUCT(
1188     Atomic::inc_ptr(&_numObjectsPromoted);
1189     Atomic::add_ptr(alloc_sz, &_numWordsPromoted);
1190   )
1191 
1192   return obj;
1193 }
1194 
1195 void
1196 ConcurrentMarkSweepGeneration::
1197 par_promote_alloc_done(int thread_num) {
1198   CMSParGCThreadState* ps = _par_gc_thread_states[thread_num];
1199   ps->lab.retire(thread_num);
1200 }
1201 
1202 void
1203 ConcurrentMarkSweepGeneration::
1204 par_oop_since_save_marks_iterate_done(int thread_num) {
1205   CMSParGCThreadState* ps = _par_gc_thread_states[thread_num];
1206   ParScanWithoutBarrierClosure* dummy_cl = NULL;
1207   ps->promo.promoted_oops_iterate_nv(dummy_cl);
1208 }
1209 
1210 bool ConcurrentMarkSweepGeneration::should_collect(bool   full,
1211                                                    size_t size,
1212                                                    bool   tlab)
1213 {
1214   // We allow a STW collection only if a full
1215   // collection was requested.
1216   return full || should_allocate(size, tlab); // FIX ME !!!
1217   // This and promotion failure handling are connected at the
1218   // hip and should be fixed by untying them.
1219 }
1220 
1221 bool CMSCollector::shouldConcurrentCollect() {
1222   if (_full_gc_requested) {
1223     if (Verbose && PrintGCDetails) {
1224       gclog_or_tty->print_cr("CMSCollector: collect because of explicit "
1225                              " gc request (or gc_locker)");
1226     }
1227     return true;
1228   }
1229 
1230   FreelistLocker x(this);
1231   // ------------------------------------------------------------------
1232   // Print out lots of information which affects the initiation of
1233   // a collection.
1234   if (PrintCMSInitiationStatistics && stats().valid()) {
1235     gclog_or_tty->print("CMSCollector shouldConcurrentCollect: ");
1236     gclog_or_tty->stamp();
1237     gclog_or_tty->cr();
1238     stats().print_on(gclog_or_tty);
1239     gclog_or_tty->print_cr("time_until_cms_gen_full %3.7f",
1240       stats().time_until_cms_gen_full());
1241     gclog_or_tty->print_cr("free="SIZE_FORMAT, _cmsGen->free());
1242     gclog_or_tty->print_cr("contiguous_available="SIZE_FORMAT,
1243                            _cmsGen->contiguous_available());
1244     gclog_or_tty->print_cr("promotion_rate=%g", stats().promotion_rate());
1245     gclog_or_tty->print_cr("cms_allocation_rate=%g", stats().cms_allocation_rate());
1246     gclog_or_tty->print_cr("occupancy=%3.7f", _cmsGen->occupancy());
1247     gclog_or_tty->print_cr("initiatingOccupancy=%3.7f", _cmsGen->initiating_occupancy());
1248     gclog_or_tty->print_cr("cms_time_since_begin=%3.7f", stats().cms_time_since_begin());
1249     gclog_or_tty->print_cr("cms_time_since_end=%3.7f", stats().cms_time_since_end());
1250     gclog_or_tty->print_cr("metadata initialized %d",
1251       MetaspaceGC::should_concurrent_collect());
1252   }
1253   // ------------------------------------------------------------------
1254 
1255   // If the estimated time to complete a cms collection (cms_duration())
1256   // is less than the estimated time remaining until the cms generation
1257   // is full, start a collection.
1258   if (!UseCMSInitiatingOccupancyOnly) {
1259     if (stats().valid()) {
1260       if (stats().time_until_cms_start() == 0.0) {
1261         return true;
1262       }
1263     } else {
1264       // We want to conservatively collect somewhat early in order
1265       // to try and "bootstrap" our CMS/promotion statistics;
1266       // this branch will not fire after the first successful CMS
1267       // collection because the stats should then be valid.
1268       if (_cmsGen->occupancy() >= _bootstrap_occupancy) {
1269         if (Verbose && PrintGCDetails) {
1270           gclog_or_tty->print_cr(
1271             " CMSCollector: collect for bootstrapping statistics:"
1272             " occupancy = %f, boot occupancy = %f", _cmsGen->occupancy(),
1273             _bootstrap_occupancy);
1274         }
1275         return true;
1276       }
1277     }
1278   }
1279 
1280   // Otherwise, we start a collection cycle if
1281   // old gen want a collection cycle started. Each may use
1282   // an appropriate criterion for making this decision.
1283   // XXX We need to make sure that the gen expansion
1284   // criterion dovetails well with this. XXX NEED TO FIX THIS
1285   if (_cmsGen->should_concurrent_collect()) {
1286     if (Verbose && PrintGCDetails) {
1287       gclog_or_tty->print_cr("CMS old gen initiated");
1288     }
1289     return true;
1290   }
1291 
1292   // We start a collection if we believe an incremental collection may fail;
1293   // this is not likely to be productive in practice because it's probably too
1294   // late anyway.
1295   GenCollectedHeap* gch = GenCollectedHeap::heap();
1296   assert(gch->collector_policy()->is_generation_policy(),
1297          "You may want to check the correctness of the following");
1298   if (gch->incremental_collection_will_fail(true /* consult_young */)) {
1299     if (Verbose && PrintGCDetails) {
1300       gclog_or_tty->print("CMSCollector: collect because incremental collection will fail ");
1301     }
1302     return true;
1303   }
1304 
1305   if (MetaspaceGC::should_concurrent_collect()) {
1306     if (Verbose && PrintGCDetails) {
1307       gclog_or_tty->print("CMSCollector: collect for metadata allocation ");
1308     }
1309     return true;
1310   }
1311 
1312   // CMSTriggerInterval starts a CMS cycle if enough time has passed.
1313   if (CMSTriggerInterval >= 0) {
1314     if (CMSTriggerInterval == 0) {
1315       // Trigger always
1316       return true;
1317     }
1318 
1319     // Check the CMS time since begin (we do not check the stats validity
1320     // as we want to be able to trigger the first CMS cycle as well)
1321     if (stats().cms_time_since_begin() >= (CMSTriggerInterval / ((double) MILLIUNITS))) {
1322       if (Verbose && PrintGCDetails) {
1323         if (stats().valid()) {
1324           gclog_or_tty->print_cr("CMSCollector: collect because of trigger interval (time since last begin %3.7f secs)",
1325                                  stats().cms_time_since_begin());
1326         } else {
1327           gclog_or_tty->print_cr("CMSCollector: collect because of trigger interval (first collection)");
1328         }
1329       }
1330       return true;
1331     }
1332   }
1333 
1334   return false;
1335 }
1336 
1337 void CMSCollector::set_did_compact(bool v) { _cmsGen->set_did_compact(v); }
1338 
1339 // Clear _expansion_cause fields of constituent generations
1340 void CMSCollector::clear_expansion_cause() {
1341   _cmsGen->clear_expansion_cause();
1342 }
1343 
1344 // We should be conservative in starting a collection cycle.  To
1345 // start too eagerly runs the risk of collecting too often in the
1346 // extreme.  To collect too rarely falls back on full collections,
1347 // which works, even if not optimum in terms of concurrent work.
1348 // As a work around for too eagerly collecting, use the flag
1349 // UseCMSInitiatingOccupancyOnly.  This also has the advantage of
1350 // giving the user an easily understandable way of controlling the
1351 // collections.
1352 // We want to start a new collection cycle if any of the following
1353 // conditions hold:
1354 // . our current occupancy exceeds the configured initiating occupancy
1355 //   for this generation, or
1356 // . we recently needed to expand this space and have not, since that
1357 //   expansion, done a collection of this generation, or
1358 // . the underlying space believes that it may be a good idea to initiate
1359 //   a concurrent collection (this may be based on criteria such as the
1360 //   following: the space uses linear allocation and linear allocation is
1361 //   going to fail, or there is believed to be excessive fragmentation in
1362 //   the generation, etc... or ...
1363 // [.(currently done by CMSCollector::shouldConcurrentCollect() only for
1364 //   the case of the old generation; see CR 6543076):
1365 //   we may be approaching a point at which allocation requests may fail because
1366 //   we will be out of sufficient free space given allocation rate estimates.]
1367 bool ConcurrentMarkSweepGeneration::should_concurrent_collect() const {
1368 
1369   assert_lock_strong(freelistLock());
1370   if (occupancy() > initiating_occupancy()) {
1371     if (PrintGCDetails && Verbose) {
1372       gclog_or_tty->print(" %s: collect because of occupancy %f / %f  ",
1373         short_name(), occupancy(), initiating_occupancy());
1374     }
1375     return true;
1376   }
1377   if (UseCMSInitiatingOccupancyOnly) {
1378     return false;
1379   }
1380   if (expansion_cause() == CMSExpansionCause::_satisfy_allocation) {
1381     if (PrintGCDetails && Verbose) {
1382       gclog_or_tty->print(" %s: collect because expanded for allocation ",
1383         short_name());
1384     }
1385     return true;
1386   }
1387   if (_cmsSpace->should_concurrent_collect()) {
1388     if (PrintGCDetails && Verbose) {
1389       gclog_or_tty->print(" %s: collect because cmsSpace says so ",
1390         short_name());
1391     }
1392     return true;
1393   }
1394   return false;
1395 }
1396 
1397 void ConcurrentMarkSweepGeneration::collect(bool   full,
1398                                             bool   clear_all_soft_refs,
1399                                             size_t size,
1400                                             bool   tlab)
1401 {
1402   collector()->collect(full, clear_all_soft_refs, size, tlab);
1403 }
1404 
1405 void CMSCollector::collect(bool   full,
1406                            bool   clear_all_soft_refs,
1407                            size_t size,
1408                            bool   tlab)
1409 {
1410   // The following "if" branch is present for defensive reasons.
1411   // In the current uses of this interface, it can be replaced with:
1412   // assert(!GC_locker.is_active(), "Can't be called otherwise");
1413   // But I am not placing that assert here to allow future
1414   // generality in invoking this interface.
1415   if (GC_locker::is_active()) {
1416     // A consistency test for GC_locker
1417     assert(GC_locker::needs_gc(), "Should have been set already");
1418     // Skip this foreground collection, instead
1419     // expanding the heap if necessary.
1420     // Need the free list locks for the call to free() in compute_new_size()
1421     compute_new_size();
1422     return;
1423   }
1424   acquire_control_and_collect(full, clear_all_soft_refs);
1425 }
1426 
1427 void CMSCollector::request_full_gc(unsigned int full_gc_count, GCCause::Cause cause) {
1428   GenCollectedHeap* gch = GenCollectedHeap::heap();
1429   unsigned int gc_count = gch->total_full_collections();
1430   if (gc_count == full_gc_count) {
1431     MutexLockerEx y(CGC_lock, Mutex::_no_safepoint_check_flag);
1432     _full_gc_requested = true;
1433     _full_gc_cause = cause;
1434     CGC_lock->notify();   // nudge CMS thread
1435   } else {
1436     assert(gc_count > full_gc_count, "Error: causal loop");
1437   }
1438 }
1439 
1440 bool CMSCollector::is_external_interruption() {
1441   GCCause::Cause cause = GenCollectedHeap::heap()->gc_cause();
1442   return GCCause::is_user_requested_gc(cause) ||
1443          GCCause::is_serviceability_requested_gc(cause);
1444 }
1445 
1446 void CMSCollector::report_concurrent_mode_interruption() {
1447   if (is_external_interruption()) {
1448     if (PrintGCDetails) {
1449       gclog_or_tty->print(" (concurrent mode interrupted)");
1450     }
1451   } else {
1452     if (PrintGCDetails) {
1453       gclog_or_tty->print(" (concurrent mode failure)");
1454     }
1455     _gc_tracer_cm->report_concurrent_mode_failure();
1456   }
1457 }
1458 
1459 
1460 // The foreground and background collectors need to coordinate in order
1461 // to make sure that they do not mutually interfere with CMS collections.
1462 // When a background collection is active,
1463 // the foreground collector may need to take over (preempt) and
1464 // synchronously complete an ongoing collection. Depending on the
1465 // frequency of the background collections and the heap usage
1466 // of the application, this preemption can be seldom or frequent.
1467 // There are only certain
1468 // points in the background collection that the "collection-baton"
1469 // can be passed to the foreground collector.
1470 //
1471 // The foreground collector will wait for the baton before
1472 // starting any part of the collection.  The foreground collector
1473 // will only wait at one location.
1474 //
1475 // The background collector will yield the baton before starting a new
1476 // phase of the collection (e.g., before initial marking, marking from roots,
1477 // precleaning, final re-mark, sweep etc.)  This is normally done at the head
1478 // of the loop which switches the phases. The background collector does some
1479 // of the phases (initial mark, final re-mark) with the world stopped.
1480 // Because of locking involved in stopping the world,
1481 // the foreground collector should not block waiting for the background
1482 // collector when it is doing a stop-the-world phase.  The background
1483 // collector will yield the baton at an additional point just before
1484 // it enters a stop-the-world phase.  Once the world is stopped, the
1485 // background collector checks the phase of the collection.  If the
1486 // phase has not changed, it proceeds with the collection.  If the
1487 // phase has changed, it skips that phase of the collection.  See
1488 // the comments on the use of the Heap_lock in collect_in_background().
1489 //
1490 // Variable used in baton passing.
1491 //   _foregroundGCIsActive - Set to true by the foreground collector when
1492 //      it wants the baton.  The foreground clears it when it has finished
1493 //      the collection.
1494 //   _foregroundGCShouldWait - Set to true by the background collector
1495 //        when it is running.  The foreground collector waits while
1496 //      _foregroundGCShouldWait is true.
1497 //  CGC_lock - monitor used to protect access to the above variables
1498 //      and to notify the foreground and background collectors.
1499 //  _collectorState - current state of the CMS collection.
1500 //
1501 // The foreground collector
1502 //   acquires the CGC_lock
1503 //   sets _foregroundGCIsActive
1504 //   waits on the CGC_lock for _foregroundGCShouldWait to be false
1505 //     various locks acquired in preparation for the collection
1506 //     are released so as not to block the background collector
1507 //     that is in the midst of a collection
1508 //   proceeds with the collection
1509 //   clears _foregroundGCIsActive
1510 //   returns
1511 //
1512 // The background collector in a loop iterating on the phases of the
1513 //      collection
1514 //   acquires the CGC_lock
1515 //   sets _foregroundGCShouldWait
1516 //   if _foregroundGCIsActive is set
1517 //     clears _foregroundGCShouldWait, notifies _CGC_lock
1518 //     waits on _CGC_lock for _foregroundGCIsActive to become false
1519 //     and exits the loop.
1520 //   otherwise
1521 //     proceed with that phase of the collection
1522 //     if the phase is a stop-the-world phase,
1523 //       yield the baton once more just before enqueueing
1524 //       the stop-world CMS operation (executed by the VM thread).
1525 //   returns after all phases of the collection are done
1526 //
1527 
1528 void CMSCollector::acquire_control_and_collect(bool full,
1529         bool clear_all_soft_refs) {
1530   assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint");
1531   assert(!Thread::current()->is_ConcurrentGC_thread(),
1532          "shouldn't try to acquire control from self!");
1533 
1534   // Start the protocol for acquiring control of the
1535   // collection from the background collector (aka CMS thread).
1536   assert(ConcurrentMarkSweepThread::vm_thread_has_cms_token(),
1537          "VM thread should have CMS token");
1538   // Remember the possibly interrupted state of an ongoing
1539   // concurrent collection
1540   CollectorState first_state = _collectorState;
1541 
1542   // Signal to a possibly ongoing concurrent collection that
1543   // we want to do a foreground collection.
1544   _foregroundGCIsActive = true;
1545 
1546   // release locks and wait for a notify from the background collector
1547   // releasing the locks in only necessary for phases which
1548   // do yields to improve the granularity of the collection.
1549   assert_lock_strong(bitMapLock());
1550   // We need to lock the Free list lock for the space that we are
1551   // currently collecting.
1552   assert(haveFreelistLocks(), "Must be holding free list locks");
1553   bitMapLock()->unlock();
1554   releaseFreelistLocks();
1555   {
1556     MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
1557     if (_foregroundGCShouldWait) {
1558       // We are going to be waiting for action for the CMS thread;
1559       // it had better not be gone (for instance at shutdown)!
1560       assert(ConcurrentMarkSweepThread::cmst() != NULL,
1561              "CMS thread must be running");
1562       // Wait here until the background collector gives us the go-ahead
1563       ConcurrentMarkSweepThread::clear_CMS_flag(
1564         ConcurrentMarkSweepThread::CMS_vm_has_token);  // release token
1565       // Get a possibly blocked CMS thread going:
1566       //   Note that we set _foregroundGCIsActive true above,
1567       //   without protection of the CGC_lock.
1568       CGC_lock->notify();
1569       assert(!ConcurrentMarkSweepThread::vm_thread_wants_cms_token(),
1570              "Possible deadlock");
1571       while (_foregroundGCShouldWait) {
1572         // wait for notification
1573         CGC_lock->wait(Mutex::_no_safepoint_check_flag);
1574         // Possibility of delay/starvation here, since CMS token does
1575         // not know to give priority to VM thread? Actually, i think
1576         // there wouldn't be any delay/starvation, but the proof of
1577         // that "fact" (?) appears non-trivial. XXX 20011219YSR
1578       }
1579       ConcurrentMarkSweepThread::set_CMS_flag(
1580         ConcurrentMarkSweepThread::CMS_vm_has_token);
1581     }
1582   }
1583   // The CMS_token is already held.  Get back the other locks.
1584   assert(ConcurrentMarkSweepThread::vm_thread_has_cms_token(),
1585          "VM thread should have CMS token");
1586   getFreelistLocks();
1587   bitMapLock()->lock_without_safepoint_check();
1588   if (TraceCMSState) {
1589     gclog_or_tty->print_cr("CMS foreground collector has asked for control "
1590       INTPTR_FORMAT " with first state %d", Thread::current(), first_state);
1591     gclog_or_tty->print_cr("    gets control with state %d", _collectorState);
1592   }
1593 
1594   // Inform cms gen if this was due to partial collection failing.
1595   // The CMS gen may use this fact to determine its expansion policy.
1596   GenCollectedHeap* gch = GenCollectedHeap::heap();
1597   if (gch->incremental_collection_will_fail(false /* don't consult_young */)) {
1598     assert(!_cmsGen->incremental_collection_failed(),
1599            "Should have been noticed, reacted to and cleared");
1600     _cmsGen->set_incremental_collection_failed();
1601   }
1602 
1603   if (first_state > Idling) {
1604     report_concurrent_mode_interruption();
1605   }
1606 
1607   set_did_compact(true);
1608 
1609   // If the collection is being acquired from the background
1610   // collector, there may be references on the discovered
1611   // references lists that have NULL referents (being those
1612   // that were concurrently cleared by a mutator) or
1613   // that are no longer active (having been enqueued concurrently
1614   // by the mutator).
1615   // Scrub the list of those references because Mark-Sweep-Compact
1616   // code assumes referents are not NULL and that all discovered
1617   // Reference objects are active.
1618   ref_processor()->clean_up_discovered_references();
1619 
1620   if (first_state > Idling) {
1621     save_heap_summary();
1622   }
1623 
1624   do_compaction_work(clear_all_soft_refs);
1625 
1626   // Has the GC time limit been exceeded?
1627   size_t max_eden_size = _young_gen->max_capacity() -
1628                          _young_gen->to()->capacity() -
1629                          _young_gen->from()->capacity();
1630   GCCause::Cause gc_cause = gch->gc_cause();
1631   size_policy()->check_gc_overhead_limit(_young_gen->used(),
1632                                          _young_gen->eden()->used(),
1633                                          _cmsGen->max_capacity(),
1634                                          max_eden_size,
1635                                          full,
1636                                          gc_cause,
1637                                          gch->collector_policy());
1638 
1639   // Reset the expansion cause, now that we just completed
1640   // a collection cycle.
1641   clear_expansion_cause();
1642   _foregroundGCIsActive = false;
1643   return;
1644 }
1645 
1646 // Resize the tenured generation
1647 // after obtaining the free list locks for the
1648 // two generations.
1649 void CMSCollector::compute_new_size() {
1650   assert_locked_or_safepoint(Heap_lock);
1651   FreelistLocker z(this);
1652   MetaspaceGC::compute_new_size();
1653   _cmsGen->compute_new_size_free_list();
1654 }
1655 
1656 // A work method used by the foreground collector to do
1657 // a mark-sweep-compact.
1658 void CMSCollector::do_compaction_work(bool clear_all_soft_refs) {
1659   GenCollectedHeap* gch = GenCollectedHeap::heap();
1660 
1661   STWGCTimer* gc_timer = GenMarkSweep::gc_timer();
1662   gc_timer->register_gc_start();
1663 
1664   SerialOldTracer* gc_tracer = GenMarkSweep::gc_tracer();
1665   gc_tracer->report_gc_start(gch->gc_cause(), gc_timer->gc_start());
1666 
1667   GCTraceTime t("CMS:MSC ", PrintGCDetails && Verbose, true, NULL, gc_tracer->gc_id());
1668 
1669   // Temporarily widen the span of the weak reference processing to
1670   // the entire heap.
1671   MemRegion new_span(GenCollectedHeap::heap()->reserved_region());
1672   ReferenceProcessorSpanMutator rp_mut_span(ref_processor(), new_span);
1673   // Temporarily, clear the "is_alive_non_header" field of the
1674   // reference processor.
1675   ReferenceProcessorIsAliveMutator rp_mut_closure(ref_processor(), NULL);
1676   // Temporarily make reference _processing_ single threaded (non-MT).
1677   ReferenceProcessorMTProcMutator rp_mut_mt_processing(ref_processor(), false);
1678   // Temporarily make refs discovery atomic
1679   ReferenceProcessorAtomicMutator rp_mut_atomic(ref_processor(), true);
1680   // Temporarily make reference _discovery_ single threaded (non-MT)
1681   ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(ref_processor(), false);
1682 
1683   ref_processor()->set_enqueuing_is_done(false);
1684   ref_processor()->enable_discovery(false /*verify_disabled*/, false /*check_no_refs*/);
1685   ref_processor()->setup_policy(clear_all_soft_refs);
1686   // If an asynchronous collection finishes, the _modUnionTable is
1687   // all clear.  If we are assuming the collection from an asynchronous
1688   // collection, clear the _modUnionTable.
1689   assert(_collectorState != Idling || _modUnionTable.isAllClear(),
1690     "_modUnionTable should be clear if the baton was not passed");
1691   _modUnionTable.clear_all();
1692   assert(_collectorState != Idling || _ct->klass_rem_set()->mod_union_is_clear(),
1693     "mod union for klasses should be clear if the baton was passed");
1694   _ct->klass_rem_set()->clear_mod_union();
1695 
1696   // We must adjust the allocation statistics being maintained
1697   // in the free list space. We do so by reading and clearing
1698   // the sweep timer and updating the block flux rate estimates below.
1699   assert(!_intra_sweep_timer.is_active(), "_intra_sweep_timer should be inactive");
1700   if (_inter_sweep_timer.is_active()) {
1701     _inter_sweep_timer.stop();
1702     // Note that we do not use this sample to update the _inter_sweep_estimate.
1703     _cmsGen->cmsSpace()->beginSweepFLCensus((float)(_inter_sweep_timer.seconds()),
1704                                             _inter_sweep_estimate.padded_average(),
1705                                             _intra_sweep_estimate.padded_average());
1706   }
1707 
1708   GenMarkSweep::invoke_at_safepoint(_cmsGen->level(),
1709     ref_processor(), clear_all_soft_refs);
1710   #ifdef ASSERT
1711     CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace();
1712     size_t free_size = cms_space->free();
1713     assert(free_size ==
1714            pointer_delta(cms_space->end(), cms_space->compaction_top())
1715            * HeapWordSize,
1716       "All the free space should be compacted into one chunk at top");
1717     assert(cms_space->dictionary()->total_chunk_size(
1718                                       debug_only(cms_space->freelistLock())) == 0 ||
1719            cms_space->totalSizeInIndexedFreeLists() == 0,
1720       "All the free space should be in a single chunk");
1721     size_t num = cms_space->totalCount();
1722     assert((free_size == 0 && num == 0) ||
1723            (free_size > 0  && (num == 1 || num == 2)),
1724          "There should be at most 2 free chunks after compaction");
1725   #endif // ASSERT
1726   _collectorState = Resetting;
1727   assert(_restart_addr == NULL,
1728          "Should have been NULL'd before baton was passed");
1729   reset(false /* == !concurrent */);
1730   _cmsGen->reset_after_compaction();
1731   _concurrent_cycles_since_last_unload = 0;
1732 
1733   // Clear any data recorded in the PLAB chunk arrays.
1734   if (_survivor_plab_array != NULL) {
1735     reset_survivor_plab_arrays();
1736   }
1737 
1738   // Adjust the per-size allocation stats for the next epoch.
1739   _cmsGen->cmsSpace()->endSweepFLCensus(sweep_count() /* fake */);
1740   // Restart the "inter sweep timer" for the next epoch.
1741   _inter_sweep_timer.reset();
1742   _inter_sweep_timer.start();
1743 
1744   gc_timer->register_gc_end();
1745 
1746   gc_tracer->report_gc_end(gc_timer->gc_end(), gc_timer->time_partitions());
1747 
1748   // For a mark-sweep-compact, compute_new_size() will be called
1749   // in the heap's do_collection() method.
1750 }
1751 
1752 void CMSCollector::print_eden_and_survivor_chunk_arrays() {
1753   ContiguousSpace* eden_space = _young_gen->eden();
1754   ContiguousSpace* from_space = _young_gen->from();
1755   ContiguousSpace* to_space   = _young_gen->to();
1756   // Eden
1757   if (_eden_chunk_array != NULL) {
1758     gclog_or_tty->print_cr("eden " PTR_FORMAT "-" PTR_FORMAT "-" PTR_FORMAT "(" SIZE_FORMAT ")",
1759                            eden_space->bottom(), eden_space->top(),
1760                            eden_space->end(), eden_space->capacity());
1761     gclog_or_tty->print_cr("_eden_chunk_index=" SIZE_FORMAT ", "
1762                            "_eden_chunk_capacity=" SIZE_FORMAT,
1763                            _eden_chunk_index, _eden_chunk_capacity);
1764     for (size_t i = 0; i < _eden_chunk_index; i++) {
1765       gclog_or_tty->print_cr("_eden_chunk_array[" SIZE_FORMAT "]=" PTR_FORMAT,
1766                              i, _eden_chunk_array[i]);
1767     }
1768   }
1769   // Survivor
1770   if (_survivor_chunk_array != NULL) {
1771     gclog_or_tty->print_cr("survivor " PTR_FORMAT "-" PTR_FORMAT "-" PTR_FORMAT "(" SIZE_FORMAT ")",
1772                            from_space->bottom(), from_space->top(),
1773                            from_space->end(), from_space->capacity());
1774     gclog_or_tty->print_cr("_survivor_chunk_index=" SIZE_FORMAT ", "
1775                            "_survivor_chunk_capacity=" SIZE_FORMAT,
1776                            _survivor_chunk_index, _survivor_chunk_capacity);
1777     for (size_t i = 0; i < _survivor_chunk_index; i++) {
1778       gclog_or_tty->print_cr("_survivor_chunk_array[" SIZE_FORMAT "]=" PTR_FORMAT,
1779                              i, _survivor_chunk_array[i]);
1780     }
1781   }
1782 }
1783 
1784 void CMSCollector::getFreelistLocks() const {
1785   // Get locks for all free lists in all generations that this
1786   // collector is responsible for
1787   _cmsGen->freelistLock()->lock_without_safepoint_check();
1788 }
1789 
1790 void CMSCollector::releaseFreelistLocks() const {
1791   // Release locks for all free lists in all generations that this
1792   // collector is responsible for
1793   _cmsGen->freelistLock()->unlock();
1794 }
1795 
1796 bool CMSCollector::haveFreelistLocks() const {
1797   // Check locks for all free lists in all generations that this
1798   // collector is responsible for
1799   assert_lock_strong(_cmsGen->freelistLock());
1800   PRODUCT_ONLY(ShouldNotReachHere());
1801   return true;
1802 }
1803 
1804 // A utility class that is used by the CMS collector to
1805 // temporarily "release" the foreground collector from its
1806 // usual obligation to wait for the background collector to
1807 // complete an ongoing phase before proceeding.
1808 class ReleaseForegroundGC: public StackObj {
1809  private:
1810   CMSCollector* _c;
1811  public:
1812   ReleaseForegroundGC(CMSCollector* c) : _c(c) {
1813     assert(_c->_foregroundGCShouldWait, "Else should not need to call");
1814     MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
1815     // allow a potentially blocked foreground collector to proceed
1816     _c->_foregroundGCShouldWait = false;
1817     if (_c->_foregroundGCIsActive) {
1818       CGC_lock->notify();
1819     }
1820     assert(!ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
1821            "Possible deadlock");
1822   }
1823 
1824   ~ReleaseForegroundGC() {
1825     assert(!_c->_foregroundGCShouldWait, "Usage protocol violation?");
1826     MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
1827     _c->_foregroundGCShouldWait = true;
1828   }
1829 };
1830 
1831 void CMSCollector::collect_in_background(GCCause::Cause cause) {
1832   assert(Thread::current()->is_ConcurrentGC_thread(),
1833     "A CMS asynchronous collection is only allowed on a CMS thread.");
1834 
1835   GenCollectedHeap* gch = GenCollectedHeap::heap();
1836   {
1837     bool safepoint_check = Mutex::_no_safepoint_check_flag;
1838     MutexLockerEx hl(Heap_lock, safepoint_check);
1839     FreelistLocker fll(this);
1840     MutexLockerEx x(CGC_lock, safepoint_check);
1841     if (_foregroundGCIsActive || !UseAsyncConcMarkSweepGC) {
1842       // The foreground collector is active or we're
1843       // not using asynchronous collections.  Skip this
1844       // background collection.
1845       assert(!_foregroundGCShouldWait, "Should be clear");
1846       return;
1847     } else {
1848       assert(_collectorState == Idling, "Should be idling before start.");
1849       _collectorState = InitialMarking;
1850       register_gc_start(cause);
1851       // Reset the expansion cause, now that we are about to begin
1852       // a new cycle.
1853       clear_expansion_cause();
1854 
1855       // Clear the MetaspaceGC flag since a concurrent collection
1856       // is starting but also clear it after the collection.
1857       MetaspaceGC::set_should_concurrent_collect(false);
1858     }
1859     // Decide if we want to enable class unloading as part of the
1860     // ensuing concurrent GC cycle.
1861     update_should_unload_classes();
1862     _full_gc_requested = false;           // acks all outstanding full gc requests
1863     _full_gc_cause = GCCause::_no_gc;
1864     // Signal that we are about to start a collection
1865     gch->increment_total_full_collections();  // ... starting a collection cycle
1866     _collection_count_start = gch->total_full_collections();
1867   }
1868 
1869   // Used for PrintGC
1870   size_t prev_used;
1871   if (PrintGC && Verbose) {
1872     prev_used = _cmsGen->used();
1873   }
1874 
1875   // The change of the collection state is normally done at this level;
1876   // the exceptions are phases that are executed while the world is
1877   // stopped.  For those phases the change of state is done while the
1878   // world is stopped.  For baton passing purposes this allows the
1879   // background collector to finish the phase and change state atomically.
1880   // The foreground collector cannot wait on a phase that is done
1881   // while the world is stopped because the foreground collector already
1882   // has the world stopped and would deadlock.
1883   while (_collectorState != Idling) {
1884     if (TraceCMSState) {
1885       gclog_or_tty->print_cr("Thread " INTPTR_FORMAT " in CMS state %d",
1886         Thread::current(), _collectorState);
1887     }
1888     // The foreground collector
1889     //   holds the Heap_lock throughout its collection.
1890     //   holds the CMS token (but not the lock)
1891     //     except while it is waiting for the background collector to yield.
1892     //
1893     // The foreground collector should be blocked (not for long)
1894     //   if the background collector is about to start a phase
1895     //   executed with world stopped.  If the background
1896     //   collector has already started such a phase, the
1897     //   foreground collector is blocked waiting for the
1898     //   Heap_lock.  The stop-world phases (InitialMarking and FinalMarking)
1899     //   are executed in the VM thread.
1900     //
1901     // The locking order is
1902     //   PendingListLock (PLL)  -- if applicable (FinalMarking)
1903     //   Heap_lock  (both this & PLL locked in VM_CMS_Operation::prologue())
1904     //   CMS token  (claimed in
1905     //                stop_world_and_do() -->
1906     //                  safepoint_synchronize() -->
1907     //                    CMSThread::synchronize())
1908 
1909     {
1910       // Check if the FG collector wants us to yield.
1911       CMSTokenSync x(true); // is cms thread
1912       if (waitForForegroundGC()) {
1913         // We yielded to a foreground GC, nothing more to be
1914         // done this round.
1915         assert(_foregroundGCShouldWait == false, "We set it to false in "
1916                "waitForForegroundGC()");
1917         if (TraceCMSState) {
1918           gclog_or_tty->print_cr("CMS Thread " INTPTR_FORMAT
1919             " exiting collection CMS state %d",
1920             Thread::current(), _collectorState);
1921         }
1922         return;
1923       } else {
1924         // The background collector can run but check to see if the
1925         // foreground collector has done a collection while the
1926         // background collector was waiting to get the CGC_lock
1927         // above.  If yes, break so that _foregroundGCShouldWait
1928         // is cleared before returning.
1929         if (_collectorState == Idling) {
1930           break;
1931         }
1932       }
1933     }
1934 
1935     assert(_foregroundGCShouldWait, "Foreground collector, if active, "
1936       "should be waiting");
1937 
1938     switch (_collectorState) {
1939       case InitialMarking:
1940         {
1941           ReleaseForegroundGC x(this);
1942           stats().record_cms_begin();
1943           VM_CMS_Initial_Mark initial_mark_op(this);
1944           VMThread::execute(&initial_mark_op);
1945         }
1946         // The collector state may be any legal state at this point
1947         // since the background collector may have yielded to the
1948         // foreground collector.
1949         break;
1950       case Marking:
1951         // initial marking in checkpointRootsInitialWork has been completed
1952         if (markFromRoots()) { // we were successful
1953           assert(_collectorState == Precleaning, "Collector state should "
1954             "have changed");
1955         } else {
1956           assert(_foregroundGCIsActive, "Internal state inconsistency");
1957         }
1958         break;
1959       case Precleaning:
1960         // marking from roots in markFromRoots has been completed
1961         preclean();
1962         assert(_collectorState == AbortablePreclean ||
1963                _collectorState == FinalMarking,
1964                "Collector state should have changed");
1965         break;
1966       case AbortablePreclean:
1967         abortable_preclean();
1968         assert(_collectorState == FinalMarking, "Collector state should "
1969           "have changed");
1970         break;
1971       case FinalMarking:
1972         {
1973           ReleaseForegroundGC x(this);
1974 
1975           VM_CMS_Final_Remark final_remark_op(this);
1976           VMThread::execute(&final_remark_op);
1977         }
1978         assert(_foregroundGCShouldWait, "block post-condition");
1979         break;
1980       case Sweeping:
1981         // final marking in checkpointRootsFinal has been completed
1982         sweep();
1983         assert(_collectorState == Resizing, "Collector state change "
1984           "to Resizing must be done under the free_list_lock");
1985 
1986       case Resizing: {
1987         // Sweeping has been completed...
1988         // At this point the background collection has completed.
1989         // Don't move the call to compute_new_size() down
1990         // into code that might be executed if the background
1991         // collection was preempted.
1992         {
1993           ReleaseForegroundGC x(this);   // unblock FG collection
1994           MutexLockerEx       y(Heap_lock, Mutex::_no_safepoint_check_flag);
1995           CMSTokenSync        z(true);   // not strictly needed.
1996           if (_collectorState == Resizing) {
1997             compute_new_size();
1998             save_heap_summary();
1999             _collectorState = Resetting;
2000           } else {
2001             assert(_collectorState == Idling, "The state should only change"
2002                    " because the foreground collector has finished the collection");
2003           }
2004         }
2005         break;
2006       }
2007       case Resetting:
2008         // CMS heap resizing has been completed
2009         reset(true);
2010         assert(_collectorState == Idling, "Collector state should "
2011           "have changed");
2012 
2013         MetaspaceGC::set_should_concurrent_collect(false);
2014 
2015         stats().record_cms_end();
2016         // Don't move the concurrent_phases_end() and compute_new_size()
2017         // calls to here because a preempted background collection
2018         // has it's state set to "Resetting".
2019         break;
2020       case Idling:
2021       default:
2022         ShouldNotReachHere();
2023         break;
2024     }
2025     if (TraceCMSState) {
2026       gclog_or_tty->print_cr("  Thread " INTPTR_FORMAT " done - next CMS state %d",
2027         Thread::current(), _collectorState);
2028     }
2029     assert(_foregroundGCShouldWait, "block post-condition");
2030   }
2031 
2032   // Should this be in gc_epilogue?
2033   collector_policy()->counters()->update_counters();
2034 
2035   {
2036     // Clear _foregroundGCShouldWait and, in the event that the
2037     // foreground collector is waiting, notify it, before
2038     // returning.
2039     MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
2040     _foregroundGCShouldWait = false;
2041     if (_foregroundGCIsActive) {
2042       CGC_lock->notify();
2043     }
2044     assert(!ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
2045            "Possible deadlock");
2046   }
2047   if (TraceCMSState) {
2048     gclog_or_tty->print_cr("CMS Thread " INTPTR_FORMAT
2049       " exiting collection CMS state %d",
2050       Thread::current(), _collectorState);
2051   }
2052   if (PrintGC && Verbose) {
2053     _cmsGen->print_heap_change(prev_used);
2054   }
2055 }
2056 
2057 void CMSCollector::register_gc_start(GCCause::Cause cause) {
2058   _cms_start_registered = true;
2059   _gc_timer_cm->register_gc_start();
2060   _gc_tracer_cm->report_gc_start(cause, _gc_timer_cm->gc_start());
2061 }
2062 
2063 void CMSCollector::register_gc_end() {
2064   if (_cms_start_registered) {
2065     report_heap_summary(GCWhen::AfterGC);
2066 
2067     _gc_timer_cm->register_gc_end();
2068     _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
2069     _cms_start_registered = false;
2070   }
2071 }
2072 
2073 void CMSCollector::save_heap_summary() {
2074   GenCollectedHeap* gch = GenCollectedHeap::heap();
2075   _last_heap_summary = gch->create_heap_summary();
2076   _last_metaspace_summary = gch->create_metaspace_summary();
2077 }
2078 
2079 void CMSCollector::report_heap_summary(GCWhen::Type when) {
2080   _gc_tracer_cm->report_gc_heap_summary(when, _last_heap_summary);
2081   _gc_tracer_cm->report_metaspace_summary(when, _last_metaspace_summary);
2082 }
2083 
2084 bool CMSCollector::waitForForegroundGC() {
2085   bool res = false;
2086   assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
2087          "CMS thread should have CMS token");
2088   // Block the foreground collector until the
2089   // background collectors decides whether to
2090   // yield.
2091   MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
2092   _foregroundGCShouldWait = true;
2093   if (_foregroundGCIsActive) {
2094     // The background collector yields to the
2095     // foreground collector and returns a value
2096     // indicating that it has yielded.  The foreground
2097     // collector can proceed.
2098     res = true;
2099     _foregroundGCShouldWait = false;
2100     ConcurrentMarkSweepThread::clear_CMS_flag(
2101       ConcurrentMarkSweepThread::CMS_cms_has_token);
2102     ConcurrentMarkSweepThread::set_CMS_flag(
2103       ConcurrentMarkSweepThread::CMS_cms_wants_token);
2104     // Get a possibly blocked foreground thread going
2105     CGC_lock->notify();
2106     if (TraceCMSState) {
2107       gclog_or_tty->print_cr("CMS Thread " INTPTR_FORMAT " waiting at CMS state %d",
2108         Thread::current(), _collectorState);
2109     }
2110     while (_foregroundGCIsActive) {
2111       CGC_lock->wait(Mutex::_no_safepoint_check_flag);
2112     }
2113     ConcurrentMarkSweepThread::set_CMS_flag(
2114       ConcurrentMarkSweepThread::CMS_cms_has_token);
2115     ConcurrentMarkSweepThread::clear_CMS_flag(
2116       ConcurrentMarkSweepThread::CMS_cms_wants_token);
2117   }
2118   if (TraceCMSState) {
2119     gclog_or_tty->print_cr("CMS Thread " INTPTR_FORMAT " continuing at CMS state %d",
2120       Thread::current(), _collectorState);
2121   }
2122   return res;
2123 }
2124 
2125 // Because of the need to lock the free lists and other structures in
2126 // the collector, common to all the generations that the collector is
2127 // collecting, we need the gc_prologues of individual CMS generations
2128 // delegate to their collector. It may have been simpler had the
2129 // current infrastructure allowed one to call a prologue on a
2130 // collector. In the absence of that we have the generation's
2131 // prologue delegate to the collector, which delegates back
2132 // some "local" work to a worker method in the individual generations
2133 // that it's responsible for collecting, while itself doing any
2134 // work common to all generations it's responsible for. A similar
2135 // comment applies to the  gc_epilogue()'s.
2136 // The role of the variable _between_prologue_and_epilogue is to
2137 // enforce the invocation protocol.
2138 void CMSCollector::gc_prologue(bool full) {
2139   // Call gc_prologue_work() for the CMSGen
2140   // we are responsible for.
2141 
2142   // The following locking discipline assumes that we are only called
2143   // when the world is stopped.
2144   assert(SafepointSynchronize::is_at_safepoint(), "world is stopped assumption");
2145 
2146   // The CMSCollector prologue must call the gc_prologues for the
2147   // "generations" that it's responsible
2148   // for.
2149 
2150   assert(   Thread::current()->is_VM_thread()
2151          || (   CMSScavengeBeforeRemark
2152              && Thread::current()->is_ConcurrentGC_thread()),
2153          "Incorrect thread type for prologue execution");
2154 
2155   if (_between_prologue_and_epilogue) {
2156     // We have already been invoked; this is a gc_prologue delegation
2157     // from yet another CMS generation that we are responsible for, just
2158     // ignore it since all relevant work has already been done.
2159     return;
2160   }
2161 
2162   // set a bit saying prologue has been called; cleared in epilogue
2163   _between_prologue_and_epilogue = true;
2164   // Claim locks for common data structures, then call gc_prologue_work()
2165   // for each CMSGen.
2166 
2167   getFreelistLocks();   // gets free list locks on constituent spaces
2168   bitMapLock()->lock_without_safepoint_check();
2169 
2170   // Should call gc_prologue_work() for all cms gens we are responsible for
2171   bool duringMarking =    _collectorState >= Marking
2172                          && _collectorState < Sweeping;
2173 
2174   // The young collections clear the modified oops state, which tells if
2175   // there are any modified oops in the class. The remark phase also needs
2176   // that information. Tell the young collection to save the union of all
2177   // modified klasses.
2178   if (duringMarking) {
2179     _ct->klass_rem_set()->set_accumulate_modified_oops(true);
2180   }
2181 
2182   bool registerClosure = duringMarking;
2183 
2184   ModUnionClosure* muc = CollectedHeap::use_parallel_gc_threads() ?
2185                                                &_modUnionClosurePar
2186                                                : &_modUnionClosure;
2187   _cmsGen->gc_prologue_work(full, registerClosure, muc);
2188 
2189   if (!full) {
2190     stats().record_gc0_begin();
2191   }
2192 }
2193 
2194 void ConcurrentMarkSweepGeneration::gc_prologue(bool full) {
2195 
2196   _capacity_at_prologue = capacity();
2197   _used_at_prologue = used();
2198 
2199   // Delegate to CMScollector which knows how to coordinate between
2200   // this and any other CMS generations that it is responsible for
2201   // collecting.
2202   collector()->gc_prologue(full);
2203 }
2204 
2205 // This is a "private" interface for use by this generation's CMSCollector.
2206 // Not to be called directly by any other entity (for instance,
2207 // GenCollectedHeap, which calls the "public" gc_prologue method above).
2208 void ConcurrentMarkSweepGeneration::gc_prologue_work(bool full,
2209   bool registerClosure, ModUnionClosure* modUnionClosure) {
2210   assert(!incremental_collection_failed(), "Shouldn't be set yet");
2211   assert(cmsSpace()->preconsumptionDirtyCardClosure() == NULL,
2212     "Should be NULL");
2213   if (registerClosure) {
2214     cmsSpace()->setPreconsumptionDirtyCardClosure(modUnionClosure);
2215   }
2216   cmsSpace()->gc_prologue();
2217   // Clear stat counters
2218   NOT_PRODUCT(
2219     assert(_numObjectsPromoted == 0, "check");
2220     assert(_numWordsPromoted   == 0, "check");
2221     if (Verbose && PrintGC) {
2222       gclog_or_tty->print("Allocated "SIZE_FORMAT" objects, "
2223                           SIZE_FORMAT" bytes concurrently",
2224       _numObjectsAllocated, _numWordsAllocated*sizeof(HeapWord));
2225     }
2226     _numObjectsAllocated = 0;
2227     _numWordsAllocated   = 0;
2228   )
2229 }
2230 
2231 void CMSCollector::gc_epilogue(bool full) {
2232   // The following locking discipline assumes that we are only called
2233   // when the world is stopped.
2234   assert(SafepointSynchronize::is_at_safepoint(),
2235          "world is stopped assumption");
2236 
2237   // Currently the CMS epilogue (see CompactibleFreeListSpace) merely checks
2238   // if linear allocation blocks need to be appropriately marked to allow the
2239   // the blocks to be parsable. We also check here whether we need to nudge the
2240   // CMS collector thread to start a new cycle (if it's not already active).
2241   assert(   Thread::current()->is_VM_thread()
2242          || (   CMSScavengeBeforeRemark
2243              && Thread::current()->is_ConcurrentGC_thread()),
2244          "Incorrect thread type for epilogue execution");
2245 
2246   if (!_between_prologue_and_epilogue) {
2247     // We have already been invoked; this is a gc_epilogue delegation
2248     // from yet another CMS generation that we are responsible for, just
2249     // ignore it since all relevant work has already been done.
2250     return;
2251   }
2252   assert(haveFreelistLocks(), "must have freelist locks");
2253   assert_lock_strong(bitMapLock());
2254 
2255   _ct->klass_rem_set()->set_accumulate_modified_oops(false);
2256 
2257   _cmsGen->gc_epilogue_work(full);
2258 
2259   if (_collectorState == AbortablePreclean || _collectorState == Precleaning) {
2260     // in case sampling was not already enabled, enable it
2261     _start_sampling = true;
2262   }
2263   // reset _eden_chunk_array so sampling starts afresh
2264   _eden_chunk_index = 0;
2265 
2266   size_t cms_used   = _cmsGen->cmsSpace()->used();
2267 
2268   // update performance counters - this uses a special version of
2269   // update_counters() that allows the utilization to be passed as a
2270   // parameter, avoiding multiple calls to used().
2271   //
2272   _cmsGen->update_counters(cms_used);
2273 
2274   bitMapLock()->unlock();
2275   releaseFreelistLocks();
2276 
2277   if (!CleanChunkPoolAsync) {
2278     Chunk::clean_chunk_pool();
2279   }
2280 
2281   set_did_compact(false);
2282   _between_prologue_and_epilogue = false;  // ready for next cycle
2283 }
2284 
2285 void ConcurrentMarkSweepGeneration::gc_epilogue(bool full) {
2286   collector()->gc_epilogue(full);
2287 
2288   // Also reset promotion tracking in par gc thread states.
2289   for (uint i = 0; i < ParallelGCThreads; i++) {
2290     _par_gc_thread_states[i]->promo.stopTrackingPromotions(i);
2291   }
2292 }
2293 
2294 void ConcurrentMarkSweepGeneration::gc_epilogue_work(bool full) {
2295   assert(!incremental_collection_failed(), "Should have been cleared");
2296   cmsSpace()->setPreconsumptionDirtyCardClosure(NULL);
2297   cmsSpace()->gc_epilogue();
2298     // Print stat counters
2299   NOT_PRODUCT(
2300     assert(_numObjectsAllocated == 0, "check");
2301     assert(_numWordsAllocated == 0, "check");
2302     if (Verbose && PrintGC) {
2303       gclog_or_tty->print("Promoted "SIZE_FORMAT" objects, "
2304                           SIZE_FORMAT" bytes",
2305                  _numObjectsPromoted, _numWordsPromoted*sizeof(HeapWord));
2306     }
2307     _numObjectsPromoted = 0;
2308     _numWordsPromoted   = 0;
2309   )
2310 
2311   if (PrintGC && Verbose) {
2312     // Call down the chain in contiguous_available needs the freelistLock
2313     // so print this out before releasing the freeListLock.
2314     gclog_or_tty->print(" Contiguous available "SIZE_FORMAT" bytes ",
2315                         contiguous_available());
2316   }
2317 }
2318 
2319 #ifndef PRODUCT
2320 bool CMSCollector::have_cms_token() {
2321   Thread* thr = Thread::current();
2322   if (thr->is_VM_thread()) {
2323     return ConcurrentMarkSweepThread::vm_thread_has_cms_token();
2324   } else if (thr->is_ConcurrentGC_thread()) {
2325     return ConcurrentMarkSweepThread::cms_thread_has_cms_token();
2326   } else if (thr->is_GC_task_thread()) {
2327     return ConcurrentMarkSweepThread::vm_thread_has_cms_token() &&
2328            ParGCRareEvent_lock->owned_by_self();
2329   }
2330   return false;
2331 }
2332 #endif
2333 
2334 // Check reachability of the given heap address in CMS generation,
2335 // treating all other generations as roots.
2336 bool CMSCollector::is_cms_reachable(HeapWord* addr) {
2337   // We could "guarantee" below, rather than assert, but I'll
2338   // leave these as "asserts" so that an adventurous debugger
2339   // could try this in the product build provided some subset of
2340   // the conditions were met, provided they were interested in the
2341   // results and knew that the computation below wouldn't interfere
2342   // with other concurrent computations mutating the structures
2343   // being read or written.
2344   assert(SafepointSynchronize::is_at_safepoint(),
2345          "Else mutations in object graph will make answer suspect");
2346   assert(have_cms_token(), "Should hold cms token");
2347   assert(haveFreelistLocks(), "must hold free list locks");
2348   assert_lock_strong(bitMapLock());
2349 
2350   // Clear the marking bit map array before starting, but, just
2351   // for kicks, first report if the given address is already marked
2352   gclog_or_tty->print_cr("Start: Address " PTR_FORMAT " is%s marked", addr,
2353                 _markBitMap.isMarked(addr) ? "" : " not");
2354 
2355   if (verify_after_remark()) {
2356     MutexLockerEx x(verification_mark_bm()->lock(), Mutex::_no_safepoint_check_flag);
2357     bool result = verification_mark_bm()->isMarked(addr);
2358     gclog_or_tty->print_cr("TransitiveMark: Address " PTR_FORMAT " %s marked", addr,
2359                            result ? "IS" : "is NOT");
2360     return result;
2361   } else {
2362     gclog_or_tty->print_cr("Could not compute result");
2363     return false;
2364   }
2365 }
2366 
2367 
2368 void
2369 CMSCollector::print_on_error(outputStream* st) {
2370   CMSCollector* collector = ConcurrentMarkSweepGeneration::_collector;
2371   if (collector != NULL) {
2372     CMSBitMap* bitmap = &collector->_markBitMap;
2373     st->print_cr("Marking Bits: (CMSBitMap*) " PTR_FORMAT, bitmap);
2374     bitmap->print_on_error(st, " Bits: ");
2375 
2376     st->cr();
2377 
2378     CMSBitMap* mut_bitmap = &collector->_modUnionTable;
2379     st->print_cr("Mod Union Table: (CMSBitMap*) " PTR_FORMAT, mut_bitmap);
2380     mut_bitmap->print_on_error(st, " Bits: ");
2381   }
2382 }
2383 
2384 ////////////////////////////////////////////////////////
2385 // CMS Verification Support
2386 ////////////////////////////////////////////////////////
2387 // Following the remark phase, the following invariant
2388 // should hold -- each object in the CMS heap which is
2389 // marked in markBitMap() should be marked in the verification_mark_bm().
2390 
2391 class VerifyMarkedClosure: public BitMapClosure {
2392   CMSBitMap* _marks;
2393   bool       _failed;
2394 
2395  public:
2396   VerifyMarkedClosure(CMSBitMap* bm): _marks(bm), _failed(false) {}
2397 
2398   bool do_bit(size_t offset) {
2399     HeapWord* addr = _marks->offsetToHeapWord(offset);
2400     if (!_marks->isMarked(addr)) {
2401       oop(addr)->print_on(gclog_or_tty);
2402       gclog_or_tty->print_cr(" ("INTPTR_FORMAT" should have been marked)", addr);
2403       _failed = true;
2404     }
2405     return true;
2406   }
2407 
2408   bool failed() { return _failed; }
2409 };
2410 
2411 bool CMSCollector::verify_after_remark(bool silent) {
2412   if (!silent) gclog_or_tty->print(" [Verifying CMS Marking... ");
2413   MutexLockerEx ml(verification_mark_bm()->lock(), Mutex::_no_safepoint_check_flag);
2414   static bool init = false;
2415 
2416   assert(SafepointSynchronize::is_at_safepoint(),
2417          "Else mutations in object graph will make answer suspect");
2418   assert(have_cms_token(),
2419          "Else there may be mutual interference in use of "
2420          " verification data structures");
2421   assert(_collectorState > Marking && _collectorState <= Sweeping,
2422          "Else marking info checked here may be obsolete");
2423   assert(haveFreelistLocks(), "must hold free list locks");
2424   assert_lock_strong(bitMapLock());
2425 
2426 
2427   // Allocate marking bit map if not already allocated
2428   if (!init) { // first time
2429     if (!verification_mark_bm()->allocate(_span)) {
2430       return false;
2431     }
2432     init = true;
2433   }
2434 
2435   assert(verification_mark_stack()->isEmpty(), "Should be empty");
2436 
2437   // Turn off refs discovery -- so we will be tracing through refs.
2438   // This is as intended, because by this time
2439   // GC must already have cleared any refs that need to be cleared,
2440   // and traced those that need to be marked; moreover,
2441   // the marking done here is not going to interfere in any
2442   // way with the marking information used by GC.
2443   NoRefDiscovery no_discovery(ref_processor());
2444 
2445   COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact;)
2446 
2447   // Clear any marks from a previous round
2448   verification_mark_bm()->clear_all();
2449   assert(verification_mark_stack()->isEmpty(), "markStack should be empty");
2450   verify_work_stacks_empty();
2451 
2452   GenCollectedHeap* gch = GenCollectedHeap::heap();
2453   gch->ensure_parsability(false);  // fill TLABs, but no need to retire them
2454   // Update the saved marks which may affect the root scans.
2455   gch->save_marks();
2456 
2457   if (CMSRemarkVerifyVariant == 1) {
2458     // In this first variant of verification, we complete
2459     // all marking, then check if the new marks-vector is
2460     // a subset of the CMS marks-vector.
2461     verify_after_remark_work_1();
2462   } else if (CMSRemarkVerifyVariant == 2) {
2463     // In this second variant of verification, we flag an error
2464     // (i.e. an object reachable in the new marks-vector not reachable
2465     // in the CMS marks-vector) immediately, also indicating the
2466     // identify of an object (A) that references the unmarked object (B) --
2467     // presumably, a mutation to A failed to be picked up by preclean/remark?
2468     verify_after_remark_work_2();
2469   } else {
2470     warning("Unrecognized value %d for CMSRemarkVerifyVariant",
2471             CMSRemarkVerifyVariant);
2472   }
2473   if (!silent) gclog_or_tty->print(" done] ");
2474   return true;
2475 }
2476 
2477 void CMSCollector::verify_after_remark_work_1() {
2478   ResourceMark rm;
2479   HandleMark  hm;
2480   GenCollectedHeap* gch = GenCollectedHeap::heap();
2481 
2482   // Get a clear set of claim bits for the roots processing to work with.
2483   ClassLoaderDataGraph::clear_claimed_marks();
2484 
2485   // Mark from roots one level into CMS
2486   MarkRefsIntoClosure notOlder(_span, verification_mark_bm());
2487   gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
2488 
2489   gch->gen_process_roots(_cmsGen->level(),
2490                          true,   // younger gens are roots
2491                          true,   // activate StrongRootsScope
2492                          SharedHeap::ScanningOption(roots_scanning_options()),
2493                          should_unload_classes(),
2494                          &notOlder,
2495                          NULL,
2496                          NULL);  // SSS: Provide correct closure
2497 
2498   // Now mark from the roots
2499   MarkFromRootsClosure markFromRootsClosure(this, _span,
2500     verification_mark_bm(), verification_mark_stack(),
2501     false /* don't yield */, true /* verifying */);
2502   assert(_restart_addr == NULL, "Expected pre-condition");
2503   verification_mark_bm()->iterate(&markFromRootsClosure);
2504   while (_restart_addr != NULL) {
2505     // Deal with stack overflow: by restarting at the indicated
2506     // address.
2507     HeapWord* ra = _restart_addr;
2508     markFromRootsClosure.reset(ra);
2509     _restart_addr = NULL;
2510     verification_mark_bm()->iterate(&markFromRootsClosure, ra, _span.end());
2511   }
2512   assert(verification_mark_stack()->isEmpty(), "Should have been drained");
2513   verify_work_stacks_empty();
2514 
2515   // Marking completed -- now verify that each bit marked in
2516   // verification_mark_bm() is also marked in markBitMap(); flag all
2517   // errors by printing corresponding objects.
2518   VerifyMarkedClosure vcl(markBitMap());
2519   verification_mark_bm()->iterate(&vcl);
2520   if (vcl.failed()) {
2521     gclog_or_tty->print("Verification failed");
2522     Universe::heap()->print_on(gclog_or_tty);
2523     fatal("CMS: failed marking verification after remark");
2524   }
2525 }
2526 
2527 class VerifyKlassOopsKlassClosure : public KlassClosure {
2528   class VerifyKlassOopsClosure : public OopClosure {
2529     CMSBitMap* _bitmap;
2530    public:
2531     VerifyKlassOopsClosure(CMSBitMap* bitmap) : _bitmap(bitmap) { }
2532     void do_oop(oop* p)       { guarantee(*p == NULL || _bitmap->isMarked((HeapWord*) *p), "Should be marked"); }
2533     void do_oop(narrowOop* p) { ShouldNotReachHere(); }
2534   } _oop_closure;
2535  public:
2536   VerifyKlassOopsKlassClosure(CMSBitMap* bitmap) : _oop_closure(bitmap) {}
2537   void do_klass(Klass* k) {
2538     k->oops_do(&_oop_closure);
2539   }
2540 };
2541 
2542 void CMSCollector::verify_after_remark_work_2() {
2543   ResourceMark rm;
2544   HandleMark  hm;
2545   GenCollectedHeap* gch = GenCollectedHeap::heap();
2546 
2547   // Get a clear set of claim bits for the roots processing to work with.
2548   ClassLoaderDataGraph::clear_claimed_marks();
2549 
2550   // Mark from roots one level into CMS
2551   MarkRefsIntoVerifyClosure notOlder(_span, verification_mark_bm(),
2552                                      markBitMap());
2553   CLDToOopClosure cld_closure(&notOlder, true);
2554 
2555   gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
2556 
2557   gch->gen_process_roots(_cmsGen->level(),
2558                          true,   // younger gens are roots
2559                          true,   // activate StrongRootsScope
2560                          SharedHeap::ScanningOption(roots_scanning_options()),
2561                          should_unload_classes(),
2562                          &notOlder,
2563                          NULL,
2564                          &cld_closure);
2565 
2566   // Now mark from the roots
2567   MarkFromRootsVerifyClosure markFromRootsClosure(this, _span,
2568     verification_mark_bm(), markBitMap(), verification_mark_stack());
2569   assert(_restart_addr == NULL, "Expected pre-condition");
2570   verification_mark_bm()->iterate(&markFromRootsClosure);
2571   while (_restart_addr != NULL) {
2572     // Deal with stack overflow: by restarting at the indicated
2573     // address.
2574     HeapWord* ra = _restart_addr;
2575     markFromRootsClosure.reset(ra);
2576     _restart_addr = NULL;
2577     verification_mark_bm()->iterate(&markFromRootsClosure, ra, _span.end());
2578   }
2579   assert(verification_mark_stack()->isEmpty(), "Should have been drained");
2580   verify_work_stacks_empty();
2581 
2582   VerifyKlassOopsKlassClosure verify_klass_oops(verification_mark_bm());
2583   ClassLoaderDataGraph::classes_do(&verify_klass_oops);
2584 
2585   // Marking completed -- now verify that each bit marked in
2586   // verification_mark_bm() is also marked in markBitMap(); flag all
2587   // errors by printing corresponding objects.
2588   VerifyMarkedClosure vcl(markBitMap());
2589   verification_mark_bm()->iterate(&vcl);
2590   assert(!vcl.failed(), "Else verification above should not have succeeded");
2591 }
2592 
2593 void ConcurrentMarkSweepGeneration::save_marks() {
2594   // delegate to CMS space
2595   cmsSpace()->save_marks();
2596   for (uint i = 0; i < ParallelGCThreads; i++) {
2597     _par_gc_thread_states[i]->promo.startTrackingPromotions();
2598   }
2599 }
2600 
2601 bool ConcurrentMarkSweepGeneration::no_allocs_since_save_marks() {
2602   return cmsSpace()->no_allocs_since_save_marks();
2603 }
2604 
2605 #define CMS_SINCE_SAVE_MARKS_DEFN(OopClosureType, nv_suffix)    \
2606                                                                 \
2607 void ConcurrentMarkSweepGeneration::                            \
2608 oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl) {   \
2609   cl->set_generation(this);                                     \
2610   cmsSpace()->oop_since_save_marks_iterate##nv_suffix(cl);      \
2611   cl->reset_generation();                                       \
2612   save_marks();                                                 \
2613 }
2614 
2615 ALL_SINCE_SAVE_MARKS_CLOSURES(CMS_SINCE_SAVE_MARKS_DEFN)
2616 
2617 void
2618 ConcurrentMarkSweepGeneration::oop_iterate(ExtendedOopClosure* cl) {
2619   if (freelistLock()->owned_by_self()) {
2620     Generation::oop_iterate(cl);
2621   } else {
2622     MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
2623     Generation::oop_iterate(cl);
2624   }
2625 }
2626 
2627 void
2628 ConcurrentMarkSweepGeneration::object_iterate(ObjectClosure* cl) {
2629   if (freelistLock()->owned_by_self()) {
2630     Generation::object_iterate(cl);
2631   } else {
2632     MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
2633     Generation::object_iterate(cl);
2634   }
2635 }
2636 
2637 void
2638 ConcurrentMarkSweepGeneration::safe_object_iterate(ObjectClosure* cl) {
2639   if (freelistLock()->owned_by_self()) {
2640     Generation::safe_object_iterate(cl);
2641   } else {
2642     MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
2643     Generation::safe_object_iterate(cl);
2644   }
2645 }
2646 
2647 void
2648 ConcurrentMarkSweepGeneration::post_compact() {
2649 }
2650 
2651 void
2652 ConcurrentMarkSweepGeneration::prepare_for_verify() {
2653   // Fix the linear allocation blocks to look like free blocks.
2654 
2655   // Locks are normally acquired/released in gc_prologue/gc_epilogue, but those
2656   // are not called when the heap is verified during universe initialization and
2657   // at vm shutdown.
2658   if (freelistLock()->owned_by_self()) {
2659     cmsSpace()->prepare_for_verify();
2660   } else {
2661     MutexLockerEx fll(freelistLock(), Mutex::_no_safepoint_check_flag);
2662     cmsSpace()->prepare_for_verify();
2663   }
2664 }
2665 
2666 void
2667 ConcurrentMarkSweepGeneration::verify() {
2668   // Locks are normally acquired/released in gc_prologue/gc_epilogue, but those
2669   // are not called when the heap is verified during universe initialization and
2670   // at vm shutdown.
2671   if (freelistLock()->owned_by_self()) {
2672     cmsSpace()->verify();
2673   } else {
2674     MutexLockerEx fll(freelistLock(), Mutex::_no_safepoint_check_flag);
2675     cmsSpace()->verify();
2676   }
2677 }
2678 
2679 void CMSCollector::verify() {
2680   _cmsGen->verify();
2681 }
2682 
2683 #ifndef PRODUCT
2684 bool CMSCollector::overflow_list_is_empty() const {
2685   assert(_num_par_pushes >= 0, "Inconsistency");
2686   if (_overflow_list == NULL) {
2687     assert(_num_par_pushes == 0, "Inconsistency");
2688   }
2689   return _overflow_list == NULL;
2690 }
2691 
2692 // The methods verify_work_stacks_empty() and verify_overflow_empty()
2693 // merely consolidate assertion checks that appear to occur together frequently.
2694 void CMSCollector::verify_work_stacks_empty() const {
2695   assert(_markStack.isEmpty(), "Marking stack should be empty");
2696   assert(overflow_list_is_empty(), "Overflow list should be empty");
2697 }
2698 
2699 void CMSCollector::verify_overflow_empty() const {
2700   assert(overflow_list_is_empty(), "Overflow list should be empty");
2701   assert(no_preserved_marks(), "No preserved marks");
2702 }
2703 #endif // PRODUCT
2704 
2705 // Decide if we want to enable class unloading as part of the
2706 // ensuing concurrent GC cycle. We will collect and
2707 // unload classes if it's the case that:
2708 // (1) an explicit gc request has been made and the flag
2709 //     ExplicitGCInvokesConcurrentAndUnloadsClasses is set, OR
2710 // (2) (a) class unloading is enabled at the command line, and
2711 //     (b) old gen is getting really full
2712 // NOTE: Provided there is no change in the state of the heap between
2713 // calls to this method, it should have idempotent results. Moreover,
2714 // its results should be monotonically increasing (i.e. going from 0 to 1,
2715 // but not 1 to 0) between successive calls between which the heap was
2716 // not collected. For the implementation below, it must thus rely on
2717 // the property that concurrent_cycles_since_last_unload()
2718 // will not decrease unless a collection cycle happened and that
2719 // _cmsGen->is_too_full() are
2720 // themselves also monotonic in that sense. See check_monotonicity()
2721 // below.
2722 void CMSCollector::update_should_unload_classes() {
2723   _should_unload_classes = false;
2724   // Condition 1 above
2725   if (_full_gc_requested && ExplicitGCInvokesConcurrentAndUnloadsClasses) {
2726     _should_unload_classes = true;
2727   } else if (CMSClassUnloadingEnabled) { // Condition 2.a above
2728     // Disjuncts 2.b.(i,ii,iii) above
2729     _should_unload_classes = (concurrent_cycles_since_last_unload() >=
2730                               CMSClassUnloadingMaxInterval)
2731                            || _cmsGen->is_too_full();
2732   }
2733 }
2734 
2735 bool ConcurrentMarkSweepGeneration::is_too_full() const {
2736   bool res = should_concurrent_collect();
2737   res = res && (occupancy() > (double)CMSIsTooFullPercentage/100.0);
2738   return res;
2739 }
2740 
2741 void CMSCollector::setup_cms_unloading_and_verification_state() {
2742   const  bool should_verify =   VerifyBeforeGC || VerifyAfterGC || VerifyDuringGC
2743                              || VerifyBeforeExit;
2744   const  int  rso           =   SharedHeap::SO_AllCodeCache;
2745 
2746   // We set the proper root for this CMS cycle here.
2747   if (should_unload_classes()) {   // Should unload classes this cycle
2748     remove_root_scanning_option(rso);  // Shrink the root set appropriately
2749     set_verifying(should_verify);    // Set verification state for this cycle
2750     return;                            // Nothing else needs to be done at this time
2751   }
2752 
2753   // Not unloading classes this cycle
2754   assert(!should_unload_classes(), "Inconsistency!");
2755 
2756   if ((!verifying() || unloaded_classes_last_cycle()) && should_verify) {
2757     // Include symbols, strings and code cache elements to prevent their resurrection.
2758     add_root_scanning_option(rso);
2759     set_verifying(true);
2760   } else if (verifying() && !should_verify) {
2761     // We were verifying, but some verification flags got disabled.
2762     set_verifying(false);
2763     // Exclude symbols, strings and code cache elements from root scanning to
2764     // reduce IM and RM pauses.
2765     remove_root_scanning_option(rso);
2766   }
2767 }
2768 
2769 
2770 #ifndef PRODUCT
2771 HeapWord* CMSCollector::block_start(const void* p) const {
2772   const HeapWord* addr = (HeapWord*)p;
2773   if (_span.contains(p)) {
2774     if (_cmsGen->cmsSpace()->is_in_reserved(addr)) {
2775       return _cmsGen->cmsSpace()->block_start(p);
2776     }
2777   }
2778   return NULL;
2779 }
2780 #endif
2781 
2782 HeapWord*
2783 ConcurrentMarkSweepGeneration::expand_and_allocate(size_t word_size,
2784                                                    bool   tlab,
2785                                                    bool   parallel) {
2786   CMSSynchronousYieldRequest yr;
2787   assert(!tlab, "Can't deal with TLAB allocation");
2788   MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
2789   expand_for_gc_cause(word_size*HeapWordSize, MinHeapDeltaBytes, CMSExpansionCause::_satisfy_allocation);
2790   if (GCExpandToAllocateDelayMillis > 0) {
2791     os::sleep(Thread::current(), GCExpandToAllocateDelayMillis, false);
2792   }
2793   return have_lock_and_allocate(word_size, tlab);
2794 }
2795 
2796 void ConcurrentMarkSweepGeneration::expand_for_gc_cause(
2797     size_t bytes,
2798     size_t expand_bytes,
2799     CMSExpansionCause::Cause cause)
2800 {
2801 
2802   bool success = expand(bytes, expand_bytes);
2803 
2804   // remember why we expanded; this information is used
2805   // by shouldConcurrentCollect() when making decisions on whether to start
2806   // a new CMS cycle.
2807   if (success) {
2808     set_expansion_cause(cause);
2809     if (PrintGCDetails && Verbose) {
2810       gclog_or_tty->print_cr("Expanded CMS gen for %s",
2811         CMSExpansionCause::to_string(cause));
2812     }
2813   }
2814 }
2815 
2816 HeapWord* ConcurrentMarkSweepGeneration::expand_and_par_lab_allocate(CMSParGCThreadState* ps, size_t word_sz) {
2817   HeapWord* res = NULL;
2818   MutexLocker x(ParGCRareEvent_lock);
2819   while (true) {
2820     // Expansion by some other thread might make alloc OK now:
2821     res = ps->lab.alloc(word_sz);
2822     if (res != NULL) return res;
2823     // If there's not enough expansion space available, give up.
2824     if (_virtual_space.uncommitted_size() < (word_sz * HeapWordSize)) {
2825       return NULL;
2826     }
2827     // Otherwise, we try expansion.
2828     expand_for_gc_cause(word_sz*HeapWordSize, MinHeapDeltaBytes, CMSExpansionCause::_allocate_par_lab);
2829     // Now go around the loop and try alloc again;
2830     // A competing par_promote might beat us to the expansion space,
2831     // so we may go around the loop again if promotion fails again.
2832     if (GCExpandToAllocateDelayMillis > 0) {
2833       os::sleep(Thread::current(), GCExpandToAllocateDelayMillis, false);
2834     }
2835   }
2836 }
2837 
2838 
2839 bool ConcurrentMarkSweepGeneration::expand_and_ensure_spooling_space(
2840   PromotionInfo* promo) {
2841   MutexLocker x(ParGCRareEvent_lock);
2842   size_t refill_size_bytes = promo->refillSize() * HeapWordSize;
2843   while (true) {
2844     // Expansion by some other thread might make alloc OK now:
2845     if (promo->ensure_spooling_space()) {
2846       assert(promo->has_spooling_space(),
2847              "Post-condition of successful ensure_spooling_space()");
2848       return true;
2849     }
2850     // If there's not enough expansion space available, give up.
2851     if (_virtual_space.uncommitted_size() < refill_size_bytes) {
2852       return false;
2853     }
2854     // Otherwise, we try expansion.
2855     expand_for_gc_cause(refill_size_bytes, MinHeapDeltaBytes, CMSExpansionCause::_allocate_par_spooling_space);
2856     // Now go around the loop and try alloc again;
2857     // A competing allocation might beat us to the expansion space,
2858     // so we may go around the loop again if allocation fails again.
2859     if (GCExpandToAllocateDelayMillis > 0) {
2860       os::sleep(Thread::current(), GCExpandToAllocateDelayMillis, false);
2861     }
2862   }
2863 }
2864 
2865 void ConcurrentMarkSweepGeneration::shrink(size_t bytes) {
2866   // Only shrink if a compaction was done so that all the free space
2867   // in the generation is in a contiguous block at the end.
2868   if (did_compact()) {
2869     CardGeneration::shrink(bytes);
2870   }
2871 }
2872 
2873 void ConcurrentMarkSweepGeneration::assert_correct_size_change_locking() {
2874   assert_locked_or_safepoint(Heap_lock);
2875 }
2876 
2877 void ConcurrentMarkSweepGeneration::shrink_free_list_by(size_t bytes) {
2878   assert_locked_or_safepoint(Heap_lock);
2879   assert_lock_strong(freelistLock());
2880   if (PrintGCDetails && Verbose) {
2881     warning("Shrinking of CMS not yet implemented");
2882   }
2883   return;
2884 }
2885 
2886 
2887 // Simple ctor/dtor wrapper for accounting & timer chores around concurrent
2888 // phases.
2889 class CMSPhaseAccounting: public StackObj {
2890  public:
2891   CMSPhaseAccounting(CMSCollector *collector,
2892                      const char *phase,
2893                      const GCId gc_id,
2894                      bool print_cr = true);
2895   ~CMSPhaseAccounting();
2896 
2897  private:
2898   CMSCollector *_collector;
2899   const char *_phase;
2900   elapsedTimer _wallclock;
2901   bool _print_cr;
2902   const GCId _gc_id;
2903 
2904  public:
2905   // Not MT-safe; so do not pass around these StackObj's
2906   // where they may be accessed by other threads.
2907   jlong wallclock_millis() {
2908     assert(_wallclock.is_active(), "Wall clock should not stop");
2909     _wallclock.stop();  // to record time
2910     jlong ret = _wallclock.milliseconds();
2911     _wallclock.start(); // restart
2912     return ret;
2913   }
2914 };
2915 
2916 CMSPhaseAccounting::CMSPhaseAccounting(CMSCollector *collector,
2917                                        const char *phase,
2918                                        const GCId gc_id,
2919                                        bool print_cr) :
2920   _collector(collector), _phase(phase), _print_cr(print_cr), _gc_id(gc_id) {
2921 
2922   if (PrintCMSStatistics != 0) {
2923     _collector->resetYields();
2924   }
2925   if (PrintGCDetails) {
2926     gclog_or_tty->gclog_stamp(_gc_id);
2927     gclog_or_tty->print_cr("[%s-concurrent-%s-start]",
2928       _collector->cmsGen()->short_name(), _phase);
2929   }
2930   _collector->resetTimer();
2931   _wallclock.start();
2932   _collector->startTimer();
2933 }
2934 
2935 CMSPhaseAccounting::~CMSPhaseAccounting() {
2936   assert(_wallclock.is_active(), "Wall clock should not have stopped");
2937   _collector->stopTimer();
2938   _wallclock.stop();
2939   if (PrintGCDetails) {
2940     gclog_or_tty->gclog_stamp(_gc_id);
2941     gclog_or_tty->print("[%s-concurrent-%s: %3.3f/%3.3f secs]",
2942                  _collector->cmsGen()->short_name(),
2943                  _phase, _collector->timerValue(), _wallclock.seconds());
2944     if (_print_cr) {
2945       gclog_or_tty->cr();
2946     }
2947     if (PrintCMSStatistics != 0) {
2948       gclog_or_tty->print_cr(" (CMS-concurrent-%s yielded %d times)", _phase,
2949                     _collector->yields());
2950     }
2951   }
2952 }
2953 
2954 // CMS work
2955 
2956 // The common parts of CMSParInitialMarkTask and CMSParRemarkTask.
2957 class CMSParMarkTask : public AbstractGangTask {
2958  protected:
2959   CMSCollector*     _collector;
2960   int               _n_workers;
2961   CMSParMarkTask(const char* name, CMSCollector* collector, int n_workers) :
2962       AbstractGangTask(name),
2963       _collector(collector),
2964       _n_workers(n_workers) {}
2965   // Work method in support of parallel rescan ... of young gen spaces
2966   void do_young_space_rescan(uint worker_id, OopsInGenClosure* cl,
2967                              ContiguousSpace* space,
2968                              HeapWord** chunk_array, size_t chunk_top);
2969   void work_on_young_gen_roots(uint worker_id, OopsInGenClosure* cl);
2970 };
2971 
2972 // Parallel initial mark task
2973 class CMSParInitialMarkTask: public CMSParMarkTask {
2974  public:
2975   CMSParInitialMarkTask(CMSCollector* collector, int n_workers) :
2976       CMSParMarkTask("Scan roots and young gen for initial mark in parallel",
2977                      collector, n_workers) {}
2978   void work(uint worker_id);
2979 };
2980 
2981 // Checkpoint the roots into this generation from outside
2982 // this generation. [Note this initial checkpoint need only
2983 // be approximate -- we'll do a catch up phase subsequently.]
2984 void CMSCollector::checkpointRootsInitial() {
2985   assert(_collectorState == InitialMarking, "Wrong collector state");
2986   check_correct_thread_executing();
2987   TraceCMSMemoryManagerStats tms(_collectorState,GenCollectedHeap::heap()->gc_cause());
2988 
2989   save_heap_summary();
2990   report_heap_summary(GCWhen::BeforeGC);
2991 
2992   ReferenceProcessor* rp = ref_processor();
2993   SpecializationStats::clear();
2994   assert(_restart_addr == NULL, "Control point invariant");
2995   {
2996     // acquire locks for subsequent manipulations
2997     MutexLockerEx x(bitMapLock(),
2998                     Mutex::_no_safepoint_check_flag);
2999     checkpointRootsInitialWork();
3000     // enable ("weak") refs discovery
3001     rp->enable_discovery(true /*verify_disabled*/, true /*check_no_refs*/);
3002     _collectorState = Marking;
3003   }
3004   SpecializationStats::print();
3005 }
3006 
3007 void CMSCollector::checkpointRootsInitialWork() {
3008   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
3009   assert(_collectorState == InitialMarking, "just checking");
3010 
3011   // If there has not been a GC[n-1] since last GC[n] cycle completed,
3012   // precede our marking with a collection of all
3013   // younger generations to keep floating garbage to a minimum.
3014   // XXX: we won't do this for now -- it's an optimization to be done later.
3015 
3016   // already have locks
3017   assert_lock_strong(bitMapLock());
3018   assert(_markBitMap.isAllClear(), "was reset at end of previous cycle");
3019 
3020   // Setup the verification and class unloading state for this
3021   // CMS collection cycle.
3022   setup_cms_unloading_and_verification_state();
3023 
3024   NOT_PRODUCT(GCTraceTime t("\ncheckpointRootsInitialWork",
3025     PrintGCDetails && Verbose, true, _gc_timer_cm, _gc_tracer_cm->gc_id());)
3026 
3027   // Reset all the PLAB chunk arrays if necessary.
3028   if (_survivor_plab_array != NULL && !CMSPLABRecordAlways) {
3029     reset_survivor_plab_arrays();
3030   }
3031 
3032   ResourceMark rm;
3033   HandleMark  hm;
3034 
3035   MarkRefsIntoClosure notOlder(_span, &_markBitMap);
3036   GenCollectedHeap* gch = GenCollectedHeap::heap();
3037 
3038   verify_work_stacks_empty();
3039   verify_overflow_empty();
3040 
3041   gch->ensure_parsability(false);  // fill TLABs, but no need to retire them
3042   // Update the saved marks which may affect the root scans.
3043   gch->save_marks();
3044 
3045   // weak reference processing has not started yet.
3046   ref_processor()->set_enqueuing_is_done(false);
3047 
3048   // Need to remember all newly created CLDs,
3049   // so that we can guarantee that the remark finds them.
3050   ClassLoaderDataGraph::remember_new_clds(true);
3051 
3052   // Whenever a CLD is found, it will be claimed before proceeding to mark
3053   // the klasses. The claimed marks need to be cleared before marking starts.
3054   ClassLoaderDataGraph::clear_claimed_marks();
3055 
3056   if (CMSPrintEdenSurvivorChunks) {
3057     print_eden_and_survivor_chunk_arrays();
3058   }
3059 
3060   {
3061     COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact;)
3062     if (CMSParallelInitialMarkEnabled && CollectedHeap::use_parallel_gc_threads()) {
3063       // The parallel version.
3064       FlexibleWorkGang* workers = gch->workers();
3065       assert(workers != NULL, "Need parallel worker threads.");
3066       int n_workers = workers->active_workers();
3067       CMSParInitialMarkTask tsk(this, n_workers);
3068       gch->set_par_threads(n_workers);
3069       initialize_sequential_subtasks_for_young_gen_rescan(n_workers);
3070       if (n_workers > 1) {
3071         GenCollectedHeap::StrongRootsScope srs(gch);
3072         workers->run_task(&tsk);
3073       } else {
3074         GenCollectedHeap::StrongRootsScope srs(gch);
3075         tsk.work(0);
3076       }
3077       gch->set_par_threads(0);
3078     } else {
3079       // The serial version.
3080       CLDToOopClosure cld_closure(&notOlder, true);
3081       gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
3082       gch->gen_process_roots(_cmsGen->level(),
3083                              true,   // younger gens are roots
3084                              true,   // activate StrongRootsScope
3085                              SharedHeap::ScanningOption(roots_scanning_options()),
3086                              should_unload_classes(),
3087                              &notOlder,
3088                              NULL,
3089                              &cld_closure);
3090     }
3091   }
3092 
3093   // Clear mod-union table; it will be dirtied in the prologue of
3094   // CMS generation per each younger generation collection.
3095 
3096   assert(_modUnionTable.isAllClear(),
3097        "Was cleared in most recent final checkpoint phase"
3098        " or no bits are set in the gc_prologue before the start of the next "
3099        "subsequent marking phase.");
3100 
3101   assert(_ct->klass_rem_set()->mod_union_is_clear(), "Must be");
3102 
3103   // Save the end of the used_region of the constituent generations
3104   // to be used to limit the extent of sweep in each generation.
3105   save_sweep_limits();
3106   verify_overflow_empty();
3107 }
3108 
3109 bool CMSCollector::markFromRoots() {
3110   // we might be tempted to assert that:
3111   // assert(!SafepointSynchronize::is_at_safepoint(),
3112   //        "inconsistent argument?");
3113   // However that wouldn't be right, because it's possible that
3114   // a safepoint is indeed in progress as a younger generation
3115   // stop-the-world GC happens even as we mark in this generation.
3116   assert(_collectorState == Marking, "inconsistent state?");
3117   check_correct_thread_executing();
3118   verify_overflow_empty();
3119 
3120   // Weak ref discovery note: We may be discovering weak
3121   // refs in this generation concurrent (but interleaved) with
3122   // weak ref discovery by a younger generation collector.
3123 
3124   CMSTokenSyncWithLocks ts(true, bitMapLock());
3125   TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
3126   CMSPhaseAccounting pa(this, "mark", _gc_tracer_cm->gc_id(), !PrintGCDetails);
3127   bool res = markFromRootsWork();
3128   if (res) {
3129     _collectorState = Precleaning;
3130   } else { // We failed and a foreground collection wants to take over
3131     assert(_foregroundGCIsActive, "internal state inconsistency");
3132     assert(_restart_addr == NULL,  "foreground will restart from scratch");
3133     if (PrintGCDetails) {
3134       gclog_or_tty->print_cr("bailing out to foreground collection");
3135     }
3136   }
3137   verify_overflow_empty();
3138   return res;
3139 }
3140 
3141 bool CMSCollector::markFromRootsWork() {
3142   // iterate over marked bits in bit map, doing a full scan and mark
3143   // from these roots using the following algorithm:
3144   // . if oop is to the right of the current scan pointer,
3145   //   mark corresponding bit (we'll process it later)
3146   // . else (oop is to left of current scan pointer)
3147   //   push oop on marking stack
3148   // . drain the marking stack
3149 
3150   // Note that when we do a marking step we need to hold the
3151   // bit map lock -- recall that direct allocation (by mutators)
3152   // and promotion (by younger generation collectors) is also
3153   // marking the bit map. [the so-called allocate live policy.]
3154   // Because the implementation of bit map marking is not
3155   // robust wrt simultaneous marking of bits in the same word,
3156   // we need to make sure that there is no such interference
3157   // between concurrent such updates.
3158 
3159   // already have locks
3160   assert_lock_strong(bitMapLock());
3161 
3162   verify_work_stacks_empty();
3163   verify_overflow_empty();
3164   bool result = false;
3165   if (CMSConcurrentMTEnabled && ConcGCThreads > 0) {
3166     result = do_marking_mt();
3167   } else {
3168     result = do_marking_st();
3169   }
3170   return result;
3171 }
3172 
3173 // Forward decl
3174 class CMSConcMarkingTask;
3175 
3176 class CMSConcMarkingTerminator: public ParallelTaskTerminator {
3177   CMSCollector*       _collector;
3178   CMSConcMarkingTask* _task;
3179  public:
3180   virtual void yield();
3181 
3182   // "n_threads" is the number of threads to be terminated.
3183   // "queue_set" is a set of work queues of other threads.
3184   // "collector" is the CMS collector associated with this task terminator.
3185   // "yield" indicates whether we need the gang as a whole to yield.
3186   CMSConcMarkingTerminator(int n_threads, TaskQueueSetSuper* queue_set, CMSCollector* collector) :
3187     ParallelTaskTerminator(n_threads, queue_set),
3188     _collector(collector) { }
3189 
3190   void set_task(CMSConcMarkingTask* task) {
3191     _task = task;
3192   }
3193 };
3194 
3195 class CMSConcMarkingTerminatorTerminator: public TerminatorTerminator {
3196   CMSConcMarkingTask* _task;
3197  public:
3198   bool should_exit_termination();
3199   void set_task(CMSConcMarkingTask* task) {
3200     _task = task;
3201   }
3202 };
3203 
3204 // MT Concurrent Marking Task
3205 class CMSConcMarkingTask: public YieldingFlexibleGangTask {
3206   CMSCollector* _collector;
3207   int           _n_workers;                  // requested/desired # workers
3208   bool          _result;
3209   CompactibleFreeListSpace*  _cms_space;
3210   char          _pad_front[64];   // padding to ...
3211   HeapWord*     _global_finger;   // ... avoid sharing cache line
3212   char          _pad_back[64];
3213   HeapWord*     _restart_addr;
3214 
3215   //  Exposed here for yielding support
3216   Mutex* const _bit_map_lock;
3217 
3218   // The per thread work queues, available here for stealing
3219   OopTaskQueueSet*  _task_queues;
3220 
3221   // Termination (and yielding) support
3222   CMSConcMarkingTerminator _term;
3223   CMSConcMarkingTerminatorTerminator _term_term;
3224 
3225  public:
3226   CMSConcMarkingTask(CMSCollector* collector,
3227                  CompactibleFreeListSpace* cms_space,
3228                  YieldingFlexibleWorkGang* workers,
3229                  OopTaskQueueSet* task_queues):
3230     YieldingFlexibleGangTask("Concurrent marking done multi-threaded"),
3231     _collector(collector),
3232     _cms_space(cms_space),
3233     _n_workers(0), _result(true),
3234     _task_queues(task_queues),
3235     _term(_n_workers, task_queues, _collector),
3236     _bit_map_lock(collector->bitMapLock())
3237   {
3238     _requested_size = _n_workers;
3239     _term.set_task(this);
3240     _term_term.set_task(this);
3241     _restart_addr = _global_finger = _cms_space->bottom();
3242   }
3243 
3244 
3245   OopTaskQueueSet* task_queues()  { return _task_queues; }
3246 
3247   OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
3248 
3249   HeapWord** global_finger_addr() { return &_global_finger; }
3250 
3251   CMSConcMarkingTerminator* terminator() { return &_term; }
3252 
3253   virtual void set_for_termination(int active_workers) {
3254     terminator()->reset_for_reuse(active_workers);
3255   }
3256 
3257   void work(uint worker_id);
3258   bool should_yield() {
3259     return    ConcurrentMarkSweepThread::should_yield()
3260            && !_collector->foregroundGCIsActive();
3261   }
3262 
3263   virtual void coordinator_yield();  // stuff done by coordinator
3264   bool result() { return _result; }
3265 
3266   void reset(HeapWord* ra) {
3267     assert(_global_finger >= _cms_space->end(),  "Postcondition of ::work(i)");
3268     _restart_addr = _global_finger = ra;
3269     _term.reset_for_reuse();
3270   }
3271 
3272   static bool get_work_from_overflow_stack(CMSMarkStack* ovflw_stk,
3273                                            OopTaskQueue* work_q);
3274 
3275  private:
3276   void do_scan_and_mark(int i, CompactibleFreeListSpace* sp);
3277   void do_work_steal(int i);
3278   void bump_global_finger(HeapWord* f);
3279 };
3280 
3281 bool CMSConcMarkingTerminatorTerminator::should_exit_termination() {
3282   assert(_task != NULL, "Error");
3283   return _task->yielding();
3284   // Note that we do not need the disjunct || _task->should_yield() above
3285   // because we want terminating threads to yield only if the task
3286   // is already in the midst of yielding, which happens only after at least one
3287   // thread has yielded.
3288 }
3289 
3290 void CMSConcMarkingTerminator::yield() {
3291   if (_task->should_yield()) {
3292     _task->yield();
3293   } else {
3294     ParallelTaskTerminator::yield();
3295   }
3296 }
3297 
3298 ////////////////////////////////////////////////////////////////
3299 // Concurrent Marking Algorithm Sketch
3300 ////////////////////////////////////////////////////////////////
3301 // Until all tasks exhausted (both spaces):
3302 // -- claim next available chunk
3303 // -- bump global finger via CAS
3304 // -- find first object that starts in this chunk
3305 //    and start scanning bitmap from that position
3306 // -- scan marked objects for oops
3307 // -- CAS-mark target, and if successful:
3308 //    . if target oop is above global finger (volatile read)
3309 //      nothing to do
3310 //    . if target oop is in chunk and above local finger
3311 //        then nothing to do
3312 //    . else push on work-queue
3313 // -- Deal with possible overflow issues:
3314 //    . local work-queue overflow causes stuff to be pushed on
3315 //      global (common) overflow queue
3316 //    . always first empty local work queue
3317 //    . then get a batch of oops from global work queue if any
3318 //    . then do work stealing
3319 // -- When all tasks claimed (both spaces)
3320 //    and local work queue empty,
3321 //    then in a loop do:
3322 //    . check global overflow stack; steal a batch of oops and trace
3323 //    . try to steal from other threads oif GOS is empty
3324 //    . if neither is available, offer termination
3325 // -- Terminate and return result
3326 //
3327 void CMSConcMarkingTask::work(uint worker_id) {
3328   elapsedTimer _timer;
3329   ResourceMark rm;
3330   HandleMark hm;
3331 
3332   DEBUG_ONLY(_collector->verify_overflow_empty();)
3333 
3334   // Before we begin work, our work queue should be empty
3335   assert(work_queue(worker_id)->size() == 0, "Expected to be empty");
3336   // Scan the bitmap covering _cms_space, tracing through grey objects.
3337   _timer.start();
3338   do_scan_and_mark(worker_id, _cms_space);
3339   _timer.stop();
3340   if (PrintCMSStatistics != 0) {
3341     gclog_or_tty->print_cr("Finished cms space scanning in %dth thread: %3.3f sec",
3342       worker_id, _timer.seconds());
3343       // XXX: need xxx/xxx type of notation, two timers
3344   }
3345 
3346   // ... do work stealing
3347   _timer.reset();
3348   _timer.start();
3349   do_work_steal(worker_id);
3350   _timer.stop();
3351   if (PrintCMSStatistics != 0) {
3352     gclog_or_tty->print_cr("Finished work stealing in %dth thread: %3.3f sec",
3353       worker_id, _timer.seconds());
3354       // XXX: need xxx/xxx type of notation, two timers
3355   }
3356   assert(_collector->_markStack.isEmpty(), "Should have been emptied");
3357   assert(work_queue(worker_id)->size() == 0, "Should have been emptied");
3358   // Note that under the current task protocol, the
3359   // following assertion is true even of the spaces
3360   // expanded since the completion of the concurrent
3361   // marking. XXX This will likely change under a strict
3362   // ABORT semantics.
3363   // After perm removal the comparison was changed to
3364   // greater than or equal to from strictly greater than.
3365   // Before perm removal the highest address sweep would
3366   // have been at the end of perm gen but now is at the
3367   // end of the tenured gen.
3368   assert(_global_finger >=  _cms_space->end(),
3369          "All tasks have been completed");
3370   DEBUG_ONLY(_collector->verify_overflow_empty();)
3371 }
3372 
3373 void CMSConcMarkingTask::bump_global_finger(HeapWord* f) {
3374   HeapWord* read = _global_finger;
3375   HeapWord* cur  = read;
3376   while (f > read) {
3377     cur = read;
3378     read = (HeapWord*) Atomic::cmpxchg_ptr(f, &_global_finger, cur);
3379     if (cur == read) {
3380       // our cas succeeded
3381       assert(_global_finger >= f, "protocol consistency");
3382       break;
3383     }
3384   }
3385 }
3386 
3387 // This is really inefficient, and should be redone by
3388 // using (not yet available) block-read and -write interfaces to the
3389 // stack and the work_queue. XXX FIX ME !!!
3390 bool CMSConcMarkingTask::get_work_from_overflow_stack(CMSMarkStack* ovflw_stk,
3391                                                       OopTaskQueue* work_q) {
3392   // Fast lock-free check
3393   if (ovflw_stk->length() == 0) {
3394     return false;
3395   }
3396   assert(work_q->size() == 0, "Shouldn't steal");
3397   MutexLockerEx ml(ovflw_stk->par_lock(),
3398                    Mutex::_no_safepoint_check_flag);
3399   // Grab up to 1/4 the size of the work queue
3400   size_t num = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
3401                     (size_t)ParGCDesiredObjsFromOverflowList);
3402   num = MIN2(num, ovflw_stk->length());
3403   for (int i = (int) num; i > 0; i--) {
3404     oop cur = ovflw_stk->pop();
3405     assert(cur != NULL, "Counted wrong?");
3406     work_q->push(cur);
3407   }
3408   return num > 0;
3409 }
3410 
3411 void CMSConcMarkingTask::do_scan_and_mark(int i, CompactibleFreeListSpace* sp) {
3412   SequentialSubTasksDone* pst = sp->conc_par_seq_tasks();
3413   int n_tasks = pst->n_tasks();
3414   // We allow that there may be no tasks to do here because
3415   // we are restarting after a stack overflow.
3416   assert(pst->valid() || n_tasks == 0, "Uninitialized use?");
3417   uint nth_task = 0;
3418 
3419   HeapWord* aligned_start = sp->bottom();
3420   if (sp->used_region().contains(_restart_addr)) {
3421     // Align down to a card boundary for the start of 0th task
3422     // for this space.
3423     aligned_start =
3424       (HeapWord*)align_size_down((uintptr_t)_restart_addr,
3425                                  CardTableModRefBS::card_size);
3426   }
3427 
3428   size_t chunk_size = sp->marking_task_size();
3429   while (!pst->is_task_claimed(/* reference */ nth_task)) {
3430     // Having claimed the nth task in this space,
3431     // compute the chunk that it corresponds to:
3432     MemRegion span = MemRegion(aligned_start + nth_task*chunk_size,
3433                                aligned_start + (nth_task+1)*chunk_size);
3434     // Try and bump the global finger via a CAS;
3435     // note that we need to do the global finger bump
3436     // _before_ taking the intersection below, because
3437     // the task corresponding to that region will be
3438     // deemed done even if the used_region() expands
3439     // because of allocation -- as it almost certainly will
3440     // during start-up while the threads yield in the
3441     // closure below.
3442     HeapWord* finger = span.end();
3443     bump_global_finger(finger);   // atomically
3444     // There are null tasks here corresponding to chunks
3445     // beyond the "top" address of the space.
3446     span = span.intersection(sp->used_region());
3447     if (!span.is_empty()) {  // Non-null task
3448       HeapWord* prev_obj;
3449       assert(!span.contains(_restart_addr) || nth_task == 0,
3450              "Inconsistency");
3451       if (nth_task == 0) {
3452         // For the 0th task, we'll not need to compute a block_start.
3453         if (span.contains(_restart_addr)) {
3454           // In the case of a restart because of stack overflow,
3455           // we might additionally skip a chunk prefix.
3456           prev_obj = _restart_addr;
3457         } else {
3458           prev_obj = span.start();
3459         }
3460       } else {
3461         // We want to skip the first object because
3462         // the protocol is to scan any object in its entirety
3463         // that _starts_ in this span; a fortiori, any
3464         // object starting in an earlier span is scanned
3465         // as part of an earlier claimed task.
3466         // Below we use the "careful" version of block_start
3467         // so we do not try to navigate uninitialized objects.
3468         prev_obj = sp->block_start_careful(span.start());
3469         // Below we use a variant of block_size that uses the
3470         // Printezis bits to avoid waiting for allocated
3471         // objects to become initialized/parsable.
3472         while (prev_obj < span.start()) {
3473           size_t sz = sp->block_size_no_stall(prev_obj, _collector);
3474           if (sz > 0) {
3475             prev_obj += sz;
3476           } else {
3477             // In this case we may end up doing a bit of redundant
3478             // scanning, but that appears unavoidable, short of
3479             // locking the free list locks; see bug 6324141.
3480             break;
3481           }
3482         }
3483       }
3484       if (prev_obj < span.end()) {
3485         MemRegion my_span = MemRegion(prev_obj, span.end());
3486         // Do the marking work within a non-empty span --
3487         // the last argument to the constructor indicates whether the
3488         // iteration should be incremental with periodic yields.
3489         Par_MarkFromRootsClosure cl(this, _collector, my_span,
3490                                     &_collector->_markBitMap,
3491                                     work_queue(i),
3492                                     &_collector->_markStack);
3493         _collector->_markBitMap.iterate(&cl, my_span.start(), my_span.end());
3494       } // else nothing to do for this task
3495     }   // else nothing to do for this task
3496   }
3497   // We'd be tempted to assert here that since there are no
3498   // more tasks left to claim in this space, the global_finger
3499   // must exceed space->top() and a fortiori space->end(). However,
3500   // that would not quite be correct because the bumping of
3501   // global_finger occurs strictly after the claiming of a task,
3502   // so by the time we reach here the global finger may not yet
3503   // have been bumped up by the thread that claimed the last
3504   // task.
3505   pst->all_tasks_completed();
3506 }
3507 
3508 class Par_ConcMarkingClosure: public MetadataAwareOopClosure {
3509  private:
3510   CMSCollector* _collector;
3511   CMSConcMarkingTask* _task;
3512   MemRegion     _span;
3513   CMSBitMap*    _bit_map;
3514   CMSMarkStack* _overflow_stack;
3515   OopTaskQueue* _work_queue;
3516  protected:
3517   DO_OOP_WORK_DEFN
3518  public:
3519   Par_ConcMarkingClosure(CMSCollector* collector, CMSConcMarkingTask* task, OopTaskQueue* work_queue,
3520                          CMSBitMap* bit_map, CMSMarkStack* overflow_stack):
3521     MetadataAwareOopClosure(collector->ref_processor()),
3522     _collector(collector),
3523     _task(task),
3524     _span(collector->_span),
3525     _work_queue(work_queue),
3526     _bit_map(bit_map),
3527     _overflow_stack(overflow_stack)
3528   { }
3529   virtual void do_oop(oop* p);
3530   virtual void do_oop(narrowOop* p);
3531 
3532   void trim_queue(size_t max);
3533   void handle_stack_overflow(HeapWord* lost);
3534   void do_yield_check() {
3535     if (_task->should_yield()) {
3536       _task->yield();
3537     }
3538   }
3539 };
3540 
3541 // Grey object scanning during work stealing phase --
3542 // the salient assumption here is that any references
3543 // that are in these stolen objects being scanned must
3544 // already have been initialized (else they would not have
3545 // been published), so we do not need to check for
3546 // uninitialized objects before pushing here.
3547 void Par_ConcMarkingClosure::do_oop(oop obj) {
3548   assert(obj->is_oop_or_null(true), err_msg("Expected an oop or NULL at " PTR_FORMAT, p2i(obj)));
3549   HeapWord* addr = (HeapWord*)obj;
3550   // Check if oop points into the CMS generation
3551   // and is not marked
3552   if (_span.contains(addr) && !_bit_map->isMarked(addr)) {
3553     // a white object ...
3554     // If we manage to "claim" the object, by being the
3555     // first thread to mark it, then we push it on our
3556     // marking stack
3557     if (_bit_map->par_mark(addr)) {     // ... now grey
3558       // push on work queue (grey set)
3559       bool simulate_overflow = false;
3560       NOT_PRODUCT(
3561         if (CMSMarkStackOverflowALot &&
3562             _collector->simulate_overflow()) {
3563           // simulate a stack overflow
3564           simulate_overflow = true;
3565         }
3566       )
3567       if (simulate_overflow ||
3568           !(_work_queue->push(obj) || _overflow_stack->par_push(obj))) {
3569         // stack overflow
3570         if (PrintCMSStatistics != 0) {
3571           gclog_or_tty->print_cr("CMS marking stack overflow (benign) at "
3572                                  SIZE_FORMAT, _overflow_stack->capacity());
3573         }
3574         // We cannot assert that the overflow stack is full because
3575         // it may have been emptied since.
3576         assert(simulate_overflow ||
3577                _work_queue->size() == _work_queue->max_elems(),
3578               "Else push should have succeeded");
3579         handle_stack_overflow(addr);
3580       }
3581     } // Else, some other thread got there first
3582     do_yield_check();
3583   }
3584 }
3585 
3586 void Par_ConcMarkingClosure::do_oop(oop* p)       { Par_ConcMarkingClosure::do_oop_work(p); }
3587 void Par_ConcMarkingClosure::do_oop(narrowOop* p) { Par_ConcMarkingClosure::do_oop_work(p); }
3588 
3589 void Par_ConcMarkingClosure::trim_queue(size_t max) {
3590   while (_work_queue->size() > max) {
3591     oop new_oop;
3592     if (_work_queue->pop_local(new_oop)) {
3593       assert(new_oop->is_oop(), "Should be an oop");
3594       assert(_bit_map->isMarked((HeapWord*)new_oop), "Grey object");
3595       assert(_span.contains((HeapWord*)new_oop), "Not in span");
3596       new_oop->oop_iterate(this);  // do_oop() above
3597       do_yield_check();
3598     }
3599   }
3600 }
3601 
3602 // Upon stack overflow, we discard (part of) the stack,
3603 // remembering the least address amongst those discarded
3604 // in CMSCollector's _restart_address.
3605 void Par_ConcMarkingClosure::handle_stack_overflow(HeapWord* lost) {
3606   // We need to do this under a mutex to prevent other
3607   // workers from interfering with the work done below.
3608   MutexLockerEx ml(_overflow_stack->par_lock(),
3609                    Mutex::_no_safepoint_check_flag);
3610   // Remember the least grey address discarded
3611   HeapWord* ra = (HeapWord*)_overflow_stack->least_value(lost);
3612   _collector->lower_restart_addr(ra);
3613   _overflow_stack->reset();  // discard stack contents
3614   _overflow_stack->expand(); // expand the stack if possible
3615 }
3616 
3617 
3618 void CMSConcMarkingTask::do_work_steal(int i) {
3619   OopTaskQueue* work_q = work_queue(i);
3620   oop obj_to_scan;
3621   CMSBitMap* bm = &(_collector->_markBitMap);
3622   CMSMarkStack* ovflw = &(_collector->_markStack);
3623   int* seed = _collector->hash_seed(i);
3624   Par_ConcMarkingClosure cl(_collector, this, work_q, bm, ovflw);
3625   while (true) {
3626     cl.trim_queue(0);
3627     assert(work_q->size() == 0, "Should have been emptied above");
3628     if (get_work_from_overflow_stack(ovflw, work_q)) {
3629       // Can't assert below because the work obtained from the
3630       // overflow stack may already have been stolen from us.
3631       // assert(work_q->size() > 0, "Work from overflow stack");
3632       continue;
3633     } else if (task_queues()->steal(i, seed, /* reference */ obj_to_scan)) {
3634       assert(obj_to_scan->is_oop(), "Should be an oop");
3635       assert(bm->isMarked((HeapWord*)obj_to_scan), "Grey object");
3636       obj_to_scan->oop_iterate(&cl);
3637     } else if (terminator()->offer_termination(&_term_term)) {
3638       assert(work_q->size() == 0, "Impossible!");
3639       break;
3640     } else if (yielding() || should_yield()) {
3641       yield();
3642     }
3643   }
3644 }
3645 
3646 // This is run by the CMS (coordinator) thread.
3647 void CMSConcMarkingTask::coordinator_yield() {
3648   assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
3649          "CMS thread should hold CMS token");
3650   // First give up the locks, then yield, then re-lock
3651   // We should probably use a constructor/destructor idiom to
3652   // do this unlock/lock or modify the MutexUnlocker class to
3653   // serve our purpose. XXX
3654   assert_lock_strong(_bit_map_lock);
3655   _bit_map_lock->unlock();
3656   ConcurrentMarkSweepThread::desynchronize(true);
3657   _collector->stopTimer();
3658   if (PrintCMSStatistics != 0) {
3659     _collector->incrementYields();
3660   }
3661 
3662   // It is possible for whichever thread initiated the yield request
3663   // not to get a chance to wake up and take the bitmap lock between
3664   // this thread releasing it and reacquiring it. So, while the
3665   // should_yield() flag is on, let's sleep for a bit to give the
3666   // other thread a chance to wake up. The limit imposed on the number
3667   // of iterations is defensive, to avoid any unforseen circumstances
3668   // putting us into an infinite loop. Since it's always been this
3669   // (coordinator_yield()) method that was observed to cause the
3670   // problem, we are using a parameter (CMSCoordinatorYieldSleepCount)
3671   // which is by default non-zero. For the other seven methods that
3672   // also perform the yield operation, as are using a different
3673   // parameter (CMSYieldSleepCount) which is by default zero. This way we
3674   // can enable the sleeping for those methods too, if necessary.
3675   // See 6442774.
3676   //
3677   // We really need to reconsider the synchronization between the GC
3678   // thread and the yield-requesting threads in the future and we
3679   // should really use wait/notify, which is the recommended
3680   // way of doing this type of interaction. Additionally, we should
3681   // consolidate the eight methods that do the yield operation and they
3682   // are almost identical into one for better maintainability and
3683   // readability. See 6445193.
3684   //
3685   // Tony 2006.06.29
3686   for (unsigned i = 0; i < CMSCoordinatorYieldSleepCount &&
3687                    ConcurrentMarkSweepThread::should_yield() &&
3688                    !CMSCollector::foregroundGCIsActive(); ++i) {
3689     os::sleep(Thread::current(), 1, false);
3690   }
3691 
3692   ConcurrentMarkSweepThread::synchronize(true);
3693   _bit_map_lock->lock_without_safepoint_check();
3694   _collector->startTimer();
3695 }
3696 
3697 bool CMSCollector::do_marking_mt() {
3698   assert(ConcGCThreads > 0 && conc_workers() != NULL, "precondition");
3699   int num_workers = AdaptiveSizePolicy::calc_active_conc_workers(
3700                                        conc_workers()->total_workers(),
3701                                        conc_workers()->active_workers(),
3702                                        Threads::number_of_non_daemon_threads());
3703   conc_workers()->set_active_workers(num_workers);
3704 
3705   CompactibleFreeListSpace* cms_space  = _cmsGen->cmsSpace();
3706 
3707   CMSConcMarkingTask tsk(this,
3708                          cms_space,
3709                          conc_workers(),
3710                          task_queues());
3711 
3712   // Since the actual number of workers we get may be different
3713   // from the number we requested above, do we need to do anything different
3714   // below? In particular, may be we need to subclass the SequantialSubTasksDone
3715   // class?? XXX
3716   cms_space ->initialize_sequential_subtasks_for_marking(num_workers);
3717 
3718   // Refs discovery is already non-atomic.
3719   assert(!ref_processor()->discovery_is_atomic(), "Should be non-atomic");
3720   assert(ref_processor()->discovery_is_mt(), "Discovery should be MT");
3721   conc_workers()->start_task(&tsk);
3722   while (tsk.yielded()) {
3723     tsk.coordinator_yield();
3724     conc_workers()->continue_task(&tsk);
3725   }
3726   // If the task was aborted, _restart_addr will be non-NULL
3727   assert(tsk.completed() || _restart_addr != NULL, "Inconsistency");
3728   while (_restart_addr != NULL) {
3729     // XXX For now we do not make use of ABORTED state and have not
3730     // yet implemented the right abort semantics (even in the original
3731     // single-threaded CMS case). That needs some more investigation
3732     // and is deferred for now; see CR# TBF. 07252005YSR. XXX
3733     assert(!CMSAbortSemantics || tsk.aborted(), "Inconsistency");
3734     // If _restart_addr is non-NULL, a marking stack overflow
3735     // occurred; we need to do a fresh marking iteration from the
3736     // indicated restart address.
3737     if (_foregroundGCIsActive) {
3738       // We may be running into repeated stack overflows, having
3739       // reached the limit of the stack size, while making very
3740       // slow forward progress. It may be best to bail out and
3741       // let the foreground collector do its job.
3742       // Clear _restart_addr, so that foreground GC
3743       // works from scratch. This avoids the headache of
3744       // a "rescan" which would otherwise be needed because
3745       // of the dirty mod union table & card table.
3746       _restart_addr = NULL;
3747       return false;
3748     }
3749     // Adjust the task to restart from _restart_addr
3750     tsk.reset(_restart_addr);
3751     cms_space ->initialize_sequential_subtasks_for_marking(num_workers,
3752                   _restart_addr);
3753     _restart_addr = NULL;
3754     // Get the workers going again
3755     conc_workers()->start_task(&tsk);
3756     while (tsk.yielded()) {
3757       tsk.coordinator_yield();
3758       conc_workers()->continue_task(&tsk);
3759     }
3760   }
3761   assert(tsk.completed(), "Inconsistency");
3762   assert(tsk.result() == true, "Inconsistency");
3763   return true;
3764 }
3765 
3766 bool CMSCollector::do_marking_st() {
3767   ResourceMark rm;
3768   HandleMark   hm;
3769 
3770   // Temporarily make refs discovery single threaded (non-MT)
3771   ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(ref_processor(), false);
3772   MarkFromRootsClosure markFromRootsClosure(this, _span, &_markBitMap,
3773     &_markStack, CMSYield);
3774   // the last argument to iterate indicates whether the iteration
3775   // should be incremental with periodic yields.
3776   _markBitMap.iterate(&markFromRootsClosure);
3777   // If _restart_addr is non-NULL, a marking stack overflow
3778   // occurred; we need to do a fresh iteration from the
3779   // indicated restart address.
3780   while (_restart_addr != NULL) {
3781     if (_foregroundGCIsActive) {
3782       // We may be running into repeated stack overflows, having
3783       // reached the limit of the stack size, while making very
3784       // slow forward progress. It may be best to bail out and
3785       // let the foreground collector do its job.
3786       // Clear _restart_addr, so that foreground GC
3787       // works from scratch. This avoids the headache of
3788       // a "rescan" which would otherwise be needed because
3789       // of the dirty mod union table & card table.
3790       _restart_addr = NULL;
3791       return false;  // indicating failure to complete marking
3792     }
3793     // Deal with stack overflow:
3794     // we restart marking from _restart_addr
3795     HeapWord* ra = _restart_addr;
3796     markFromRootsClosure.reset(ra);
3797     _restart_addr = NULL;
3798     _markBitMap.iterate(&markFromRootsClosure, ra, _span.end());
3799   }
3800   return true;
3801 }
3802 
3803 void CMSCollector::preclean() {
3804   check_correct_thread_executing();
3805   assert(Thread::current()->is_ConcurrentGC_thread(), "Wrong thread");
3806   verify_work_stacks_empty();
3807   verify_overflow_empty();
3808   _abort_preclean = false;
3809   if (CMSPrecleaningEnabled) {
3810     if (!CMSEdenChunksRecordAlways) {
3811       _eden_chunk_index = 0;
3812     }
3813     size_t used = get_eden_used();
3814     size_t capacity = get_eden_capacity();
3815     // Don't start sampling unless we will get sufficiently
3816     // many samples.
3817     if (used < (capacity/(CMSScheduleRemarkSamplingRatio * 100)
3818                 * CMSScheduleRemarkEdenPenetration)) {
3819       _start_sampling = true;
3820     } else {
3821       _start_sampling = false;
3822     }
3823     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
3824     CMSPhaseAccounting pa(this, "preclean", _gc_tracer_cm->gc_id(), !PrintGCDetails);
3825     preclean_work(CMSPrecleanRefLists1, CMSPrecleanSurvivors1);
3826   }
3827   CMSTokenSync x(true); // is cms thread
3828   if (CMSPrecleaningEnabled) {
3829     sample_eden();
3830     _collectorState = AbortablePreclean;
3831   } else {
3832     _collectorState = FinalMarking;
3833   }
3834   verify_work_stacks_empty();
3835   verify_overflow_empty();
3836 }
3837 
3838 // Try and schedule the remark such that young gen
3839 // occupancy is CMSScheduleRemarkEdenPenetration %.
3840 void CMSCollector::abortable_preclean() {
3841   check_correct_thread_executing();
3842   assert(CMSPrecleaningEnabled,  "Inconsistent control state");
3843   assert(_collectorState == AbortablePreclean, "Inconsistent control state");
3844 
3845   // If Eden's current occupancy is below this threshold,
3846   // immediately schedule the remark; else preclean
3847   // past the next scavenge in an effort to
3848   // schedule the pause as described above. By choosing
3849   // CMSScheduleRemarkEdenSizeThreshold >= max eden size
3850   // we will never do an actual abortable preclean cycle.
3851   if (get_eden_used() > CMSScheduleRemarkEdenSizeThreshold) {
3852     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
3853     CMSPhaseAccounting pa(this, "abortable-preclean", _gc_tracer_cm->gc_id(), !PrintGCDetails);
3854     // We need more smarts in the abortable preclean
3855     // loop below to deal with cases where allocation
3856     // in young gen is very very slow, and our precleaning
3857     // is running a losing race against a horde of
3858     // mutators intent on flooding us with CMS updates
3859     // (dirty cards).
3860     // One, admittedly dumb, strategy is to give up
3861     // after a certain number of abortable precleaning loops
3862     // or after a certain maximum time. We want to make
3863     // this smarter in the next iteration.
3864     // XXX FIX ME!!! YSR
3865     size_t loops = 0, workdone = 0, cumworkdone = 0, waited = 0;
3866     while (!(should_abort_preclean() ||
3867              ConcurrentMarkSweepThread::should_terminate())) {
3868       workdone = preclean_work(CMSPrecleanRefLists2, CMSPrecleanSurvivors2);
3869       cumworkdone += workdone;
3870       loops++;
3871       // Voluntarily terminate abortable preclean phase if we have
3872       // been at it for too long.
3873       if ((CMSMaxAbortablePrecleanLoops != 0) &&
3874           loops >= CMSMaxAbortablePrecleanLoops) {
3875         if (PrintGCDetails) {
3876           gclog_or_tty->print(" CMS: abort preclean due to loops ");
3877         }
3878         break;
3879       }
3880       if (pa.wallclock_millis() > CMSMaxAbortablePrecleanTime) {
3881         if (PrintGCDetails) {
3882           gclog_or_tty->print(" CMS: abort preclean due to time ");
3883         }
3884         break;
3885       }
3886       // If we are doing little work each iteration, we should
3887       // take a short break.
3888       if (workdone < CMSAbortablePrecleanMinWorkPerIteration) {
3889         // Sleep for some time, waiting for work to accumulate
3890         stopTimer();
3891         cmsThread()->wait_on_cms_lock(CMSAbortablePrecleanWaitMillis);
3892         startTimer();
3893         waited++;
3894       }
3895     }
3896     if (PrintCMSStatistics > 0) {
3897       gclog_or_tty->print(" [" SIZE_FORMAT " iterations, " SIZE_FORMAT " waits, " SIZE_FORMAT " cards)] ",
3898                           loops, waited, cumworkdone);
3899     }
3900   }
3901   CMSTokenSync x(true); // is cms thread
3902   if (_collectorState != Idling) {
3903     assert(_collectorState == AbortablePreclean,
3904            "Spontaneous state transition?");
3905     _collectorState = FinalMarking;
3906   } // Else, a foreground collection completed this CMS cycle.
3907   return;
3908 }
3909 
3910 // Respond to an Eden sampling opportunity
3911 void CMSCollector::sample_eden() {
3912   // Make sure a young gc cannot sneak in between our
3913   // reading and recording of a sample.
3914   assert(Thread::current()->is_ConcurrentGC_thread(),
3915          "Only the cms thread may collect Eden samples");
3916   assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
3917          "Should collect samples while holding CMS token");
3918   if (!_start_sampling) {
3919     return;
3920   }
3921   // When CMSEdenChunksRecordAlways is true, the eden chunk array
3922   // is populated by the young generation.
3923   if (_eden_chunk_array != NULL && !CMSEdenChunksRecordAlways) {
3924     if (_eden_chunk_index < _eden_chunk_capacity) {
3925       _eden_chunk_array[_eden_chunk_index] = *_top_addr;   // take sample
3926       assert(_eden_chunk_array[_eden_chunk_index] <= *_end_addr,
3927              "Unexpected state of Eden");
3928       // We'd like to check that what we just sampled is an oop-start address;
3929       // however, we cannot do that here since the object may not yet have been
3930       // initialized. So we'll instead do the check when we _use_ this sample
3931       // later.
3932       if (_eden_chunk_index == 0 ||
3933           (pointer_delta(_eden_chunk_array[_eden_chunk_index],
3934                          _eden_chunk_array[_eden_chunk_index-1])
3935            >= CMSSamplingGrain)) {
3936         _eden_chunk_index++;  // commit sample
3937       }
3938     }
3939   }
3940   if ((_collectorState == AbortablePreclean) && !_abort_preclean) {
3941     size_t used = get_eden_used();
3942     size_t capacity = get_eden_capacity();
3943     assert(used <= capacity, "Unexpected state of Eden");
3944     if (used >  (capacity/100 * CMSScheduleRemarkEdenPenetration)) {
3945       _abort_preclean = true;
3946     }
3947   }
3948 }
3949 
3950 
3951 size_t CMSCollector::preclean_work(bool clean_refs, bool clean_survivor) {
3952   assert(_collectorState == Precleaning ||
3953          _collectorState == AbortablePreclean, "incorrect state");
3954   ResourceMark rm;
3955   HandleMark   hm;
3956 
3957   // Precleaning is currently not MT but the reference processor
3958   // may be set for MT.  Disable it temporarily here.
3959   ReferenceProcessor* rp = ref_processor();
3960   ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
3961 
3962   // Do one pass of scrubbing the discovered reference lists
3963   // to remove any reference objects with strongly-reachable
3964   // referents.
3965   if (clean_refs) {
3966     CMSPrecleanRefsYieldClosure yield_cl(this);
3967     assert(rp->span().equals(_span), "Spans should be equal");
3968     CMSKeepAliveClosure keep_alive(this, _span, &_markBitMap,
3969                                    &_markStack, true /* preclean */);
3970     CMSDrainMarkingStackClosure complete_trace(this,
3971                                    _span, &_markBitMap, &_markStack,
3972                                    &keep_alive, true /* preclean */);
3973 
3974     // We don't want this step to interfere with a young
3975     // collection because we don't want to take CPU
3976     // or memory bandwidth away from the young GC threads
3977     // (which may be as many as there are CPUs).
3978     // Note that we don't need to protect ourselves from
3979     // interference with mutators because they can't
3980     // manipulate the discovered reference lists nor affect
3981     // the computed reachability of the referents, the
3982     // only properties manipulated by the precleaning
3983     // of these reference lists.
3984     stopTimer();
3985     CMSTokenSyncWithLocks x(true /* is cms thread */,
3986                             bitMapLock());
3987     startTimer();
3988     sample_eden();
3989 
3990     // The following will yield to allow foreground
3991     // collection to proceed promptly. XXX YSR:
3992     // The code in this method may need further
3993     // tweaking for better performance and some restructuring
3994     // for cleaner interfaces.
3995     GCTimer *gc_timer = NULL; // Currently not tracing concurrent phases
3996     rp->preclean_discovered_references(
3997           rp->is_alive_non_header(), &keep_alive, &complete_trace, &yield_cl,
3998           gc_timer, _gc_tracer_cm->gc_id());
3999   }
4000 
4001   if (clean_survivor) {  // preclean the active survivor space(s)
4002     PushAndMarkClosure pam_cl(this, _span, ref_processor(),
4003                              &_markBitMap, &_modUnionTable,
4004                              &_markStack, true /* precleaning phase */);
4005     stopTimer();
4006     CMSTokenSyncWithLocks ts(true /* is cms thread */,
4007                              bitMapLock());
4008     startTimer();
4009     unsigned int before_count =
4010       GenCollectedHeap::heap()->total_collections();
4011     SurvivorSpacePrecleanClosure
4012       sss_cl(this, _span, &_markBitMap, &_markStack,
4013              &pam_cl, before_count, CMSYield);
4014     _young_gen->from()->object_iterate_careful(&sss_cl);
4015     _young_gen->to()->object_iterate_careful(&sss_cl);
4016   }
4017   MarkRefsIntoAndScanClosure
4018     mrias_cl(_span, ref_processor(), &_markBitMap, &_modUnionTable,
4019              &_markStack, this, CMSYield,
4020              true /* precleaning phase */);
4021   // CAUTION: The following closure has persistent state that may need to
4022   // be reset upon a decrease in the sequence of addresses it
4023   // processes.
4024   ScanMarkedObjectsAgainCarefullyClosure
4025     smoac_cl(this, _span,
4026       &_markBitMap, &_markStack, &mrias_cl, CMSYield);
4027 
4028   // Preclean dirty cards in ModUnionTable and CardTable using
4029   // appropriate convergence criterion;
4030   // repeat CMSPrecleanIter times unless we find that
4031   // we are losing.
4032   assert(CMSPrecleanIter < 10, "CMSPrecleanIter is too large");
4033   assert(CMSPrecleanNumerator < CMSPrecleanDenominator,
4034          "Bad convergence multiplier");
4035   assert(CMSPrecleanThreshold >= 100,
4036          "Unreasonably low CMSPrecleanThreshold");
4037 
4038   size_t numIter, cumNumCards, lastNumCards, curNumCards;
4039   for (numIter = 0, cumNumCards = lastNumCards = curNumCards = 0;
4040        numIter < CMSPrecleanIter;
4041        numIter++, lastNumCards = curNumCards, cumNumCards += curNumCards) {
4042     curNumCards  = preclean_mod_union_table(_cmsGen, &smoac_cl);
4043     if (Verbose && PrintGCDetails) {
4044       gclog_or_tty->print(" (modUnionTable: " SIZE_FORMAT " cards)", curNumCards);
4045     }
4046     // Either there are very few dirty cards, so re-mark
4047     // pause will be small anyway, or our pre-cleaning isn't
4048     // that much faster than the rate at which cards are being
4049     // dirtied, so we might as well stop and re-mark since
4050     // precleaning won't improve our re-mark time by much.
4051     if (curNumCards <= CMSPrecleanThreshold ||
4052         (numIter > 0 &&
4053          (curNumCards * CMSPrecleanDenominator >
4054          lastNumCards * CMSPrecleanNumerator))) {
4055       numIter++;
4056       cumNumCards += curNumCards;
4057       break;
4058     }
4059   }
4060 
4061   preclean_klasses(&mrias_cl, _cmsGen->freelistLock());
4062 
4063   curNumCards = preclean_card_table(_cmsGen, &smoac_cl);
4064   cumNumCards += curNumCards;
4065   if (PrintGCDetails && PrintCMSStatistics != 0) {
4066     gclog_or_tty->print_cr(" (cardTable: " SIZE_FORMAT " cards, re-scanned " SIZE_FORMAT " cards, " SIZE_FORMAT " iterations)",
4067                   curNumCards, cumNumCards, numIter);
4068   }
4069   return cumNumCards;   // as a measure of useful work done
4070 }
4071 
4072 // PRECLEANING NOTES:
4073 // Precleaning involves:
4074 // . reading the bits of the modUnionTable and clearing the set bits.
4075 // . For the cards corresponding to the set bits, we scan the
4076 //   objects on those cards. This means we need the free_list_lock
4077 //   so that we can safely iterate over the CMS space when scanning
4078 //   for oops.
4079 // . When we scan the objects, we'll be both reading and setting
4080 //   marks in the marking bit map, so we'll need the marking bit map.
4081 // . For protecting _collector_state transitions, we take the CGC_lock.
4082 //   Note that any races in the reading of of card table entries by the
4083 //   CMS thread on the one hand and the clearing of those entries by the
4084 //   VM thread or the setting of those entries by the mutator threads on the
4085 //   other are quite benign. However, for efficiency it makes sense to keep
4086 //   the VM thread from racing with the CMS thread while the latter is
4087 //   dirty card info to the modUnionTable. We therefore also use the
4088 //   CGC_lock to protect the reading of the card table and the mod union
4089 //   table by the CM thread.
4090 // . We run concurrently with mutator updates, so scanning
4091 //   needs to be done carefully  -- we should not try to scan
4092 //   potentially uninitialized objects.
4093 //
4094 // Locking strategy: While holding the CGC_lock, we scan over and
4095 // reset a maximal dirty range of the mod union / card tables, then lock
4096 // the free_list_lock and bitmap lock to do a full marking, then
4097 // release these locks; and repeat the cycle. This allows for a
4098 // certain amount of fairness in the sharing of these locks between
4099 // the CMS collector on the one hand, and the VM thread and the
4100 // mutators on the other.
4101 
4102 // NOTE: preclean_mod_union_table() and preclean_card_table()
4103 // further below are largely identical; if you need to modify
4104 // one of these methods, please check the other method too.
4105 
4106 size_t CMSCollector::preclean_mod_union_table(
4107   ConcurrentMarkSweepGeneration* gen,
4108   ScanMarkedObjectsAgainCarefullyClosure* cl) {
4109   verify_work_stacks_empty();
4110   verify_overflow_empty();
4111 
4112   // strategy: starting with the first card, accumulate contiguous
4113   // ranges of dirty cards; clear these cards, then scan the region
4114   // covered by these cards.
4115 
4116   // Since all of the MUT is committed ahead, we can just use
4117   // that, in case the generations expand while we are precleaning.
4118   // It might also be fine to just use the committed part of the
4119   // generation, but we might potentially miss cards when the
4120   // generation is rapidly expanding while we are in the midst
4121   // of precleaning.
4122   HeapWord* startAddr = gen->reserved().start();
4123   HeapWord* endAddr   = gen->reserved().end();
4124 
4125   cl->setFreelistLock(gen->freelistLock());   // needed for yielding
4126 
4127   size_t numDirtyCards, cumNumDirtyCards;
4128   HeapWord *nextAddr, *lastAddr;
4129   for (cumNumDirtyCards = numDirtyCards = 0,
4130        nextAddr = lastAddr = startAddr;
4131        nextAddr < endAddr;
4132        nextAddr = lastAddr, cumNumDirtyCards += numDirtyCards) {
4133 
4134     ResourceMark rm;
4135     HandleMark   hm;
4136 
4137     MemRegion dirtyRegion;
4138     {
4139       stopTimer();
4140       // Potential yield point
4141       CMSTokenSync ts(true);
4142       startTimer();
4143       sample_eden();
4144       // Get dirty region starting at nextOffset (inclusive),
4145       // simultaneously clearing it.
4146       dirtyRegion =
4147         _modUnionTable.getAndClearMarkedRegion(nextAddr, endAddr);
4148       assert(dirtyRegion.start() >= nextAddr,
4149              "returned region inconsistent?");
4150     }
4151     // Remember where the next search should begin.
4152     // The returned region (if non-empty) is a right open interval,
4153     // so lastOffset is obtained from the right end of that
4154     // interval.
4155     lastAddr = dirtyRegion.end();
4156     // Should do something more transparent and less hacky XXX
4157     numDirtyCards =
4158       _modUnionTable.heapWordDiffToOffsetDiff(dirtyRegion.word_size());
4159 
4160     // We'll scan the cards in the dirty region (with periodic
4161     // yields for foreground GC as needed).
4162     if (!dirtyRegion.is_empty()) {
4163       assert(numDirtyCards > 0, "consistency check");
4164       HeapWord* stop_point = NULL;
4165       stopTimer();
4166       // Potential yield point
4167       CMSTokenSyncWithLocks ts(true, gen->freelistLock(),
4168                                bitMapLock());
4169       startTimer();
4170       {
4171         verify_work_stacks_empty();
4172         verify_overflow_empty();
4173         sample_eden();
4174         stop_point =
4175           gen->cmsSpace()->object_iterate_careful_m(dirtyRegion, cl);
4176       }
4177       if (stop_point != NULL) {
4178         // The careful iteration stopped early either because it found an
4179         // uninitialized object, or because we were in the midst of an
4180         // "abortable preclean", which should now be aborted. Redirty
4181         // the bits corresponding to the partially-scanned or unscanned
4182         // cards. We'll either restart at the next block boundary or
4183         // abort the preclean.
4184         assert((_collectorState == AbortablePreclean && should_abort_preclean()),
4185                "Should only be AbortablePreclean.");
4186         _modUnionTable.mark_range(MemRegion(stop_point, dirtyRegion.end()));
4187         if (should_abort_preclean()) {
4188           break; // out of preclean loop
4189         } else {
4190           // Compute the next address at which preclean should pick up;
4191           // might need bitMapLock in order to read P-bits.
4192           lastAddr = next_card_start_after_block(stop_point);
4193         }
4194       }
4195     } else {
4196       assert(lastAddr == endAddr, "consistency check");
4197       assert(numDirtyCards == 0, "consistency check");
4198       break;
4199     }
4200   }
4201   verify_work_stacks_empty();
4202   verify_overflow_empty();
4203   return cumNumDirtyCards;
4204 }
4205 
4206 // NOTE: preclean_mod_union_table() above and preclean_card_table()
4207 // below are largely identical; if you need to modify
4208 // one of these methods, please check the other method too.
4209 
4210 size_t CMSCollector::preclean_card_table(ConcurrentMarkSweepGeneration* gen,
4211   ScanMarkedObjectsAgainCarefullyClosure* cl) {
4212   // strategy: it's similar to precleamModUnionTable above, in that
4213   // we accumulate contiguous ranges of dirty cards, mark these cards
4214   // precleaned, then scan the region covered by these cards.
4215   HeapWord* endAddr   = (HeapWord*)(gen->_virtual_space.high());
4216   HeapWord* startAddr = (HeapWord*)(gen->_virtual_space.low());
4217 
4218   cl->setFreelistLock(gen->freelistLock());   // needed for yielding
4219 
4220   size_t numDirtyCards, cumNumDirtyCards;
4221   HeapWord *lastAddr, *nextAddr;
4222 
4223   for (cumNumDirtyCards = numDirtyCards = 0,
4224        nextAddr = lastAddr = startAddr;
4225        nextAddr < endAddr;
4226        nextAddr = lastAddr, cumNumDirtyCards += numDirtyCards) {
4227 
4228     ResourceMark rm;
4229     HandleMark   hm;
4230 
4231     MemRegion dirtyRegion;
4232     {
4233       // See comments in "Precleaning notes" above on why we
4234       // do this locking. XXX Could the locking overheads be
4235       // too high when dirty cards are sparse? [I don't think so.]
4236       stopTimer();
4237       CMSTokenSync x(true); // is cms thread
4238       startTimer();
4239       sample_eden();
4240       // Get and clear dirty region from card table
4241       dirtyRegion = _ct->ct_bs()->dirty_card_range_after_reset(
4242                                     MemRegion(nextAddr, endAddr),
4243                                     true,
4244                                     CardTableModRefBS::precleaned_card_val());
4245 
4246       assert(dirtyRegion.start() >= nextAddr,
4247              "returned region inconsistent?");
4248     }
4249     lastAddr = dirtyRegion.end();
4250     numDirtyCards =
4251       dirtyRegion.word_size()/CardTableModRefBS::card_size_in_words;
4252 
4253     if (!dirtyRegion.is_empty()) {
4254       stopTimer();
4255       CMSTokenSyncWithLocks ts(true, gen->freelistLock(), bitMapLock());
4256       startTimer();
4257       sample_eden();
4258       verify_work_stacks_empty();
4259       verify_overflow_empty();
4260       HeapWord* stop_point =
4261         gen->cmsSpace()->object_iterate_careful_m(dirtyRegion, cl);
4262       if (stop_point != NULL) {
4263         assert((_collectorState == AbortablePreclean && should_abort_preclean()),
4264                "Should only be AbortablePreclean.");
4265         _ct->ct_bs()->invalidate(MemRegion(stop_point, dirtyRegion.end()));
4266         if (should_abort_preclean()) {
4267           break; // out of preclean loop
4268         } else {
4269           // Compute the next address at which preclean should pick up.
4270           lastAddr = next_card_start_after_block(stop_point);
4271         }
4272       }
4273     } else {
4274       break;
4275     }
4276   }
4277   verify_work_stacks_empty();
4278   verify_overflow_empty();
4279   return cumNumDirtyCards;
4280 }
4281 
4282 class PrecleanKlassClosure : public KlassClosure {
4283   KlassToOopClosure _cm_klass_closure;
4284  public:
4285   PrecleanKlassClosure(OopClosure* oop_closure) : _cm_klass_closure(oop_closure) {}
4286   void do_klass(Klass* k) {
4287     if (k->has_accumulated_modified_oops()) {
4288       k->clear_accumulated_modified_oops();
4289 
4290       _cm_klass_closure.do_klass(k);
4291     }
4292   }
4293 };
4294 
4295 // The freelist lock is needed to prevent asserts, is it really needed?
4296 void CMSCollector::preclean_klasses(MarkRefsIntoAndScanClosure* cl, Mutex* freelistLock) {
4297 
4298   cl->set_freelistLock(freelistLock);
4299 
4300   CMSTokenSyncWithLocks ts(true, freelistLock, bitMapLock());
4301 
4302   // SSS: Add equivalent to ScanMarkedObjectsAgainCarefullyClosure::do_yield_check and should_abort_preclean?
4303   // SSS: We should probably check if precleaning should be aborted, at suitable intervals?
4304   PrecleanKlassClosure preclean_klass_closure(cl);
4305   ClassLoaderDataGraph::classes_do(&preclean_klass_closure);
4306 
4307   verify_work_stacks_empty();
4308   verify_overflow_empty();
4309 }
4310 
4311 void CMSCollector::checkpointRootsFinal() {
4312   assert(_collectorState == FinalMarking, "incorrect state transition?");
4313   check_correct_thread_executing();
4314   // world is stopped at this checkpoint
4315   assert(SafepointSynchronize::is_at_safepoint(),
4316          "world should be stopped");
4317   TraceCMSMemoryManagerStats tms(_collectorState,GenCollectedHeap::heap()->gc_cause());
4318 
4319   verify_work_stacks_empty();
4320   verify_overflow_empty();
4321 
4322   SpecializationStats::clear();
4323   if (PrintGCDetails) {
4324     gclog_or_tty->print("[YG occupancy: "SIZE_FORMAT" K ("SIZE_FORMAT" K)]",
4325                         _young_gen->used() / K,
4326                         _young_gen->capacity() / K);
4327   }
4328   {
4329     if (CMSScavengeBeforeRemark) {
4330       GenCollectedHeap* gch = GenCollectedHeap::heap();
4331       // Temporarily set flag to false, GCH->do_collection will
4332       // expect it to be false and set to true
4333       FlagSetting fl(gch->_is_gc_active, false);
4334       NOT_PRODUCT(GCTraceTime t("Scavenge-Before-Remark",
4335         PrintGCDetails && Verbose, true, _gc_timer_cm, _gc_tracer_cm->gc_id());)
4336       int level = _cmsGen->level() - 1;
4337       if (level >= 0) {
4338         gch->do_collection(true,        // full (i.e. force, see below)
4339                            false,       // !clear_all_soft_refs
4340                            0,           // size
4341                            false,       // is_tlab
4342                            level        // max_level
4343                           );
4344       }
4345     }
4346     FreelistLocker x(this);
4347     MutexLockerEx y(bitMapLock(),
4348                     Mutex::_no_safepoint_check_flag);
4349     checkpointRootsFinalWork();
4350   }
4351   verify_work_stacks_empty();
4352   verify_overflow_empty();
4353   SpecializationStats::print();
4354 }
4355 
4356 void CMSCollector::checkpointRootsFinalWork() {
4357   NOT_PRODUCT(GCTraceTime tr("checkpointRootsFinalWork", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());)
4358 
4359   assert(haveFreelistLocks(), "must have free list locks");
4360   assert_lock_strong(bitMapLock());
4361 
4362   ResourceMark rm;
4363   HandleMark   hm;
4364 
4365   GenCollectedHeap* gch = GenCollectedHeap::heap();
4366 
4367   if (should_unload_classes()) {
4368     CodeCache::gc_prologue();
4369   }
4370   assert(haveFreelistLocks(), "must have free list locks");
4371   assert_lock_strong(bitMapLock());
4372 
4373   // We might assume that we need not fill TLAB's when
4374   // CMSScavengeBeforeRemark is set, because we may have just done
4375   // a scavenge which would have filled all TLAB's -- and besides
4376   // Eden would be empty. This however may not always be the case --
4377   // for instance although we asked for a scavenge, it may not have
4378   // happened because of a JNI critical section. We probably need
4379   // a policy for deciding whether we can in that case wait until
4380   // the critical section releases and then do the remark following
4381   // the scavenge, and skip it here. In the absence of that policy,
4382   // or of an indication of whether the scavenge did indeed occur,
4383   // we cannot rely on TLAB's having been filled and must do
4384   // so here just in case a scavenge did not happen.
4385   gch->ensure_parsability(false);  // fill TLAB's, but no need to retire them
4386   // Update the saved marks which may affect the root scans.
4387   gch->save_marks();
4388 
4389   if (CMSPrintEdenSurvivorChunks) {
4390     print_eden_and_survivor_chunk_arrays();
4391   }
4392 
4393   {
4394     COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact;)
4395 
4396     // Note on the role of the mod union table:
4397     // Since the marker in "markFromRoots" marks concurrently with
4398     // mutators, it is possible for some reachable objects not to have been
4399     // scanned. For instance, an only reference to an object A was
4400     // placed in object B after the marker scanned B. Unless B is rescanned,
4401     // A would be collected. Such updates to references in marked objects
4402     // are detected via the mod union table which is the set of all cards
4403     // dirtied since the first checkpoint in this GC cycle and prior to
4404     // the most recent young generation GC, minus those cleaned up by the
4405     // concurrent precleaning.
4406     if (CMSParallelRemarkEnabled && CollectedHeap::use_parallel_gc_threads()) {
4407       GCTraceTime t("Rescan (parallel) ", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
4408       do_remark_parallel();
4409     } else {
4410       GCTraceTime t("Rescan (non-parallel) ", PrintGCDetails, false,
4411                   _gc_timer_cm, _gc_tracer_cm->gc_id());
4412       do_remark_non_parallel();
4413     }
4414   }
4415   verify_work_stacks_empty();
4416   verify_overflow_empty();
4417 
4418   {
4419     NOT_PRODUCT(GCTraceTime ts("refProcessingWork", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());)
4420     refProcessingWork();
4421   }
4422   verify_work_stacks_empty();
4423   verify_overflow_empty();
4424 
4425   if (should_unload_classes()) {
4426     CodeCache::gc_epilogue();
4427   }
4428   JvmtiExport::gc_epilogue();
4429 
4430   // If we encountered any (marking stack / work queue) overflow
4431   // events during the current CMS cycle, take appropriate
4432   // remedial measures, where possible, so as to try and avoid
4433   // recurrence of that condition.
4434   assert(_markStack.isEmpty(), "No grey objects");
4435   size_t ser_ovflw = _ser_pmc_remark_ovflw + _ser_pmc_preclean_ovflw +
4436                      _ser_kac_ovflw        + _ser_kac_preclean_ovflw;
4437   if (ser_ovflw > 0) {
4438     if (PrintCMSStatistics != 0) {
4439       gclog_or_tty->print_cr("Marking stack overflow (benign) "
4440         "(pmc_pc="SIZE_FORMAT", pmc_rm="SIZE_FORMAT", kac="SIZE_FORMAT
4441         ", kac_preclean="SIZE_FORMAT")",
4442         _ser_pmc_preclean_ovflw, _ser_pmc_remark_ovflw,
4443         _ser_kac_ovflw, _ser_kac_preclean_ovflw);
4444     }
4445     _markStack.expand();
4446     _ser_pmc_remark_ovflw = 0;
4447     _ser_pmc_preclean_ovflw = 0;
4448     _ser_kac_preclean_ovflw = 0;
4449     _ser_kac_ovflw = 0;
4450   }
4451   if (_par_pmc_remark_ovflw > 0 || _par_kac_ovflw > 0) {
4452     if (PrintCMSStatistics != 0) {
4453       gclog_or_tty->print_cr("Work queue overflow (benign) "
4454         "(pmc_rm="SIZE_FORMAT", kac="SIZE_FORMAT")",
4455         _par_pmc_remark_ovflw, _par_kac_ovflw);
4456     }
4457     _par_pmc_remark_ovflw = 0;
4458     _par_kac_ovflw = 0;
4459   }
4460   if (PrintCMSStatistics != 0) {
4461      if (_markStack._hit_limit > 0) {
4462        gclog_or_tty->print_cr(" (benign) Hit max stack size limit ("SIZE_FORMAT")",
4463                               _markStack._hit_limit);
4464      }
4465      if (_markStack._failed_double > 0) {
4466        gclog_or_tty->print_cr(" (benign) Failed stack doubling ("SIZE_FORMAT"),"
4467                               " current capacity "SIZE_FORMAT,
4468                               _markStack._failed_double,
4469                               _markStack.capacity());
4470      }
4471   }
4472   _markStack._hit_limit = 0;
4473   _markStack._failed_double = 0;
4474 
4475   if ((VerifyAfterGC || VerifyDuringGC) &&
4476       GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
4477     verify_after_remark();
4478   }
4479 
4480   _gc_tracer_cm->report_object_count_after_gc(&_is_alive_closure);
4481 
4482   // Change under the freelistLocks.
4483   _collectorState = Sweeping;
4484   // Call isAllClear() under bitMapLock
4485   assert(_modUnionTable.isAllClear(),
4486       "Should be clear by end of the final marking");
4487   assert(_ct->klass_rem_set()->mod_union_is_clear(),
4488       "Should be clear by end of the final marking");
4489 }
4490 
4491 void CMSParInitialMarkTask::work(uint worker_id) {
4492   elapsedTimer _timer;
4493   ResourceMark rm;
4494   HandleMark   hm;
4495 
4496   // ---------- scan from roots --------------
4497   _timer.start();
4498   GenCollectedHeap* gch = GenCollectedHeap::heap();
4499   Par_MarkRefsIntoClosure par_mri_cl(_collector->_span, &(_collector->_markBitMap));
4500 
4501   // ---------- young gen roots --------------
4502   {
4503     work_on_young_gen_roots(worker_id, &par_mri_cl);
4504     _timer.stop();
4505     if (PrintCMSStatistics != 0) {
4506       gclog_or_tty->print_cr(
4507         "Finished young gen initial mark scan work in %dth thread: %3.3f sec",
4508         worker_id, _timer.seconds());
4509     }
4510   }
4511 
4512   // ---------- remaining roots --------------
4513   _timer.reset();
4514   _timer.start();
4515 
4516   CLDToOopClosure cld_closure(&par_mri_cl, true);
4517 
4518   gch->gen_process_roots(_collector->_cmsGen->level(),
4519                          false,     // yg was scanned above
4520                          false,     // this is parallel code
4521                          SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
4522                          _collector->should_unload_classes(),
4523                          &par_mri_cl,
4524                          NULL,
4525                          &cld_closure);
4526   assert(_collector->should_unload_classes()
4527          || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_AllCodeCache),
4528          "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
4529   _timer.stop();
4530   if (PrintCMSStatistics != 0) {
4531     gclog_or_tty->print_cr(
4532       "Finished remaining root initial mark scan work in %dth thread: %3.3f sec",
4533       worker_id, _timer.seconds());
4534   }
4535 }
4536 
4537 // Parallel remark task
4538 class CMSParRemarkTask: public CMSParMarkTask {
4539   CompactibleFreeListSpace* _cms_space;
4540 
4541   // The per-thread work queues, available here for stealing.
4542   OopTaskQueueSet*       _task_queues;
4543   ParallelTaskTerminator _term;
4544 
4545  public:
4546   // A value of 0 passed to n_workers will cause the number of
4547   // workers to be taken from the active workers in the work gang.
4548   CMSParRemarkTask(CMSCollector* collector,
4549                    CompactibleFreeListSpace* cms_space,
4550                    int n_workers, FlexibleWorkGang* workers,
4551                    OopTaskQueueSet* task_queues):
4552     CMSParMarkTask("Rescan roots and grey objects in parallel",
4553                    collector, n_workers),
4554     _cms_space(cms_space),
4555     _task_queues(task_queues),
4556     _term(n_workers, task_queues) { }
4557 
4558   OopTaskQueueSet* task_queues() { return _task_queues; }
4559 
4560   OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
4561 
4562   ParallelTaskTerminator* terminator() { return &_term; }
4563   int n_workers() { return _n_workers; }
4564 
4565   void work(uint worker_id);
4566 
4567  private:
4568   // ... of  dirty cards in old space
4569   void do_dirty_card_rescan_tasks(CompactibleFreeListSpace* sp, int i,
4570                                   Par_MarkRefsIntoAndScanClosure* cl);
4571 
4572   // ... work stealing for the above
4573   void do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl, int* seed);
4574 };
4575 
4576 class RemarkKlassClosure : public KlassClosure {
4577   KlassToOopClosure _cm_klass_closure;
4578  public:
4579   RemarkKlassClosure(OopClosure* oop_closure) : _cm_klass_closure(oop_closure) {}
4580   void do_klass(Klass* k) {
4581     // Check if we have modified any oops in the Klass during the concurrent marking.
4582     if (k->has_accumulated_modified_oops()) {
4583       k->clear_accumulated_modified_oops();
4584 
4585       // We could have transfered the current modified marks to the accumulated marks,
4586       // like we do with the Card Table to Mod Union Table. But it's not really necessary.
4587     } else if (k->has_modified_oops()) {
4588       // Don't clear anything, this info is needed by the next young collection.
4589     } else {
4590       // No modified oops in the Klass.
4591       return;
4592     }
4593 
4594     // The klass has modified fields, need to scan the klass.
4595     _cm_klass_closure.do_klass(k);
4596   }
4597 };
4598 
4599 void CMSParMarkTask::work_on_young_gen_roots(uint worker_id, OopsInGenClosure* cl) {
4600   ParNewGeneration* young_gen = _collector->_young_gen;
4601   ContiguousSpace* eden_space = young_gen->eden();
4602   ContiguousSpace* from_space = young_gen->from();
4603   ContiguousSpace* to_space   = young_gen->to();
4604 
4605   HeapWord** eca = _collector->_eden_chunk_array;
4606   size_t     ect = _collector->_eden_chunk_index;
4607   HeapWord** sca = _collector->_survivor_chunk_array;
4608   size_t     sct = _collector->_survivor_chunk_index;
4609 
4610   assert(ect <= _collector->_eden_chunk_capacity, "out of bounds");
4611   assert(sct <= _collector->_survivor_chunk_capacity, "out of bounds");
4612 
4613   do_young_space_rescan(worker_id, cl, to_space, NULL, 0);
4614   do_young_space_rescan(worker_id, cl, from_space, sca, sct);
4615   do_young_space_rescan(worker_id, cl, eden_space, eca, ect);
4616 }
4617 
4618 // work_queue(i) is passed to the closure
4619 // Par_MarkRefsIntoAndScanClosure.  The "i" parameter
4620 // also is passed to do_dirty_card_rescan_tasks() and to
4621 // do_work_steal() to select the i-th task_queue.
4622 
4623 void CMSParRemarkTask::work(uint worker_id) {
4624   elapsedTimer _timer;
4625   ResourceMark rm;
4626   HandleMark   hm;
4627 
4628   // ---------- rescan from roots --------------
4629   _timer.start();
4630   GenCollectedHeap* gch = GenCollectedHeap::heap();
4631   Par_MarkRefsIntoAndScanClosure par_mrias_cl(_collector,
4632     _collector->_span, _collector->ref_processor(),
4633     &(_collector->_markBitMap),
4634     work_queue(worker_id));
4635 
4636   // Rescan young gen roots first since these are likely
4637   // coarsely partitioned and may, on that account, constitute
4638   // the critical path; thus, it's best to start off that
4639   // work first.
4640   // ---------- young gen roots --------------
4641   {
4642     work_on_young_gen_roots(worker_id, &par_mrias_cl);
4643     _timer.stop();
4644     if (PrintCMSStatistics != 0) {
4645       gclog_or_tty->print_cr(
4646         "Finished young gen rescan work in %dth thread: %3.3f sec",
4647         worker_id, _timer.seconds());
4648     }
4649   }
4650 
4651   // ---------- remaining roots --------------
4652   _timer.reset();
4653   _timer.start();
4654   gch->gen_process_roots(_collector->_cmsGen->level(),
4655                          false,     // yg was scanned above
4656                          false,     // this is parallel code
4657                          SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
4658                          _collector->should_unload_classes(),
4659                          &par_mrias_cl,
4660                          NULL,
4661                          NULL);     // The dirty klasses will be handled below
4662 
4663   assert(_collector->should_unload_classes()
4664          || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_AllCodeCache),
4665          "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
4666   _timer.stop();
4667   if (PrintCMSStatistics != 0) {
4668     gclog_or_tty->print_cr(
4669       "Finished remaining root rescan work in %dth thread: %3.3f sec",
4670       worker_id, _timer.seconds());
4671   }
4672 
4673   // ---------- unhandled CLD scanning ----------
4674   if (worker_id == 0) { // Single threaded at the moment.
4675     _timer.reset();
4676     _timer.start();
4677 
4678     // Scan all new class loader data objects and new dependencies that were
4679     // introduced during concurrent marking.
4680     ResourceMark rm;
4681     GrowableArray<ClassLoaderData*>* array = ClassLoaderDataGraph::new_clds();
4682     for (int i = 0; i < array->length(); i++) {
4683       par_mrias_cl.do_class_loader_data(array->at(i));
4684     }
4685 
4686     // We don't need to keep track of new CLDs anymore.
4687     ClassLoaderDataGraph::remember_new_clds(false);
4688 
4689     _timer.stop();
4690     if (PrintCMSStatistics != 0) {
4691       gclog_or_tty->print_cr(
4692           "Finished unhandled CLD scanning work in %dth thread: %3.3f sec",
4693           worker_id, _timer.seconds());
4694     }
4695   }
4696 
4697   // ---------- dirty klass scanning ----------
4698   if (worker_id == 0) { // Single threaded at the moment.
4699     _timer.reset();
4700     _timer.start();
4701 
4702     // Scan all classes that was dirtied during the concurrent marking phase.
4703     RemarkKlassClosure remark_klass_closure(&par_mrias_cl);
4704     ClassLoaderDataGraph::classes_do(&remark_klass_closure);
4705 
4706     _timer.stop();
4707     if (PrintCMSStatistics != 0) {
4708       gclog_or_tty->print_cr(
4709           "Finished dirty klass scanning work in %dth thread: %3.3f sec",
4710           worker_id, _timer.seconds());
4711     }
4712   }
4713 
4714   // We might have added oops to ClassLoaderData::_handles during the
4715   // concurrent marking phase. These oops point to newly allocated objects
4716   // that are guaranteed to be kept alive. Either by the direct allocation
4717   // code, or when the young collector processes the roots. Hence,
4718   // we don't have to revisit the _handles block during the remark phase.
4719 
4720   // ---------- rescan dirty cards ------------
4721   _timer.reset();
4722   _timer.start();
4723 
4724   // Do the rescan tasks for each of the two spaces
4725   // (cms_space) in turn.
4726   // "worker_id" is passed to select the task_queue for "worker_id"
4727   do_dirty_card_rescan_tasks(_cms_space, worker_id, &par_mrias_cl);
4728   _timer.stop();
4729   if (PrintCMSStatistics != 0) {
4730     gclog_or_tty->print_cr(
4731       "Finished dirty card rescan work in %dth thread: %3.3f sec",
4732       worker_id, _timer.seconds());
4733   }
4734 
4735   // ---------- steal work from other threads ...
4736   // ---------- ... and drain overflow list.
4737   _timer.reset();
4738   _timer.start();
4739   do_work_steal(worker_id, &par_mrias_cl, _collector->hash_seed(worker_id));
4740   _timer.stop();
4741   if (PrintCMSStatistics != 0) {
4742     gclog_or_tty->print_cr(
4743       "Finished work stealing in %dth thread: %3.3f sec",
4744       worker_id, _timer.seconds());
4745   }
4746 }
4747 
4748 // Note that parameter "i" is not used.
4749 void
4750 CMSParMarkTask::do_young_space_rescan(uint worker_id,
4751   OopsInGenClosure* cl, ContiguousSpace* space,
4752   HeapWord** chunk_array, size_t chunk_top) {
4753   // Until all tasks completed:
4754   // . claim an unclaimed task
4755   // . compute region boundaries corresponding to task claimed
4756   //   using chunk_array
4757   // . par_oop_iterate(cl) over that region
4758 
4759   ResourceMark rm;
4760   HandleMark   hm;
4761 
4762   SequentialSubTasksDone* pst = space->par_seq_tasks();
4763 
4764   uint nth_task = 0;
4765   uint n_tasks  = pst->n_tasks();
4766 
4767   if (n_tasks > 0) {
4768     assert(pst->valid(), "Uninitialized use?");
4769     HeapWord *start, *end;
4770     while (!pst->is_task_claimed(/* reference */ nth_task)) {
4771       // We claimed task # nth_task; compute its boundaries.
4772       if (chunk_top == 0) {  // no samples were taken
4773         assert(nth_task == 0 && n_tasks == 1, "Can have only 1 eden task");
4774         start = space->bottom();
4775         end   = space->top();
4776       } else if (nth_task == 0) {
4777         start = space->bottom();
4778         end   = chunk_array[nth_task];
4779       } else if (nth_task < (uint)chunk_top) {
4780         assert(nth_task >= 1, "Control point invariant");
4781         start = chunk_array[nth_task - 1];
4782         end   = chunk_array[nth_task];
4783       } else {
4784         assert(nth_task == (uint)chunk_top, "Control point invariant");
4785         start = chunk_array[chunk_top - 1];
4786         end   = space->top();
4787       }
4788       MemRegion mr(start, end);
4789       // Verify that mr is in space
4790       assert(mr.is_empty() || space->used_region().contains(mr),
4791              "Should be in space");
4792       // Verify that "start" is an object boundary
4793       assert(mr.is_empty() || oop(mr.start())->is_oop(),
4794              "Should be an oop");
4795       space->par_oop_iterate(mr, cl);
4796     }
4797     pst->all_tasks_completed();
4798   }
4799 }
4800 
4801 void
4802 CMSParRemarkTask::do_dirty_card_rescan_tasks(
4803   CompactibleFreeListSpace* sp, int i,
4804   Par_MarkRefsIntoAndScanClosure* cl) {
4805   // Until all tasks completed:
4806   // . claim an unclaimed task
4807   // . compute region boundaries corresponding to task claimed
4808   // . transfer dirty bits ct->mut for that region
4809   // . apply rescanclosure to dirty mut bits for that region
4810 
4811   ResourceMark rm;
4812   HandleMark   hm;
4813 
4814   OopTaskQueue* work_q = work_queue(i);
4815   ModUnionClosure modUnionClosure(&(_collector->_modUnionTable));
4816   // CAUTION! CAUTION! CAUTION! CAUTION! CAUTION! CAUTION! CAUTION!
4817   // CAUTION: This closure has state that persists across calls to
4818   // the work method dirty_range_iterate_clear() in that it has
4819   // embedded in it a (subtype of) UpwardsObjectClosure. The
4820   // use of that state in the embedded UpwardsObjectClosure instance
4821   // assumes that the cards are always iterated (even if in parallel
4822   // by several threads) in monotonically increasing order per each
4823   // thread. This is true of the implementation below which picks
4824   // card ranges (chunks) in monotonically increasing order globally
4825   // and, a-fortiori, in monotonically increasing order per thread
4826   // (the latter order being a subsequence of the former).
4827   // If the work code below is ever reorganized into a more chaotic
4828   // work-partitioning form than the current "sequential tasks"
4829   // paradigm, the use of that persistent state will have to be
4830   // revisited and modified appropriately. See also related
4831   // bug 4756801 work on which should examine this code to make
4832   // sure that the changes there do not run counter to the
4833   // assumptions made here and necessary for correctness and
4834   // efficiency. Note also that this code might yield inefficient
4835   // behavior in the case of very large objects that span one or
4836   // more work chunks. Such objects would potentially be scanned
4837   // several times redundantly. Work on 4756801 should try and
4838   // address that performance anomaly if at all possible. XXX
4839   MemRegion  full_span  = _collector->_span;
4840   CMSBitMap* bm    = &(_collector->_markBitMap);     // shared
4841   MarkFromDirtyCardsClosure
4842     greyRescanClosure(_collector, full_span, // entire span of interest
4843                       sp, bm, work_q, cl);
4844 
4845   SequentialSubTasksDone* pst = sp->conc_par_seq_tasks();
4846   assert(pst->valid(), "Uninitialized use?");
4847   uint nth_task = 0;
4848   const int alignment = CardTableModRefBS::card_size * BitsPerWord;
4849   MemRegion span = sp->used_region();
4850   HeapWord* start_addr = span.start();
4851   HeapWord* end_addr = (HeapWord*)round_to((intptr_t)span.end(),
4852                                            alignment);
4853   const size_t chunk_size = sp->rescan_task_size(); // in HeapWord units
4854   assert((HeapWord*)round_to((intptr_t)start_addr, alignment) ==
4855          start_addr, "Check alignment");
4856   assert((size_t)round_to((intptr_t)chunk_size, alignment) ==
4857          chunk_size, "Check alignment");
4858 
4859   while (!pst->is_task_claimed(/* reference */ nth_task)) {
4860     // Having claimed the nth_task, compute corresponding mem-region,
4861     // which is a-fortiori aligned correctly (i.e. at a MUT boundary).
4862     // The alignment restriction ensures that we do not need any
4863     // synchronization with other gang-workers while setting or
4864     // clearing bits in thus chunk of the MUT.
4865     MemRegion this_span = MemRegion(start_addr + nth_task*chunk_size,
4866                                     start_addr + (nth_task+1)*chunk_size);
4867     // The last chunk's end might be way beyond end of the
4868     // used region. In that case pull back appropriately.
4869     if (this_span.end() > end_addr) {
4870       this_span.set_end(end_addr);
4871       assert(!this_span.is_empty(), "Program logic (calculation of n_tasks)");
4872     }
4873     // Iterate over the dirty cards covering this chunk, marking them
4874     // precleaned, and setting the corresponding bits in the mod union
4875     // table. Since we have been careful to partition at Card and MUT-word
4876     // boundaries no synchronization is needed between parallel threads.
4877     _collector->_ct->ct_bs()->dirty_card_iterate(this_span,
4878                                                  &modUnionClosure);
4879 
4880     // Having transferred these marks into the modUnionTable,
4881     // rescan the marked objects on the dirty cards in the modUnionTable.
4882     // Even if this is at a synchronous collection, the initial marking
4883     // may have been done during an asynchronous collection so there
4884     // may be dirty bits in the mod-union table.
4885     _collector->_modUnionTable.dirty_range_iterate_clear(
4886                   this_span, &greyRescanClosure);
4887     _collector->_modUnionTable.verifyNoOneBitsInRange(
4888                                  this_span.start(),
4889                                  this_span.end());
4890   }
4891   pst->all_tasks_completed();  // declare that i am done
4892 }
4893 
4894 // . see if we can share work_queues with ParNew? XXX
4895 void
4896 CMSParRemarkTask::do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl,
4897                                 int* seed) {
4898   OopTaskQueue* work_q = work_queue(i);
4899   NOT_PRODUCT(int num_steals = 0;)
4900   oop obj_to_scan;
4901   CMSBitMap* bm = &(_collector->_markBitMap);
4902 
4903   while (true) {
4904     // Completely finish any left over work from (an) earlier round(s)
4905     cl->trim_queue(0);
4906     size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
4907                                          (size_t)ParGCDesiredObjsFromOverflowList);
4908     // Now check if there's any work in the overflow list
4909     // Passing ParallelGCThreads as the third parameter, no_of_gc_threads,
4910     // only affects the number of attempts made to get work from the
4911     // overflow list and does not affect the number of workers.  Just
4912     // pass ParallelGCThreads so this behavior is unchanged.
4913     if (_collector->par_take_from_overflow_list(num_from_overflow_list,
4914                                                 work_q,
4915                                                 ParallelGCThreads)) {
4916       // found something in global overflow list;
4917       // not yet ready to go stealing work from others.
4918       // We'd like to assert(work_q->size() != 0, ...)
4919       // because we just took work from the overflow list,
4920       // but of course we can't since all of that could have
4921       // been already stolen from us.
4922       // "He giveth and He taketh away."
4923       continue;
4924     }
4925     // Verify that we have no work before we resort to stealing
4926     assert(work_q->size() == 0, "Have work, shouldn't steal");
4927     // Try to steal from other queues that have work
4928     if (task_queues()->steal(i, seed, /* reference */ obj_to_scan)) {
4929       NOT_PRODUCT(num_steals++;)
4930       assert(obj_to_scan->is_oop(), "Oops, not an oop!");
4931       assert(bm->isMarked((HeapWord*)obj_to_scan), "Stole an unmarked oop?");
4932       // Do scanning work
4933       obj_to_scan->oop_iterate(cl);
4934       // Loop around, finish this work, and try to steal some more
4935     } else if (terminator()->offer_termination()) {
4936         break;  // nirvana from the infinite cycle
4937     }
4938   }
4939   NOT_PRODUCT(
4940     if (PrintCMSStatistics != 0) {
4941       gclog_or_tty->print("\n\t(%d: stole %d oops)", i, num_steals);
4942     }
4943   )
4944   assert(work_q->size() == 0 && _collector->overflow_list_is_empty(),
4945          "Else our work is not yet done");
4946 }
4947 
4948 // Record object boundaries in _eden_chunk_array by sampling the eden
4949 // top in the slow-path eden object allocation code path and record
4950 // the boundaries, if CMSEdenChunksRecordAlways is true. If
4951 // CMSEdenChunksRecordAlways is false, we use the other asynchronous
4952 // sampling in sample_eden() that activates during the part of the
4953 // preclean phase.
4954 void CMSCollector::sample_eden_chunk() {
4955   if (CMSEdenChunksRecordAlways && _eden_chunk_array != NULL) {
4956     if (_eden_chunk_lock->try_lock()) {
4957       // Record a sample. This is the critical section. The contents
4958       // of the _eden_chunk_array have to be non-decreasing in the
4959       // address order.
4960       _eden_chunk_array[_eden_chunk_index] = *_top_addr;
4961       assert(_eden_chunk_array[_eden_chunk_index] <= *_end_addr,
4962              "Unexpected state of Eden");
4963       if (_eden_chunk_index == 0 ||
4964           ((_eden_chunk_array[_eden_chunk_index] > _eden_chunk_array[_eden_chunk_index-1]) &&
4965            (pointer_delta(_eden_chunk_array[_eden_chunk_index],
4966                           _eden_chunk_array[_eden_chunk_index-1]) >= CMSSamplingGrain))) {
4967         _eden_chunk_index++;  // commit sample
4968       }
4969       _eden_chunk_lock->unlock();
4970     }
4971   }
4972 }
4973 
4974 // Return a thread-local PLAB recording array, as appropriate.
4975 void* CMSCollector::get_data_recorder(int thr_num) {
4976   if (_survivor_plab_array != NULL &&
4977       (CMSPLABRecordAlways ||
4978        (_collectorState > Marking && _collectorState < FinalMarking))) {
4979     assert(thr_num < (int)ParallelGCThreads, "thr_num is out of bounds");
4980     ChunkArray* ca = &_survivor_plab_array[thr_num];
4981     ca->reset();   // clear it so that fresh data is recorded
4982     return (void*) ca;
4983   } else {
4984     return NULL;
4985   }
4986 }
4987 
4988 // Reset all the thread-local PLAB recording arrays
4989 void CMSCollector::reset_survivor_plab_arrays() {
4990   for (uint i = 0; i < ParallelGCThreads; i++) {
4991     _survivor_plab_array[i].reset();
4992   }
4993 }
4994 
4995 // Merge the per-thread plab arrays into the global survivor chunk
4996 // array which will provide the partitioning of the survivor space
4997 // for CMS initial scan and rescan.
4998 void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv,
4999                                               int no_of_gc_threads) {
5000   assert(_survivor_plab_array  != NULL, "Error");
5001   assert(_survivor_chunk_array != NULL, "Error");
5002   assert(_collectorState == FinalMarking ||
5003          (CMSParallelInitialMarkEnabled && _collectorState == InitialMarking), "Error");
5004   for (int j = 0; j < no_of_gc_threads; j++) {
5005     _cursor[j] = 0;
5006   }
5007   HeapWord* top = surv->top();
5008   size_t i;
5009   for (i = 0; i < _survivor_chunk_capacity; i++) {  // all sca entries
5010     HeapWord* min_val = top;          // Higher than any PLAB address
5011     uint      min_tid = 0;            // position of min_val this round
5012     for (int j = 0; j < no_of_gc_threads; j++) {
5013       ChunkArray* cur_sca = &_survivor_plab_array[j];
5014       if (_cursor[j] == cur_sca->end()) {
5015         continue;
5016       }
5017       assert(_cursor[j] < cur_sca->end(), "ctl pt invariant");
5018       HeapWord* cur_val = cur_sca->nth(_cursor[j]);
5019       assert(surv->used_region().contains(cur_val), "Out of bounds value");
5020       if (cur_val < min_val) {
5021         min_tid = j;
5022         min_val = cur_val;
5023       } else {
5024         assert(cur_val < top, "All recorded addresses should be less");
5025       }
5026     }
5027     // At this point min_val and min_tid are respectively
5028     // the least address in _survivor_plab_array[j]->nth(_cursor[j])
5029     // and the thread (j) that witnesses that address.
5030     // We record this address in the _survivor_chunk_array[i]
5031     // and increment _cursor[min_tid] prior to the next round i.
5032     if (min_val == top) {
5033       break;
5034     }
5035     _survivor_chunk_array[i] = min_val;
5036     _cursor[min_tid]++;
5037   }
5038   // We are all done; record the size of the _survivor_chunk_array
5039   _survivor_chunk_index = i; // exclusive: [0, i)
5040   if (PrintCMSStatistics > 0) {
5041     gclog_or_tty->print(" (Survivor:" SIZE_FORMAT "chunks) ", i);
5042   }
5043   // Verify that we used up all the recorded entries
5044   #ifdef ASSERT
5045     size_t total = 0;
5046     for (int j = 0; j < no_of_gc_threads; j++) {
5047       assert(_cursor[j] == _survivor_plab_array[j].end(), "Ctl pt invariant");
5048       total += _cursor[j];
5049     }
5050     assert(total == _survivor_chunk_index, "Ctl Pt Invariant");
5051     // Check that the merged array is in sorted order
5052     if (total > 0) {
5053       for (size_t i = 0; i < total - 1; i++) {
5054         if (PrintCMSStatistics > 0) {
5055           gclog_or_tty->print(" (chunk" SIZE_FORMAT ":" INTPTR_FORMAT ") ",
5056                               i, _survivor_chunk_array[i]);
5057         }
5058         assert(_survivor_chunk_array[i] < _survivor_chunk_array[i+1],
5059                "Not sorted");
5060       }
5061     }
5062   #endif // ASSERT
5063 }
5064 
5065 // Set up the space's par_seq_tasks structure for work claiming
5066 // for parallel initial scan and rescan of young gen.
5067 // See ParRescanTask where this is currently used.
5068 void
5069 CMSCollector::
5070 initialize_sequential_subtasks_for_young_gen_rescan(int n_threads) {
5071   assert(n_threads > 0, "Unexpected n_threads argument");
5072 
5073   // Eden space
5074   if (!_young_gen->eden()->is_empty()) {
5075     SequentialSubTasksDone* pst = _young_gen->eden()->par_seq_tasks();
5076     assert(!pst->valid(), "Clobbering existing data?");
5077     // Each valid entry in [0, _eden_chunk_index) represents a task.
5078     size_t n_tasks = _eden_chunk_index + 1;
5079     assert(n_tasks == 1 || _eden_chunk_array != NULL, "Error");
5080     // Sets the condition for completion of the subtask (how many threads
5081     // need to finish in order to be done).
5082     pst->set_n_threads(n_threads);
5083     pst->set_n_tasks((int)n_tasks);
5084   }
5085 
5086   // Merge the survivor plab arrays into _survivor_chunk_array
5087   if (_survivor_plab_array != NULL) {
5088     merge_survivor_plab_arrays(_young_gen->from(), n_threads);
5089   } else {
5090     assert(_survivor_chunk_index == 0, "Error");
5091   }
5092 
5093   // To space
5094   {
5095     SequentialSubTasksDone* pst = _young_gen->to()->par_seq_tasks();
5096     assert(!pst->valid(), "Clobbering existing data?");
5097     // Sets the condition for completion of the subtask (how many threads
5098     // need to finish in order to be done).
5099     pst->set_n_threads(n_threads);
5100     pst->set_n_tasks(1);
5101     assert(pst->valid(), "Error");
5102   }
5103 
5104   // From space
5105   {
5106     SequentialSubTasksDone* pst = _young_gen->from()->par_seq_tasks();
5107     assert(!pst->valid(), "Clobbering existing data?");
5108     size_t n_tasks = _survivor_chunk_index + 1;
5109     assert(n_tasks == 1 || _survivor_chunk_array != NULL, "Error");
5110     // Sets the condition for completion of the subtask (how many threads
5111     // need to finish in order to be done).
5112     pst->set_n_threads(n_threads);
5113     pst->set_n_tasks((int)n_tasks);
5114     assert(pst->valid(), "Error");
5115   }
5116 }
5117 
5118 // Parallel version of remark
5119 void CMSCollector::do_remark_parallel() {
5120   GenCollectedHeap* gch = GenCollectedHeap::heap();
5121   FlexibleWorkGang* workers = gch->workers();
5122   assert(workers != NULL, "Need parallel worker threads.");
5123   // Choose to use the number of GC workers most recently set
5124   // into "active_workers".  If active_workers is not set, set it
5125   // to ParallelGCThreads.
5126   int n_workers = workers->active_workers();
5127   if (n_workers == 0) {
5128     assert(n_workers > 0, "Should have been set during scavenge");
5129     n_workers = ParallelGCThreads;
5130     workers->set_active_workers(n_workers);
5131   }
5132   CompactibleFreeListSpace* cms_space  = _cmsGen->cmsSpace();
5133 
5134   CMSParRemarkTask tsk(this,
5135     cms_space,
5136     n_workers, workers, task_queues());
5137 
5138   // Set up for parallel process_roots work.
5139   gch->set_par_threads(n_workers);
5140   // We won't be iterating over the cards in the card table updating
5141   // the younger_gen cards, so we shouldn't call the following else
5142   // the verification code as well as subsequent younger_refs_iterate
5143   // code would get confused. XXX
5144   // gch->rem_set()->prepare_for_younger_refs_iterate(true); // parallel
5145 
5146   // The young gen rescan work will not be done as part of
5147   // process_roots (which currently doesn't know how to
5148   // parallelize such a scan), but rather will be broken up into
5149   // a set of parallel tasks (via the sampling that the [abortable]
5150   // preclean phase did of eden, plus the [two] tasks of
5151   // scanning the [two] survivor spaces. Further fine-grain
5152   // parallelization of the scanning of the survivor spaces
5153   // themselves, and of precleaning of the younger gen itself
5154   // is deferred to the future.
5155   initialize_sequential_subtasks_for_young_gen_rescan(n_workers);
5156 
5157   // The dirty card rescan work is broken up into a "sequence"
5158   // of parallel tasks (per constituent space) that are dynamically
5159   // claimed by the parallel threads.
5160   cms_space->initialize_sequential_subtasks_for_rescan(n_workers);
5161 
5162   // It turns out that even when we're using 1 thread, doing the work in a
5163   // separate thread causes wide variance in run times.  We can't help this
5164   // in the multi-threaded case, but we special-case n=1 here to get
5165   // repeatable measurements of the 1-thread overhead of the parallel code.
5166   if (n_workers > 1) {
5167     // Make refs discovery MT-safe, if it isn't already: it may not
5168     // necessarily be so, since it's possible that we are doing
5169     // ST marking.
5170     ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), true);
5171     GenCollectedHeap::StrongRootsScope srs(gch);
5172     workers->run_task(&tsk);
5173   } else {
5174     ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), false);
5175     GenCollectedHeap::StrongRootsScope srs(gch);
5176     tsk.work(0);
5177   }
5178 
5179   gch->set_par_threads(0);  // 0 ==> non-parallel.
5180   // restore, single-threaded for now, any preserved marks
5181   // as a result of work_q overflow
5182   restore_preserved_marks_if_any();
5183 }
5184 
5185 // Non-parallel version of remark
5186 void CMSCollector::do_remark_non_parallel() {
5187   ResourceMark rm;
5188   HandleMark   hm;
5189   GenCollectedHeap* gch = GenCollectedHeap::heap();
5190   ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), false);
5191 
5192   MarkRefsIntoAndScanClosure
5193     mrias_cl(_span, ref_processor(), &_markBitMap, NULL /* not precleaning */,
5194              &_markStack, this,
5195              false /* should_yield */, false /* not precleaning */);
5196   MarkFromDirtyCardsClosure
5197     markFromDirtyCardsClosure(this, _span,
5198                               NULL,  // space is set further below
5199                               &_markBitMap, &_markStack, &mrias_cl);
5200   {
5201     GCTraceTime t("grey object rescan", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
5202     // Iterate over the dirty cards, setting the corresponding bits in the
5203     // mod union table.
5204     {
5205       ModUnionClosure modUnionClosure(&_modUnionTable);
5206       _ct->ct_bs()->dirty_card_iterate(
5207                       _cmsGen->used_region(),
5208                       &modUnionClosure);
5209     }
5210     // Having transferred these marks into the modUnionTable, we just need
5211     // to rescan the marked objects on the dirty cards in the modUnionTable.
5212     // The initial marking may have been done during an asynchronous
5213     // collection so there may be dirty bits in the mod-union table.
5214     const int alignment =
5215       CardTableModRefBS::card_size * BitsPerWord;
5216     {
5217       // ... First handle dirty cards in CMS gen
5218       markFromDirtyCardsClosure.set_space(_cmsGen->cmsSpace());
5219       MemRegion ur = _cmsGen->used_region();
5220       HeapWord* lb = ur.start();
5221       HeapWord* ub = (HeapWord*)round_to((intptr_t)ur.end(), alignment);
5222       MemRegion cms_span(lb, ub);
5223       _modUnionTable.dirty_range_iterate_clear(cms_span,
5224                                                &markFromDirtyCardsClosure);
5225       verify_work_stacks_empty();
5226       if (PrintCMSStatistics != 0) {
5227         gclog_or_tty->print(" (re-scanned "SIZE_FORMAT" dirty cards in cms gen) ",
5228           markFromDirtyCardsClosure.num_dirty_cards());
5229       }
5230     }
5231   }
5232   if (VerifyDuringGC &&
5233       GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
5234     HandleMark hm;  // Discard invalid handles created during verification
5235     Universe::verify();
5236   }
5237   {
5238     GCTraceTime t("root rescan", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
5239 
5240     verify_work_stacks_empty();
5241 
5242     gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
5243     GenCollectedHeap::StrongRootsScope srs(gch);
5244 
5245     gch->gen_process_roots(_cmsGen->level(),
5246                            true,  // younger gens as roots
5247                            false, // use the local StrongRootsScope
5248                            SharedHeap::ScanningOption(roots_scanning_options()),
5249                            should_unload_classes(),
5250                            &mrias_cl,
5251                            NULL,
5252                            NULL); // The dirty klasses will be handled below
5253 
5254     assert(should_unload_classes()
5255            || (roots_scanning_options() & SharedHeap::SO_AllCodeCache),
5256            "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
5257   }
5258 
5259   {
5260     GCTraceTime t("visit unhandled CLDs", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
5261 
5262     verify_work_stacks_empty();
5263 
5264     // Scan all class loader data objects that might have been introduced
5265     // during concurrent marking.
5266     ResourceMark rm;
5267     GrowableArray<ClassLoaderData*>* array = ClassLoaderDataGraph::new_clds();
5268     for (int i = 0; i < array->length(); i++) {
5269       mrias_cl.do_class_loader_data(array->at(i));
5270     }
5271 
5272     // We don't need to keep track of new CLDs anymore.
5273     ClassLoaderDataGraph::remember_new_clds(false);
5274 
5275     verify_work_stacks_empty();
5276   }
5277 
5278   {
5279     GCTraceTime t("dirty klass scan", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
5280 
5281     verify_work_stacks_empty();
5282 
5283     RemarkKlassClosure remark_klass_closure(&mrias_cl);
5284     ClassLoaderDataGraph::classes_do(&remark_klass_closure);
5285 
5286     verify_work_stacks_empty();
5287   }
5288 
5289   // We might have added oops to ClassLoaderData::_handles during the
5290   // concurrent marking phase. These oops point to newly allocated objects
5291   // that are guaranteed to be kept alive. Either by the direct allocation
5292   // code, or when the young collector processes the roots. Hence,
5293   // we don't have to revisit the _handles block during the remark phase.
5294 
5295   verify_work_stacks_empty();
5296   // Restore evacuated mark words, if any, used for overflow list links
5297   if (!CMSOverflowEarlyRestoration) {
5298     restore_preserved_marks_if_any();
5299   }
5300   verify_overflow_empty();
5301 }
5302 
5303 ////////////////////////////////////////////////////////
5304 // Parallel Reference Processing Task Proxy Class
5305 ////////////////////////////////////////////////////////
5306 class CMSRefProcTaskProxy: public AbstractGangTaskWOopQueues {
5307   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
5308   CMSCollector*          _collector;
5309   CMSBitMap*             _mark_bit_map;
5310   const MemRegion        _span;
5311   ProcessTask&           _task;
5312 
5313 public:
5314   CMSRefProcTaskProxy(ProcessTask&     task,
5315                       CMSCollector*    collector,
5316                       const MemRegion& span,
5317                       CMSBitMap*       mark_bit_map,
5318                       AbstractWorkGang* workers,
5319                       OopTaskQueueSet* task_queues):
5320     // XXX Should superclass AGTWOQ also know about AWG since it knows
5321     // about the task_queues used by the AWG? Then it could initialize
5322     // the terminator() object. See 6984287. The set_for_termination()
5323     // below is a temporary band-aid for the regression in 6984287.
5324     AbstractGangTaskWOopQueues("Process referents by policy in parallel",
5325       task_queues),
5326     _task(task),
5327     _collector(collector), _span(span), _mark_bit_map(mark_bit_map)
5328   {
5329     assert(_collector->_span.equals(_span) && !_span.is_empty(),
5330            "Inconsistency in _span");
5331     set_for_termination(workers->active_workers());
5332   }
5333 
5334   OopTaskQueueSet* task_queues() { return queues(); }
5335 
5336   OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
5337 
5338   void do_work_steal(int i,
5339                      CMSParDrainMarkingStackClosure* drain,
5340                      CMSParKeepAliveClosure* keep_alive,
5341                      int* seed);
5342 
5343   virtual void work(uint worker_id);
5344 };
5345 
5346 void CMSRefProcTaskProxy::work(uint worker_id) {
5347   ResourceMark rm;
5348   HandleMark hm;
5349   assert(_collector->_span.equals(_span), "Inconsistency in _span");
5350   CMSParKeepAliveClosure par_keep_alive(_collector, _span,
5351                                         _mark_bit_map,
5352                                         work_queue(worker_id));
5353   CMSParDrainMarkingStackClosure par_drain_stack(_collector, _span,
5354                                                  _mark_bit_map,
5355                                                  work_queue(worker_id));
5356   CMSIsAliveClosure is_alive_closure(_span, _mark_bit_map);
5357   _task.work(worker_id, is_alive_closure, par_keep_alive, par_drain_stack);
5358   if (_task.marks_oops_alive()) {
5359     do_work_steal(worker_id, &par_drain_stack, &par_keep_alive,
5360                   _collector->hash_seed(worker_id));
5361   }
5362   assert(work_queue(worker_id)->size() == 0, "work_queue should be empty");
5363   assert(_collector->_overflow_list == NULL, "non-empty _overflow_list");
5364 }
5365 
5366 class CMSRefEnqueueTaskProxy: public AbstractGangTask {
5367   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
5368   EnqueueTask& _task;
5369 
5370 public:
5371   CMSRefEnqueueTaskProxy(EnqueueTask& task)
5372     : AbstractGangTask("Enqueue reference objects in parallel"),
5373       _task(task)
5374   { }
5375 
5376   virtual void work(uint worker_id)
5377   {
5378     _task.work(worker_id);
5379   }
5380 };
5381 
5382 CMSParKeepAliveClosure::CMSParKeepAliveClosure(CMSCollector* collector,
5383   MemRegion span, CMSBitMap* bit_map, OopTaskQueue* work_queue):
5384    _span(span),
5385    _bit_map(bit_map),
5386    _work_queue(work_queue),
5387    _mark_and_push(collector, span, bit_map, work_queue),
5388    _low_water_mark(MIN2((uint)(work_queue->max_elems()/4),
5389                         (uint)(CMSWorkQueueDrainThreshold * ParallelGCThreads)))
5390 { }
5391 
5392 // . see if we can share work_queues with ParNew? XXX
5393 void CMSRefProcTaskProxy::do_work_steal(int i,
5394   CMSParDrainMarkingStackClosure* drain,
5395   CMSParKeepAliveClosure* keep_alive,
5396   int* seed) {
5397   OopTaskQueue* work_q = work_queue(i);
5398   NOT_PRODUCT(int num_steals = 0;)
5399   oop obj_to_scan;
5400 
5401   while (true) {
5402     // Completely finish any left over work from (an) earlier round(s)
5403     drain->trim_queue(0);
5404     size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
5405                                          (size_t)ParGCDesiredObjsFromOverflowList);
5406     // Now check if there's any work in the overflow list
5407     // Passing ParallelGCThreads as the third parameter, no_of_gc_threads,
5408     // only affects the number of attempts made to get work from the
5409     // overflow list and does not affect the number of workers.  Just
5410     // pass ParallelGCThreads so this behavior is unchanged.
5411     if (_collector->par_take_from_overflow_list(num_from_overflow_list,
5412                                                 work_q,
5413                                                 ParallelGCThreads)) {
5414       // Found something in global overflow list;
5415       // not yet ready to go stealing work from others.
5416       // We'd like to assert(work_q->size() != 0, ...)
5417       // because we just took work from the overflow list,
5418       // but of course we can't, since all of that might have
5419       // been already stolen from us.
5420       continue;
5421     }
5422     // Verify that we have no work before we resort to stealing
5423     assert(work_q->size() == 0, "Have work, shouldn't steal");
5424     // Try to steal from other queues that have work
5425     if (task_queues()->steal(i, seed, /* reference */ obj_to_scan)) {
5426       NOT_PRODUCT(num_steals++;)
5427       assert(obj_to_scan->is_oop(), "Oops, not an oop!");
5428       assert(_mark_bit_map->isMarked((HeapWord*)obj_to_scan), "Stole an unmarked oop?");
5429       // Do scanning work
5430       obj_to_scan->oop_iterate(keep_alive);
5431       // Loop around, finish this work, and try to steal some more
5432     } else if (terminator()->offer_termination()) {
5433       break;  // nirvana from the infinite cycle
5434     }
5435   }
5436   NOT_PRODUCT(
5437     if (PrintCMSStatistics != 0) {
5438       gclog_or_tty->print("\n\t(%d: stole %d oops)", i, num_steals);
5439     }
5440   )
5441 }
5442 
5443 void CMSRefProcTaskExecutor::execute(ProcessTask& task)
5444 {
5445   GenCollectedHeap* gch = GenCollectedHeap::heap();
5446   FlexibleWorkGang* workers = gch->workers();
5447   assert(workers != NULL, "Need parallel worker threads.");
5448   CMSRefProcTaskProxy rp_task(task, &_collector,
5449                               _collector.ref_processor()->span(),
5450                               _collector.markBitMap(),
5451                               workers, _collector.task_queues());
5452   workers->run_task(&rp_task);
5453 }
5454 
5455 void CMSRefProcTaskExecutor::execute(EnqueueTask& task)
5456 {
5457 
5458   GenCollectedHeap* gch = GenCollectedHeap::heap();
5459   FlexibleWorkGang* workers = gch->workers();
5460   assert(workers != NULL, "Need parallel worker threads.");
5461   CMSRefEnqueueTaskProxy enq_task(task);
5462   workers->run_task(&enq_task);
5463 }
5464 
5465 void CMSCollector::refProcessingWork() {
5466   ResourceMark rm;
5467   HandleMark   hm;
5468 
5469   ReferenceProcessor* rp = ref_processor();
5470   assert(rp->span().equals(_span), "Spans should be equal");
5471   assert(!rp->enqueuing_is_done(), "Enqueuing should not be complete");
5472   // Process weak references.
5473   rp->setup_policy(false);
5474   verify_work_stacks_empty();
5475 
5476   CMSKeepAliveClosure cmsKeepAliveClosure(this, _span, &_markBitMap,
5477                                           &_markStack, false /* !preclean */);
5478   CMSDrainMarkingStackClosure cmsDrainMarkingStackClosure(this,
5479                                 _span, &_markBitMap, &_markStack,
5480                                 &cmsKeepAliveClosure, false /* !preclean */);
5481   {
5482     GCTraceTime t("weak refs processing", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
5483 
5484     ReferenceProcessorStats stats;
5485     if (rp->processing_is_mt()) {
5486       // Set the degree of MT here.  If the discovery is done MT, there
5487       // may have been a different number of threads doing the discovery
5488       // and a different number of discovered lists may have Ref objects.
5489       // That is OK as long as the Reference lists are balanced (see
5490       // balance_all_queues() and balance_queues()).
5491       GenCollectedHeap* gch = GenCollectedHeap::heap();
5492       int active_workers = ParallelGCThreads;
5493       FlexibleWorkGang* workers = gch->workers();
5494       if (workers != NULL) {
5495         active_workers = workers->active_workers();
5496         // The expectation is that active_workers will have already
5497         // been set to a reasonable value.  If it has not been set,
5498         // investigate.
5499         assert(active_workers > 0, "Should have been set during scavenge");
5500       }
5501       rp->set_active_mt_degree(active_workers);
5502       CMSRefProcTaskExecutor task_executor(*this);
5503       stats = rp->process_discovered_references(&_is_alive_closure,
5504                                         &cmsKeepAliveClosure,
5505                                         &cmsDrainMarkingStackClosure,
5506                                         &task_executor,
5507                                         _gc_timer_cm,
5508                                         _gc_tracer_cm->gc_id());
5509     } else {
5510       stats = rp->process_discovered_references(&_is_alive_closure,
5511                                         &cmsKeepAliveClosure,
5512                                         &cmsDrainMarkingStackClosure,
5513                                         NULL,
5514                                         _gc_timer_cm,
5515                                         _gc_tracer_cm->gc_id());
5516     }
5517     _gc_tracer_cm->report_gc_reference_stats(stats);
5518 
5519   }
5520 
5521   // This is the point where the entire marking should have completed.
5522   verify_work_stacks_empty();
5523 
5524   if (should_unload_classes()) {
5525     {
5526       GCTraceTime t("class unloading", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
5527 
5528       // Unload classes and purge the SystemDictionary.
5529       bool purged_class = SystemDictionary::do_unloading(&_is_alive_closure);
5530 
5531       // Unload nmethods.
5532       CodeCache::do_unloading(&_is_alive_closure, purged_class);
5533 
5534       // Prune dead klasses from subklass/sibling/implementor lists.
5535       Klass::clean_weak_klass_links(&_is_alive_closure);
5536     }
5537 
5538     {
5539       GCTraceTime t("scrub symbol table", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
5540       // Clean up unreferenced symbols in symbol table.
5541       SymbolTable::unlink();
5542     }
5543 
5544     {
5545       GCTraceTime t("scrub string table", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
5546       // Delete entries for dead interned strings.
5547       StringTable::unlink(&_is_alive_closure);
5548     }
5549   }
5550 
5551 
5552   // Restore any preserved marks as a result of mark stack or
5553   // work queue overflow
5554   restore_preserved_marks_if_any();  // done single-threaded for now
5555 
5556   rp->set_enqueuing_is_done(true);
5557   if (rp->processing_is_mt()) {
5558     rp->balance_all_queues();
5559     CMSRefProcTaskExecutor task_executor(*this);
5560     rp->enqueue_discovered_references(&task_executor);
5561   } else {
5562     rp->enqueue_discovered_references(NULL);
5563   }
5564   rp->verify_no_references_recorded();
5565   assert(!rp->discovery_enabled(), "should have been disabled");
5566 }
5567 
5568 #ifndef PRODUCT
5569 void CMSCollector::check_correct_thread_executing() {
5570   Thread* t = Thread::current();
5571   // Only the VM thread or the CMS thread should be here.
5572   assert(t->is_ConcurrentGC_thread() || t->is_VM_thread(),
5573          "Unexpected thread type");
5574   // If this is the vm thread, the foreground process
5575   // should not be waiting.  Note that _foregroundGCIsActive is
5576   // true while the foreground collector is waiting.
5577   if (_foregroundGCShouldWait) {
5578     // We cannot be the VM thread
5579     assert(t->is_ConcurrentGC_thread(),
5580            "Should be CMS thread");
5581   } else {
5582     // We can be the CMS thread only if we are in a stop-world
5583     // phase of CMS collection.
5584     if (t->is_ConcurrentGC_thread()) {
5585       assert(_collectorState == InitialMarking ||
5586              _collectorState == FinalMarking,
5587              "Should be a stop-world phase");
5588       // The CMS thread should be holding the CMS_token.
5589       assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
5590              "Potential interference with concurrently "
5591              "executing VM thread");
5592     }
5593   }
5594 }
5595 #endif
5596 
5597 void CMSCollector::sweep() {
5598   assert(_collectorState == Sweeping, "just checking");
5599   check_correct_thread_executing();
5600   verify_work_stacks_empty();
5601   verify_overflow_empty();
5602   increment_sweep_count();
5603   TraceCMSMemoryManagerStats tms(_collectorState,GenCollectedHeap::heap()->gc_cause());
5604 
5605   _inter_sweep_timer.stop();
5606   _inter_sweep_estimate.sample(_inter_sweep_timer.seconds());
5607 
5608   assert(!_intra_sweep_timer.is_active(), "Should not be active");
5609   _intra_sweep_timer.reset();
5610   _intra_sweep_timer.start();
5611   {
5612     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
5613     CMSPhaseAccounting pa(this, "sweep", _gc_tracer_cm->gc_id(), !PrintGCDetails);
5614     // First sweep the old gen
5615     {
5616       CMSTokenSyncWithLocks ts(true, _cmsGen->freelistLock(),
5617                                bitMapLock());
5618       sweepWork(_cmsGen);
5619     }
5620 
5621     // Update Universe::_heap_*_at_gc figures.
5622     // We need all the free list locks to make the abstract state
5623     // transition from Sweeping to Resetting. See detailed note
5624     // further below.
5625     {
5626       CMSTokenSyncWithLocks ts(true, _cmsGen->freelistLock());
5627       // Update heap occupancy information which is used as
5628       // input to soft ref clearing policy at the next gc.
5629       Universe::update_heap_info_at_gc();
5630       _collectorState = Resizing;
5631     }
5632   }
5633   verify_work_stacks_empty();
5634   verify_overflow_empty();
5635 
5636   if (should_unload_classes()) {
5637     // Delay purge to the beginning of the next safepoint.  Metaspace::contains
5638     // requires that the virtual spaces are stable and not deleted.
5639     ClassLoaderDataGraph::set_should_purge(true);
5640   }
5641 
5642   _intra_sweep_timer.stop();
5643   _intra_sweep_estimate.sample(_intra_sweep_timer.seconds());
5644 
5645   _inter_sweep_timer.reset();
5646   _inter_sweep_timer.start();
5647 
5648   // We need to use a monotonically non-decreasing time in ms
5649   // or we will see time-warp warnings and os::javaTimeMillis()
5650   // does not guarantee monotonicity.
5651   jlong now = os::javaTimeNanos() / NANOSECS_PER_MILLISEC;
5652   update_time_of_last_gc(now);
5653 
5654   // NOTE on abstract state transitions:
5655   // Mutators allocate-live and/or mark the mod-union table dirty
5656   // based on the state of the collection.  The former is done in
5657   // the interval [Marking, Sweeping] and the latter in the interval
5658   // [Marking, Sweeping).  Thus the transitions into the Marking state
5659   // and out of the Sweeping state must be synchronously visible
5660   // globally to the mutators.
5661   // The transition into the Marking state happens with the world
5662   // stopped so the mutators will globally see it.  Sweeping is
5663   // done asynchronously by the background collector so the transition
5664   // from the Sweeping state to the Resizing state must be done
5665   // under the freelistLock (as is the check for whether to
5666   // allocate-live and whether to dirty the mod-union table).
5667   assert(_collectorState == Resizing, "Change of collector state to"
5668     " Resizing must be done under the freelistLocks (plural)");
5669 
5670   // Now that sweeping has been completed, we clear
5671   // the incremental_collection_failed flag,
5672   // thus inviting a younger gen collection to promote into
5673   // this generation. If such a promotion may still fail,
5674   // the flag will be set again when a young collection is
5675   // attempted.
5676   GenCollectedHeap* gch = GenCollectedHeap::heap();
5677   gch->clear_incremental_collection_failed();  // Worth retrying as fresh space may have been freed up
5678   gch->update_full_collections_completed(_collection_count_start);
5679 }
5680 
5681 // FIX ME!!! Looks like this belongs in CFLSpace, with
5682 // CMSGen merely delegating to it.
5683 void ConcurrentMarkSweepGeneration::setNearLargestChunk() {
5684   double nearLargestPercent = FLSLargestBlockCoalesceProximity;
5685   HeapWord*  minAddr        = _cmsSpace->bottom();
5686   HeapWord*  largestAddr    =
5687     (HeapWord*) _cmsSpace->dictionary()->find_largest_dict();
5688   if (largestAddr == NULL) {
5689     // The dictionary appears to be empty.  In this case
5690     // try to coalesce at the end of the heap.
5691     largestAddr = _cmsSpace->end();
5692   }
5693   size_t largestOffset     = pointer_delta(largestAddr, minAddr);
5694   size_t nearLargestOffset =
5695     (size_t)((double)largestOffset * nearLargestPercent) - MinChunkSize;
5696   if (PrintFLSStatistics != 0) {
5697     gclog_or_tty->print_cr(
5698       "CMS: Large Block: " PTR_FORMAT ";"
5699       " Proximity: " PTR_FORMAT " -> " PTR_FORMAT,
5700       largestAddr,
5701       _cmsSpace->nearLargestChunk(), minAddr + nearLargestOffset);
5702   }
5703   _cmsSpace->set_nearLargestChunk(minAddr + nearLargestOffset);
5704 }
5705 
5706 bool ConcurrentMarkSweepGeneration::isNearLargestChunk(HeapWord* addr) {
5707   return addr >= _cmsSpace->nearLargestChunk();
5708 }
5709 
5710 FreeChunk* ConcurrentMarkSweepGeneration::find_chunk_at_end() {
5711   return _cmsSpace->find_chunk_at_end();
5712 }
5713 
5714 void ConcurrentMarkSweepGeneration::update_gc_stats(int current_level,
5715                                                     bool full) {
5716   // The next lower level has been collected.  Gather any statistics
5717   // that are of interest at this point.
5718   if (!full && (current_level + 1) == level()) {
5719     // Gather statistics on the young generation collection.
5720     collector()->stats().record_gc0_end(used());
5721   }
5722 }
5723 
5724 void CMSCollector::sweepWork(ConcurrentMarkSweepGeneration* gen) {
5725   // We iterate over the space(s) underlying this generation,
5726   // checking the mark bit map to see if the bits corresponding
5727   // to specific blocks are marked or not. Blocks that are
5728   // marked are live and are not swept up. All remaining blocks
5729   // are swept up, with coalescing on-the-fly as we sweep up
5730   // contiguous free and/or garbage blocks:
5731   // We need to ensure that the sweeper synchronizes with allocators
5732   // and stop-the-world collectors. In particular, the following
5733   // locks are used:
5734   // . CMS token: if this is held, a stop the world collection cannot occur
5735   // . freelistLock: if this is held no allocation can occur from this
5736   //                 generation by another thread
5737   // . bitMapLock: if this is held, no other thread can access or update
5738   //
5739 
5740   // Note that we need to hold the freelistLock if we use
5741   // block iterate below; else the iterator might go awry if
5742   // a mutator (or promotion) causes block contents to change
5743   // (for instance if the allocator divvies up a block).
5744   // If we hold the free list lock, for all practical purposes
5745   // young generation GC's can't occur (they'll usually need to
5746   // promote), so we might as well prevent all young generation
5747   // GC's while we do a sweeping step. For the same reason, we might
5748   // as well take the bit map lock for the entire duration
5749 
5750   // check that we hold the requisite locks
5751   assert(have_cms_token(), "Should hold cms token");
5752   assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(), "Should possess CMS token to sweep");
5753   assert_lock_strong(gen->freelistLock());
5754   assert_lock_strong(bitMapLock());
5755 
5756   assert(!_inter_sweep_timer.is_active(), "Was switched off in an outer context");
5757   assert(_intra_sweep_timer.is_active(),  "Was switched on  in an outer context");
5758   gen->cmsSpace()->beginSweepFLCensus((float)(_inter_sweep_timer.seconds()),
5759                                       _inter_sweep_estimate.padded_average(),
5760                                       _intra_sweep_estimate.padded_average());
5761   gen->setNearLargestChunk();
5762 
5763   {
5764     SweepClosure sweepClosure(this, gen, &_markBitMap, CMSYield);
5765     gen->cmsSpace()->blk_iterate_careful(&sweepClosure);
5766     // We need to free-up/coalesce garbage/blocks from a
5767     // co-terminal free run. This is done in the SweepClosure
5768     // destructor; so, do not remove this scope, else the
5769     // end-of-sweep-census below will be off by a little bit.
5770   }
5771   gen->cmsSpace()->sweep_completed();
5772   gen->cmsSpace()->endSweepFLCensus(sweep_count());
5773   if (should_unload_classes()) {                // unloaded classes this cycle,
5774     _concurrent_cycles_since_last_unload = 0;   // ... reset count
5775   } else {                                      // did not unload classes,
5776     _concurrent_cycles_since_last_unload++;     // ... increment count
5777   }
5778 }
5779 
5780 // Reset CMS data structures (for now just the marking bit map)
5781 // preparatory for the next cycle.
5782 void CMSCollector::reset(bool concurrent) {
5783   if (concurrent) {
5784     CMSTokenSyncWithLocks ts(true, bitMapLock());
5785 
5786     // If the state is not "Resetting", the foreground  thread
5787     // has done a collection and the resetting.
5788     if (_collectorState != Resetting) {
5789       assert(_collectorState == Idling, "The state should only change"
5790         " because the foreground collector has finished the collection");
5791       return;
5792     }
5793 
5794     // Clear the mark bitmap (no grey objects to start with)
5795     // for the next cycle.
5796     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
5797     CMSPhaseAccounting cmspa(this, "reset", _gc_tracer_cm->gc_id(), !PrintGCDetails);
5798 
5799     HeapWord* curAddr = _markBitMap.startWord();
5800     while (curAddr < _markBitMap.endWord()) {
5801       size_t remaining  = pointer_delta(_markBitMap.endWord(), curAddr);
5802       MemRegion chunk(curAddr, MIN2(CMSBitMapYieldQuantum, remaining));
5803       _markBitMap.clear_large_range(chunk);
5804       if (ConcurrentMarkSweepThread::should_yield() &&
5805           !foregroundGCIsActive() &&
5806           CMSYield) {
5807         assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
5808                "CMS thread should hold CMS token");
5809         assert_lock_strong(bitMapLock());
5810         bitMapLock()->unlock();
5811         ConcurrentMarkSweepThread::desynchronize(true);
5812         stopTimer();
5813         if (PrintCMSStatistics != 0) {
5814           incrementYields();
5815         }
5816 
5817         // See the comment in coordinator_yield()
5818         for (unsigned i = 0; i < CMSYieldSleepCount &&
5819                          ConcurrentMarkSweepThread::should_yield() &&
5820                          !CMSCollector::foregroundGCIsActive(); ++i) {
5821           os::sleep(Thread::current(), 1, false);
5822         }
5823 
5824         ConcurrentMarkSweepThread::synchronize(true);
5825         bitMapLock()->lock_without_safepoint_check();
5826         startTimer();
5827       }
5828       curAddr = chunk.end();
5829     }
5830     // A successful mostly concurrent collection has been done.
5831     // Because only the full (i.e., concurrent mode failure) collections
5832     // are being measured for gc overhead limits, clean the "near" flag
5833     // and count.
5834     size_policy()->reset_gc_overhead_limit_count();
5835     _collectorState = Idling;
5836   } else {
5837     // already have the lock
5838     assert(_collectorState == Resetting, "just checking");
5839     assert_lock_strong(bitMapLock());
5840     _markBitMap.clear_all();
5841     _collectorState = Idling;
5842   }
5843 
5844   register_gc_end();
5845 }
5846 
5847 void CMSCollector::do_CMS_operation(CMS_op_type op, GCCause::Cause gc_cause) {
5848   TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
5849   GCTraceTime t(GCCauseString("GC", gc_cause), PrintGC, !PrintGCDetails, NULL, _gc_tracer_cm->gc_id());
5850   TraceCollectorStats tcs(counters());
5851 
5852   switch (op) {
5853     case CMS_op_checkpointRootsInitial: {
5854       SvcGCMarker sgcm(SvcGCMarker::OTHER);
5855       checkpointRootsInitial();
5856       if (PrintGC) {
5857         _cmsGen->printOccupancy("initial-mark");
5858       }
5859       break;
5860     }
5861     case CMS_op_checkpointRootsFinal: {
5862       SvcGCMarker sgcm(SvcGCMarker::OTHER);
5863       checkpointRootsFinal();
5864       if (PrintGC) {
5865         _cmsGen->printOccupancy("remark");
5866       }
5867       break;
5868     }
5869     default:
5870       fatal("No such CMS_op");
5871   }
5872 }
5873 
5874 #ifndef PRODUCT
5875 size_t const CMSCollector::skip_header_HeapWords() {
5876   return FreeChunk::header_size();
5877 }
5878 
5879 // Try and collect here conditions that should hold when
5880 // CMS thread is exiting. The idea is that the foreground GC
5881 // thread should not be blocked if it wants to terminate
5882 // the CMS thread and yet continue to run the VM for a while
5883 // after that.
5884 void CMSCollector::verify_ok_to_terminate() const {
5885   assert(Thread::current()->is_ConcurrentGC_thread(),
5886          "should be called by CMS thread");
5887   assert(!_foregroundGCShouldWait, "should be false");
5888   // We could check here that all the various low-level locks
5889   // are not held by the CMS thread, but that is overkill; see
5890   // also CMSThread::verify_ok_to_terminate() where the CGC_lock
5891   // is checked.
5892 }
5893 #endif
5894 
5895 size_t CMSCollector::block_size_using_printezis_bits(HeapWord* addr) const {
5896    assert(_markBitMap.isMarked(addr) && _markBitMap.isMarked(addr + 1),
5897           "missing Printezis mark?");
5898   HeapWord* nextOneAddr = _markBitMap.getNextMarkedWordAddress(addr + 2);
5899   size_t size = pointer_delta(nextOneAddr + 1, addr);
5900   assert(size == CompactibleFreeListSpace::adjustObjectSize(size),
5901          "alignment problem");
5902   assert(size >= 3, "Necessary for Printezis marks to work");
5903   return size;
5904 }
5905 
5906 // A variant of the above (block_size_using_printezis_bits()) except
5907 // that we return 0 if the P-bits are not yet set.
5908 size_t CMSCollector::block_size_if_printezis_bits(HeapWord* addr) const {
5909   if (_markBitMap.isMarked(addr + 1)) {
5910     assert(_markBitMap.isMarked(addr), "P-bit can be set only for marked objects");
5911     HeapWord* nextOneAddr = _markBitMap.getNextMarkedWordAddress(addr + 2);
5912     size_t size = pointer_delta(nextOneAddr + 1, addr);
5913     assert(size == CompactibleFreeListSpace::adjustObjectSize(size),
5914            "alignment problem");
5915     assert(size >= 3, "Necessary for Printezis marks to work");
5916     return size;
5917   }
5918   return 0;
5919 }
5920 
5921 HeapWord* CMSCollector::next_card_start_after_block(HeapWord* addr) const {
5922   size_t sz = 0;
5923   oop p = (oop)addr;
5924   if (p->klass_or_null() != NULL) {
5925     sz = CompactibleFreeListSpace::adjustObjectSize(p->size());
5926   } else {
5927     sz = block_size_using_printezis_bits(addr);
5928   }
5929   assert(sz > 0, "size must be nonzero");
5930   HeapWord* next_block = addr + sz;
5931   HeapWord* next_card  = (HeapWord*)round_to((uintptr_t)next_block,
5932                                              CardTableModRefBS::card_size);
5933   assert(round_down((uintptr_t)addr,      CardTableModRefBS::card_size) <
5934          round_down((uintptr_t)next_card, CardTableModRefBS::card_size),
5935          "must be different cards");
5936   return next_card;
5937 }
5938 
5939 
5940 // CMS Bit Map Wrapper /////////////////////////////////////////
5941 
5942 // Construct a CMS bit map infrastructure, but don't create the
5943 // bit vector itself. That is done by a separate call CMSBitMap::allocate()
5944 // further below.
5945 CMSBitMap::CMSBitMap(int shifter, int mutex_rank, const char* mutex_name):
5946   _bm(),
5947   _shifter(shifter),
5948   _lock(mutex_rank >= 0 ? new Mutex(mutex_rank, mutex_name, true) : NULL)
5949 {
5950   _bmStartWord = 0;
5951   _bmWordSize  = 0;
5952 }
5953 
5954 bool CMSBitMap::allocate(MemRegion mr) {
5955   _bmStartWord = mr.start();
5956   _bmWordSize  = mr.word_size();
5957   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
5958                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
5959   if (!brs.is_reserved()) {
5960     warning("CMS bit map allocation failure");
5961     return false;
5962   }
5963   // For now we'll just commit all of the bit map up front.
5964   // Later on we'll try to be more parsimonious with swap.
5965   if (!_virtual_space.initialize(brs, brs.size())) {
5966     warning("CMS bit map backing store failure");
5967     return false;
5968   }
5969   assert(_virtual_space.committed_size() == brs.size(),
5970          "didn't reserve backing store for all of CMS bit map?");
5971   _bm.set_map((BitMap::bm_word_t*)_virtual_space.low());
5972   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
5973          _bmWordSize, "inconsistency in bit map sizing");
5974   _bm.set_size(_bmWordSize >> _shifter);
5975 
5976   // bm.clear(); // can we rely on getting zero'd memory? verify below
5977   assert(isAllClear(),
5978          "Expected zero'd memory from ReservedSpace constructor");
5979   assert(_bm.size() == heapWordDiffToOffsetDiff(sizeInWords()),
5980          "consistency check");
5981   return true;
5982 }
5983 
5984 void CMSBitMap::dirty_range_iterate_clear(MemRegion mr, MemRegionClosure* cl) {
5985   HeapWord *next_addr, *end_addr, *last_addr;
5986   assert_locked();
5987   assert(covers(mr), "out-of-range error");
5988   // XXX assert that start and end are appropriately aligned
5989   for (next_addr = mr.start(), end_addr = mr.end();
5990        next_addr < end_addr; next_addr = last_addr) {
5991     MemRegion dirty_region = getAndClearMarkedRegion(next_addr, end_addr);
5992     last_addr = dirty_region.end();
5993     if (!dirty_region.is_empty()) {
5994       cl->do_MemRegion(dirty_region);
5995     } else {
5996       assert(last_addr == end_addr, "program logic");
5997       return;
5998     }
5999   }
6000 }
6001 
6002 void CMSBitMap::print_on_error(outputStream* st, const char* prefix) const {
6003   _bm.print_on_error(st, prefix);
6004 }
6005 
6006 #ifndef PRODUCT
6007 void CMSBitMap::assert_locked() const {
6008   CMSLockVerifier::assert_locked(lock());
6009 }
6010 
6011 bool CMSBitMap::covers(MemRegion mr) const {
6012   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
6013   assert((size_t)_bm.size() == (_bmWordSize >> _shifter),
6014          "size inconsistency");
6015   return (mr.start() >= _bmStartWord) &&
6016          (mr.end()   <= endWord());
6017 }
6018 
6019 bool CMSBitMap::covers(HeapWord* start, size_t size) const {
6020     return (start >= _bmStartWord && (start + size) <= endWord());
6021 }
6022 
6023 void CMSBitMap::verifyNoOneBitsInRange(HeapWord* left, HeapWord* right) {
6024   // verify that there are no 1 bits in the interval [left, right)
6025   FalseBitMapClosure falseBitMapClosure;
6026   iterate(&falseBitMapClosure, left, right);
6027 }
6028 
6029 void CMSBitMap::region_invariant(MemRegion mr)
6030 {
6031   assert_locked();
6032   // mr = mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
6033   assert(!mr.is_empty(), "unexpected empty region");
6034   assert(covers(mr), "mr should be covered by bit map");
6035   // convert address range into offset range
6036   size_t start_ofs = heapWordToOffset(mr.start());
6037   // Make sure that end() is appropriately aligned
6038   assert(mr.end() == (HeapWord*)round_to((intptr_t)mr.end(),
6039                         (1 << (_shifter+LogHeapWordSize))),
6040          "Misaligned mr.end()");
6041   size_t end_ofs   = heapWordToOffset(mr.end());
6042   assert(end_ofs > start_ofs, "Should mark at least one bit");
6043 }
6044 
6045 #endif
6046 
6047 bool CMSMarkStack::allocate(size_t size) {
6048   // allocate a stack of the requisite depth
6049   ReservedSpace rs(ReservedSpace::allocation_align_size_up(
6050                    size * sizeof(oop)));
6051   if (!rs.is_reserved()) {
6052     warning("CMSMarkStack allocation failure");
6053     return false;
6054   }
6055   if (!_virtual_space.initialize(rs, rs.size())) {
6056     warning("CMSMarkStack backing store failure");
6057     return false;
6058   }
6059   assert(_virtual_space.committed_size() == rs.size(),
6060          "didn't reserve backing store for all of CMS stack?");
6061   _base = (oop*)(_virtual_space.low());
6062   _index = 0;
6063   _capacity = size;
6064   NOT_PRODUCT(_max_depth = 0);
6065   return true;
6066 }
6067 
6068 // XXX FIX ME !!! In the MT case we come in here holding a
6069 // leaf lock. For printing we need to take a further lock
6070 // which has lower rank. We need to recalibrate the two
6071 // lock-ranks involved in order to be able to print the
6072 // messages below. (Or defer the printing to the caller.
6073 // For now we take the expedient path of just disabling the
6074 // messages for the problematic case.)
6075 void CMSMarkStack::expand() {
6076   assert(_capacity <= MarkStackSizeMax, "stack bigger than permitted");
6077   if (_capacity == MarkStackSizeMax) {
6078     if (_hit_limit++ == 0 && !CMSConcurrentMTEnabled && PrintGCDetails) {
6079       // We print a warning message only once per CMS cycle.
6080       gclog_or_tty->print_cr(" (benign) Hit CMSMarkStack max size limit");
6081     }
6082     return;
6083   }
6084   // Double capacity if possible
6085   size_t new_capacity = MIN2(_capacity*2, MarkStackSizeMax);
6086   // Do not give up existing stack until we have managed to
6087   // get the double capacity that we desired.
6088   ReservedSpace rs(ReservedSpace::allocation_align_size_up(
6089                    new_capacity * sizeof(oop)));
6090   if (rs.is_reserved()) {
6091     // Release the backing store associated with old stack
6092     _virtual_space.release();
6093     // Reinitialize virtual space for new stack
6094     if (!_virtual_space.initialize(rs, rs.size())) {
6095       fatal("Not enough swap for expanded marking stack");
6096     }
6097     _base = (oop*)(_virtual_space.low());
6098     _index = 0;
6099     _capacity = new_capacity;
6100   } else if (_failed_double++ == 0 && !CMSConcurrentMTEnabled && PrintGCDetails) {
6101     // Failed to double capacity, continue;
6102     // we print a detail message only once per CMS cycle.
6103     gclog_or_tty->print(" (benign) Failed to expand marking stack from "SIZE_FORMAT"K to "
6104             SIZE_FORMAT"K",
6105             _capacity / K, new_capacity / K);
6106   }
6107 }
6108 
6109 
6110 // Closures
6111 // XXX: there seems to be a lot of code  duplication here;
6112 // should refactor and consolidate common code.
6113 
6114 // This closure is used to mark refs into the CMS generation in
6115 // the CMS bit map. Called at the first checkpoint. This closure
6116 // assumes that we do not need to re-mark dirty cards; if the CMS
6117 // generation on which this is used is not an oldest
6118 // generation then this will lose younger_gen cards!
6119 
6120 MarkRefsIntoClosure::MarkRefsIntoClosure(
6121   MemRegion span, CMSBitMap* bitMap):
6122     _span(span),
6123     _bitMap(bitMap)
6124 {
6125     assert(_ref_processor == NULL, "deliberately left NULL");
6126     assert(_bitMap->covers(_span), "_bitMap/_span mismatch");
6127 }
6128 
6129 void MarkRefsIntoClosure::do_oop(oop obj) {
6130   // if p points into _span, then mark corresponding bit in _markBitMap
6131   assert(obj->is_oop(), "expected an oop");
6132   HeapWord* addr = (HeapWord*)obj;
6133   if (_span.contains(addr)) {
6134     // this should be made more efficient
6135     _bitMap->mark(addr);
6136   }
6137 }
6138 
6139 void MarkRefsIntoClosure::do_oop(oop* p)       { MarkRefsIntoClosure::do_oop_work(p); }
6140 void MarkRefsIntoClosure::do_oop(narrowOop* p) { MarkRefsIntoClosure::do_oop_work(p); }
6141 
6142 Par_MarkRefsIntoClosure::Par_MarkRefsIntoClosure(
6143   MemRegion span, CMSBitMap* bitMap):
6144     _span(span),
6145     _bitMap(bitMap)
6146 {
6147     assert(_ref_processor == NULL, "deliberately left NULL");
6148     assert(_bitMap->covers(_span), "_bitMap/_span mismatch");
6149 }
6150 
6151 void Par_MarkRefsIntoClosure::do_oop(oop obj) {
6152   // if p points into _span, then mark corresponding bit in _markBitMap
6153   assert(obj->is_oop(), "expected an oop");
6154   HeapWord* addr = (HeapWord*)obj;
6155   if (_span.contains(addr)) {
6156     // this should be made more efficient
6157     _bitMap->par_mark(addr);
6158   }
6159 }
6160 
6161 void Par_MarkRefsIntoClosure::do_oop(oop* p)       { Par_MarkRefsIntoClosure::do_oop_work(p); }
6162 void Par_MarkRefsIntoClosure::do_oop(narrowOop* p) { Par_MarkRefsIntoClosure::do_oop_work(p); }
6163 
6164 // A variant of the above, used for CMS marking verification.
6165 MarkRefsIntoVerifyClosure::MarkRefsIntoVerifyClosure(
6166   MemRegion span, CMSBitMap* verification_bm, CMSBitMap* cms_bm):
6167     _span(span),
6168     _verification_bm(verification_bm),
6169     _cms_bm(cms_bm)
6170 {
6171     assert(_ref_processor == NULL, "deliberately left NULL");
6172     assert(_verification_bm->covers(_span), "_verification_bm/_span mismatch");
6173 }
6174 
6175 void MarkRefsIntoVerifyClosure::do_oop(oop obj) {
6176   // if p points into _span, then mark corresponding bit in _markBitMap
6177   assert(obj->is_oop(), "expected an oop");
6178   HeapWord* addr = (HeapWord*)obj;
6179   if (_span.contains(addr)) {
6180     _verification_bm->mark(addr);
6181     if (!_cms_bm->isMarked(addr)) {
6182       oop(addr)->print();
6183       gclog_or_tty->print_cr(" (" INTPTR_FORMAT " should have been marked)", addr);
6184       fatal("... aborting");
6185     }
6186   }
6187 }
6188 
6189 void MarkRefsIntoVerifyClosure::do_oop(oop* p)       { MarkRefsIntoVerifyClosure::do_oop_work(p); }
6190 void MarkRefsIntoVerifyClosure::do_oop(narrowOop* p) { MarkRefsIntoVerifyClosure::do_oop_work(p); }
6191 
6192 //////////////////////////////////////////////////
6193 // MarkRefsIntoAndScanClosure
6194 //////////////////////////////////////////////////
6195 
6196 MarkRefsIntoAndScanClosure::MarkRefsIntoAndScanClosure(MemRegion span,
6197                                                        ReferenceProcessor* rp,
6198                                                        CMSBitMap* bit_map,
6199                                                        CMSBitMap* mod_union_table,
6200                                                        CMSMarkStack*  mark_stack,
6201                                                        CMSCollector* collector,
6202                                                        bool should_yield,
6203                                                        bool concurrent_precleaning):
6204   _collector(collector),
6205   _span(span),
6206   _bit_map(bit_map),
6207   _mark_stack(mark_stack),
6208   _pushAndMarkClosure(collector, span, rp, bit_map, mod_union_table,
6209                       mark_stack, concurrent_precleaning),
6210   _yield(should_yield),
6211   _concurrent_precleaning(concurrent_precleaning),
6212   _freelistLock(NULL)
6213 {
6214   _ref_processor = rp;
6215   assert(_ref_processor != NULL, "_ref_processor shouldn't be NULL");
6216 }
6217 
6218 // This closure is used to mark refs into the CMS generation at the
6219 // second (final) checkpoint, and to scan and transitively follow
6220 // the unmarked oops. It is also used during the concurrent precleaning
6221 // phase while scanning objects on dirty cards in the CMS generation.
6222 // The marks are made in the marking bit map and the marking stack is
6223 // used for keeping the (newly) grey objects during the scan.
6224 // The parallel version (Par_...) appears further below.
6225 void MarkRefsIntoAndScanClosure::do_oop(oop obj) {
6226   if (obj != NULL) {
6227     assert(obj->is_oop(), "expected an oop");
6228     HeapWord* addr = (HeapWord*)obj;
6229     assert(_mark_stack->isEmpty(), "pre-condition (eager drainage)");
6230     assert(_collector->overflow_list_is_empty(),
6231            "overflow list should be empty");
6232     if (_span.contains(addr) &&
6233         !_bit_map->isMarked(addr)) {
6234       // mark bit map (object is now grey)
6235       _bit_map->mark(addr);
6236       // push on marking stack (stack should be empty), and drain the
6237       // stack by applying this closure to the oops in the oops popped
6238       // from the stack (i.e. blacken the grey objects)
6239       bool res = _mark_stack->push(obj);
6240       assert(res, "Should have space to push on empty stack");
6241       do {
6242         oop new_oop = _mark_stack->pop();
6243         assert(new_oop != NULL && new_oop->is_oop(), "Expected an oop");
6244         assert(_bit_map->isMarked((HeapWord*)new_oop),
6245                "only grey objects on this stack");
6246         // iterate over the oops in this oop, marking and pushing
6247         // the ones in CMS heap (i.e. in _span).
6248         new_oop->oop_iterate(&_pushAndMarkClosure);
6249         // check if it's time to yield
6250         do_yield_check();
6251       } while (!_mark_stack->isEmpty() ||
6252                (!_concurrent_precleaning && take_from_overflow_list()));
6253         // if marking stack is empty, and we are not doing this
6254         // during precleaning, then check the overflow list
6255     }
6256     assert(_mark_stack->isEmpty(), "post-condition (eager drainage)");
6257     assert(_collector->overflow_list_is_empty(),
6258            "overflow list was drained above");
6259     // We could restore evacuated mark words, if any, used for
6260     // overflow list links here because the overflow list is
6261     // provably empty here. That would reduce the maximum
6262     // size requirements for preserved_{oop,mark}_stack.
6263     // But we'll just postpone it until we are all done
6264     // so we can just stream through.
6265     if (!_concurrent_precleaning && CMSOverflowEarlyRestoration) {
6266       _collector->restore_preserved_marks_if_any();
6267       assert(_collector->no_preserved_marks(), "No preserved marks");
6268     }
6269     assert(!CMSOverflowEarlyRestoration || _collector->no_preserved_marks(),
6270            "All preserved marks should have been restored above");
6271   }
6272 }
6273 
6274 void MarkRefsIntoAndScanClosure::do_oop(oop* p)       { MarkRefsIntoAndScanClosure::do_oop_work(p); }
6275 void MarkRefsIntoAndScanClosure::do_oop(narrowOop* p) { MarkRefsIntoAndScanClosure::do_oop_work(p); }
6276 
6277 void MarkRefsIntoAndScanClosure::do_yield_work() {
6278   assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
6279          "CMS thread should hold CMS token");
6280   assert_lock_strong(_freelistLock);
6281   assert_lock_strong(_bit_map->lock());
6282   // relinquish the free_list_lock and bitMaplock()
6283   _bit_map->lock()->unlock();
6284   _freelistLock->unlock();
6285   ConcurrentMarkSweepThread::desynchronize(true);
6286   _collector->stopTimer();
6287   if (PrintCMSStatistics != 0) {
6288     _collector->incrementYields();
6289   }
6290 
6291   // See the comment in coordinator_yield()
6292   for (unsigned i = 0;
6293        i < CMSYieldSleepCount &&
6294        ConcurrentMarkSweepThread::should_yield() &&
6295        !CMSCollector::foregroundGCIsActive();
6296        ++i) {
6297     os::sleep(Thread::current(), 1, false);
6298   }
6299 
6300   ConcurrentMarkSweepThread::synchronize(true);
6301   _freelistLock->lock_without_safepoint_check();
6302   _bit_map->lock()->lock_without_safepoint_check();
6303   _collector->startTimer();
6304 }
6305 
6306 ///////////////////////////////////////////////////////////
6307 // Par_MarkRefsIntoAndScanClosure: a parallel version of
6308 //                                 MarkRefsIntoAndScanClosure
6309 ///////////////////////////////////////////////////////////
6310 Par_MarkRefsIntoAndScanClosure::Par_MarkRefsIntoAndScanClosure(
6311   CMSCollector* collector, MemRegion span, ReferenceProcessor* rp,
6312   CMSBitMap* bit_map, OopTaskQueue* work_queue):
6313   _span(span),
6314   _bit_map(bit_map),
6315   _work_queue(work_queue),
6316   _low_water_mark(MIN2((uint)(work_queue->max_elems()/4),
6317                        (uint)(CMSWorkQueueDrainThreshold * ParallelGCThreads))),
6318   _par_pushAndMarkClosure(collector, span, rp, bit_map, work_queue)
6319 {
6320   _ref_processor = rp;
6321   assert(_ref_processor != NULL, "_ref_processor shouldn't be NULL");
6322 }
6323 
6324 // This closure is used to mark refs into the CMS generation at the
6325 // second (final) checkpoint, and to scan and transitively follow
6326 // the unmarked oops. The marks are made in the marking bit map and
6327 // the work_queue is used for keeping the (newly) grey objects during
6328 // the scan phase whence they are also available for stealing by parallel
6329 // threads. Since the marking bit map is shared, updates are
6330 // synchronized (via CAS).
6331 void Par_MarkRefsIntoAndScanClosure::do_oop(oop obj) {
6332   if (obj != NULL) {
6333     // Ignore mark word because this could be an already marked oop
6334     // that may be chained at the end of the overflow list.
6335     assert(obj->is_oop(true), "expected an oop");
6336     HeapWord* addr = (HeapWord*)obj;
6337     if (_span.contains(addr) &&
6338         !_bit_map->isMarked(addr)) {
6339       // mark bit map (object will become grey):
6340       // It is possible for several threads to be
6341       // trying to "claim" this object concurrently;
6342       // the unique thread that succeeds in marking the
6343       // object first will do the subsequent push on
6344       // to the work queue (or overflow list).
6345       if (_bit_map->par_mark(addr)) {
6346         // push on work_queue (which may not be empty), and trim the
6347         // queue to an appropriate length by applying this closure to
6348         // the oops in the oops popped from the stack (i.e. blacken the
6349         // grey objects)
6350         bool res = _work_queue->push(obj);
6351         assert(res, "Low water mark should be less than capacity?");
6352         trim_queue(_low_water_mark);
6353       } // Else, another thread claimed the object
6354     }
6355   }
6356 }
6357 
6358 void Par_MarkRefsIntoAndScanClosure::do_oop(oop* p)       { Par_MarkRefsIntoAndScanClosure::do_oop_work(p); }
6359 void Par_MarkRefsIntoAndScanClosure::do_oop(narrowOop* p) { Par_MarkRefsIntoAndScanClosure::do_oop_work(p); }
6360 
6361 // This closure is used to rescan the marked objects on the dirty cards
6362 // in the mod union table and the card table proper.
6363 size_t ScanMarkedObjectsAgainCarefullyClosure::do_object_careful_m(
6364   oop p, MemRegion mr) {
6365 
6366   size_t size = 0;
6367   HeapWord* addr = (HeapWord*)p;
6368   DEBUG_ONLY(_collector->verify_work_stacks_empty();)
6369   assert(_span.contains(addr), "we are scanning the CMS generation");
6370   // check if it's time to yield
6371   if (do_yield_check()) {
6372     // We yielded for some foreground stop-world work,
6373     // and we have been asked to abort this ongoing preclean cycle.
6374     return 0;
6375   }
6376   if (_bitMap->isMarked(addr)) {
6377     // it's marked; is it potentially uninitialized?
6378     if (p->klass_or_null() != NULL) {
6379         // an initialized object; ignore mark word in verification below
6380         // since we are running concurrent with mutators
6381         assert(p->is_oop(true), "should be an oop");
6382         if (p->is_objArray()) {
6383           // objArrays are precisely marked; restrict scanning
6384           // to dirty cards only.
6385           size = CompactibleFreeListSpace::adjustObjectSize(
6386                    p->oop_iterate(_scanningClosure, mr));
6387         } else {
6388           // A non-array may have been imprecisely marked; we need
6389           // to scan object in its entirety.
6390           size = CompactibleFreeListSpace::adjustObjectSize(
6391                    p->oop_iterate(_scanningClosure));
6392         }
6393         #ifdef ASSERT
6394           size_t direct_size =
6395             CompactibleFreeListSpace::adjustObjectSize(p->size());
6396           assert(size == direct_size, "Inconsistency in size");
6397           assert(size >= 3, "Necessary for Printezis marks to work");
6398           if (!_bitMap->isMarked(addr+1)) {
6399             _bitMap->verifyNoOneBitsInRange(addr+2, addr+size);
6400           } else {
6401             _bitMap->verifyNoOneBitsInRange(addr+2, addr+size-1);
6402             assert(_bitMap->isMarked(addr+size-1),
6403                    "inconsistent Printezis mark");
6404           }
6405         #endif // ASSERT
6406     } else {
6407       // An uninitialized object.
6408       assert(_bitMap->isMarked(addr+1), "missing Printezis mark?");
6409       HeapWord* nextOneAddr = _bitMap->getNextMarkedWordAddress(addr + 2);
6410       size = pointer_delta(nextOneAddr + 1, addr);
6411       assert(size == CompactibleFreeListSpace::adjustObjectSize(size),
6412              "alignment problem");
6413       // Note that pre-cleaning needn't redirty the card. OopDesc::set_klass()
6414       // will dirty the card when the klass pointer is installed in the
6415       // object (signaling the completion of initialization).
6416     }
6417   } else {
6418     // Either a not yet marked object or an uninitialized object
6419     if (p->klass_or_null() == NULL) {
6420       // An uninitialized object, skip to the next card, since
6421       // we may not be able to read its P-bits yet.
6422       assert(size == 0, "Initial value");
6423     } else {
6424       // An object not (yet) reached by marking: we merely need to
6425       // compute its size so as to go look at the next block.
6426       assert(p->is_oop(true), "should be an oop");
6427       size = CompactibleFreeListSpace::adjustObjectSize(p->size());
6428     }
6429   }
6430   DEBUG_ONLY(_collector->verify_work_stacks_empty();)
6431   return size;
6432 }
6433 
6434 void ScanMarkedObjectsAgainCarefullyClosure::do_yield_work() {
6435   assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
6436          "CMS thread should hold CMS token");
6437   assert_lock_strong(_freelistLock);
6438   assert_lock_strong(_bitMap->lock());
6439   // relinquish the free_list_lock and bitMaplock()
6440   _bitMap->lock()->unlock();
6441   _freelistLock->unlock();
6442   ConcurrentMarkSweepThread::desynchronize(true);
6443   _collector->stopTimer();
6444   if (PrintCMSStatistics != 0) {
6445     _collector->incrementYields();
6446   }
6447 
6448   // See the comment in coordinator_yield()
6449   for (unsigned i = 0; i < CMSYieldSleepCount &&
6450                    ConcurrentMarkSweepThread::should_yield() &&
6451                    !CMSCollector::foregroundGCIsActive(); ++i) {
6452     os::sleep(Thread::current(), 1, false);
6453   }
6454 
6455   ConcurrentMarkSweepThread::synchronize(true);
6456   _freelistLock->lock_without_safepoint_check();
6457   _bitMap->lock()->lock_without_safepoint_check();
6458   _collector->startTimer();
6459 }
6460 
6461 
6462 //////////////////////////////////////////////////////////////////
6463 // SurvivorSpacePrecleanClosure
6464 //////////////////////////////////////////////////////////////////
6465 // This (single-threaded) closure is used to preclean the oops in
6466 // the survivor spaces.
6467 size_t SurvivorSpacePrecleanClosure::do_object_careful(oop p) {
6468 
6469   HeapWord* addr = (HeapWord*)p;
6470   DEBUG_ONLY(_collector->verify_work_stacks_empty();)
6471   assert(!_span.contains(addr), "we are scanning the survivor spaces");
6472   assert(p->klass_or_null() != NULL, "object should be initialized");
6473   // an initialized object; ignore mark word in verification below
6474   // since we are running concurrent with mutators
6475   assert(p->is_oop(true), "should be an oop");
6476   // Note that we do not yield while we iterate over
6477   // the interior oops of p, pushing the relevant ones
6478   // on our marking stack.
6479   size_t size = p->oop_iterate(_scanning_closure);
6480   do_yield_check();
6481   // Observe that below, we do not abandon the preclean
6482   // phase as soon as we should; rather we empty the
6483   // marking stack before returning. This is to satisfy
6484   // some existing assertions. In general, it may be a
6485   // good idea to abort immediately and complete the marking
6486   // from the grey objects at a later time.
6487   while (!_mark_stack->isEmpty()) {
6488     oop new_oop = _mark_stack->pop();
6489     assert(new_oop != NULL && new_oop->is_oop(), "Expected an oop");
6490     assert(_bit_map->isMarked((HeapWord*)new_oop),
6491            "only grey objects on this stack");
6492     // iterate over the oops in this oop, marking and pushing
6493     // the ones in CMS heap (i.e. in _span).
6494     new_oop->oop_iterate(_scanning_closure);
6495     // check if it's time to yield
6496     do_yield_check();
6497   }
6498   unsigned int after_count =
6499     GenCollectedHeap::heap()->total_collections();
6500   bool abort = (_before_count != after_count) ||
6501                _collector->should_abort_preclean();
6502   return abort ? 0 : size;
6503 }
6504 
6505 void SurvivorSpacePrecleanClosure::do_yield_work() {
6506   assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
6507          "CMS thread should hold CMS token");
6508   assert_lock_strong(_bit_map->lock());
6509   // Relinquish the bit map lock
6510   _bit_map->lock()->unlock();
6511   ConcurrentMarkSweepThread::desynchronize(true);
6512   _collector->stopTimer();
6513   if (PrintCMSStatistics != 0) {
6514     _collector->incrementYields();
6515   }
6516 
6517   // See the comment in coordinator_yield()
6518   for (unsigned i = 0; i < CMSYieldSleepCount &&
6519                        ConcurrentMarkSweepThread::should_yield() &&
6520                        !CMSCollector::foregroundGCIsActive(); ++i) {
6521     os::sleep(Thread::current(), 1, false);
6522   }
6523 
6524   ConcurrentMarkSweepThread::synchronize(true);
6525   _bit_map->lock()->lock_without_safepoint_check();
6526   _collector->startTimer();
6527 }
6528 
6529 // This closure is used to rescan the marked objects on the dirty cards
6530 // in the mod union table and the card table proper. In the parallel
6531 // case, although the bitMap is shared, we do a single read so the
6532 // isMarked() query is "safe".
6533 bool ScanMarkedObjectsAgainClosure::do_object_bm(oop p, MemRegion mr) {
6534   // Ignore mark word because we are running concurrent with mutators
6535   assert(p->is_oop_or_null(true), err_msg("Expected an oop or NULL at " PTR_FORMAT, p2i(p)));
6536   HeapWord* addr = (HeapWord*)p;
6537   assert(_span.contains(addr), "we are scanning the CMS generation");
6538   bool is_obj_array = false;
6539   #ifdef ASSERT
6540     if (!_parallel) {
6541       assert(_mark_stack->isEmpty(), "pre-condition (eager drainage)");
6542       assert(_collector->overflow_list_is_empty(),
6543              "overflow list should be empty");
6544 
6545     }
6546   #endif // ASSERT
6547   if (_bit_map->isMarked(addr)) {
6548     // Obj arrays are precisely marked, non-arrays are not;
6549     // so we scan objArrays precisely and non-arrays in their
6550     // entirety.
6551     if (p->is_objArray()) {
6552       is_obj_array = true;
6553       if (_parallel) {
6554         p->oop_iterate(_par_scan_closure, mr);
6555       } else {
6556         p->oop_iterate(_scan_closure, mr);
6557       }
6558     } else {
6559       if (_parallel) {
6560         p->oop_iterate(_par_scan_closure);
6561       } else {
6562         p->oop_iterate(_scan_closure);
6563       }
6564     }
6565   }
6566   #ifdef ASSERT
6567     if (!_parallel) {
6568       assert(_mark_stack->isEmpty(), "post-condition (eager drainage)");
6569       assert(_collector->overflow_list_is_empty(),
6570              "overflow list should be empty");
6571 
6572     }
6573   #endif // ASSERT
6574   return is_obj_array;
6575 }
6576 
6577 MarkFromRootsClosure::MarkFromRootsClosure(CMSCollector* collector,
6578                         MemRegion span,
6579                         CMSBitMap* bitMap, CMSMarkStack*  markStack,
6580                         bool should_yield, bool verifying):
6581   _collector(collector),
6582   _span(span),
6583   _bitMap(bitMap),
6584   _mut(&collector->_modUnionTable),
6585   _markStack(markStack),
6586   _yield(should_yield),
6587   _skipBits(0)
6588 {
6589   assert(_markStack->isEmpty(), "stack should be empty");
6590   _finger = _bitMap->startWord();
6591   _threshold = _finger;
6592   assert(_collector->_restart_addr == NULL, "Sanity check");
6593   assert(_span.contains(_finger), "Out of bounds _finger?");
6594   DEBUG_ONLY(_verifying = verifying;)
6595 }
6596 
6597 void MarkFromRootsClosure::reset(HeapWord* addr) {
6598   assert(_markStack->isEmpty(), "would cause duplicates on stack");
6599   assert(_span.contains(addr), "Out of bounds _finger?");
6600   _finger = addr;
6601   _threshold = (HeapWord*)round_to(
6602                  (intptr_t)_finger, CardTableModRefBS::card_size);
6603 }
6604 
6605 // Should revisit to see if this should be restructured for
6606 // greater efficiency.
6607 bool MarkFromRootsClosure::do_bit(size_t offset) {
6608   if (_skipBits > 0) {
6609     _skipBits--;
6610     return true;
6611   }
6612   // convert offset into a HeapWord*
6613   HeapWord* addr = _bitMap->startWord() + offset;
6614   assert(_bitMap->endWord() && addr < _bitMap->endWord(),
6615          "address out of range");
6616   assert(_bitMap->isMarked(addr), "tautology");
6617   if (_bitMap->isMarked(addr+1)) {
6618     // this is an allocated but not yet initialized object
6619     assert(_skipBits == 0, "tautology");
6620     _skipBits = 2;  // skip next two marked bits ("Printezis-marks")
6621     oop p = oop(addr);
6622     if (p->klass_or_null() == NULL) {
6623       DEBUG_ONLY(if (!_verifying) {)
6624         // We re-dirty the cards on which this object lies and increase
6625         // the _threshold so that we'll come back to scan this object
6626         // during the preclean or remark phase. (CMSCleanOnEnter)
6627         if (CMSCleanOnEnter) {
6628           size_t sz = _collector->block_size_using_printezis_bits(addr);
6629           HeapWord* end_card_addr   = (HeapWord*)round_to(
6630                                          (intptr_t)(addr+sz), CardTableModRefBS::card_size);
6631           MemRegion redirty_range = MemRegion(addr, end_card_addr);
6632           assert(!redirty_range.is_empty(), "Arithmetical tautology");
6633           // Bump _threshold to end_card_addr; note that
6634           // _threshold cannot possibly exceed end_card_addr, anyhow.
6635           // This prevents future clearing of the card as the scan proceeds
6636           // to the right.
6637           assert(_threshold <= end_card_addr,
6638                  "Because we are just scanning into this object");
6639           if (_threshold < end_card_addr) {
6640             _threshold = end_card_addr;
6641           }
6642           if (p->klass_or_null() != NULL) {
6643             // Redirty the range of cards...
6644             _mut->mark_range(redirty_range);
6645           } // ...else the setting of klass will dirty the card anyway.
6646         }
6647       DEBUG_ONLY(})
6648       return true;
6649     }
6650   }
6651   scanOopsInOop(addr);
6652   return true;
6653 }
6654 
6655 // We take a break if we've been at this for a while,
6656 // so as to avoid monopolizing the locks involved.
6657 void MarkFromRootsClosure::do_yield_work() {
6658   // First give up the locks, then yield, then re-lock
6659   // We should probably use a constructor/destructor idiom to
6660   // do this unlock/lock or modify the MutexUnlocker class to
6661   // serve our purpose. XXX
6662   assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
6663          "CMS thread should hold CMS token");
6664   assert_lock_strong(_bitMap->lock());
6665   _bitMap->lock()->unlock();
6666   ConcurrentMarkSweepThread::desynchronize(true);
6667   _collector->stopTimer();
6668   if (PrintCMSStatistics != 0) {
6669     _collector->incrementYields();
6670   }
6671 
6672   // See the comment in coordinator_yield()
6673   for (unsigned i = 0; i < CMSYieldSleepCount &&
6674                        ConcurrentMarkSweepThread::should_yield() &&
6675                        !CMSCollector::foregroundGCIsActive(); ++i) {
6676     os::sleep(Thread::current(), 1, false);
6677   }
6678 
6679   ConcurrentMarkSweepThread::synchronize(true);
6680   _bitMap->lock()->lock_without_safepoint_check();
6681   _collector->startTimer();
6682 }
6683 
6684 void MarkFromRootsClosure::scanOopsInOop(HeapWord* ptr) {
6685   assert(_bitMap->isMarked(ptr), "expected bit to be set");
6686   assert(_markStack->isEmpty(),
6687          "should drain stack to limit stack usage");
6688   // convert ptr to an oop preparatory to scanning
6689   oop obj = oop(ptr);
6690   // Ignore mark word in verification below, since we
6691   // may be running concurrent with mutators.
6692   assert(obj->is_oop(true), "should be an oop");
6693   assert(_finger <= ptr, "_finger runneth ahead");
6694   // advance the finger to right end of this object
6695   _finger = ptr + obj->size();
6696   assert(_finger > ptr, "we just incremented it above");
6697   // On large heaps, it may take us some time to get through
6698   // the marking phase. During
6699   // this time it's possible that a lot of mutations have
6700   // accumulated in the card table and the mod union table --
6701   // these mutation records are redundant until we have
6702   // actually traced into the corresponding card.
6703   // Here, we check whether advancing the finger would make
6704   // us cross into a new card, and if so clear corresponding
6705   // cards in the MUT (preclean them in the card-table in the
6706   // future).
6707 
6708   DEBUG_ONLY(if (!_verifying) {)
6709     // The clean-on-enter optimization is disabled by default,
6710     // until we fix 6178663.
6711     if (CMSCleanOnEnter && (_finger > _threshold)) {
6712       // [_threshold, _finger) represents the interval
6713       // of cards to be cleared  in MUT (or precleaned in card table).
6714       // The set of cards to be cleared is all those that overlap
6715       // with the interval [_threshold, _finger); note that
6716       // _threshold is always kept card-aligned but _finger isn't
6717       // always card-aligned.
6718       HeapWord* old_threshold = _threshold;
6719       assert(old_threshold == (HeapWord*)round_to(
6720               (intptr_t)old_threshold, CardTableModRefBS::card_size),
6721              "_threshold should always be card-aligned");
6722       _threshold = (HeapWord*)round_to(
6723                      (intptr_t)_finger, CardTableModRefBS::card_size);
6724       MemRegion mr(old_threshold, _threshold);
6725       assert(!mr.is_empty(), "Control point invariant");
6726       assert(_span.contains(mr), "Should clear within span");
6727       _mut->clear_range(mr);
6728     }
6729   DEBUG_ONLY(})
6730   // Note: the finger doesn't advance while we drain
6731   // the stack below.
6732   PushOrMarkClosure pushOrMarkClosure(_collector,
6733                                       _span, _bitMap, _markStack,
6734                                       _finger, this);
6735   bool res = _markStack->push(obj);
6736   assert(res, "Empty non-zero size stack should have space for single push");
6737   while (!_markStack->isEmpty()) {
6738     oop new_oop = _markStack->pop();
6739     // Skip verifying header mark word below because we are
6740     // running concurrent with mutators.
6741     assert(new_oop->is_oop(true), "Oops! expected to pop an oop");
6742     // now scan this oop's oops
6743     new_oop->oop_iterate(&pushOrMarkClosure);
6744     do_yield_check();
6745   }
6746   assert(_markStack->isEmpty(), "tautology, emphasizing post-condition");
6747 }
6748 
6749 Par_MarkFromRootsClosure::Par_MarkFromRootsClosure(CMSConcMarkingTask* task,
6750                        CMSCollector* collector, MemRegion span,
6751                        CMSBitMap* bit_map,
6752                        OopTaskQueue* work_queue,
6753                        CMSMarkStack*  overflow_stack):
6754   _collector(collector),
6755   _whole_span(collector->_span),
6756   _span(span),
6757   _bit_map(bit_map),
6758   _mut(&collector->_modUnionTable),
6759   _work_queue(work_queue),
6760   _overflow_stack(overflow_stack),
6761   _skip_bits(0),
6762   _task(task)
6763 {
6764   assert(_work_queue->size() == 0, "work_queue should be empty");
6765   _finger = span.start();
6766   _threshold = _finger;     // XXX Defer clear-on-enter optimization for now
6767   assert(_span.contains(_finger), "Out of bounds _finger?");
6768 }
6769 
6770 // Should revisit to see if this should be restructured for
6771 // greater efficiency.
6772 bool Par_MarkFromRootsClosure::do_bit(size_t offset) {
6773   if (_skip_bits > 0) {
6774     _skip_bits--;
6775     return true;
6776   }
6777   // convert offset into a HeapWord*
6778   HeapWord* addr = _bit_map->startWord() + offset;
6779   assert(_bit_map->endWord() && addr < _bit_map->endWord(),
6780          "address out of range");
6781   assert(_bit_map->isMarked(addr), "tautology");
6782   if (_bit_map->isMarked(addr+1)) {
6783     // this is an allocated object that might not yet be initialized
6784     assert(_skip_bits == 0, "tautology");
6785     _skip_bits = 2;  // skip next two marked bits ("Printezis-marks")
6786     oop p = oop(addr);
6787     if (p->klass_or_null() == NULL) {
6788       // in the case of Clean-on-Enter optimization, redirty card
6789       // and avoid clearing card by increasing  the threshold.
6790       return true;
6791     }
6792   }
6793   scan_oops_in_oop(addr);
6794   return true;
6795 }
6796 
6797 void Par_MarkFromRootsClosure::scan_oops_in_oop(HeapWord* ptr) {
6798   assert(_bit_map->isMarked(ptr), "expected bit to be set");
6799   // Should we assert that our work queue is empty or
6800   // below some drain limit?
6801   assert(_work_queue->size() == 0,
6802          "should drain stack to limit stack usage");
6803   // convert ptr to an oop preparatory to scanning
6804   oop obj = oop(ptr);
6805   // Ignore mark word in verification below, since we
6806   // may be running concurrent with mutators.
6807   assert(obj->is_oop(true), "should be an oop");
6808   assert(_finger <= ptr, "_finger runneth ahead");
6809   // advance the finger to right end of this object
6810   _finger = ptr + obj->size();
6811   assert(_finger > ptr, "we just incremented it above");
6812   // On large heaps, it may take us some time to get through
6813   // the marking phase. During
6814   // this time it's possible that a lot of mutations have
6815   // accumulated in the card table and the mod union table --
6816   // these mutation records are redundant until we have
6817   // actually traced into the corresponding card.
6818   // Here, we check whether advancing the finger would make
6819   // us cross into a new card, and if so clear corresponding
6820   // cards in the MUT (preclean them in the card-table in the
6821   // future).
6822 
6823   // The clean-on-enter optimization is disabled by default,
6824   // until we fix 6178663.
6825   if (CMSCleanOnEnter && (_finger > _threshold)) {
6826     // [_threshold, _finger) represents the interval
6827     // of cards to be cleared  in MUT (or precleaned in card table).
6828     // The set of cards to be cleared is all those that overlap
6829     // with the interval [_threshold, _finger); note that
6830     // _threshold is always kept card-aligned but _finger isn't
6831     // always card-aligned.
6832     HeapWord* old_threshold = _threshold;
6833     assert(old_threshold == (HeapWord*)round_to(
6834             (intptr_t)old_threshold, CardTableModRefBS::card_size),
6835            "_threshold should always be card-aligned");
6836     _threshold = (HeapWord*)round_to(
6837                    (intptr_t)_finger, CardTableModRefBS::card_size);
6838     MemRegion mr(old_threshold, _threshold);
6839     assert(!mr.is_empty(), "Control point invariant");
6840     assert(_span.contains(mr), "Should clear within span"); // _whole_span ??
6841     _mut->clear_range(mr);
6842   }
6843 
6844   // Note: the local finger doesn't advance while we drain
6845   // the stack below, but the global finger sure can and will.
6846   HeapWord** gfa = _task->global_finger_addr();
6847   Par_PushOrMarkClosure pushOrMarkClosure(_collector,
6848                                       _span, _bit_map,
6849                                       _work_queue,
6850                                       _overflow_stack,
6851                                       _finger,
6852                                       gfa, this);
6853   bool res = _work_queue->push(obj);   // overflow could occur here
6854   assert(res, "Will hold once we use workqueues");
6855   while (true) {
6856     oop new_oop;
6857     if (!_work_queue->pop_local(new_oop)) {
6858       // We emptied our work_queue; check if there's stuff that can
6859       // be gotten from the overflow stack.
6860       if (CMSConcMarkingTask::get_work_from_overflow_stack(
6861             _overflow_stack, _work_queue)) {
6862         do_yield_check();
6863         continue;
6864       } else {  // done
6865         break;
6866       }
6867     }
6868     // Skip verifying header mark word below because we are
6869     // running concurrent with mutators.
6870     assert(new_oop->is_oop(true), "Oops! expected to pop an oop");
6871     // now scan this oop's oops
6872     new_oop->oop_iterate(&pushOrMarkClosure);
6873     do_yield_check();
6874   }
6875   assert(_work_queue->size() == 0, "tautology, emphasizing post-condition");
6876 }
6877 
6878 // Yield in response to a request from VM Thread or
6879 // from mutators.
6880 void Par_MarkFromRootsClosure::do_yield_work() {
6881   assert(_task != NULL, "sanity");
6882   _task->yield();
6883 }
6884 
6885 // A variant of the above used for verifying CMS marking work.
6886 MarkFromRootsVerifyClosure::MarkFromRootsVerifyClosure(CMSCollector* collector,
6887                         MemRegion span,
6888                         CMSBitMap* verification_bm, CMSBitMap* cms_bm,
6889                         CMSMarkStack*  mark_stack):
6890   _collector(collector),
6891   _span(span),
6892   _verification_bm(verification_bm),
6893   _cms_bm(cms_bm),
6894   _mark_stack(mark_stack),
6895   _pam_verify_closure(collector, span, verification_bm, cms_bm,
6896                       mark_stack)
6897 {
6898   assert(_mark_stack->isEmpty(), "stack should be empty");
6899   _finger = _verification_bm->startWord();
6900   assert(_collector->_restart_addr == NULL, "Sanity check");
6901   assert(_span.contains(_finger), "Out of bounds _finger?");
6902 }
6903 
6904 void MarkFromRootsVerifyClosure::reset(HeapWord* addr) {
6905   assert(_mark_stack->isEmpty(), "would cause duplicates on stack");
6906   assert(_span.contains(addr), "Out of bounds _finger?");
6907   _finger = addr;
6908 }
6909 
6910 // Should revisit to see if this should be restructured for
6911 // greater efficiency.
6912 bool MarkFromRootsVerifyClosure::do_bit(size_t offset) {
6913   // convert offset into a HeapWord*
6914   HeapWord* addr = _verification_bm->startWord() + offset;
6915   assert(_verification_bm->endWord() && addr < _verification_bm->endWord(),
6916          "address out of range");
6917   assert(_verification_bm->isMarked(addr), "tautology");
6918   assert(_cms_bm->isMarked(addr), "tautology");
6919 
6920   assert(_mark_stack->isEmpty(),
6921          "should drain stack to limit stack usage");
6922   // convert addr to an oop preparatory to scanning
6923   oop obj = oop(addr);
6924   assert(obj->is_oop(), "should be an oop");
6925   assert(_finger <= addr, "_finger runneth ahead");
6926   // advance the finger to right end of this object
6927   _finger = addr + obj->size();
6928   assert(_finger > addr, "we just incremented it above");
6929   // Note: the finger doesn't advance while we drain
6930   // the stack below.
6931   bool res = _mark_stack->push(obj);
6932   assert(res, "Empty non-zero size stack should have space for single push");
6933   while (!_mark_stack->isEmpty()) {
6934     oop new_oop = _mark_stack->pop();
6935     assert(new_oop->is_oop(), "Oops! expected to pop an oop");
6936     // now scan this oop's oops
6937     new_oop->oop_iterate(&_pam_verify_closure);
6938   }
6939   assert(_mark_stack->isEmpty(), "tautology, emphasizing post-condition");
6940   return true;
6941 }
6942 
6943 PushAndMarkVerifyClosure::PushAndMarkVerifyClosure(
6944   CMSCollector* collector, MemRegion span,
6945   CMSBitMap* verification_bm, CMSBitMap* cms_bm,
6946   CMSMarkStack*  mark_stack):
6947   MetadataAwareOopClosure(collector->ref_processor()),
6948   _collector(collector),
6949   _span(span),
6950   _verification_bm(verification_bm),
6951   _cms_bm(cms_bm),
6952   _mark_stack(mark_stack)
6953 { }
6954 
6955 void PushAndMarkVerifyClosure::do_oop(oop* p)       { PushAndMarkVerifyClosure::do_oop_work(p); }
6956 void PushAndMarkVerifyClosure::do_oop(narrowOop* p) { PushAndMarkVerifyClosure::do_oop_work(p); }
6957 
6958 // Upon stack overflow, we discard (part of) the stack,
6959 // remembering the least address amongst those discarded
6960 // in CMSCollector's _restart_address.
6961 void PushAndMarkVerifyClosure::handle_stack_overflow(HeapWord* lost) {
6962   // Remember the least grey address discarded
6963   HeapWord* ra = (HeapWord*)_mark_stack->least_value(lost);
6964   _collector->lower_restart_addr(ra);
6965   _mark_stack->reset();  // discard stack contents
6966   _mark_stack->expand(); // expand the stack if possible
6967 }
6968 
6969 void PushAndMarkVerifyClosure::do_oop(oop obj) {
6970   assert(obj->is_oop_or_null(), err_msg("Expected an oop or NULL at " PTR_FORMAT, p2i(obj)));
6971   HeapWord* addr = (HeapWord*)obj;
6972   if (_span.contains(addr) && !_verification_bm->isMarked(addr)) {
6973     // Oop lies in _span and isn't yet grey or black
6974     _verification_bm->mark(addr);            // now grey
6975     if (!_cms_bm->isMarked(addr)) {
6976       oop(addr)->print();
6977       gclog_or_tty->print_cr(" (" INTPTR_FORMAT " should have been marked)",
6978                              addr);
6979       fatal("... aborting");
6980     }
6981 
6982     if (!_mark_stack->push(obj)) { // stack overflow
6983       if (PrintCMSStatistics != 0) {
6984         gclog_or_tty->print_cr("CMS marking stack overflow (benign) at "
6985                                SIZE_FORMAT, _mark_stack->capacity());
6986       }
6987       assert(_mark_stack->isFull(), "Else push should have succeeded");
6988       handle_stack_overflow(addr);
6989     }
6990     // anything including and to the right of _finger
6991     // will be scanned as we iterate over the remainder of the
6992     // bit map
6993   }
6994 }
6995 
6996 PushOrMarkClosure::PushOrMarkClosure(CMSCollector* collector,
6997                      MemRegion span,
6998                      CMSBitMap* bitMap, CMSMarkStack*  markStack,
6999                      HeapWord* finger, MarkFromRootsClosure* parent) :
7000   MetadataAwareOopClosure(collector->ref_processor()),
7001   _collector(collector),
7002   _span(span),
7003   _bitMap(bitMap),
7004   _markStack(markStack),
7005   _finger(finger),
7006   _parent(parent)
7007 { }
7008 
7009 Par_PushOrMarkClosure::Par_PushOrMarkClosure(CMSCollector* collector,
7010                      MemRegion span,
7011                      CMSBitMap* bit_map,
7012                      OopTaskQueue* work_queue,
7013                      CMSMarkStack*  overflow_stack,
7014                      HeapWord* finger,
7015                      HeapWord** global_finger_addr,
7016                      Par_MarkFromRootsClosure* parent) :
7017   MetadataAwareOopClosure(collector->ref_processor()),
7018   _collector(collector),
7019   _whole_span(collector->_span),
7020   _span(span),
7021   _bit_map(bit_map),
7022   _work_queue(work_queue),
7023   _overflow_stack(overflow_stack),
7024   _finger(finger),
7025   _global_finger_addr(global_finger_addr),
7026   _parent(parent)
7027 { }
7028 
7029 // Assumes thread-safe access by callers, who are
7030 // responsible for mutual exclusion.
7031 void CMSCollector::lower_restart_addr(HeapWord* low) {
7032   assert(_span.contains(low), "Out of bounds addr");
7033   if (_restart_addr == NULL) {
7034     _restart_addr = low;
7035   } else {
7036     _restart_addr = MIN2(_restart_addr, low);
7037   }
7038 }
7039 
7040 // Upon stack overflow, we discard (part of) the stack,
7041 // remembering the least address amongst those discarded
7042 // in CMSCollector's _restart_address.
7043 void PushOrMarkClosure::handle_stack_overflow(HeapWord* lost) {
7044   // Remember the least grey address discarded
7045   HeapWord* ra = (HeapWord*)_markStack->least_value(lost);
7046   _collector->lower_restart_addr(ra);
7047   _markStack->reset();  // discard stack contents
7048   _markStack->expand(); // expand the stack if possible
7049 }
7050 
7051 // Upon stack overflow, we discard (part of) the stack,
7052 // remembering the least address amongst those discarded
7053 // in CMSCollector's _restart_address.
7054 void Par_PushOrMarkClosure::handle_stack_overflow(HeapWord* lost) {
7055   // We need to do this under a mutex to prevent other
7056   // workers from interfering with the work done below.
7057   MutexLockerEx ml(_overflow_stack->par_lock(),
7058                    Mutex::_no_safepoint_check_flag);
7059   // Remember the least grey address discarded
7060   HeapWord* ra = (HeapWord*)_overflow_stack->least_value(lost);
7061   _collector->lower_restart_addr(ra);
7062   _overflow_stack->reset();  // discard stack contents
7063   _overflow_stack->expand(); // expand the stack if possible
7064 }
7065 
7066 void PushOrMarkClosure::do_oop(oop obj) {
7067   // Ignore mark word because we are running concurrent with mutators.
7068   assert(obj->is_oop_or_null(true), err_msg("Expected an oop or NULL at " PTR_FORMAT, p2i(obj)));
7069   HeapWord* addr = (HeapWord*)obj;
7070   if (_span.contains(addr) && !_bitMap->isMarked(addr)) {
7071     // Oop lies in _span and isn't yet grey or black
7072     _bitMap->mark(addr);            // now grey
7073     if (addr < _finger) {
7074       // the bit map iteration has already either passed, or
7075       // sampled, this bit in the bit map; we'll need to
7076       // use the marking stack to scan this oop's oops.
7077       bool simulate_overflow = false;
7078       NOT_PRODUCT(
7079         if (CMSMarkStackOverflowALot &&
7080             _collector->simulate_overflow()) {
7081           // simulate a stack overflow
7082           simulate_overflow = true;
7083         }
7084       )
7085       if (simulate_overflow || !_markStack->push(obj)) { // stack overflow
7086         if (PrintCMSStatistics != 0) {
7087           gclog_or_tty->print_cr("CMS marking stack overflow (benign) at "
7088                                  SIZE_FORMAT, _markStack->capacity());
7089         }
7090         assert(simulate_overflow || _markStack->isFull(), "Else push should have succeeded");
7091         handle_stack_overflow(addr);
7092       }
7093     }
7094     // anything including and to the right of _finger
7095     // will be scanned as we iterate over the remainder of the
7096     // bit map
7097     do_yield_check();
7098   }
7099 }
7100 
7101 void PushOrMarkClosure::do_oop(oop* p)       { PushOrMarkClosure::do_oop_work(p); }
7102 void PushOrMarkClosure::do_oop(narrowOop* p) { PushOrMarkClosure::do_oop_work(p); }
7103 
7104 void Par_PushOrMarkClosure::do_oop(oop obj) {
7105   // Ignore mark word because we are running concurrent with mutators.
7106   assert(obj->is_oop_or_null(true), err_msg("Expected an oop or NULL at " PTR_FORMAT, p2i(obj)));
7107   HeapWord* addr = (HeapWord*)obj;
7108   if (_whole_span.contains(addr) && !_bit_map->isMarked(addr)) {
7109     // Oop lies in _span and isn't yet grey or black
7110     // We read the global_finger (volatile read) strictly after marking oop
7111     bool res = _bit_map->par_mark(addr);    // now grey
7112     volatile HeapWord** gfa = (volatile HeapWord**)_global_finger_addr;
7113     // Should we push this marked oop on our stack?
7114     // -- if someone else marked it, nothing to do
7115     // -- if target oop is above global finger nothing to do
7116     // -- if target oop is in chunk and above local finger
7117     //      then nothing to do
7118     // -- else push on work queue
7119     if (   !res       // someone else marked it, they will deal with it
7120         || (addr >= *gfa)  // will be scanned in a later task
7121         || (_span.contains(addr) && addr >= _finger)) { // later in this chunk
7122       return;
7123     }
7124     // the bit map iteration has already either passed, or
7125     // sampled, this bit in the bit map; we'll need to
7126     // use the marking stack to scan this oop's oops.
7127     bool simulate_overflow = false;
7128     NOT_PRODUCT(
7129       if (CMSMarkStackOverflowALot &&
7130           _collector->simulate_overflow()) {
7131         // simulate a stack overflow
7132         simulate_overflow = true;
7133       }
7134     )
7135     if (simulate_overflow ||
7136         !(_work_queue->push(obj) || _overflow_stack->par_push(obj))) {
7137       // stack overflow
7138       if (PrintCMSStatistics != 0) {
7139         gclog_or_tty->print_cr("CMS marking stack overflow (benign) at "
7140                                SIZE_FORMAT, _overflow_stack->capacity());
7141       }
7142       // We cannot assert that the overflow stack is full because
7143       // it may have been emptied since.
7144       assert(simulate_overflow ||
7145              _work_queue->size() == _work_queue->max_elems(),
7146             "Else push should have succeeded");
7147       handle_stack_overflow(addr);
7148     }
7149     do_yield_check();
7150   }
7151 }
7152 
7153 void Par_PushOrMarkClosure::do_oop(oop* p)       { Par_PushOrMarkClosure::do_oop_work(p); }
7154 void Par_PushOrMarkClosure::do_oop(narrowOop* p) { Par_PushOrMarkClosure::do_oop_work(p); }
7155 
7156 PushAndMarkClosure::PushAndMarkClosure(CMSCollector* collector,
7157                                        MemRegion span,
7158                                        ReferenceProcessor* rp,
7159                                        CMSBitMap* bit_map,
7160                                        CMSBitMap* mod_union_table,
7161                                        CMSMarkStack*  mark_stack,
7162                                        bool           concurrent_precleaning):
7163   MetadataAwareOopClosure(rp),
7164   _collector(collector),
7165   _span(span),
7166   _bit_map(bit_map),
7167   _mod_union_table(mod_union_table),
7168   _mark_stack(mark_stack),
7169   _concurrent_precleaning(concurrent_precleaning)
7170 {
7171   assert(_ref_processor != NULL, "_ref_processor shouldn't be NULL");
7172 }
7173 
7174 // Grey object rescan during pre-cleaning and second checkpoint phases --
7175 // the non-parallel version (the parallel version appears further below.)
7176 void PushAndMarkClosure::do_oop(oop obj) {
7177   // Ignore mark word verification. If during concurrent precleaning,
7178   // the object monitor may be locked. If during the checkpoint
7179   // phases, the object may already have been reached by a  different
7180   // path and may be at the end of the global overflow list (so
7181   // the mark word may be NULL).
7182   assert(obj->is_oop_or_null(true /* ignore mark word */),
7183          err_msg("Expected an oop or NULL at " PTR_FORMAT, p2i(obj)));
7184   HeapWord* addr = (HeapWord*)obj;
7185   // Check if oop points into the CMS generation
7186   // and is not marked
7187   if (_span.contains(addr) && !_bit_map->isMarked(addr)) {
7188     // a white object ...
7189     _bit_map->mark(addr);         // ... now grey
7190     // push on the marking stack (grey set)
7191     bool simulate_overflow = false;
7192     NOT_PRODUCT(
7193       if (CMSMarkStackOverflowALot &&
7194           _collector->simulate_overflow()) {
7195         // simulate a stack overflow
7196         simulate_overflow = true;
7197       }
7198     )
7199     if (simulate_overflow || !_mark_stack->push(obj)) {
7200       if (_concurrent_precleaning) {
7201          // During precleaning we can just dirty the appropriate card(s)
7202          // in the mod union table, thus ensuring that the object remains
7203          // in the grey set  and continue. In the case of object arrays
7204          // we need to dirty all of the cards that the object spans,
7205          // since the rescan of object arrays will be limited to the
7206          // dirty cards.
7207          // Note that no one can be interfering with us in this action
7208          // of dirtying the mod union table, so no locking or atomics
7209          // are required.
7210          if (obj->is_objArray()) {
7211            size_t sz = obj->size();
7212            HeapWord* end_card_addr = (HeapWord*)round_to(
7213                                         (intptr_t)(addr+sz), CardTableModRefBS::card_size);
7214            MemRegion redirty_range = MemRegion(addr, end_card_addr);
7215            assert(!redirty_range.is_empty(), "Arithmetical tautology");
7216            _mod_union_table->mark_range(redirty_range);
7217          } else {
7218            _mod_union_table->mark(addr);
7219          }
7220          _collector->_ser_pmc_preclean_ovflw++;
7221       } else {
7222          // During the remark phase, we need to remember this oop
7223          // in the overflow list.
7224          _collector->push_on_overflow_list(obj);
7225          _collector->_ser_pmc_remark_ovflw++;
7226       }
7227     }
7228   }
7229 }
7230 
7231 Par_PushAndMarkClosure::Par_PushAndMarkClosure(CMSCollector* collector,
7232                                                MemRegion span,
7233                                                ReferenceProcessor* rp,
7234                                                CMSBitMap* bit_map,
7235                                                OopTaskQueue* work_queue):
7236   MetadataAwareOopClosure(rp),
7237   _collector(collector),
7238   _span(span),
7239   _bit_map(bit_map),
7240   _work_queue(work_queue)
7241 {
7242   assert(_ref_processor != NULL, "_ref_processor shouldn't be NULL");
7243 }
7244 
7245 void PushAndMarkClosure::do_oop(oop* p)       { PushAndMarkClosure::do_oop_work(p); }
7246 void PushAndMarkClosure::do_oop(narrowOop* p) { PushAndMarkClosure::do_oop_work(p); }
7247 
7248 // Grey object rescan during second checkpoint phase --
7249 // the parallel version.
7250 void Par_PushAndMarkClosure::do_oop(oop obj) {
7251   // In the assert below, we ignore the mark word because
7252   // this oop may point to an already visited object that is
7253   // on the overflow stack (in which case the mark word has
7254   // been hijacked for chaining into the overflow stack --
7255   // if this is the last object in the overflow stack then
7256   // its mark word will be NULL). Because this object may
7257   // have been subsequently popped off the global overflow
7258   // stack, and the mark word possibly restored to the prototypical
7259   // value, by the time we get to examined this failing assert in
7260   // the debugger, is_oop_or_null(false) may subsequently start
7261   // to hold.
7262   assert(obj->is_oop_or_null(true),
7263          err_msg("Expected an oop or NULL at " PTR_FORMAT, p2i(obj)));
7264   HeapWord* addr = (HeapWord*)obj;
7265   // Check if oop points into the CMS generation
7266   // and is not marked
7267   if (_span.contains(addr) && !_bit_map->isMarked(addr)) {
7268     // a white object ...
7269     // If we manage to "claim" the object, by being the
7270     // first thread to mark it, then we push it on our
7271     // marking stack
7272     if (_bit_map->par_mark(addr)) {     // ... now grey
7273       // push on work queue (grey set)
7274       bool simulate_overflow = false;
7275       NOT_PRODUCT(
7276         if (CMSMarkStackOverflowALot &&
7277             _collector->par_simulate_overflow()) {
7278           // simulate a stack overflow
7279           simulate_overflow = true;
7280         }
7281       )
7282       if (simulate_overflow || !_work_queue->push(obj)) {
7283         _collector->par_push_on_overflow_list(obj);
7284         _collector->_par_pmc_remark_ovflw++; //  imprecise OK: no need to CAS
7285       }
7286     } // Else, some other thread got there first
7287   }
7288 }
7289 
7290 void Par_PushAndMarkClosure::do_oop(oop* p)       { Par_PushAndMarkClosure::do_oop_work(p); }
7291 void Par_PushAndMarkClosure::do_oop(narrowOop* p) { Par_PushAndMarkClosure::do_oop_work(p); }
7292 
7293 void CMSPrecleanRefsYieldClosure::do_yield_work() {
7294   Mutex* bml = _collector->bitMapLock();
7295   assert_lock_strong(bml);
7296   assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
7297          "CMS thread should hold CMS token");
7298 
7299   bml->unlock();
7300   ConcurrentMarkSweepThread::desynchronize(true);
7301 
7302   _collector->stopTimer();
7303   if (PrintCMSStatistics != 0) {
7304     _collector->incrementYields();
7305   }
7306 
7307   // See the comment in coordinator_yield()
7308   for (unsigned i = 0; i < CMSYieldSleepCount &&
7309                        ConcurrentMarkSweepThread::should_yield() &&
7310                        !CMSCollector::foregroundGCIsActive(); ++i) {
7311     os::sleep(Thread::current(), 1, false);
7312   }
7313 
7314   ConcurrentMarkSweepThread::synchronize(true);
7315   bml->lock();
7316 
7317   _collector->startTimer();
7318 }
7319 
7320 bool CMSPrecleanRefsYieldClosure::should_return() {
7321   if (ConcurrentMarkSweepThread::should_yield()) {
7322     do_yield_work();
7323   }
7324   return _collector->foregroundGCIsActive();
7325 }
7326 
7327 void MarkFromDirtyCardsClosure::do_MemRegion(MemRegion mr) {
7328   assert(((size_t)mr.start())%CardTableModRefBS::card_size_in_words == 0,
7329          "mr should be aligned to start at a card boundary");
7330   // We'd like to assert:
7331   // assert(mr.word_size()%CardTableModRefBS::card_size_in_words == 0,
7332   //        "mr should be a range of cards");
7333   // However, that would be too strong in one case -- the last
7334   // partition ends at _unallocated_block which, in general, can be
7335   // an arbitrary boundary, not necessarily card aligned.
7336   if (PrintCMSStatistics != 0) {
7337     _num_dirty_cards +=
7338          mr.word_size()/CardTableModRefBS::card_size_in_words;
7339   }
7340   _space->object_iterate_mem(mr, &_scan_cl);
7341 }
7342 
7343 SweepClosure::SweepClosure(CMSCollector* collector,
7344                            ConcurrentMarkSweepGeneration* g,
7345                            CMSBitMap* bitMap, bool should_yield) :
7346   _collector(collector),
7347   _g(g),
7348   _sp(g->cmsSpace()),
7349   _limit(_sp->sweep_limit()),
7350   _freelistLock(_sp->freelistLock()),
7351   _bitMap(bitMap),
7352   _yield(should_yield),
7353   _inFreeRange(false),           // No free range at beginning of sweep
7354   _freeRangeInFreeLists(false),  // No free range at beginning of sweep
7355   _lastFreeRangeCoalesced(false),
7356   _freeFinger(g->used_region().start())
7357 {
7358   NOT_PRODUCT(
7359     _numObjectsFreed = 0;
7360     _numWordsFreed   = 0;
7361     _numObjectsLive = 0;
7362     _numWordsLive = 0;
7363     _numObjectsAlreadyFree = 0;
7364     _numWordsAlreadyFree = 0;
7365     _last_fc = NULL;
7366 
7367     _sp->initializeIndexedFreeListArrayReturnedBytes();
7368     _sp->dictionary()->initialize_dict_returned_bytes();
7369   )
7370   assert(_limit >= _sp->bottom() && _limit <= _sp->end(),
7371          "sweep _limit out of bounds");
7372   if (CMSTraceSweeper) {
7373     gclog_or_tty->print_cr("\n====================\nStarting new sweep with limit " PTR_FORMAT,
7374                         _limit);
7375   }
7376 }
7377 
7378 void SweepClosure::print_on(outputStream* st) const {
7379   tty->print_cr("_sp = [" PTR_FORMAT "," PTR_FORMAT ")",
7380                 _sp->bottom(), _sp->end());
7381   tty->print_cr("_limit = " PTR_FORMAT, _limit);
7382   tty->print_cr("_freeFinger = " PTR_FORMAT, _freeFinger);
7383   NOT_PRODUCT(tty->print_cr("_last_fc = " PTR_FORMAT, _last_fc);)
7384   tty->print_cr("_inFreeRange = %d, _freeRangeInFreeLists = %d, _lastFreeRangeCoalesced = %d",
7385                 _inFreeRange, _freeRangeInFreeLists, _lastFreeRangeCoalesced);
7386 }
7387 
7388 #ifndef PRODUCT
7389 // Assertion checking only:  no useful work in product mode --
7390 // however, if any of the flags below become product flags,
7391 // you may need to review this code to see if it needs to be
7392 // enabled in product mode.
7393 SweepClosure::~SweepClosure() {
7394   assert_lock_strong(_freelistLock);
7395   assert(_limit >= _sp->bottom() && _limit <= _sp->end(),
7396          "sweep _limit out of bounds");
7397   if (inFreeRange()) {
7398     warning("inFreeRange() should have been reset; dumping state of SweepClosure");
7399     print();
7400     ShouldNotReachHere();
7401   }
7402   if (Verbose && PrintGC) {
7403     gclog_or_tty->print("Collected "SIZE_FORMAT" objects, " SIZE_FORMAT " bytes",
7404                         _numObjectsFreed, _numWordsFreed*sizeof(HeapWord));
7405     gclog_or_tty->print_cr("\nLive "SIZE_FORMAT" objects,  "
7406                            SIZE_FORMAT" bytes  "
7407       "Already free "SIZE_FORMAT" objects, "SIZE_FORMAT" bytes",
7408       _numObjectsLive, _numWordsLive*sizeof(HeapWord),
7409       _numObjectsAlreadyFree, _numWordsAlreadyFree*sizeof(HeapWord));
7410     size_t totalBytes = (_numWordsFreed + _numWordsLive + _numWordsAlreadyFree)
7411                         * sizeof(HeapWord);
7412     gclog_or_tty->print_cr("Total sweep: "SIZE_FORMAT" bytes", totalBytes);
7413 
7414     if (PrintCMSStatistics && CMSVerifyReturnedBytes) {
7415       size_t indexListReturnedBytes = _sp->sumIndexedFreeListArrayReturnedBytes();
7416       size_t dict_returned_bytes = _sp->dictionary()->sum_dict_returned_bytes();
7417       size_t returned_bytes = indexListReturnedBytes + dict_returned_bytes;
7418       gclog_or_tty->print("Returned "SIZE_FORMAT" bytes", returned_bytes);
7419       gclog_or_tty->print("   Indexed List Returned "SIZE_FORMAT" bytes",
7420         indexListReturnedBytes);
7421       gclog_or_tty->print_cr("        Dictionary Returned "SIZE_FORMAT" bytes",
7422         dict_returned_bytes);
7423     }
7424   }
7425   if (CMSTraceSweeper) {
7426     gclog_or_tty->print_cr("end of sweep with _limit = " PTR_FORMAT "\n================",
7427                            _limit);
7428   }
7429 }
7430 #endif  // PRODUCT
7431 
7432 void SweepClosure::initialize_free_range(HeapWord* freeFinger,
7433     bool freeRangeInFreeLists) {
7434   if (CMSTraceSweeper) {
7435     gclog_or_tty->print("---- Start free range at " PTR_FORMAT " with free block (%d)\n",
7436                freeFinger, freeRangeInFreeLists);
7437   }
7438   assert(!inFreeRange(), "Trampling existing free range");
7439   set_inFreeRange(true);
7440   set_lastFreeRangeCoalesced(false);
7441 
7442   set_freeFinger(freeFinger);
7443   set_freeRangeInFreeLists(freeRangeInFreeLists);
7444   if (CMSTestInFreeList) {
7445     if (freeRangeInFreeLists) {
7446       FreeChunk* fc = (FreeChunk*) freeFinger;
7447       assert(fc->is_free(), "A chunk on the free list should be free.");
7448       assert(fc->size() > 0, "Free range should have a size");
7449       assert(_sp->verify_chunk_in_free_list(fc), "Chunk is not in free lists");
7450     }
7451   }
7452 }
7453 
7454 // Note that the sweeper runs concurrently with mutators. Thus,
7455 // it is possible for direct allocation in this generation to happen
7456 // in the middle of the sweep. Note that the sweeper also coalesces
7457 // contiguous free blocks. Thus, unless the sweeper and the allocator
7458 // synchronize appropriately freshly allocated blocks may get swept up.
7459 // This is accomplished by the sweeper locking the free lists while
7460 // it is sweeping. Thus blocks that are determined to be free are
7461 // indeed free. There is however one additional complication:
7462 // blocks that have been allocated since the final checkpoint and
7463 // mark, will not have been marked and so would be treated as
7464 // unreachable and swept up. To prevent this, the allocator marks
7465 // the bit map when allocating during the sweep phase. This leads,
7466 // however, to a further complication -- objects may have been allocated
7467 // but not yet initialized -- in the sense that the header isn't yet
7468 // installed. The sweeper can not then determine the size of the block
7469 // in order to skip over it. To deal with this case, we use a technique
7470 // (due to Printezis) to encode such uninitialized block sizes in the
7471 // bit map. Since the bit map uses a bit per every HeapWord, but the
7472 // CMS generation has a minimum object size of 3 HeapWords, it follows
7473 // that "normal marks" won't be adjacent in the bit map (there will
7474 // always be at least two 0 bits between successive 1 bits). We make use
7475 // of these "unused" bits to represent uninitialized blocks -- the bit
7476 // corresponding to the start of the uninitialized object and the next
7477 // bit are both set. Finally, a 1 bit marks the end of the object that
7478 // started with the two consecutive 1 bits to indicate its potentially
7479 // uninitialized state.
7480 
7481 size_t SweepClosure::do_blk_careful(HeapWord* addr) {
7482   FreeChunk* fc = (FreeChunk*)addr;
7483   size_t res;
7484 
7485   // Check if we are done sweeping. Below we check "addr >= _limit" rather
7486   // than "addr == _limit" because although _limit was a block boundary when
7487   // we started the sweep, it may no longer be one because heap expansion
7488   // may have caused us to coalesce the block ending at the address _limit
7489   // with a newly expanded chunk (this happens when _limit was set to the
7490   // previous _end of the space), so we may have stepped past _limit:
7491   // see the following Zeno-like trail of CRs 6977970, 7008136, 7042740.
7492   if (addr >= _limit) { // we have swept up to or past the limit: finish up
7493     assert(_limit >= _sp->bottom() && _limit <= _sp->end(),
7494            "sweep _limit out of bounds");
7495     assert(addr < _sp->end(), "addr out of bounds");
7496     // Flush any free range we might be holding as a single
7497     // coalesced chunk to the appropriate free list.
7498     if (inFreeRange()) {
7499       assert(freeFinger() >= _sp->bottom() && freeFinger() < _limit,
7500              err_msg("freeFinger() " PTR_FORMAT" is out-of-bounds", freeFinger()));
7501       flush_cur_free_chunk(freeFinger(),
7502                            pointer_delta(addr, freeFinger()));
7503       if (CMSTraceSweeper) {
7504         gclog_or_tty->print("Sweep: last chunk: ");
7505         gclog_or_tty->print("put_free_blk " PTR_FORMAT " ("SIZE_FORMAT") "
7506                    "[coalesced:%d]\n",
7507                    freeFinger(), pointer_delta(addr, freeFinger()),
7508                    lastFreeRangeCoalesced() ? 1 : 0);
7509       }
7510     }
7511 
7512     // help the iterator loop finish
7513     return pointer_delta(_sp->end(), addr);
7514   }
7515 
7516   assert(addr < _limit, "sweep invariant");
7517   // check if we should yield
7518   do_yield_check(addr);
7519   if (fc->is_free()) {
7520     // Chunk that is already free
7521     res = fc->size();
7522     do_already_free_chunk(fc);
7523     debug_only(_sp->verifyFreeLists());
7524     // If we flush the chunk at hand in lookahead_and_flush()
7525     // and it's coalesced with a preceding chunk, then the
7526     // process of "mangling" the payload of the coalesced block
7527     // will cause erasure of the size information from the
7528     // (erstwhile) header of all the coalesced blocks but the
7529     // first, so the first disjunct in the assert will not hold
7530     // in that specific case (in which case the second disjunct
7531     // will hold).
7532     assert(res == fc->size() || ((HeapWord*)fc) + res >= _limit,
7533            "Otherwise the size info doesn't change at this step");
7534     NOT_PRODUCT(
7535       _numObjectsAlreadyFree++;
7536       _numWordsAlreadyFree += res;
7537     )
7538     NOT_PRODUCT(_last_fc = fc;)
7539   } else if (!_bitMap->isMarked(addr)) {
7540     // Chunk is fresh garbage
7541     res = do_garbage_chunk(fc);
7542     debug_only(_sp->verifyFreeLists());
7543     NOT_PRODUCT(
7544       _numObjectsFreed++;
7545       _numWordsFreed += res;
7546     )
7547   } else {
7548     // Chunk that is alive.
7549     res = do_live_chunk(fc);
7550     debug_only(_sp->verifyFreeLists());
7551     NOT_PRODUCT(
7552         _numObjectsLive++;
7553         _numWordsLive += res;
7554     )
7555   }
7556   return res;
7557 }
7558 
7559 // For the smart allocation, record following
7560 //  split deaths - a free chunk is removed from its free list because
7561 //      it is being split into two or more chunks.
7562 //  split birth - a free chunk is being added to its free list because
7563 //      a larger free chunk has been split and resulted in this free chunk.
7564 //  coal death - a free chunk is being removed from its free list because
7565 //      it is being coalesced into a large free chunk.
7566 //  coal birth - a free chunk is being added to its free list because
7567 //      it was created when two or more free chunks where coalesced into
7568 //      this free chunk.
7569 //
7570 // These statistics are used to determine the desired number of free
7571 // chunks of a given size.  The desired number is chosen to be relative
7572 // to the end of a CMS sweep.  The desired number at the end of a sweep
7573 // is the
7574 //      count-at-end-of-previous-sweep (an amount that was enough)
7575 //              - count-at-beginning-of-current-sweep  (the excess)
7576 //              + split-births  (gains in this size during interval)
7577 //              - split-deaths  (demands on this size during interval)
7578 // where the interval is from the end of one sweep to the end of the
7579 // next.
7580 //
7581 // When sweeping the sweeper maintains an accumulated chunk which is
7582 // the chunk that is made up of chunks that have been coalesced.  That
7583 // will be termed the left-hand chunk.  A new chunk of garbage that
7584 // is being considered for coalescing will be referred to as the
7585 // right-hand chunk.
7586 //
7587 // When making a decision on whether to coalesce a right-hand chunk with
7588 // the current left-hand chunk, the current count vs. the desired count
7589 // of the left-hand chunk is considered.  Also if the right-hand chunk
7590 // is near the large chunk at the end of the heap (see
7591 // ConcurrentMarkSweepGeneration::isNearLargestChunk()), then the
7592 // left-hand chunk is coalesced.
7593 //
7594 // When making a decision about whether to split a chunk, the desired count
7595 // vs. the current count of the candidate to be split is also considered.
7596 // If the candidate is underpopulated (currently fewer chunks than desired)
7597 // a chunk of an overpopulated (currently more chunks than desired) size may
7598 // be chosen.  The "hint" associated with a free list, if non-null, points
7599 // to a free list which may be overpopulated.
7600 //
7601 
7602 void SweepClosure::do_already_free_chunk(FreeChunk* fc) {
7603   const size_t size = fc->size();
7604   // Chunks that cannot be coalesced are not in the
7605   // free lists.
7606   if (CMSTestInFreeList && !fc->cantCoalesce()) {
7607     assert(_sp->verify_chunk_in_free_list(fc),
7608       "free chunk should be in free lists");
7609   }
7610   // a chunk that is already free, should not have been
7611   // marked in the bit map
7612   HeapWord* const addr = (HeapWord*) fc;
7613   assert(!_bitMap->isMarked(addr), "free chunk should be unmarked");
7614   // Verify that the bit map has no bits marked between
7615   // addr and purported end of this block.
7616   _bitMap->verifyNoOneBitsInRange(addr + 1, addr + size);
7617 
7618   // Some chunks cannot be coalesced under any circumstances.
7619   // See the definition of cantCoalesce().
7620   if (!fc->cantCoalesce()) {
7621     // This chunk can potentially be coalesced.
7622     if (_sp->adaptive_freelists()) {
7623       // All the work is done in
7624       do_post_free_or_garbage_chunk(fc, size);
7625     } else {  // Not adaptive free lists
7626       // this is a free chunk that can potentially be coalesced by the sweeper;
7627       if (!inFreeRange()) {
7628         // if the next chunk is a free block that can't be coalesced
7629         // it doesn't make sense to remove this chunk from the free lists
7630         FreeChunk* nextChunk = (FreeChunk*)(addr + size);
7631         assert((HeapWord*)nextChunk <= _sp->end(), "Chunk size out of bounds?");
7632         if ((HeapWord*)nextChunk < _sp->end() &&     // There is another free chunk to the right ...
7633             nextChunk->is_free()               &&     // ... which is free...
7634             nextChunk->cantCoalesce()) {             // ... but can't be coalesced
7635           // nothing to do
7636         } else {
7637           // Potentially the start of a new free range:
7638           // Don't eagerly remove it from the free lists.
7639           // No need to remove it if it will just be put
7640           // back again.  (Also from a pragmatic point of view
7641           // if it is a free block in a region that is beyond
7642           // any allocated blocks, an assertion will fail)
7643           // Remember the start of a free run.
7644           initialize_free_range(addr, true);
7645           // end - can coalesce with next chunk
7646         }
7647       } else {
7648         // the midst of a free range, we are coalescing
7649         print_free_block_coalesced(fc);
7650         if (CMSTraceSweeper) {
7651           gclog_or_tty->print("  -- pick up free block " PTR_FORMAT " (" SIZE_FORMAT ")\n", fc, size);
7652         }
7653         // remove it from the free lists
7654         _sp->removeFreeChunkFromFreeLists(fc);
7655         set_lastFreeRangeCoalesced(true);
7656         // If the chunk is being coalesced and the current free range is
7657         // in the free lists, remove the current free range so that it
7658         // will be returned to the free lists in its entirety - all
7659         // the coalesced pieces included.
7660         if (freeRangeInFreeLists()) {
7661           FreeChunk* ffc = (FreeChunk*) freeFinger();
7662           assert(ffc->size() == pointer_delta(addr, freeFinger()),
7663             "Size of free range is inconsistent with chunk size.");
7664           if (CMSTestInFreeList) {
7665             assert(_sp->verify_chunk_in_free_list(ffc),
7666               "free range is not in free lists");
7667           }
7668           _sp->removeFreeChunkFromFreeLists(ffc);
7669           set_freeRangeInFreeLists(false);
7670         }
7671       }
7672     }
7673     // Note that if the chunk is not coalescable (the else arm
7674     // below), we unconditionally flush, without needing to do
7675     // a "lookahead," as we do below.
7676     if (inFreeRange()) lookahead_and_flush(fc, size);
7677   } else {
7678     // Code path common to both original and adaptive free lists.
7679 
7680     // cant coalesce with previous block; this should be treated
7681     // as the end of a free run if any
7682     if (inFreeRange()) {
7683       // we kicked some butt; time to pick up the garbage
7684       assert(freeFinger() < addr, "freeFinger points too high");
7685       flush_cur_free_chunk(freeFinger(), pointer_delta(addr, freeFinger()));
7686     }
7687     // else, nothing to do, just continue
7688   }
7689 }
7690 
7691 size_t SweepClosure::do_garbage_chunk(FreeChunk* fc) {
7692   // This is a chunk of garbage.  It is not in any free list.
7693   // Add it to a free list or let it possibly be coalesced into
7694   // a larger chunk.
7695   HeapWord* const addr = (HeapWord*) fc;
7696   const size_t size = CompactibleFreeListSpace::adjustObjectSize(oop(addr)->size());
7697 
7698   if (_sp->adaptive_freelists()) {
7699     // Verify that the bit map has no bits marked between
7700     // addr and purported end of just dead object.
7701     _bitMap->verifyNoOneBitsInRange(addr + 1, addr + size);
7702 
7703     do_post_free_or_garbage_chunk(fc, size);
7704   } else {
7705     if (!inFreeRange()) {
7706       // start of a new free range
7707       assert(size > 0, "A free range should have a size");
7708       initialize_free_range(addr, false);
7709     } else {
7710       // this will be swept up when we hit the end of the
7711       // free range
7712       if (CMSTraceSweeper) {
7713         gclog_or_tty->print("  -- pick up garbage " PTR_FORMAT " (" SIZE_FORMAT ")\n", fc, size);
7714       }
7715       // If the chunk is being coalesced and the current free range is
7716       // in the free lists, remove the current free range so that it
7717       // will be returned to the free lists in its entirety - all
7718       // the coalesced pieces included.
7719       if (freeRangeInFreeLists()) {
7720         FreeChunk* ffc = (FreeChunk*)freeFinger();
7721         assert(ffc->size() == pointer_delta(addr, freeFinger()),
7722           "Size of free range is inconsistent with chunk size.");
7723         if (CMSTestInFreeList) {
7724           assert(_sp->verify_chunk_in_free_list(ffc),
7725             "free range is not in free lists");
7726         }
7727         _sp->removeFreeChunkFromFreeLists(ffc);
7728         set_freeRangeInFreeLists(false);
7729       }
7730       set_lastFreeRangeCoalesced(true);
7731     }
7732     // this will be swept up when we hit the end of the free range
7733 
7734     // Verify that the bit map has no bits marked between
7735     // addr and purported end of just dead object.
7736     _bitMap->verifyNoOneBitsInRange(addr + 1, addr + size);
7737   }
7738   assert(_limit >= addr + size,
7739          "A freshly garbage chunk can't possibly straddle over _limit");
7740   if (inFreeRange()) lookahead_and_flush(fc, size);
7741   return size;
7742 }
7743 
7744 size_t SweepClosure::do_live_chunk(FreeChunk* fc) {
7745   HeapWord* addr = (HeapWord*) fc;
7746   // The sweeper has just found a live object. Return any accumulated
7747   // left hand chunk to the free lists.
7748   if (inFreeRange()) {
7749     assert(freeFinger() < addr, "freeFinger points too high");
7750     flush_cur_free_chunk(freeFinger(), pointer_delta(addr, freeFinger()));
7751   }
7752 
7753   // This object is live: we'd normally expect this to be
7754   // an oop, and like to assert the following:
7755   // assert(oop(addr)->is_oop(), "live block should be an oop");
7756   // However, as we commented above, this may be an object whose
7757   // header hasn't yet been initialized.
7758   size_t size;
7759   assert(_bitMap->isMarked(addr), "Tautology for this control point");
7760   if (_bitMap->isMarked(addr + 1)) {
7761     // Determine the size from the bit map, rather than trying to
7762     // compute it from the object header.
7763     HeapWord* nextOneAddr = _bitMap->getNextMarkedWordAddress(addr + 2);
7764     size = pointer_delta(nextOneAddr + 1, addr);
7765     assert(size == CompactibleFreeListSpace::adjustObjectSize(size),
7766            "alignment problem");
7767 
7768 #ifdef ASSERT
7769       if (oop(addr)->klass_or_null() != NULL) {
7770         // Ignore mark word because we are running concurrent with mutators
7771         assert(oop(addr)->is_oop(true), "live block should be an oop");
7772         assert(size ==
7773                CompactibleFreeListSpace::adjustObjectSize(oop(addr)->size()),
7774                "P-mark and computed size do not agree");
7775       }
7776 #endif
7777 
7778   } else {
7779     // This should be an initialized object that's alive.
7780     assert(oop(addr)->klass_or_null() != NULL,
7781            "Should be an initialized object");
7782     // Ignore mark word because we are running concurrent with mutators
7783     assert(oop(addr)->is_oop(true), "live block should be an oop");
7784     // Verify that the bit map has no bits marked between
7785     // addr and purported end of this block.
7786     size = CompactibleFreeListSpace::adjustObjectSize(oop(addr)->size());
7787     assert(size >= 3, "Necessary for Printezis marks to work");
7788     assert(!_bitMap->isMarked(addr+1), "Tautology for this control point");
7789     DEBUG_ONLY(_bitMap->verifyNoOneBitsInRange(addr+2, addr+size);)
7790   }
7791   return size;
7792 }
7793 
7794 void SweepClosure::do_post_free_or_garbage_chunk(FreeChunk* fc,
7795                                                  size_t chunkSize) {
7796   // do_post_free_or_garbage_chunk() should only be called in the case
7797   // of the adaptive free list allocator.
7798   const bool fcInFreeLists = fc->is_free();
7799   assert(_sp->adaptive_freelists(), "Should only be used in this case.");
7800   assert((HeapWord*)fc <= _limit, "sweep invariant");
7801   if (CMSTestInFreeList && fcInFreeLists) {
7802     assert(_sp->verify_chunk_in_free_list(fc), "free chunk is not in free lists");
7803   }
7804 
7805   if (CMSTraceSweeper) {
7806     gclog_or_tty->print_cr("  -- pick up another chunk at " PTR_FORMAT " (" SIZE_FORMAT ")", fc, chunkSize);
7807   }
7808 
7809   HeapWord* const fc_addr = (HeapWord*) fc;
7810 
7811   bool coalesce;
7812   const size_t left  = pointer_delta(fc_addr, freeFinger());
7813   const size_t right = chunkSize;
7814   switch (FLSCoalescePolicy) {
7815     // numeric value forms a coalition aggressiveness metric
7816     case 0:  { // never coalesce
7817       coalesce = false;
7818       break;
7819     }
7820     case 1: { // coalesce if left & right chunks on overpopulated lists
7821       coalesce = _sp->coalOverPopulated(left) &&
7822                  _sp->coalOverPopulated(right);
7823       break;
7824     }
7825     case 2: { // coalesce if left chunk on overpopulated list (default)
7826       coalesce = _sp->coalOverPopulated(left);
7827       break;
7828     }
7829     case 3: { // coalesce if left OR right chunk on overpopulated list
7830       coalesce = _sp->coalOverPopulated(left) ||
7831                  _sp->coalOverPopulated(right);
7832       break;
7833     }
7834     case 4: { // always coalesce
7835       coalesce = true;
7836       break;
7837     }
7838     default:
7839      ShouldNotReachHere();
7840   }
7841 
7842   // Should the current free range be coalesced?
7843   // If the chunk is in a free range and either we decided to coalesce above
7844   // or the chunk is near the large block at the end of the heap
7845   // (isNearLargestChunk() returns true), then coalesce this chunk.
7846   const bool doCoalesce = inFreeRange()
7847                           && (coalesce || _g->isNearLargestChunk(fc_addr));
7848   if (doCoalesce) {
7849     // Coalesce the current free range on the left with the new
7850     // chunk on the right.  If either is on a free list,
7851     // it must be removed from the list and stashed in the closure.
7852     if (freeRangeInFreeLists()) {
7853       FreeChunk* const ffc = (FreeChunk*)freeFinger();
7854       assert(ffc->size() == pointer_delta(fc_addr, freeFinger()),
7855         "Size of free range is inconsistent with chunk size.");
7856       if (CMSTestInFreeList) {
7857         assert(_sp->verify_chunk_in_free_list(ffc),
7858           "Chunk is not in free lists");
7859       }
7860       _sp->coalDeath(ffc->size());
7861       _sp->removeFreeChunkFromFreeLists(ffc);
7862       set_freeRangeInFreeLists(false);
7863     }
7864     if (fcInFreeLists) {
7865       _sp->coalDeath(chunkSize);
7866       assert(fc->size() == chunkSize,
7867         "The chunk has the wrong size or is not in the free lists");
7868       _sp->removeFreeChunkFromFreeLists(fc);
7869     }
7870     set_lastFreeRangeCoalesced(true);
7871     print_free_block_coalesced(fc);
7872   } else {  // not in a free range and/or should not coalesce
7873     // Return the current free range and start a new one.
7874     if (inFreeRange()) {
7875       // In a free range but cannot coalesce with the right hand chunk.
7876       // Put the current free range into the free lists.
7877       flush_cur_free_chunk(freeFinger(),
7878                            pointer_delta(fc_addr, freeFinger()));
7879     }
7880     // Set up for new free range.  Pass along whether the right hand
7881     // chunk is in the free lists.
7882     initialize_free_range((HeapWord*)fc, fcInFreeLists);
7883   }
7884 }
7885 
7886 // Lookahead flush:
7887 // If we are tracking a free range, and this is the last chunk that
7888 // we'll look at because its end crosses past _limit, we'll preemptively
7889 // flush it along with any free range we may be holding on to. Note that
7890 // this can be the case only for an already free or freshly garbage
7891 // chunk. If this block is an object, it can never straddle
7892 // over _limit. The "straddling" occurs when _limit is set at
7893 // the previous end of the space when this cycle started, and
7894 // a subsequent heap expansion caused the previously co-terminal
7895 // free block to be coalesced with the newly expanded portion,
7896 // thus rendering _limit a non-block-boundary making it dangerous
7897 // for the sweeper to step over and examine.
7898 void SweepClosure::lookahead_and_flush(FreeChunk* fc, size_t chunk_size) {
7899   assert(inFreeRange(), "Should only be called if currently in a free range.");
7900   HeapWord* const eob = ((HeapWord*)fc) + chunk_size;
7901   assert(_sp->used_region().contains(eob - 1),
7902          err_msg("eob = " PTR_FORMAT " eob-1 = " PTR_FORMAT " _limit = " PTR_FORMAT
7903                  " out of bounds wrt _sp = [" PTR_FORMAT "," PTR_FORMAT ")"
7904                  " when examining fc = " PTR_FORMAT "(" SIZE_FORMAT ")",
7905                  eob, eob-1, _limit, _sp->bottom(), _sp->end(), fc, chunk_size));
7906   if (eob >= _limit) {
7907     assert(eob == _limit || fc->is_free(), "Only a free chunk should allow us to cross over the limit");
7908     if (CMSTraceSweeper) {
7909       gclog_or_tty->print_cr("_limit " PTR_FORMAT " reached or crossed by block "
7910                              "[" PTR_FORMAT "," PTR_FORMAT ") in space "
7911                              "[" PTR_FORMAT "," PTR_FORMAT ")",
7912                              _limit, fc, eob, _sp->bottom(), _sp->end());
7913     }
7914     // Return the storage we are tracking back into the free lists.
7915     if (CMSTraceSweeper) {
7916       gclog_or_tty->print_cr("Flushing ... ");
7917     }
7918     assert(freeFinger() < eob, "Error");
7919     flush_cur_free_chunk( freeFinger(), pointer_delta(eob, freeFinger()));
7920   }
7921 }
7922 
7923 void SweepClosure::flush_cur_free_chunk(HeapWord* chunk, size_t size) {
7924   assert(inFreeRange(), "Should only be called if currently in a free range.");
7925   assert(size > 0,
7926     "A zero sized chunk cannot be added to the free lists.");
7927   if (!freeRangeInFreeLists()) {
7928     if (CMSTestInFreeList) {
7929       FreeChunk* fc = (FreeChunk*) chunk;
7930       fc->set_size(size);
7931       assert(!_sp->verify_chunk_in_free_list(fc),
7932         "chunk should not be in free lists yet");
7933     }
7934     if (CMSTraceSweeper) {
7935       gclog_or_tty->print_cr(" -- add free block " PTR_FORMAT " (" SIZE_FORMAT ") to free lists",
7936                     chunk, size);
7937     }
7938     // A new free range is going to be starting.  The current
7939     // free range has not been added to the free lists yet or
7940     // was removed so add it back.
7941     // If the current free range was coalesced, then the death
7942     // of the free range was recorded.  Record a birth now.
7943     if (lastFreeRangeCoalesced()) {
7944       _sp->coalBirth(size);
7945     }
7946     _sp->addChunkAndRepairOffsetTable(chunk, size,
7947             lastFreeRangeCoalesced());
7948   } else if (CMSTraceSweeper) {
7949     gclog_or_tty->print_cr("Already in free list: nothing to flush");
7950   }
7951   set_inFreeRange(false);
7952   set_freeRangeInFreeLists(false);
7953 }
7954 
7955 // We take a break if we've been at this for a while,
7956 // so as to avoid monopolizing the locks involved.
7957 void SweepClosure::do_yield_work(HeapWord* addr) {
7958   // Return current free chunk being used for coalescing (if any)
7959   // to the appropriate freelist.  After yielding, the next
7960   // free block encountered will start a coalescing range of
7961   // free blocks.  If the next free block is adjacent to the
7962   // chunk just flushed, they will need to wait for the next
7963   // sweep to be coalesced.
7964   if (inFreeRange()) {
7965     flush_cur_free_chunk(freeFinger(), pointer_delta(addr, freeFinger()));
7966   }
7967 
7968   // First give up the locks, then yield, then re-lock.
7969   // We should probably use a constructor/destructor idiom to
7970   // do this unlock/lock or modify the MutexUnlocker class to
7971   // serve our purpose. XXX
7972   assert_lock_strong(_bitMap->lock());
7973   assert_lock_strong(_freelistLock);
7974   assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
7975          "CMS thread should hold CMS token");
7976   _bitMap->lock()->unlock();
7977   _freelistLock->unlock();
7978   ConcurrentMarkSweepThread::desynchronize(true);
7979   _collector->stopTimer();
7980   if (PrintCMSStatistics != 0) {
7981     _collector->incrementYields();
7982   }
7983 
7984   // See the comment in coordinator_yield()
7985   for (unsigned i = 0; i < CMSYieldSleepCount &&
7986                        ConcurrentMarkSweepThread::should_yield() &&
7987                        !CMSCollector::foregroundGCIsActive(); ++i) {
7988     os::sleep(Thread::current(), 1, false);
7989   }
7990 
7991   ConcurrentMarkSweepThread::synchronize(true);
7992   _freelistLock->lock();
7993   _bitMap->lock()->lock_without_safepoint_check();
7994   _collector->startTimer();
7995 }
7996 
7997 #ifndef PRODUCT
7998 // This is actually very useful in a product build if it can
7999 // be called from the debugger.  Compile it into the product
8000 // as needed.
8001 bool debug_verify_chunk_in_free_list(FreeChunk* fc) {
8002   return debug_cms_space->verify_chunk_in_free_list(fc);
8003 }
8004 #endif
8005 
8006 void SweepClosure::print_free_block_coalesced(FreeChunk* fc) const {
8007   if (CMSTraceSweeper) {
8008     gclog_or_tty->print_cr("Sweep:coal_free_blk " PTR_FORMAT " (" SIZE_FORMAT ")",
8009                            fc, fc->size());
8010   }
8011 }
8012 
8013 // CMSIsAliveClosure
8014 bool CMSIsAliveClosure::do_object_b(oop obj) {
8015   HeapWord* addr = (HeapWord*)obj;
8016   return addr != NULL &&
8017          (!_span.contains(addr) || _bit_map->isMarked(addr));
8018 }
8019 
8020 
8021 CMSKeepAliveClosure::CMSKeepAliveClosure( CMSCollector* collector,
8022                       MemRegion span,
8023                       CMSBitMap* bit_map, CMSMarkStack* mark_stack,
8024                       bool cpc):
8025   _collector(collector),
8026   _span(span),
8027   _bit_map(bit_map),
8028   _mark_stack(mark_stack),
8029   _concurrent_precleaning(cpc) {
8030   assert(!_span.is_empty(), "Empty span could spell trouble");
8031 }
8032 
8033 
8034 // CMSKeepAliveClosure: the serial version
8035 void CMSKeepAliveClosure::do_oop(oop obj) {
8036   HeapWord* addr = (HeapWord*)obj;
8037   if (_span.contains(addr) &&
8038       !_bit_map->isMarked(addr)) {
8039     _bit_map->mark(addr);
8040     bool simulate_overflow = false;
8041     NOT_PRODUCT(
8042       if (CMSMarkStackOverflowALot &&
8043           _collector->simulate_overflow()) {
8044         // simulate a stack overflow
8045         simulate_overflow = true;
8046       }
8047     )
8048     if (simulate_overflow || !_mark_stack->push(obj)) {
8049       if (_concurrent_precleaning) {
8050         // We dirty the overflown object and let the remark
8051         // phase deal with it.
8052         assert(_collector->overflow_list_is_empty(), "Error");
8053         // In the case of object arrays, we need to dirty all of
8054         // the cards that the object spans. No locking or atomics
8055         // are needed since no one else can be mutating the mod union
8056         // table.
8057         if (obj->is_objArray()) {
8058           size_t sz = obj->size();
8059           HeapWord* end_card_addr =
8060             (HeapWord*)round_to((intptr_t)(addr+sz), CardTableModRefBS::card_size);
8061           MemRegion redirty_range = MemRegion(addr, end_card_addr);
8062           assert(!redirty_range.is_empty(), "Arithmetical tautology");
8063           _collector->_modUnionTable.mark_range(redirty_range);
8064         } else {
8065           _collector->_modUnionTable.mark(addr);
8066         }
8067         _collector->_ser_kac_preclean_ovflw++;
8068       } else {
8069         _collector->push_on_overflow_list(obj);
8070         _collector->_ser_kac_ovflw++;
8071       }
8072     }
8073   }
8074 }
8075 
8076 void CMSKeepAliveClosure::do_oop(oop* p)       { CMSKeepAliveClosure::do_oop_work(p); }
8077 void CMSKeepAliveClosure::do_oop(narrowOop* p) { CMSKeepAliveClosure::do_oop_work(p); }
8078 
8079 // CMSParKeepAliveClosure: a parallel version of the above.
8080 // The work queues are private to each closure (thread),
8081 // but (may be) available for stealing by other threads.
8082 void CMSParKeepAliveClosure::do_oop(oop obj) {
8083   HeapWord* addr = (HeapWord*)obj;
8084   if (_span.contains(addr) &&
8085       !_bit_map->isMarked(addr)) {
8086     // In general, during recursive tracing, several threads
8087     // may be concurrently getting here; the first one to
8088     // "tag" it, claims it.
8089     if (_bit_map->par_mark(addr)) {
8090       bool res = _work_queue->push(obj);
8091       assert(res, "Low water mark should be much less than capacity");
8092       // Do a recursive trim in the hope that this will keep
8093       // stack usage lower, but leave some oops for potential stealers
8094       trim_queue(_low_water_mark);
8095     } // Else, another thread got there first
8096   }
8097 }
8098 
8099 void CMSParKeepAliveClosure::do_oop(oop* p)       { CMSParKeepAliveClosure::do_oop_work(p); }
8100 void CMSParKeepAliveClosure::do_oop(narrowOop* p) { CMSParKeepAliveClosure::do_oop_work(p); }
8101 
8102 void CMSParKeepAliveClosure::trim_queue(uint max) {
8103   while (_work_queue->size() > max) {
8104     oop new_oop;
8105     if (_work_queue->pop_local(new_oop)) {
8106       assert(new_oop != NULL && new_oop->is_oop(), "Expected an oop");
8107       assert(_bit_map->isMarked((HeapWord*)new_oop),
8108              "no white objects on this stack!");
8109       assert(_span.contains((HeapWord*)new_oop), "Out of bounds oop");
8110       // iterate over the oops in this oop, marking and pushing
8111       // the ones in CMS heap (i.e. in _span).
8112       new_oop->oop_iterate(&_mark_and_push);
8113     }
8114   }
8115 }
8116 
8117 CMSInnerParMarkAndPushClosure::CMSInnerParMarkAndPushClosure(
8118                                 CMSCollector* collector,
8119                                 MemRegion span, CMSBitMap* bit_map,
8120                                 OopTaskQueue* work_queue):
8121   _collector(collector),
8122   _span(span),
8123   _bit_map(bit_map),
8124   _work_queue(work_queue) { }
8125 
8126 void CMSInnerParMarkAndPushClosure::do_oop(oop obj) {
8127   HeapWord* addr = (HeapWord*)obj;
8128   if (_span.contains(addr) &&
8129       !_bit_map->isMarked(addr)) {
8130     if (_bit_map->par_mark(addr)) {
8131       bool simulate_overflow = false;
8132       NOT_PRODUCT(
8133         if (CMSMarkStackOverflowALot &&
8134             _collector->par_simulate_overflow()) {
8135           // simulate a stack overflow
8136           simulate_overflow = true;
8137         }
8138       )
8139       if (simulate_overflow || !_work_queue->push(obj)) {
8140         _collector->par_push_on_overflow_list(obj);
8141         _collector->_par_kac_ovflw++;
8142       }
8143     } // Else another thread got there already
8144   }
8145 }
8146 
8147 void CMSInnerParMarkAndPushClosure::do_oop(oop* p)       { CMSInnerParMarkAndPushClosure::do_oop_work(p); }
8148 void CMSInnerParMarkAndPushClosure::do_oop(narrowOop* p) { CMSInnerParMarkAndPushClosure::do_oop_work(p); }
8149 
8150 //////////////////////////////////////////////////////////////////
8151 //  CMSExpansionCause                /////////////////////////////
8152 //////////////////////////////////////////////////////////////////
8153 const char* CMSExpansionCause::to_string(CMSExpansionCause::Cause cause) {
8154   switch (cause) {
8155     case _no_expansion:
8156       return "No expansion";
8157     case _satisfy_free_ratio:
8158       return "Free ratio";
8159     case _satisfy_promotion:
8160       return "Satisfy promotion";
8161     case _satisfy_allocation:
8162       return "allocation";
8163     case _allocate_par_lab:
8164       return "Par LAB";
8165     case _allocate_par_spooling_space:
8166       return "Par Spooling Space";
8167     case _adaptive_size_policy:
8168       return "Ergonomics";
8169     default:
8170       return "unknown";
8171   }
8172 }
8173 
8174 void CMSDrainMarkingStackClosure::do_void() {
8175   // the max number to take from overflow list at a time
8176   const size_t num = _mark_stack->capacity()/4;
8177   assert(!_concurrent_precleaning || _collector->overflow_list_is_empty(),
8178          "Overflow list should be NULL during concurrent phases");
8179   while (!_mark_stack->isEmpty() ||
8180          // if stack is empty, check the overflow list
8181          _collector->take_from_overflow_list(num, _mark_stack)) {
8182     oop obj = _mark_stack->pop();
8183     HeapWord* addr = (HeapWord*)obj;
8184     assert(_span.contains(addr), "Should be within span");
8185     assert(_bit_map->isMarked(addr), "Should be marked");
8186     assert(obj->is_oop(), "Should be an oop");
8187     obj->oop_iterate(_keep_alive);
8188   }
8189 }
8190 
8191 void CMSParDrainMarkingStackClosure::do_void() {
8192   // drain queue
8193   trim_queue(0);
8194 }
8195 
8196 // Trim our work_queue so its length is below max at return
8197 void CMSParDrainMarkingStackClosure::trim_queue(uint max) {
8198   while (_work_queue->size() > max) {
8199     oop new_oop;
8200     if (_work_queue->pop_local(new_oop)) {
8201       assert(new_oop->is_oop(), "Expected an oop");
8202       assert(_bit_map->isMarked((HeapWord*)new_oop),
8203              "no white objects on this stack!");
8204       assert(_span.contains((HeapWord*)new_oop), "Out of bounds oop");
8205       // iterate over the oops in this oop, marking and pushing
8206       // the ones in CMS heap (i.e. in _span).
8207       new_oop->oop_iterate(&_mark_and_push);
8208     }
8209   }
8210 }
8211 
8212 ////////////////////////////////////////////////////////////////////
8213 // Support for Marking Stack Overflow list handling and related code
8214 ////////////////////////////////////////////////////////////////////
8215 // Much of the following code is similar in shape and spirit to the
8216 // code used in ParNewGC. We should try and share that code
8217 // as much as possible in the future.
8218 
8219 #ifndef PRODUCT
8220 // Debugging support for CMSStackOverflowALot
8221 
8222 // It's OK to call this multi-threaded;  the worst thing
8223 // that can happen is that we'll get a bunch of closely
8224 // spaced simulated overflows, but that's OK, in fact
8225 // probably good as it would exercise the overflow code
8226 // under contention.
8227 bool CMSCollector::simulate_overflow() {
8228   if (_overflow_counter-- <= 0) { // just being defensive
8229     _overflow_counter = CMSMarkStackOverflowInterval;
8230     return true;
8231   } else {
8232     return false;
8233   }
8234 }
8235 
8236 bool CMSCollector::par_simulate_overflow() {
8237   return simulate_overflow();
8238 }
8239 #endif
8240 
8241 // Single-threaded
8242 bool CMSCollector::take_from_overflow_list(size_t num, CMSMarkStack* stack) {
8243   assert(stack->isEmpty(), "Expected precondition");
8244   assert(stack->capacity() > num, "Shouldn't bite more than can chew");
8245   size_t i = num;
8246   oop  cur = _overflow_list;
8247   const markOop proto = markOopDesc::prototype();
8248   NOT_PRODUCT(ssize_t n = 0;)
8249   for (oop next; i > 0 && cur != NULL; cur = next, i--) {
8250     next = oop(cur->mark());
8251     cur->set_mark(proto);   // until proven otherwise
8252     assert(cur->is_oop(), "Should be an oop");
8253     bool res = stack->push(cur);
8254     assert(res, "Bit off more than can chew?");
8255     NOT_PRODUCT(n++;)
8256   }
8257   _overflow_list = cur;
8258 #ifndef PRODUCT
8259   assert(_num_par_pushes >= n, "Too many pops?");
8260   _num_par_pushes -=n;
8261 #endif
8262   return !stack->isEmpty();
8263 }
8264 
8265 #define BUSY  (cast_to_oop<intptr_t>(0x1aff1aff))
8266 // (MT-safe) Get a prefix of at most "num" from the list.
8267 // The overflow list is chained through the mark word of
8268 // each object in the list. We fetch the entire list,
8269 // break off a prefix of the right size and return the
8270 // remainder. If other threads try to take objects from
8271 // the overflow list at that time, they will wait for
8272 // some time to see if data becomes available. If (and
8273 // only if) another thread places one or more object(s)
8274 // on the global list before we have returned the suffix
8275 // to the global list, we will walk down our local list
8276 // to find its end and append the global list to
8277 // our suffix before returning it. This suffix walk can
8278 // prove to be expensive (quadratic in the amount of traffic)
8279 // when there are many objects in the overflow list and
8280 // there is much producer-consumer contention on the list.
8281 // *NOTE*: The overflow list manipulation code here and
8282 // in ParNewGeneration:: are very similar in shape,
8283 // except that in the ParNew case we use the old (from/eden)
8284 // copy of the object to thread the list via its klass word.
8285 // Because of the common code, if you make any changes in
8286 // the code below, please check the ParNew version to see if
8287 // similar changes might be needed.
8288 // CR 6797058 has been filed to consolidate the common code.
8289 bool CMSCollector::par_take_from_overflow_list(size_t num,
8290                                                OopTaskQueue* work_q,
8291                                                int no_of_gc_threads) {
8292   assert(work_q->size() == 0, "First empty local work queue");
8293   assert(num < work_q->max_elems(), "Can't bite more than we can chew");
8294   if (_overflow_list == NULL) {
8295     return false;
8296   }
8297   // Grab the entire list; we'll put back a suffix
8298   oop prefix = cast_to_oop(Atomic::xchg_ptr(BUSY, &_overflow_list));
8299   Thread* tid = Thread::current();
8300   // Before "no_of_gc_threads" was introduced CMSOverflowSpinCount was
8301   // set to ParallelGCThreads.
8302   size_t CMSOverflowSpinCount = (size_t) no_of_gc_threads; // was ParallelGCThreads;
8303   size_t sleep_time_millis = MAX2((size_t)1, num/100);
8304   // If the list is busy, we spin for a short while,
8305   // sleeping between attempts to get the list.
8306   for (size_t spin = 0; prefix == BUSY && spin < CMSOverflowSpinCount; spin++) {
8307     os::sleep(tid, sleep_time_millis, false);
8308     if (_overflow_list == NULL) {
8309       // Nothing left to take
8310       return false;
8311     } else if (_overflow_list != BUSY) {
8312       // Try and grab the prefix
8313       prefix = cast_to_oop(Atomic::xchg_ptr(BUSY, &_overflow_list));
8314     }
8315   }
8316   // If the list was found to be empty, or we spun long
8317   // enough, we give up and return empty-handed. If we leave
8318   // the list in the BUSY state below, it must be the case that
8319   // some other thread holds the overflow list and will set it
8320   // to a non-BUSY state in the future.
8321   if (prefix == NULL || prefix == BUSY) {
8322      // Nothing to take or waited long enough
8323      if (prefix == NULL) {
8324        // Write back the NULL in case we overwrote it with BUSY above
8325        // and it is still the same value.
8326        (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
8327      }
8328      return false;
8329   }
8330   assert(prefix != NULL && prefix != BUSY, "Error");
8331   size_t i = num;
8332   oop cur = prefix;
8333   // Walk down the first "num" objects, unless we reach the end.
8334   for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--);
8335   if (cur->mark() == NULL) {
8336     // We have "num" or fewer elements in the list, so there
8337     // is nothing to return to the global list.
8338     // Write back the NULL in lieu of the BUSY we wrote
8339     // above, if it is still the same value.
8340     if (_overflow_list == BUSY) {
8341       (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
8342     }
8343   } else {
8344     // Chop off the suffix and return it to the global list.
8345     assert(cur->mark() != BUSY, "Error");
8346     oop suffix_head = cur->mark(); // suffix will be put back on global list
8347     cur->set_mark(NULL);           // break off suffix
8348     // It's possible that the list is still in the empty(busy) state
8349     // we left it in a short while ago; in that case we may be
8350     // able to place back the suffix without incurring the cost
8351     // of a walk down the list.
8352     oop observed_overflow_list = _overflow_list;
8353     oop cur_overflow_list = observed_overflow_list;
8354     bool attached = false;
8355     while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
8356       observed_overflow_list =
8357         (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
8358       if (cur_overflow_list == observed_overflow_list) {
8359         attached = true;
8360         break;
8361       } else cur_overflow_list = observed_overflow_list;
8362     }
8363     if (!attached) {
8364       // Too bad, someone else sneaked in (at least) an element; we'll need
8365       // to do a splice. Find tail of suffix so we can prepend suffix to global
8366       // list.
8367       for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark()));
8368       oop suffix_tail = cur;
8369       assert(suffix_tail != NULL && suffix_tail->mark() == NULL,
8370              "Tautology");
8371       observed_overflow_list = _overflow_list;
8372       do {
8373         cur_overflow_list = observed_overflow_list;
8374         if (cur_overflow_list != BUSY) {
8375           // Do the splice ...
8376           suffix_tail->set_mark(markOop(cur_overflow_list));
8377         } else { // cur_overflow_list == BUSY
8378           suffix_tail->set_mark(NULL);
8379         }
8380         // ... and try to place spliced list back on overflow_list ...
8381         observed_overflow_list =
8382           (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
8383       } while (cur_overflow_list != observed_overflow_list);
8384       // ... until we have succeeded in doing so.
8385     }
8386   }
8387 
8388   // Push the prefix elements on work_q
8389   assert(prefix != NULL, "control point invariant");
8390   const markOop proto = markOopDesc::prototype();
8391   oop next;
8392   NOT_PRODUCT(ssize_t n = 0;)
8393   for (cur = prefix; cur != NULL; cur = next) {
8394     next = oop(cur->mark());
8395     cur->set_mark(proto);   // until proven otherwise
8396     assert(cur->is_oop(), "Should be an oop");
8397     bool res = work_q->push(cur);
8398     assert(res, "Bit off more than we can chew?");
8399     NOT_PRODUCT(n++;)
8400   }
8401 #ifndef PRODUCT
8402   assert(_num_par_pushes >= n, "Too many pops?");
8403   Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes);
8404 #endif
8405   return true;
8406 }
8407 
8408 // Single-threaded
8409 void CMSCollector::push_on_overflow_list(oop p) {
8410   NOT_PRODUCT(_num_par_pushes++;)
8411   assert(p->is_oop(), "Not an oop");
8412   preserve_mark_if_necessary(p);
8413   p->set_mark((markOop)_overflow_list);
8414   _overflow_list = p;
8415 }
8416 
8417 // Multi-threaded; use CAS to prepend to overflow list
8418 void CMSCollector::par_push_on_overflow_list(oop p) {
8419   NOT_PRODUCT(Atomic::inc_ptr(&_num_par_pushes);)
8420   assert(p->is_oop(), "Not an oop");
8421   par_preserve_mark_if_necessary(p);
8422   oop observed_overflow_list = _overflow_list;
8423   oop cur_overflow_list;
8424   do {
8425     cur_overflow_list = observed_overflow_list;
8426     if (cur_overflow_list != BUSY) {
8427       p->set_mark(markOop(cur_overflow_list));
8428     } else {
8429       p->set_mark(NULL);
8430     }
8431     observed_overflow_list =
8432       (oop) Atomic::cmpxchg_ptr(p, &_overflow_list, cur_overflow_list);
8433   } while (cur_overflow_list != observed_overflow_list);
8434 }
8435 #undef BUSY
8436 
8437 // Single threaded
8438 // General Note on GrowableArray: pushes may silently fail
8439 // because we are (temporarily) out of C-heap for expanding
8440 // the stack. The problem is quite ubiquitous and affects
8441 // a lot of code in the JVM. The prudent thing for GrowableArray
8442 // to do (for now) is to exit with an error. However, that may
8443 // be too draconian in some cases because the caller may be
8444 // able to recover without much harm. For such cases, we
8445 // should probably introduce a "soft_push" method which returns
8446 // an indication of success or failure with the assumption that
8447 // the caller may be able to recover from a failure; code in
8448 // the VM can then be changed, incrementally, to deal with such
8449 // failures where possible, thus, incrementally hardening the VM
8450 // in such low resource situations.
8451 void CMSCollector::preserve_mark_work(oop p, markOop m) {
8452   _preserved_oop_stack.push(p);
8453   _preserved_mark_stack.push(m);
8454   assert(m == p->mark(), "Mark word changed");
8455   assert(_preserved_oop_stack.size() == _preserved_mark_stack.size(),
8456          "bijection");
8457 }
8458 
8459 // Single threaded
8460 void CMSCollector::preserve_mark_if_necessary(oop p) {
8461   markOop m = p->mark();
8462   if (m->must_be_preserved(p)) {
8463     preserve_mark_work(p, m);
8464   }
8465 }
8466 
8467 void CMSCollector::par_preserve_mark_if_necessary(oop p) {
8468   markOop m = p->mark();
8469   if (m->must_be_preserved(p)) {
8470     MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
8471     // Even though we read the mark word without holding
8472     // the lock, we are assured that it will not change
8473     // because we "own" this oop, so no other thread can
8474     // be trying to push it on the overflow list; see
8475     // the assertion in preserve_mark_work() that checks
8476     // that m == p->mark().
8477     preserve_mark_work(p, m);
8478   }
8479 }
8480 
8481 // We should be able to do this multi-threaded,
8482 // a chunk of stack being a task (this is
8483 // correct because each oop only ever appears
8484 // once in the overflow list. However, it's
8485 // not very easy to completely overlap this with
8486 // other operations, so will generally not be done
8487 // until all work's been completed. Because we
8488 // expect the preserved oop stack (set) to be small,
8489 // it's probably fine to do this single-threaded.
8490 // We can explore cleverer concurrent/overlapped/parallel
8491 // processing of preserved marks if we feel the
8492 // need for this in the future. Stack overflow should
8493 // be so rare in practice and, when it happens, its
8494 // effect on performance so great that this will
8495 // likely just be in the noise anyway.
8496 void CMSCollector::restore_preserved_marks_if_any() {
8497   assert(SafepointSynchronize::is_at_safepoint(),
8498          "world should be stopped");
8499   assert(Thread::current()->is_ConcurrentGC_thread() ||
8500          Thread::current()->is_VM_thread(),
8501          "should be single-threaded");
8502   assert(_preserved_oop_stack.size() == _preserved_mark_stack.size(),
8503          "bijection");
8504 
8505   while (!_preserved_oop_stack.is_empty()) {
8506     oop p = _preserved_oop_stack.pop();
8507     assert(p->is_oop(), "Should be an oop");
8508     assert(_span.contains(p), "oop should be in _span");
8509     assert(p->mark() == markOopDesc::prototype(),
8510            "Set when taken from overflow list");
8511     markOop m = _preserved_mark_stack.pop();
8512     p->set_mark(m);
8513   }
8514   assert(_preserved_mark_stack.is_empty() && _preserved_oop_stack.is_empty(),
8515          "stacks were cleared above");
8516 }
8517 
8518 #ifndef PRODUCT
8519 bool CMSCollector::no_preserved_marks() const {
8520   return _preserved_mark_stack.is_empty() && _preserved_oop_stack.is_empty();
8521 }
8522 #endif
8523 
8524 // Transfer some number of overflown objects to usual marking
8525 // stack. Return true if some objects were transferred.
8526 bool MarkRefsIntoAndScanClosure::take_from_overflow_list() {
8527   size_t num = MIN2((size_t)(_mark_stack->capacity() - _mark_stack->length())/4,
8528                     (size_t)ParGCDesiredObjsFromOverflowList);
8529 
8530   bool res = _collector->take_from_overflow_list(num, _mark_stack);
8531   assert(_collector->overflow_list_is_empty() || res,
8532          "If list is not empty, we should have taken something");
8533   assert(!res || !_mark_stack->isEmpty(),
8534          "If we took something, it should now be on our stack");
8535   return res;
8536 }
8537 
8538 size_t MarkDeadObjectsClosure::do_blk(HeapWord* addr) {
8539   size_t res = _sp->block_size_no_stall(addr, _collector);
8540   if (_sp->block_is_obj(addr)) {
8541     if (_live_bit_map->isMarked(addr)) {
8542       // It can't have been dead in a previous cycle
8543       guarantee(!_dead_bit_map->isMarked(addr), "No resurrection!");
8544     } else {
8545       _dead_bit_map->mark(addr);      // mark the dead object
8546     }
8547   }
8548   // Could be 0, if the block size could not be computed without stalling.
8549   return res;
8550 }
8551 
8552 TraceCMSMemoryManagerStats::TraceCMSMemoryManagerStats(CMSCollector::CollectorState phase, GCCause::Cause cause): TraceMemoryManagerStats() {
8553 
8554   switch (phase) {
8555     case CMSCollector::InitialMarking:
8556       initialize(true  /* fullGC */ ,
8557                  cause /* cause of the GC */,
8558                  true  /* recordGCBeginTime */,
8559                  true  /* recordPreGCUsage */,
8560                  false /* recordPeakUsage */,
8561                  false /* recordPostGCusage */,
8562                  true  /* recordAccumulatedGCTime */,
8563                  false /* recordGCEndTime */,
8564                  false /* countCollection */  );
8565       break;
8566 
8567     case CMSCollector::FinalMarking:
8568       initialize(true  /* fullGC */ ,
8569                  cause /* cause of the GC */,
8570                  false /* recordGCBeginTime */,
8571                  false /* recordPreGCUsage */,
8572                  false /* recordPeakUsage */,
8573                  false /* recordPostGCusage */,
8574                  true  /* recordAccumulatedGCTime */,
8575                  false /* recordGCEndTime */,
8576                  false /* countCollection */  );
8577       break;
8578 
8579     case CMSCollector::Sweeping:
8580       initialize(true  /* fullGC */ ,
8581                  cause /* cause of the GC */,
8582                  false /* recordGCBeginTime */,
8583                  false /* recordPreGCUsage */,
8584                  true  /* recordPeakUsage */,
8585                  true  /* recordPostGCusage */,
8586                  false /* recordAccumulatedGCTime */,
8587                  true  /* recordGCEndTime */,
8588                  true  /* countCollection */  );
8589       break;
8590 
8591     default:
8592       ShouldNotReachHere();
8593   }
8594 }