refproc Wdiff src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp

Print this page

Split	Close
Expand all
Collapse all

          --- old/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
          +++ new/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp

   1    1  /*
   2    2   * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
   3    3   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4    4   *
   5    5   * This code is free software; you can redistribute it and/or modify it
   6    6   * under the terms of the GNU General Public License version 2 only, as
   7    7   * published by the Free Software Foundation.
   8    8   *
   9    9   * This code is distributed in the hope that it will be useful, but WITHOUT
  10   10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11   11   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12   12   * version 2 for more details (a copy is included in the LICENSE file that
  13   13   * accompanied this code).
  14   14   *
  15   15   * You should have received a copy of the GNU General Public License version
  16   16   * 2 along with this work; if not, write to the Free Software Foundation,
  17   17   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18   18   *
  19   19   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20   20   * or visit www.oracle.com if you need additional information or have any
  21   21   * questions.
  22   22   *
  23   23   */
  24   24  
  25   25  #include "precompiled.hpp"
  26   26  #include "classfile/symbolTable.hpp"
  27   27  #include "classfile/systemDictionary.hpp"
  28   28  #include "code/codeCache.hpp"
  29   29  #include "gc_implementation/concurrentMarkSweep/cmsAdaptiveSizePolicy.hpp"
  30   30  #include "gc_implementation/concurrentMarkSweep/cmsCollectorPolicy.hpp"
  31   31  #include "gc_implementation/concurrentMarkSweep/cmsGCAdaptivePolicyCounters.hpp"
  32   32  #include "gc_implementation/concurrentMarkSweep/cmsOopClosures.inline.hpp"
  33   33  #include "gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp"
  34   34  #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.inline.hpp"
  35   35  #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp"
  36   36  #include "gc_implementation/concurrentMarkSweep/vmCMSOperations.hpp"
  37   37  #include "gc_implementation/parNew/parNewGeneration.hpp"
  38   38  #include "gc_implementation/shared/collectorCounters.hpp"
  39   39  #include "gc_implementation/shared/isGCActiveMark.hpp"
  40   40  #include "gc_interface/collectedHeap.inline.hpp"
  41   41  #include "memory/cardTableRS.hpp"
  42   42  #include "memory/collectorPolicy.hpp"
  43   43  #include "memory/gcLocker.inline.hpp"
  44   44  #include "memory/genCollectedHeap.hpp"
  45   45  #include "memory/genMarkSweep.hpp"
  46   46  #include "memory/genOopClosures.inline.hpp"
  47   47  #include "memory/iterator.hpp"
  48   48  #include "memory/referencePolicy.hpp"
  49   49  #include "memory/resourceArea.hpp"
  50   50  #include "oops/oop.inline.hpp"
  51   51  #include "prims/jvmtiExport.hpp"
  52   52  #include "runtime/globals_extension.hpp"
  53   53  #include "runtime/handles.inline.hpp"
  54   54  #include "runtime/java.hpp"
  55   55  #include "runtime/vmThread.hpp"
  56   56  #include "services/memoryService.hpp"
  57   57  #include "services/runtimeService.hpp"
  58   58  
  59   59  // statics
  60   60  CMSCollector* ConcurrentMarkSweepGeneration::_collector = NULL;
  61   61  bool          CMSCollector::_full_gc_requested          = false;
  62   62  
  63   63  //////////////////////////////////////////////////////////////////
  64   64  // In support of CMS/VM thread synchronization
  65   65  //////////////////////////////////////////////////////////////////
  66   66  // We split use of the CGC_lock into 2 "levels".
  67   67  // The low-level locking is of the usual CGC_lock monitor. We introduce
  68   68  // a higher level "token" (hereafter "CMS token") built on top of the
  69   69  // low level monitor (hereafter "CGC lock").
  70   70  // The token-passing protocol gives priority to the VM thread. The
  71   71  // CMS-lock doesn't provide any fairness guarantees, but clients
  72   72  // should ensure that it is only held for very short, bounded
  73   73  // durations.
  74   74  //
  75   75  // When either of the CMS thread or the VM thread is involved in
  76   76  // collection operations during which it does not want the other
  77   77  // thread to interfere, it obtains the CMS token.
  78   78  //
  79   79  // If either thread tries to get the token while the other has
  80   80  // it, that thread waits. However, if the VM thread and CMS thread
  81   81  // both want the token, then the VM thread gets priority while the
  82   82  // CMS thread waits. This ensures, for instance, that the "concurrent"
  83   83  // phases of the CMS thread's work do not block out the VM thread
  84   84  // for long periods of time as the CMS thread continues to hog
  85   85  // the token. (See bug 4616232).
  86   86  //
  87   87  // The baton-passing functions are, however, controlled by the
  88   88  // flags _foregroundGCShouldWait and _foregroundGCIsActive,
  89   89  // and here the low-level CMS lock, not the high level token,
  90   90  // ensures mutual exclusion.
  91   91  //
  92   92  // Two important conditions that we have to satisfy:
  93   93  // 1. if a thread does a low-level wait on the CMS lock, then it
  94   94  //    relinquishes the CMS token if it were holding that token
  95   95  //    when it acquired the low-level CMS lock.
  96   96  // 2. any low-level notifications on the low-level lock
  97   97  //    should only be sent when a thread has relinquished the token.
  98   98  //
  99   99  // In the absence of either property, we'd have potential deadlock.
 100  100  //
 101  101  // We protect each of the CMS (concurrent and sequential) phases
 102  102  // with the CMS _token_, not the CMS _lock_.
 103  103  //
 104  104  // The only code protected by CMS lock is the token acquisition code
 105  105  // itself, see ConcurrentMarkSweepThread::[de]synchronize(), and the
 106  106  // baton-passing code.
 107  107  //
 108  108  // Unfortunately, i couldn't come up with a good abstraction to factor and
 109  109  // hide the naked CGC_lock manipulation in the baton-passing code
 110  110  // further below. That's something we should try to do. Also, the proof
 111  111  // of correctness of this 2-level locking scheme is far from obvious,
 112  112  // and potentially quite slippery. We have an uneasy supsicion, for instance,
 113  113  // that there may be a theoretical possibility of delay/starvation in the
 114  114  // low-level lock/wait/notify scheme used for the baton-passing because of
 115  115  // potential intereference with the priority scheme embodied in the
 116  116  // CMS-token-passing protocol. See related comments at a CGC_lock->wait()
 117  117  // invocation further below and marked with "XXX 20011219YSR".
 118  118  // Indeed, as we note elsewhere, this may become yet more slippery
 119  119  // in the presence of multiple CMS and/or multiple VM threads. XXX
 120  120  
 121  121  class CMSTokenSync: public StackObj {
 122  122   private:
 123  123    bool _is_cms_thread;
 124  124   public:
 125  125    CMSTokenSync(bool is_cms_thread):
 126  126      _is_cms_thread(is_cms_thread) {
 127  127      assert(is_cms_thread == Thread::current()->is_ConcurrentGC_thread(),
 128  128             "Incorrect argument to constructor");
 129  129      ConcurrentMarkSweepThread::synchronize(_is_cms_thread);
 130  130    }
 131  131  
 132  132    ~CMSTokenSync() {
 133  133      assert(_is_cms_thread ?
 134  134               ConcurrentMarkSweepThread::cms_thread_has_cms_token() :
 135  135               ConcurrentMarkSweepThread::vm_thread_has_cms_token(),
 136  136            "Incorrect state");
 137  137      ConcurrentMarkSweepThread::desynchronize(_is_cms_thread);
 138  138    }
 139  139  };
 140  140  
 141  141  // Convenience class that does a CMSTokenSync, and then acquires
 142  142  // upto three locks.
 143  143  class CMSTokenSyncWithLocks: public CMSTokenSync {
 144  144   private:
 145  145    // Note: locks are acquired in textual declaration order
 146  146    // and released in the opposite order
 147  147    MutexLockerEx _locker1, _locker2, _locker3;
 148  148   public:
 149  149    CMSTokenSyncWithLocks(bool is_cms_thread, Mutex* mutex1,
 150  150                          Mutex* mutex2 = NULL, Mutex* mutex3 = NULL):
 151  151      CMSTokenSync(is_cms_thread),
 152  152      _locker1(mutex1, Mutex::_no_safepoint_check_flag),
 153  153      _locker2(mutex2, Mutex::_no_safepoint_check_flag),
 154  154      _locker3(mutex3, Mutex::_no_safepoint_check_flag)
 155  155    { }
 156  156  };
 157  157  
 158  158  
 159  159  // Wrapper class to temporarily disable icms during a foreground cms collection.
 160  160  class ICMSDisabler: public StackObj {
 161  161   public:
 162  162    // The ctor disables icms and wakes up the thread so it notices the change;
 163  163    // the dtor re-enables icms.  Note that the CMSCollector methods will check
 164  164    // CMSIncrementalMode.
 165  165    ICMSDisabler()  { CMSCollector::disable_icms(); CMSCollector::start_icms(); }
 166  166    ~ICMSDisabler() { CMSCollector::enable_icms(); }
 167  167  };
 168  168  
 169  169  //////////////////////////////////////////////////////////////////
 170  170  //  Concurrent Mark-Sweep Generation /////////////////////////////
 171  171  //////////////////////////////////////////////////////////////////
 172  172  
 173  173  NOT_PRODUCT(CompactibleFreeListSpace* debug_cms_space;)
 174  174  
 175  175  // This struct contains per-thread things necessary to support parallel
 176  176  // young-gen collection.
 177  177  class CMSParGCThreadState: public CHeapObj {
 178  178   public:
 179  179    CFLS_LAB lab;
 180  180    PromotionInfo promo;
 181  181  
 182  182    // Constructor.
 183  183    CMSParGCThreadState(CompactibleFreeListSpace* cfls) : lab(cfls) {
 184  184      promo.setSpace(cfls);
 185  185    }
 186  186  };
 187  187  
 188  188  ConcurrentMarkSweepGeneration::ConcurrentMarkSweepGeneration(
 189  189       ReservedSpace rs, size_t initial_byte_size, int level,
 190  190       CardTableRS* ct, bool use_adaptive_freelists,
 191  191       FreeBlockDictionary::DictionaryChoice dictionaryChoice) :
 192  192    CardGeneration(rs, initial_byte_size, level, ct),
 193  193    _dilatation_factor(((double)MinChunkSize)/((double)(CollectedHeap::min_fill_size()))),
 194  194    _debug_collection_type(Concurrent_collection_type)
 195  195  {
 196  196    HeapWord* bottom = (HeapWord*) _virtual_space.low();
 197  197    HeapWord* end    = (HeapWord*) _virtual_space.high();
 198  198  
 199  199    _direct_allocated_words = 0;
 200  200    NOT_PRODUCT(
 201  201      _numObjectsPromoted = 0;
 202  202      _numWordsPromoted = 0;
 203  203      _numObjectsAllocated = 0;
 204  204      _numWordsAllocated = 0;
 205  205    )
 206  206  
 207  207    _cmsSpace = new CompactibleFreeListSpace(_bts, MemRegion(bottom, end),
 208  208                                             use_adaptive_freelists,
 209  209                                             dictionaryChoice);
 210  210    NOT_PRODUCT(debug_cms_space = _cmsSpace;)
 211  211    if (_cmsSpace == NULL) {
 212  212      vm_exit_during_initialization(
 213  213        "CompactibleFreeListSpace allocation failure");
 214  214    }
 215  215    _cmsSpace->_gen = this;
 216  216  
 217  217    _gc_stats = new CMSGCStats();
 218  218  
 219  219    // Verify the assumption that FreeChunk::_prev and OopDesc::_klass
 220  220    // offsets match. The ability to tell free chunks from objects
 221  221    // depends on this property.
 222  222    debug_only(
 223  223      FreeChunk* junk = NULL;
 224  224      assert(UseCompressedOops ||
 225  225             junk->prev_addr() == (void*)(oop(junk)->klass_addr()),
 226  226             "Offset of FreeChunk::_prev within FreeChunk must match"
 227  227             "  that of OopDesc::_klass within OopDesc");
 228  228    )
 229  229    if (CollectedHeap::use_parallel_gc_threads()) {
 230  230      typedef CMSParGCThreadState* CMSParGCThreadStatePtr;
 231  231      _par_gc_thread_states =
 232  232        NEW_C_HEAP_ARRAY(CMSParGCThreadStatePtr, ParallelGCThreads);
 233  233      if (_par_gc_thread_states == NULL) {
 234  234        vm_exit_during_initialization("Could not allocate par gc structs");
 235  235      }
 236  236      for (uint i = 0; i < ParallelGCThreads; i++) {
 237  237        _par_gc_thread_states[i] = new CMSParGCThreadState(cmsSpace());
 238  238        if (_par_gc_thread_states[i] == NULL) {
 239  239          vm_exit_during_initialization("Could not allocate par gc structs");
 240  240        }
 241  241      }
 242  242    } else {
 243  243      _par_gc_thread_states = NULL;
 244  244    }
 245  245    _incremental_collection_failed = false;
 246  246    // The "dilatation_factor" is the expansion that can occur on
 247  247    // account of the fact that the minimum object size in the CMS
 248  248    // generation may be larger than that in, say, a contiguous young
 249  249    //  generation.
 250  250    // Ideally, in the calculation below, we'd compute the dilatation
 251  251    // factor as: MinChunkSize/(promoting_gen's min object size)
 252  252    // Since we do not have such a general query interface for the
 253  253    // promoting generation, we'll instead just use the mimimum
 254  254    // object size (which today is a header's worth of space);
 255  255    // note that all arithmetic is in units of HeapWords.
 256  256    assert(MinChunkSize >= CollectedHeap::min_fill_size(), "just checking");
 257  257    assert(_dilatation_factor >= 1.0, "from previous assert");
 258  258  }
 259  259  
 260  260  
 261  261  // The field "_initiating_occupancy" represents the occupancy percentage
 262  262  // at which we trigger a new collection cycle.  Unless explicitly specified
 263  263  // via CMSInitiating[Perm]OccupancyFraction (argument "io" below), it
 264  264  // is calculated by:
 265  265  //
 266  266  //   Let "f" be MinHeapFreeRatio in
 267  267  //
 268  268  //    _intiating_occupancy = 100-f +
 269  269  //                           f * (CMSTrigger[Perm]Ratio/100)
 270  270  //   where CMSTrigger[Perm]Ratio is the argument "tr" below.
 271  271  //
 272  272  // That is, if we assume the heap is at its desired maximum occupancy at the
 273  273  // end of a collection, we let CMSTrigger[Perm]Ratio of the (purported) free
 274  274  // space be allocated before initiating a new collection cycle.
 275  275  //
 276  276  void ConcurrentMarkSweepGeneration::init_initiating_occupancy(intx io, intx tr) {
 277  277    assert(io <= 100 && tr >= 0 && tr <= 100, "Check the arguments");
 278  278    if (io >= 0) {
 279  279      _initiating_occupancy = (double)io / 100.0;
 280  280    } else {
 281  281      _initiating_occupancy = ((100 - MinHeapFreeRatio) +
 282  282                               (double)(tr * MinHeapFreeRatio) / 100.0)
 283  283                              / 100.0;
 284  284    }

↓ open down ↓

284 lines elided

↑ open up ↑

 285  285  }
 286  286  
 287  287  void ConcurrentMarkSweepGeneration::ref_processor_init() {
 288  288    assert(collector() != NULL, "no collector");
 289  289    collector()->ref_processor_init();
 290  290  }
 291  291  
 292  292  void CMSCollector::ref_processor_init() {
 293  293    if (_ref_processor == NULL) {
 294  294      // Allocate and initialize a reference processor
 295      -    _ref_processor = ReferenceProcessor::create_ref_processor(
 296      -        _span,                               // span
 297      -        _cmsGen->refs_discovery_is_atomic(), // atomic_discovery
 298      -        _cmsGen->refs_discovery_is_mt(),     // mt_discovery
 299      -        &_is_alive_closure,
 300      -        ParallelGCThreads,
 301      -        ParallelRefProcEnabled);
      295 +    _ref_processor =
      296 +      new ReferenceProcessor(_span,                               // span
      297 +                             (ParallelGCThreads > 1) && ParallelRefProcEnabled, // mt processing
      298 +                             ParallelGCThreads,                   // mt processing degree
      299 +                             _cmsGen->refs_discovery_is_mt(),     // mt discovery
      300 +                             MAX2(ConcGCThreads, ParallelGCThreads), // mt discovery degree
      301 +                             _cmsGen->refs_discovery_is_atomic(), // discovery is not atomic
      302 +                             &_is_alive_closure,                  // closure for liveness info
      303 +                             false);                              // next field updates do not need write barrier
 302  304      // Initialize the _ref_processor field of CMSGen
 303  305      _cmsGen->set_ref_processor(_ref_processor);
 304  306  
 305  307      // Allocate a dummy ref processor for perm gen.
 306  308      ReferenceProcessor* rp2 = new ReferenceProcessor();
 307  309      if (rp2 == NULL) {
 308  310        vm_exit_during_initialization("Could not allocate ReferenceProcessor object");
 309  311      }
 310  312      _permGen->set_ref_processor(rp2);
 311  313    }

 312  314  }
 313  315  
 314  316  CMSAdaptiveSizePolicy* CMSCollector::size_policy() {
 315  317    GenCollectedHeap* gch = GenCollectedHeap::heap();
 316  318    assert(gch->kind() == CollectedHeap::GenCollectedHeap,
 317  319      "Wrong type of heap");
 318  320    CMSAdaptiveSizePolicy* sp = (CMSAdaptiveSizePolicy*)
 319  321      gch->gen_policy()->size_policy();
 320  322    assert(sp->is_gc_cms_adaptive_size_policy(),
 321  323      "Wrong type of size policy");
 322  324    return sp;
 323  325  }
 324  326  
 325  327  CMSGCAdaptivePolicyCounters* CMSCollector::gc_adaptive_policy_counters() {
 326  328    CMSGCAdaptivePolicyCounters* results =
 327  329      (CMSGCAdaptivePolicyCounters*) collector_policy()->counters();
 328  330    assert(
 329  331      results->kind() == GCPolicyCounters::CMSGCAdaptivePolicyCountersKind,
 330  332      "Wrong gc policy counter kind");
 331  333    return results;
 332  334  }
 333  335  
 334  336  
 335  337  void ConcurrentMarkSweepGeneration::initialize_performance_counters() {
 336  338  
 337  339    const char* gen_name = "old";
 338  340  
 339  341    // Generation Counters - generation 1, 1 subspace
 340  342    _gen_counters = new GenerationCounters(gen_name, 1, 1, &_virtual_space);
 341  343  
 342  344    _space_counters = new GSpaceCounters(gen_name, 0,
 343  345                                         _virtual_space.reserved_size(),
 344  346                                         this, _gen_counters);
 345  347  }
 346  348  
 347  349  CMSStats::CMSStats(ConcurrentMarkSweepGeneration* cms_gen, unsigned int alpha):
 348  350    _cms_gen(cms_gen)
 349  351  {
 350  352    assert(alpha <= 100, "bad value");
 351  353    _saved_alpha = alpha;
 352  354  
 353  355    // Initialize the alphas to the bootstrap value of 100.
 354  356    _gc0_alpha = _cms_alpha = 100;
 355  357  
 356  358    _cms_begin_time.update();
 357  359    _cms_end_time.update();
 358  360  
 359  361    _gc0_duration = 0.0;
 360  362    _gc0_period = 0.0;
 361  363    _gc0_promoted = 0;
 362  364  
 363  365    _cms_duration = 0.0;
 364  366    _cms_period = 0.0;
 365  367    _cms_allocated = 0;
 366  368  
 367  369    _cms_used_at_gc0_begin = 0;
 368  370    _cms_used_at_gc0_end = 0;
 369  371    _allow_duty_cycle_reduction = false;
 370  372    _valid_bits = 0;
 371  373    _icms_duty_cycle = CMSIncrementalDutyCycle;
 372  374  }
 373  375  
 374  376  double CMSStats::cms_free_adjustment_factor(size_t free) const {
 375  377    // TBD: CR 6909490
 376  378    return 1.0;
 377  379  }
 378  380  
 379  381  void CMSStats::adjust_cms_free_adjustment_factor(bool fail, size_t free) {
 380  382  }
 381  383  
 382  384  // If promotion failure handling is on use
 383  385  // the padded average size of the promotion for each
 384  386  // young generation collection.
 385  387  double CMSStats::time_until_cms_gen_full() const {
 386  388    size_t cms_free = _cms_gen->cmsSpace()->free();
 387  389    GenCollectedHeap* gch = GenCollectedHeap::heap();
 388  390    size_t expected_promotion = MIN2(gch->get_gen(0)->capacity(),
 389  391                                     (size_t) _cms_gen->gc_stats()->avg_promoted()->padded_average());
 390  392    if (cms_free > expected_promotion) {
 391  393      // Start a cms collection if there isn't enough space to promote
 392  394      // for the next minor collection.  Use the padded average as
 393  395      // a safety factor.
 394  396      cms_free -= expected_promotion;
 395  397  
 396  398      // Adjust by the safety factor.
 397  399      double cms_free_dbl = (double)cms_free;
 398  400      double cms_adjustment = (100.0 - CMSIncrementalSafetyFactor)/100.0;
 399  401      // Apply a further correction factor which tries to adjust
 400  402      // for recent occurance of concurrent mode failures.
 401  403      cms_adjustment = cms_adjustment * cms_free_adjustment_factor(cms_free);
 402  404      cms_free_dbl = cms_free_dbl * cms_adjustment;
 403  405  
 404  406      if (PrintGCDetails && Verbose) {
 405  407        gclog_or_tty->print_cr("CMSStats::time_until_cms_gen_full: cms_free "
 406  408          SIZE_FORMAT " expected_promotion " SIZE_FORMAT,
 407  409          cms_free, expected_promotion);
 408  410        gclog_or_tty->print_cr("  cms_free_dbl %f cms_consumption_rate %f",
 409  411          cms_free_dbl, cms_consumption_rate() + 1.0);
 410  412      }
 411  413      // Add 1 in case the consumption rate goes to zero.
 412  414      return cms_free_dbl / (cms_consumption_rate() + 1.0);
 413  415    }
 414  416    return 0.0;
 415  417  }
 416  418  
 417  419  // Compare the duration of the cms collection to the
 418  420  // time remaining before the cms generation is empty.
 419  421  // Note that the time from the start of the cms collection
 420  422  // to the start of the cms sweep (less than the total
 421  423  // duration of the cms collection) can be used.  This
 422  424  // has been tried and some applications experienced
 423  425  // promotion failures early in execution.  This was
 424  426  // possibly because the averages were not accurate
 425  427  // enough at the beginning.
 426  428  double CMSStats::time_until_cms_start() const {
 427  429    // We add "gc0_period" to the "work" calculation
 428  430    // below because this query is done (mostly) at the
 429  431    // end of a scavenge, so we need to conservatively
 430  432    // account for that much possible delay
 431  433    // in the query so as to avoid concurrent mode failures
 432  434    // due to starting the collection just a wee bit too
 433  435    // late.
 434  436    double work = cms_duration() + gc0_period();
 435  437    double deadline = time_until_cms_gen_full();
 436  438    // If a concurrent mode failure occurred recently, we want to be
 437  439    // more conservative and halve our expected time_until_cms_gen_full()
 438  440    if (work > deadline) {
 439  441      if (Verbose && PrintGCDetails) {
 440  442        gclog_or_tty->print(
 441  443          " CMSCollector: collect because of anticipated promotion "
 442  444          "before full %3.7f + %3.7f > %3.7f ", cms_duration(),
 443  445          gc0_period(), time_until_cms_gen_full());
 444  446      }
 445  447      return 0.0;
 446  448    }
 447  449    return work - deadline;
 448  450  }
 449  451  
 450  452  // Return a duty cycle based on old_duty_cycle and new_duty_cycle, limiting the
 451  453  // amount of change to prevent wild oscillation.
 452  454  unsigned int CMSStats::icms_damped_duty_cycle(unsigned int old_duty_cycle,
 453  455                                                unsigned int new_duty_cycle) {
 454  456    assert(old_duty_cycle <= 100, "bad input value");
 455  457    assert(new_duty_cycle <= 100, "bad input value");
 456  458  
 457  459    // Note:  use subtraction with caution since it may underflow (values are
 458  460    // unsigned).  Addition is safe since we're in the range 0-100.
 459  461    unsigned int damped_duty_cycle = new_duty_cycle;
 460  462    if (new_duty_cycle < old_duty_cycle) {
 461  463      const unsigned int largest_delta = MAX2(old_duty_cycle / 4, 5U);
 462  464      if (new_duty_cycle + largest_delta < old_duty_cycle) {
 463  465        damped_duty_cycle = old_duty_cycle - largest_delta;
 464  466      }
 465  467    } else if (new_duty_cycle > old_duty_cycle) {
 466  468      const unsigned int largest_delta = MAX2(old_duty_cycle / 4, 15U);
 467  469      if (new_duty_cycle > old_duty_cycle + largest_delta) {
 468  470        damped_duty_cycle = MIN2(old_duty_cycle + largest_delta, 100U);
 469  471      }
 470  472    }
 471  473    assert(damped_duty_cycle <= 100, "invalid duty cycle computed");
 472  474  
 473  475    if (CMSTraceIncrementalPacing) {
 474  476      gclog_or_tty->print(" [icms_damped_duty_cycle(%d,%d) = %d] ",
 475  477                             old_duty_cycle, new_duty_cycle, damped_duty_cycle);
 476  478    }
 477  479    return damped_duty_cycle;
 478  480  }
 479  481  
 480  482  unsigned int CMSStats::icms_update_duty_cycle_impl() {
 481  483    assert(CMSIncrementalPacing && valid(),
 482  484           "should be handled in icms_update_duty_cycle()");
 483  485  
 484  486    double cms_time_so_far = cms_timer().seconds();
 485  487    double scaled_duration = cms_duration_per_mb() * _cms_used_at_gc0_end / M;
 486  488    double scaled_duration_remaining = fabsd(scaled_duration - cms_time_so_far);
 487  489  
 488  490    // Avoid division by 0.
 489  491    double time_until_full = MAX2(time_until_cms_gen_full(), 0.01);
 490  492    double duty_cycle_dbl = 100.0 * scaled_duration_remaining / time_until_full;
 491  493  
 492  494    unsigned int new_duty_cycle = MIN2((unsigned int)duty_cycle_dbl, 100U);
 493  495    if (new_duty_cycle > _icms_duty_cycle) {
 494  496      // Avoid very small duty cycles (1 or 2); 0 is allowed.
 495  497      if (new_duty_cycle > 2) {
 496  498        _icms_duty_cycle = icms_damped_duty_cycle(_icms_duty_cycle,
 497  499                                                  new_duty_cycle);
 498  500      }
 499  501    } else if (_allow_duty_cycle_reduction) {
 500  502      // The duty cycle is reduced only once per cms cycle (see record_cms_end()).
 501  503      new_duty_cycle = icms_damped_duty_cycle(_icms_duty_cycle, new_duty_cycle);
 502  504      // Respect the minimum duty cycle.
 503  505      unsigned int min_duty_cycle = (unsigned int)CMSIncrementalDutyCycleMin;
 504  506      _icms_duty_cycle = MAX2(new_duty_cycle, min_duty_cycle);
 505  507    }
 506  508  
 507  509    if (PrintGCDetails || CMSTraceIncrementalPacing) {
 508  510      gclog_or_tty->print(" icms_dc=%d ", _icms_duty_cycle);
 509  511    }
 510  512  
 511  513    _allow_duty_cycle_reduction = false;
 512  514    return _icms_duty_cycle;
 513  515  }
 514  516  
 515  517  #ifndef PRODUCT
 516  518  void CMSStats::print_on(outputStream *st) const {
 517  519    st->print(" gc0_alpha=%d,cms_alpha=%d", _gc0_alpha, _cms_alpha);
 518  520    st->print(",gc0_dur=%g,gc0_per=%g,gc0_promo=" SIZE_FORMAT,
 519  521                 gc0_duration(), gc0_period(), gc0_promoted());
 520  522    st->print(",cms_dur=%g,cms_dur_per_mb=%g,cms_per=%g,cms_alloc=" SIZE_FORMAT,
 521  523              cms_duration(), cms_duration_per_mb(),
 522  524              cms_period(), cms_allocated());
 523  525    st->print(",cms_since_beg=%g,cms_since_end=%g",
 524  526              cms_time_since_begin(), cms_time_since_end());
 525  527    st->print(",cms_used_beg=" SIZE_FORMAT ",cms_used_end=" SIZE_FORMAT,
 526  528              _cms_used_at_gc0_begin, _cms_used_at_gc0_end);
 527  529    if (CMSIncrementalMode) {
 528  530      st->print(",dc=%d", icms_duty_cycle());
 529  531    }
 530  532  
 531  533    if (valid()) {
 532  534      st->print(",promo_rate=%g,cms_alloc_rate=%g",
 533  535                promotion_rate(), cms_allocation_rate());
 534  536      st->print(",cms_consumption_rate=%g,time_until_full=%g",
 535  537                cms_consumption_rate(), time_until_cms_gen_full());
 536  538    }
 537  539    st->print(" ");
 538  540  }
 539  541  #endif // #ifndef PRODUCT
 540  542  
 541  543  CMSCollector::CollectorState CMSCollector::_collectorState =
 542  544                               CMSCollector::Idling;
 543  545  bool CMSCollector::_foregroundGCIsActive = false;
 544  546  bool CMSCollector::_foregroundGCShouldWait = false;
 545  547  
 546  548  CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
 547  549                             ConcurrentMarkSweepGeneration* permGen,
 548  550                             CardTableRS*                   ct,
 549  551                             ConcurrentMarkSweepPolicy*     cp):
 550  552    _cmsGen(cmsGen),
 551  553    _permGen(permGen),
 552  554    _ct(ct),
 553  555    _ref_processor(NULL),    // will be set later
 554  556    _conc_workers(NULL),     // may be set later
 555  557    _abort_preclean(false),
 556  558    _start_sampling(false),
 557  559    _between_prologue_and_epilogue(false),
 558  560    _markBitMap(0, Mutex::leaf + 1, "CMS_markBitMap_lock"),
 559  561    _perm_gen_verify_bit_map(0, -1 /* no mutex */, "No_lock"),
 560  562    _modUnionTable((CardTableModRefBS::card_shift - LogHeapWordSize),
 561  563                   -1 /* lock-free */, "No_lock" /* dummy */),
 562  564    _modUnionClosure(&_modUnionTable),
 563  565    _modUnionClosurePar(&_modUnionTable),
 564  566    // Adjust my span to cover old (cms) gen and perm gen
 565  567    _span(cmsGen->reserved()._union(permGen->reserved())),
 566  568    // Construct the is_alive_closure with _span & markBitMap
 567  569    _is_alive_closure(_span, &_markBitMap),
 568  570    _restart_addr(NULL),
 569  571    _overflow_list(NULL),
 570  572    _stats(cmsGen),
 571  573    _eden_chunk_array(NULL),     // may be set in ctor body
 572  574    _eden_chunk_capacity(0),     // -- ditto --
 573  575    _eden_chunk_index(0),        // -- ditto --
 574  576    _survivor_plab_array(NULL),  // -- ditto --
 575  577    _survivor_chunk_array(NULL), // -- ditto --
 576  578    _survivor_chunk_capacity(0), // -- ditto --
 577  579    _survivor_chunk_index(0),    // -- ditto --
 578  580    _ser_pmc_preclean_ovflw(0),
 579  581    _ser_kac_preclean_ovflw(0),
 580  582    _ser_pmc_remark_ovflw(0),
 581  583    _par_pmc_remark_ovflw(0),
 582  584    _ser_kac_ovflw(0),
 583  585    _par_kac_ovflw(0),
 584  586  #ifndef PRODUCT
 585  587    _num_par_pushes(0),
 586  588  #endif
 587  589    _collection_count_start(0),
 588  590    _verifying(false),
 589  591    _icms_start_limit(NULL),
 590  592    _icms_stop_limit(NULL),
 591  593    _verification_mark_bm(0, Mutex::leaf + 1, "CMS_verification_mark_bm_lock"),
 592  594    _completed_initialization(false),
 593  595    _collector_policy(cp),
 594  596    _should_unload_classes(false),
 595  597    _concurrent_cycles_since_last_unload(0),
 596  598    _roots_scanning_options(0),
 597  599    _inter_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding),
 598  600    _intra_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding)
 599  601  {
 600  602    if (ExplicitGCInvokesConcurrentAndUnloadsClasses) {
 601  603      ExplicitGCInvokesConcurrent = true;
 602  604    }
 603  605    // Now expand the span and allocate the collection support structures
 604  606    // (MUT, marking bit map etc.) to cover both generations subject to
 605  607    // collection.
 606  608  
 607  609    // First check that _permGen is adjacent to _cmsGen and above it.
 608  610    assert(   _cmsGen->reserved().word_size()  > 0
 609  611           && _permGen->reserved().word_size() > 0,
 610  612           "generations should not be of zero size");
 611  613    assert(_cmsGen->reserved().intersection(_permGen->reserved()).is_empty(),
 612  614           "_cmsGen and _permGen should not overlap");
 613  615    assert(_cmsGen->reserved().end() == _permGen->reserved().start(),
 614  616           "_cmsGen->end() different from _permGen->start()");
 615  617  
 616  618    // For use by dirty card to oop closures.
 617  619    _cmsGen->cmsSpace()->set_collector(this);
 618  620    _permGen->cmsSpace()->set_collector(this);
 619  621  
 620  622    // Allocate MUT and marking bit map
 621  623    {
 622  624      MutexLockerEx x(_markBitMap.lock(), Mutex::_no_safepoint_check_flag);
 623  625      if (!_markBitMap.allocate(_span)) {
 624  626        warning("Failed to allocate CMS Bit Map");
 625  627        return;
 626  628      }
 627  629      assert(_markBitMap.covers(_span), "_markBitMap inconsistency?");
 628  630    }
 629  631    {
 630  632      _modUnionTable.allocate(_span);
 631  633      assert(_modUnionTable.covers(_span), "_modUnionTable inconsistency?");
 632  634    }
 633  635

↓ open down ↓

322 lines elided

↑ open up ↑

 634  636    if (!_markStack.allocate(MarkStackSize)) {
 635  637      warning("Failed to allocate CMS Marking Stack");
 636  638      return;
 637  639    }
 638  640    if (!_revisitStack.allocate(CMSRevisitStackSize)) {
 639  641      warning("Failed to allocate CMS Revisit Stack");
 640  642      return;
 641  643    }
 642  644  
 643  645    // Support for multi-threaded concurrent phases
 644      -  if (CollectedHeap::use_parallel_gc_threads() && CMSConcurrentMTEnabled) {
      646 +  if (CMSConcurrentMTEnabled) {
 645  647      if (FLAG_IS_DEFAULT(ConcGCThreads)) {
 646  648        // just for now
 647  649        FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4);
 648  650      }
 649  651      if (ConcGCThreads > 1) {
 650  652        _conc_workers = new YieldingFlexibleWorkGang("Parallel CMS Threads",
 651  653                                   ConcGCThreads, true);
 652  654        if (_conc_workers == NULL) {
 653  655          warning("GC/CMS: _conc_workers allocation failure: "
 654  656                "forcing -CMSConcurrentMTEnabled");

 655  657          CMSConcurrentMTEnabled = false;
 656  658        } else {
 657  659          _conc_workers->initialize_workers();
 658  660        }
 659  661      } else {
 660  662        CMSConcurrentMTEnabled = false;
 661  663      }
 662  664    }
 663  665    if (!CMSConcurrentMTEnabled) {
 664  666      ConcGCThreads = 0;
 665  667    } else {
 666  668      // Turn off CMSCleanOnEnter optimization temporarily for
 667  669      // the MT case where it's not fixed yet; see 6178663.
 668  670      CMSCleanOnEnter = false;
 669  671    }
 670  672    assert((_conc_workers != NULL) == (ConcGCThreads > 1),
 671  673           "Inconsistency");
 672  674  
 673  675    // Parallel task queues; these are shared for the
 674  676    // concurrent and stop-world phases of CMS, but
 675  677    // are not shared with parallel scavenge (ParNew).
 676  678    {
 677  679      uint i;
 678  680      uint num_queues = (uint) MAX2(ParallelGCThreads, ConcGCThreads);
 679  681  
 680  682      if ((CMSParallelRemarkEnabled || CMSConcurrentMTEnabled
 681  683           || ParallelRefProcEnabled)
 682  684          && num_queues > 0) {
 683  685        _task_queues = new OopTaskQueueSet(num_queues);
 684  686        if (_task_queues == NULL) {
 685  687          warning("task_queues allocation failure.");
 686  688          return;
 687  689        }
 688  690        _hash_seed = NEW_C_HEAP_ARRAY(int, num_queues);
 689  691        if (_hash_seed == NULL) {
 690  692          warning("_hash_seed array allocation failure");
 691  693          return;
 692  694        }
 693  695  
 694  696        typedef Padded<OopTaskQueue> PaddedOopTaskQueue;
 695  697        for (i = 0; i < num_queues; i++) {
 696  698          PaddedOopTaskQueue *q = new PaddedOopTaskQueue();
 697  699          if (q == NULL) {
 698  700            warning("work_queue allocation failure.");
 699  701            return;
 700  702          }
 701  703          _task_queues->register_queue(i, q);
 702  704        }
 703  705        for (i = 0; i < num_queues; i++) {
 704  706          _task_queues->queue(i)->initialize();
 705  707          _hash_seed[i] = 17;  // copied from ParNew
 706  708        }
 707  709      }
 708  710    }
 709  711  
 710  712    _cmsGen ->init_initiating_occupancy(CMSInitiatingOccupancyFraction, CMSTriggerRatio);
 711  713    _permGen->init_initiating_occupancy(CMSInitiatingPermOccupancyFraction, CMSTriggerPermRatio);
 712  714  
 713  715    // Clip CMSBootstrapOccupancy between 0 and 100.
 714  716    _bootstrap_occupancy = ((double)MIN2((uintx)100, MAX2((uintx)0, CMSBootstrapOccupancy)))
 715  717                           /(double)100;
 716  718  
 717  719    _full_gcs_since_conc_gc = 0;
 718  720  
 719  721    // Now tell CMS generations the identity of their collector
 720  722    ConcurrentMarkSweepGeneration::set_collector(this);
 721  723  
 722  724    // Create & start a CMS thread for this CMS collector
 723  725    _cmsThread = ConcurrentMarkSweepThread::start(this);
 724  726    assert(cmsThread() != NULL, "CMS Thread should have been created");
 725  727    assert(cmsThread()->collector() == this,
 726  728           "CMS Thread should refer to this gen");
 727  729    assert(CGC_lock != NULL, "Where's the CGC_lock?");
 728  730  
 729  731    // Support for parallelizing young gen rescan
 730  732    GenCollectedHeap* gch = GenCollectedHeap::heap();
 731  733    _young_gen = gch->prev_gen(_cmsGen);
 732  734    if (gch->supports_inline_contig_alloc()) {
 733  735      _top_addr = gch->top_addr();
 734  736      _end_addr = gch->end_addr();
 735  737      assert(_young_gen != NULL, "no _young_gen");
 736  738      _eden_chunk_index = 0;
 737  739      _eden_chunk_capacity = (_young_gen->max_capacity()+CMSSamplingGrain)/CMSSamplingGrain;
 738  740      _eden_chunk_array = NEW_C_HEAP_ARRAY(HeapWord*, _eden_chunk_capacity);
 739  741      if (_eden_chunk_array == NULL) {
 740  742        _eden_chunk_capacity = 0;
 741  743        warning("GC/CMS: _eden_chunk_array allocation failure");
 742  744      }
 743  745    }
 744  746    assert(_eden_chunk_array != NULL || _eden_chunk_capacity == 0, "Error");
 745  747  
 746  748    // Support for parallelizing survivor space rescan
 747  749    if (CMSParallelRemarkEnabled && CMSParallelSurvivorRemarkEnabled) {
 748  750      const size_t max_plab_samples =
 749  751        ((DefNewGeneration*)_young_gen)->max_survivor_size()/MinTLABSize;
 750  752  
 751  753      _survivor_plab_array  = NEW_C_HEAP_ARRAY(ChunkArray, ParallelGCThreads);
 752  754      _survivor_chunk_array = NEW_C_HEAP_ARRAY(HeapWord*, 2*max_plab_samples);
 753  755      _cursor               = NEW_C_HEAP_ARRAY(size_t, ParallelGCThreads);
 754  756      if (_survivor_plab_array == NULL || _survivor_chunk_array == NULL
 755  757          || _cursor == NULL) {
 756  758        warning("Failed to allocate survivor plab/chunk array");
 757  759        if (_survivor_plab_array  != NULL) {
 758  760          FREE_C_HEAP_ARRAY(ChunkArray, _survivor_plab_array);
 759  761          _survivor_plab_array = NULL;
 760  762        }
 761  763        if (_survivor_chunk_array != NULL) {
 762  764          FREE_C_HEAP_ARRAY(HeapWord*, _survivor_chunk_array);
 763  765          _survivor_chunk_array = NULL;
 764  766        }
 765  767        if (_cursor != NULL) {
 766  768          FREE_C_HEAP_ARRAY(size_t, _cursor);
 767  769          _cursor = NULL;
 768  770        }
 769  771      } else {
 770  772        _survivor_chunk_capacity = 2*max_plab_samples;
 771  773        for (uint i = 0; i < ParallelGCThreads; i++) {
 772  774          HeapWord** vec = NEW_C_HEAP_ARRAY(HeapWord*, max_plab_samples);
 773  775          if (vec == NULL) {
 774  776            warning("Failed to allocate survivor plab array");
 775  777            for (int j = i; j > 0; j--) {
 776  778              FREE_C_HEAP_ARRAY(HeapWord*, _survivor_plab_array[j-1].array());
 777  779            }
 778  780            FREE_C_HEAP_ARRAY(ChunkArray, _survivor_plab_array);
 779  781            FREE_C_HEAP_ARRAY(HeapWord*, _survivor_chunk_array);
 780  782            _survivor_plab_array = NULL;
 781  783            _survivor_chunk_array = NULL;
 782  784            _survivor_chunk_capacity = 0;
 783  785            break;
 784  786          } else {
 785  787            ChunkArray* cur =
 786  788              ::new (&_survivor_plab_array[i]) ChunkArray(vec,
 787  789                                                          max_plab_samples);
 788  790            assert(cur->end() == 0, "Should be 0");
 789  791            assert(cur->array() == vec, "Should be vec");
 790  792            assert(cur->capacity() == max_plab_samples, "Error");
 791  793          }
 792  794        }
 793  795      }
 794  796    }
 795  797    assert(   (   _survivor_plab_array  != NULL
 796  798               && _survivor_chunk_array != NULL)
 797  799           || (   _survivor_chunk_capacity == 0
 798  800               && _survivor_chunk_index == 0),
 799  801           "Error");
 800  802  
 801  803    // Choose what strong roots should be scanned depending on verification options
 802  804    // and perm gen collection mode.
 803  805    if (!CMSClassUnloadingEnabled) {
 804  806      // If class unloading is disabled we want to include all classes into the root set.
 805  807      add_root_scanning_option(SharedHeap::SO_AllClasses);
 806  808    } else {
 807  809      add_root_scanning_option(SharedHeap::SO_SystemClasses);
 808  810    }
 809  811  
 810  812    NOT_PRODUCT(_overflow_counter = CMSMarkStackOverflowInterval;)
 811  813    _gc_counters = new CollectorCounters("CMS", 1);
 812  814    _completed_initialization = true;
 813  815    _inter_sweep_timer.start();  // start of time
 814  816  #ifdef SPARC
 815  817    // Issue a stern warning, but allow use for experimentation and debugging.
 816  818    if (VM_Version::is_sun4v() && UseMemSetInBOT) {
 817  819      assert(!FLAG_IS_DEFAULT(UseMemSetInBOT), "Error");
 818  820      warning("Experimental flag -XX:+UseMemSetInBOT is known to cause instability"
 819  821              " on sun4v; please understand that you are using at your own risk!");
 820  822    }
 821  823  #endif
 822  824  }
 823  825  
 824  826  const char* ConcurrentMarkSweepGeneration::name() const {
 825  827    return "concurrent mark-sweep generation";
 826  828  }
 827  829  void ConcurrentMarkSweepGeneration::update_counters() {
 828  830    if (UsePerfData) {
 829  831      _space_counters->update_all();
 830  832      _gen_counters->update_all();
 831  833    }
 832  834  }
 833  835  
 834  836  // this is an optimized version of update_counters(). it takes the
 835  837  // used value as a parameter rather than computing it.
 836  838  //
 837  839  void ConcurrentMarkSweepGeneration::update_counters(size_t used) {
 838  840    if (UsePerfData) {
 839  841      _space_counters->update_used(used);
 840  842      _space_counters->update_capacity();
 841  843      _gen_counters->update_all();
 842  844    }
 843  845  }
 844  846  
 845  847  void ConcurrentMarkSweepGeneration::print() const {
 846  848    Generation::print();
 847  849    cmsSpace()->print();
 848  850  }
 849  851  
 850  852  #ifndef PRODUCT
 851  853  void ConcurrentMarkSweepGeneration::print_statistics() {
 852  854    cmsSpace()->printFLCensus(0);
 853  855  }
 854  856  #endif
 855  857  
 856  858  void ConcurrentMarkSweepGeneration::printOccupancy(const char *s) {
 857  859    GenCollectedHeap* gch = GenCollectedHeap::heap();
 858  860    if (PrintGCDetails) {
 859  861      if (Verbose) {
 860  862        gclog_or_tty->print(" [%d %s-%s: "SIZE_FORMAT"("SIZE_FORMAT")]",
 861  863          level(), short_name(), s, used(), capacity());
 862  864      } else {
 863  865        gclog_or_tty->print(" [%d %s-%s: "SIZE_FORMAT"K("SIZE_FORMAT"K)]",
 864  866          level(), short_name(), s, used() / K, capacity() / K);
 865  867      }
 866  868    }
 867  869    if (Verbose) {
 868  870      gclog_or_tty->print(" "SIZE_FORMAT"("SIZE_FORMAT")",
 869  871                gch->used(), gch->capacity());
 870  872    } else {
 871  873      gclog_or_tty->print(" "SIZE_FORMAT"K("SIZE_FORMAT"K)",
 872  874                gch->used() / K, gch->capacity() / K);
 873  875    }
 874  876  }
 875  877  
 876  878  size_t
 877  879  ConcurrentMarkSweepGeneration::contiguous_available() const {
 878  880    // dld proposes an improvement in precision here. If the committed
 879  881    // part of the space ends in a free block we should add that to
 880  882    // uncommitted size in the calculation below. Will make this
 881  883    // change later, staying with the approximation below for the
 882  884    // time being. -- ysr.
 883  885    return MAX2(_virtual_space.uncommitted_size(), unsafe_max_alloc_nogc());
 884  886  }
 885  887  
 886  888  size_t
 887  889  ConcurrentMarkSweepGeneration::unsafe_max_alloc_nogc() const {
 888  890    return _cmsSpace->max_alloc_in_words() * HeapWordSize;
 889  891  }
 890  892  
 891  893  size_t ConcurrentMarkSweepGeneration::max_available() const {
 892  894    return free() + _virtual_space.uncommitted_size();
 893  895  }
 894  896  
 895  897  bool ConcurrentMarkSweepGeneration::promotion_attempt_is_safe(size_t max_promotion_in_bytes) const {
 896  898    size_t available = max_available();
 897  899    size_t av_promo  = (size_t)gc_stats()->avg_promoted()->padded_average();
 898  900    bool   res = (available >= av_promo) || (available >= max_promotion_in_bytes);
 899  901    if (Verbose && PrintGCDetails) {
 900  902      gclog_or_tty->print_cr(
 901  903        "CMS: promo attempt is%s safe: available("SIZE_FORMAT") %s av_promo("SIZE_FORMAT"),"
 902  904        "max_promo("SIZE_FORMAT")",
 903  905        res? "":" not", available, res? ">=":"<",
 904  906        av_promo, max_promotion_in_bytes);
 905  907    }
 906  908    return res;
 907  909  }
 908  910  
 909  911  // At a promotion failure dump information on block layout in heap
 910  912  // (cms old generation).
 911  913  void ConcurrentMarkSweepGeneration::promotion_failure_occurred() {
 912  914    if (CMSDumpAtPromotionFailure) {
 913  915      cmsSpace()->dump_at_safepoint_with_locks(collector(), gclog_or_tty);
 914  916    }
 915  917  }
 916  918  
 917  919  CompactibleSpace*
 918  920  ConcurrentMarkSweepGeneration::first_compaction_space() const {
 919  921    return _cmsSpace;
 920  922  }
 921  923  
 922  924  void ConcurrentMarkSweepGeneration::reset_after_compaction() {
 923  925    // Clear the promotion information.  These pointers can be adjusted
 924  926    // along with all the other pointers into the heap but
 925  927    // compaction is expected to be a rare event with
 926  928    // a heap using cms so don't do it without seeing the need.
 927  929    if (CollectedHeap::use_parallel_gc_threads()) {
 928  930      for (uint i = 0; i < ParallelGCThreads; i++) {
 929  931        _par_gc_thread_states[i]->promo.reset();
 930  932      }
 931  933    }
 932  934  }
 933  935  
 934  936  void ConcurrentMarkSweepGeneration::space_iterate(SpaceClosure* blk, bool usedOnly) {
 935  937    blk->do_space(_cmsSpace);
 936  938  }
 937  939  
 938  940  void ConcurrentMarkSweepGeneration::compute_new_size() {
 939  941    assert_locked_or_safepoint(Heap_lock);
 940  942  
 941  943    // If incremental collection failed, we just want to expand
 942  944    // to the limit.
 943  945    if (incremental_collection_failed()) {
 944  946      clear_incremental_collection_failed();
 945  947      grow_to_reserved();
 946  948      return;
 947  949    }
 948  950  
 949  951    size_t expand_bytes = 0;
 950  952    double free_percentage = ((double) free()) / capacity();
 951  953    double desired_free_percentage = (double) MinHeapFreeRatio / 100;
 952  954    double maximum_free_percentage = (double) MaxHeapFreeRatio / 100;
 953  955  
 954  956    // compute expansion delta needed for reaching desired free percentage
 955  957    if (free_percentage < desired_free_percentage) {
 956  958      size_t desired_capacity = (size_t)(used() / ((double) 1 - desired_free_percentage));
 957  959      assert(desired_capacity >= capacity(), "invalid expansion size");
 958  960      expand_bytes = MAX2(desired_capacity - capacity(), MinHeapDeltaBytes);
 959  961    }
 960  962    if (expand_bytes > 0) {
 961  963      if (PrintGCDetails && Verbose) {
 962  964        size_t desired_capacity = (size_t)(used() / ((double) 1 - desired_free_percentage));
 963  965        gclog_or_tty->print_cr("\nFrom compute_new_size: ");
 964  966        gclog_or_tty->print_cr("  Free fraction %f", free_percentage);
 965  967        gclog_or_tty->print_cr("  Desired free fraction %f",
 966  968          desired_free_percentage);
 967  969        gclog_or_tty->print_cr("  Maximum free fraction %f",
 968  970          maximum_free_percentage);
 969  971        gclog_or_tty->print_cr("  Capactiy "SIZE_FORMAT, capacity()/1000);
 970  972        gclog_or_tty->print_cr("  Desired capacity "SIZE_FORMAT,
 971  973          desired_capacity/1000);
 972  974        int prev_level = level() - 1;
 973  975        if (prev_level >= 0) {
 974  976          size_t prev_size = 0;
 975  977          GenCollectedHeap* gch = GenCollectedHeap::heap();
 976  978          Generation* prev_gen = gch->_gens[prev_level];
 977  979          prev_size = prev_gen->capacity();
 978  980            gclog_or_tty->print_cr("  Younger gen size "SIZE_FORMAT,
 979  981                                   prev_size/1000);
 980  982        }
 981  983        gclog_or_tty->print_cr("  unsafe_max_alloc_nogc "SIZE_FORMAT,
 982  984          unsafe_max_alloc_nogc()/1000);
 983  985        gclog_or_tty->print_cr("  contiguous available "SIZE_FORMAT,
 984  986          contiguous_available()/1000);
 985  987        gclog_or_tty->print_cr("  Expand by "SIZE_FORMAT" (bytes)",
 986  988          expand_bytes);
 987  989      }
 988  990      // safe if expansion fails
 989  991      expand(expand_bytes, 0, CMSExpansionCause::_satisfy_free_ratio);
 990  992      if (PrintGCDetails && Verbose) {
 991  993        gclog_or_tty->print_cr("  Expanded free fraction %f",
 992  994          ((double) free()) / capacity());
 993  995      }
 994  996    }
 995  997  }
 996  998  
 997  999  Mutex* ConcurrentMarkSweepGeneration::freelistLock() const {
 998 1000    return cmsSpace()->freelistLock();
 999 1001  }
1000 1002  
1001 1003  HeapWord* ConcurrentMarkSweepGeneration::allocate(size_t size,
1002 1004                                                    bool   tlab) {
1003 1005    CMSSynchronousYieldRequest yr;
1004 1006    MutexLockerEx x(freelistLock(),
1005 1007                    Mutex::_no_safepoint_check_flag);
1006 1008    return have_lock_and_allocate(size, tlab);
1007 1009  }
1008 1010  
1009 1011  HeapWord* ConcurrentMarkSweepGeneration::have_lock_and_allocate(size_t size,
1010 1012                                                    bool   tlab /* ignored */) {
1011 1013    assert_lock_strong(freelistLock());
1012 1014    size_t adjustedSize = CompactibleFreeListSpace::adjustObjectSize(size);
1013 1015    HeapWord* res = cmsSpace()->allocate(adjustedSize);
1014 1016    // Allocate the object live (grey) if the background collector has
1015 1017    // started marking. This is necessary because the marker may
1016 1018    // have passed this address and consequently this object will
1017 1019    // not otherwise be greyed and would be incorrectly swept up.
1018 1020    // Note that if this object contains references, the writing
1019 1021    // of those references will dirty the card containing this object
1020 1022    // allowing the object to be blackened (and its references scanned)
1021 1023    // either during a preclean phase or at the final checkpoint.
1022 1024    if (res != NULL) {
1023 1025      // We may block here with an uninitialized object with
1024 1026      // its mark-bit or P-bits not yet set. Such objects need
1025 1027      // to be safely navigable by block_start().
1026 1028      assert(oop(res)->klass_or_null() == NULL, "Object should be uninitialized here.");
1027 1029      assert(!((FreeChunk*)res)->isFree(), "Error, block will look free but show wrong size");
1028 1030      collector()->direct_allocated(res, adjustedSize);
1029 1031      _direct_allocated_words += adjustedSize;
1030 1032      // allocation counters
1031 1033      NOT_PRODUCT(
1032 1034        _numObjectsAllocated++;
1033 1035        _numWordsAllocated += (int)adjustedSize;
1034 1036      )
1035 1037    }
1036 1038    return res;
1037 1039  }
1038 1040  
1039 1041  // In the case of direct allocation by mutators in a generation that
1040 1042  // is being concurrently collected, the object must be allocated
1041 1043  // live (grey) if the background collector has started marking.
1042 1044  // This is necessary because the marker may
1043 1045  // have passed this address and consequently this object will
1044 1046  // not otherwise be greyed and would be incorrectly swept up.
1045 1047  // Note that if this object contains references, the writing
1046 1048  // of those references will dirty the card containing this object
1047 1049  // allowing the object to be blackened (and its references scanned)
1048 1050  // either during a preclean phase or at the final checkpoint.
1049 1051  void CMSCollector::direct_allocated(HeapWord* start, size_t size) {
1050 1052    assert(_markBitMap.covers(start, size), "Out of bounds");
1051 1053    if (_collectorState >= Marking) {
1052 1054      MutexLockerEx y(_markBitMap.lock(),
1053 1055                      Mutex::_no_safepoint_check_flag);
1054 1056      // [see comments preceding SweepClosure::do_blk() below for details]
1055 1057      // 1. need to mark the object as live so it isn't collected
1056 1058      // 2. need to mark the 2nd bit to indicate the object may be uninitialized
1057 1059      // 3. need to mark the end of the object so marking, precleaning or sweeping
1058 1060      //    can skip over uninitialized or unparsable objects. An allocated
1059 1061      //    object is considered uninitialized for our purposes as long as
1060 1062      //    its klass word is NULL. (Unparsable objects are those which are
1061 1063      //    initialized in the sense just described, but whose sizes can still
1062 1064      //    not be correctly determined. Note that the class of unparsable objects
1063 1065      //    can only occur in the perm gen. All old gen objects are parsable
1064 1066      //    as soon as they are initialized.)
1065 1067      _markBitMap.mark(start);          // object is live
1066 1068      _markBitMap.mark(start + 1);      // object is potentially uninitialized?
1067 1069      _markBitMap.mark(start + size - 1);
1068 1070                                        // mark end of object
1069 1071    }
1070 1072    // check that oop looks uninitialized
1071 1073    assert(oop(start)->klass_or_null() == NULL, "_klass should be NULL");
1072 1074  }
1073 1075  
1074 1076  void CMSCollector::promoted(bool par, HeapWord* start,
1075 1077                              bool is_obj_array, size_t obj_size) {
1076 1078    assert(_markBitMap.covers(start), "Out of bounds");
1077 1079    // See comment in direct_allocated() about when objects should
1078 1080    // be allocated live.
1079 1081    if (_collectorState >= Marking) {
1080 1082      // we already hold the marking bit map lock, taken in
1081 1083      // the prologue
1082 1084      if (par) {
1083 1085        _markBitMap.par_mark(start);
1084 1086      } else {
1085 1087        _markBitMap.mark(start);
1086 1088      }
1087 1089      // We don't need to mark the object as uninitialized (as
1088 1090      // in direct_allocated above) because this is being done with the
1089 1091      // world stopped and the object will be initialized by the
1090 1092      // time the marking, precleaning or sweeping get to look at it.
1091 1093      // But see the code for copying objects into the CMS generation,
1092 1094      // where we need to ensure that concurrent readers of the
1093 1095      // block offset table are able to safely navigate a block that
1094 1096      // is in flux from being free to being allocated (and in
1095 1097      // transition while being copied into) and subsequently
1096 1098      // becoming a bona-fide object when the copy/promotion is complete.
1097 1099      assert(SafepointSynchronize::is_at_safepoint(),
1098 1100             "expect promotion only at safepoints");
1099 1101  
1100 1102      if (_collectorState < Sweeping) {
1101 1103        // Mark the appropriate cards in the modUnionTable, so that
1102 1104        // this object gets scanned before the sweep. If this is
1103 1105        // not done, CMS generation references in the object might
1104 1106        // not get marked.
1105 1107        // For the case of arrays, which are otherwise precisely
1106 1108        // marked, we need to dirty the entire array, not just its head.
1107 1109        if (is_obj_array) {
1108 1110          // The [par_]mark_range() method expects mr.end() below to
1109 1111          // be aligned to the granularity of a bit's representation
1110 1112          // in the heap. In the case of the MUT below, that's a
1111 1113          // card size.
1112 1114          MemRegion mr(start,
1113 1115                       (HeapWord*)round_to((intptr_t)(start + obj_size),
1114 1116                          CardTableModRefBS::card_size /* bytes */));
1115 1117          if (par) {
1116 1118            _modUnionTable.par_mark_range(mr);
1117 1119          } else {
1118 1120            _modUnionTable.mark_range(mr);
1119 1121          }
1120 1122        } else {  // not an obj array; we can just mark the head
1121 1123          if (par) {
1122 1124            _modUnionTable.par_mark(start);
1123 1125          } else {
1124 1126            _modUnionTable.mark(start);
1125 1127          }
1126 1128        }
1127 1129      }
1128 1130    }
1129 1131  }
1130 1132  
1131 1133  static inline size_t percent_of_space(Space* space, HeapWord* addr)
1132 1134  {
1133 1135    size_t delta = pointer_delta(addr, space->bottom());
1134 1136    return (size_t)(delta * 100.0 / (space->capacity() / HeapWordSize));
1135 1137  }
1136 1138  
1137 1139  void CMSCollector::icms_update_allocation_limits()
1138 1140  {
1139 1141    Generation* gen0 = GenCollectedHeap::heap()->get_gen(0);
1140 1142    EdenSpace* eden = gen0->as_DefNewGeneration()->eden();
1141 1143  
1142 1144    const unsigned int duty_cycle = stats().icms_update_duty_cycle();
1143 1145    if (CMSTraceIncrementalPacing) {
1144 1146      stats().print();
1145 1147    }
1146 1148  
1147 1149    assert(duty_cycle <= 100, "invalid duty cycle");
1148 1150    if (duty_cycle != 0) {
1149 1151      // The duty_cycle is a percentage between 0 and 100; convert to words and
1150 1152      // then compute the offset from the endpoints of the space.
1151 1153      size_t free_words = eden->free() / HeapWordSize;
1152 1154      double free_words_dbl = (double)free_words;
1153 1155      size_t duty_cycle_words = (size_t)(free_words_dbl * duty_cycle / 100.0);
1154 1156      size_t offset_words = (free_words - duty_cycle_words) / 2;
1155 1157  
1156 1158      _icms_start_limit = eden->top() + offset_words;
1157 1159      _icms_stop_limit = eden->end() - offset_words;
1158 1160  
1159 1161      // The limits may be adjusted (shifted to the right) by
1160 1162      // CMSIncrementalOffset, to allow the application more mutator time after a
1161 1163      // young gen gc (when all mutators were stopped) and before CMS starts and
1162 1164      // takes away one or more cpus.
1163 1165      if (CMSIncrementalOffset != 0) {
1164 1166        double adjustment_dbl = free_words_dbl * CMSIncrementalOffset / 100.0;
1165 1167        size_t adjustment = (size_t)adjustment_dbl;
1166 1168        HeapWord* tmp_stop = _icms_stop_limit + adjustment;
1167 1169        if (tmp_stop > _icms_stop_limit && tmp_stop < eden->end()) {
1168 1170          _icms_start_limit += adjustment;
1169 1171          _icms_stop_limit = tmp_stop;
1170 1172        }
1171 1173      }
1172 1174    }
1173 1175    if (duty_cycle == 0 || (_icms_start_limit == _icms_stop_limit)) {
1174 1176      _icms_start_limit = _icms_stop_limit = eden->end();
1175 1177    }
1176 1178  
1177 1179    // Install the new start limit.
1178 1180    eden->set_soft_end(_icms_start_limit);
1179 1181  
1180 1182    if (CMSTraceIncrementalMode) {
1181 1183      gclog_or_tty->print(" icms alloc limits:  "
1182 1184                             PTR_FORMAT "," PTR_FORMAT
1183 1185                             " (" SIZE_FORMAT "%%," SIZE_FORMAT "%%) ",
1184 1186                             _icms_start_limit, _icms_stop_limit,
1185 1187                             percent_of_space(eden, _icms_start_limit),
1186 1188                             percent_of_space(eden, _icms_stop_limit));
1187 1189      if (Verbose) {
1188 1190        gclog_or_tty->print("eden:  ");
1189 1191        eden->print_on(gclog_or_tty);
1190 1192      }
1191 1193    }
1192 1194  }
1193 1195  
1194 1196  // Any changes here should try to maintain the invariant
1195 1197  // that if this method is called with _icms_start_limit
1196 1198  // and _icms_stop_limit both NULL, then it should return NULL
1197 1199  // and not notify the icms thread.
1198 1200  HeapWord*
1199 1201  CMSCollector::allocation_limit_reached(Space* space, HeapWord* top,
1200 1202                                         size_t word_size)
1201 1203  {
1202 1204    // A start_limit equal to end() means the duty cycle is 0, so treat that as a
1203 1205    // nop.
1204 1206    if (CMSIncrementalMode && _icms_start_limit != space->end()) {
1205 1207      if (top <= _icms_start_limit) {
1206 1208        if (CMSTraceIncrementalMode) {
1207 1209          space->print_on(gclog_or_tty);
1208 1210          gclog_or_tty->stamp();
1209 1211          gclog_or_tty->print_cr(" start limit top=" PTR_FORMAT
1210 1212                                 ", new limit=" PTR_FORMAT
1211 1213                                 " (" SIZE_FORMAT "%%)",
1212 1214                                 top, _icms_stop_limit,
1213 1215                                 percent_of_space(space, _icms_stop_limit));
1214 1216        }
1215 1217        ConcurrentMarkSweepThread::start_icms();
1216 1218        assert(top < _icms_stop_limit, "Tautology");
1217 1219        if (word_size < pointer_delta(_icms_stop_limit, top)) {
1218 1220          return _icms_stop_limit;
1219 1221        }
1220 1222  
1221 1223        // The allocation will cross both the _start and _stop limits, so do the
1222 1224        // stop notification also and return end().
1223 1225        if (CMSTraceIncrementalMode) {
1224 1226          space->print_on(gclog_or_tty);
1225 1227          gclog_or_tty->stamp();
1226 1228          gclog_or_tty->print_cr(" +stop limit top=" PTR_FORMAT
1227 1229                                 ", new limit=" PTR_FORMAT
1228 1230                                 " (" SIZE_FORMAT "%%)",
1229 1231                                 top, space->end(),
1230 1232                                 percent_of_space(space, space->end()));
1231 1233        }
1232 1234        ConcurrentMarkSweepThread::stop_icms();
1233 1235        return space->end();
1234 1236      }
1235 1237  
1236 1238      if (top <= _icms_stop_limit) {
1237 1239        if (CMSTraceIncrementalMode) {
1238 1240          space->print_on(gclog_or_tty);
1239 1241          gclog_or_tty->stamp();
1240 1242          gclog_or_tty->print_cr(" stop limit top=" PTR_FORMAT
1241 1243                                 ", new limit=" PTR_FORMAT
1242 1244                                 " (" SIZE_FORMAT "%%)",
1243 1245                                 top, space->end(),
1244 1246                                 percent_of_space(space, space->end()));
1245 1247        }
1246 1248        ConcurrentMarkSweepThread::stop_icms();
1247 1249        return space->end();
1248 1250      }
1249 1251  
1250 1252      if (CMSTraceIncrementalMode) {
1251 1253        space->print_on(gclog_or_tty);
1252 1254        gclog_or_tty->stamp();
1253 1255        gclog_or_tty->print_cr(" end limit top=" PTR_FORMAT
1254 1256                               ", new limit=" PTR_FORMAT,
1255 1257                               top, NULL);
1256 1258      }
1257 1259    }
1258 1260  
1259 1261    return NULL;
1260 1262  }
1261 1263  
1262 1264  oop ConcurrentMarkSweepGeneration::promote(oop obj, size_t obj_size) {
1263 1265    assert(obj_size == (size_t)obj->size(), "bad obj_size passed in");
1264 1266    // allocate, copy and if necessary update promoinfo --
1265 1267    // delegate to underlying space.
1266 1268    assert_lock_strong(freelistLock());
1267 1269  
1268 1270  #ifndef PRODUCT
1269 1271    if (Universe::heap()->promotion_should_fail()) {
1270 1272      return NULL;
1271 1273    }
1272 1274  #endif  // #ifndef PRODUCT
1273 1275  
1274 1276    oop res = _cmsSpace->promote(obj, obj_size);
1275 1277    if (res == NULL) {
1276 1278      // expand and retry
1277 1279      size_t s = _cmsSpace->expansionSpaceRequired(obj_size);  // HeapWords
1278 1280      expand(s*HeapWordSize, MinHeapDeltaBytes,
1279 1281        CMSExpansionCause::_satisfy_promotion);
1280 1282      // Since there's currently no next generation, we don't try to promote
1281 1283      // into a more senior generation.
1282 1284      assert(next_gen() == NULL, "assumption, based upon which no attempt "
1283 1285                                 "is made to pass on a possibly failing "
1284 1286                                 "promotion to next generation");
1285 1287      res = _cmsSpace->promote(obj, obj_size);
1286 1288    }
1287 1289    if (res != NULL) {
1288 1290      // See comment in allocate() about when objects should
1289 1291      // be allocated live.
1290 1292      assert(obj->is_oop(), "Will dereference klass pointer below");
1291 1293      collector()->promoted(false,           // Not parallel
1292 1294                            (HeapWord*)res, obj->is_objArray(), obj_size);
1293 1295      // promotion counters
1294 1296      NOT_PRODUCT(
1295 1297        _numObjectsPromoted++;
1296 1298        _numWordsPromoted +=
1297 1299          (int)(CompactibleFreeListSpace::adjustObjectSize(obj->size()));
1298 1300      )
1299 1301    }
1300 1302    return res;
1301 1303  }
1302 1304  
1303 1305  
1304 1306  HeapWord*
1305 1307  ConcurrentMarkSweepGeneration::allocation_limit_reached(Space* space,
1306 1308                                               HeapWord* top,
1307 1309                                               size_t word_sz)
1308 1310  {
1309 1311    return collector()->allocation_limit_reached(space, top, word_sz);
1310 1312  }
1311 1313  
1312 1314  // IMPORTANT: Notes on object size recognition in CMS.
1313 1315  // ---------------------------------------------------
1314 1316  // A block of storage in the CMS generation is always in
1315 1317  // one of three states. A free block (FREE), an allocated
1316 1318  // object (OBJECT) whose size() method reports the correct size,
1317 1319  // and an intermediate state (TRANSIENT) in which its size cannot
1318 1320  // be accurately determined.
1319 1321  // STATE IDENTIFICATION:   (32 bit and 64 bit w/o COOPS)
1320 1322  // -----------------------------------------------------
1321 1323  // FREE:      klass_word & 1 == 1; mark_word holds block size
1322 1324  //
1323 1325  // OBJECT:    klass_word installed; klass_word != 0 && klass_word & 1 == 0;
1324 1326  //            obj->size() computes correct size
1325 1327  //            [Perm Gen objects needs to be "parsable" before they can be navigated]
1326 1328  //
1327 1329  // TRANSIENT: klass_word == 0; size is indeterminate until we become an OBJECT
1328 1330  //
1329 1331  // STATE IDENTIFICATION: (64 bit+COOPS)
1330 1332  // ------------------------------------
1331 1333  // FREE:      mark_word & CMS_FREE_BIT == 1; mark_word & ~CMS_FREE_BIT gives block_size
1332 1334  //
1333 1335  // OBJECT:    klass_word installed; klass_word != 0;
1334 1336  //            obj->size() computes correct size
1335 1337  //            [Perm Gen comment above continues to hold]
1336 1338  //
1337 1339  // TRANSIENT: klass_word == 0; size is indeterminate until we become an OBJECT
1338 1340  //
1339 1341  //
1340 1342  // STATE TRANSITION DIAGRAM
1341 1343  //
1342 1344  //        mut / parnew                     mut  /  parnew
1343 1345  // FREE --------------------> TRANSIENT ---------------------> OBJECT --|
1344 1346  //  ^                                                                   |
1345 1347  //  |------------------------ DEAD <------------------------------------|
1346 1348  //         sweep                            mut
1347 1349  //
1348 1350  // While a block is in TRANSIENT state its size cannot be determined
1349 1351  // so readers will either need to come back later or stall until
1350 1352  // the size can be determined. Note that for the case of direct
1351 1353  // allocation, P-bits, when available, may be used to determine the
1352 1354  // size of an object that may not yet have been initialized.
1353 1355  
1354 1356  // Things to support parallel young-gen collection.
1355 1357  oop
1356 1358  ConcurrentMarkSweepGeneration::par_promote(int thread_num,
1357 1359                                             oop old, markOop m,
1358 1360                                             size_t word_sz) {
1359 1361  #ifndef PRODUCT
1360 1362    if (Universe::heap()->promotion_should_fail()) {
1361 1363      return NULL;
1362 1364    }
1363 1365  #endif  // #ifndef PRODUCT
1364 1366  
1365 1367    CMSParGCThreadState* ps = _par_gc_thread_states[thread_num];
1366 1368    PromotionInfo* promoInfo = &ps->promo;
1367 1369    // if we are tracking promotions, then first ensure space for
1368 1370    // promotion (including spooling space for saving header if necessary).
1369 1371    // then allocate and copy, then track promoted info if needed.
1370 1372    // When tracking (see PromotionInfo::track()), the mark word may
1371 1373    // be displaced and in this case restoration of the mark word
1372 1374    // occurs in the (oop_since_save_marks_)iterate phase.
1373 1375    if (promoInfo->tracking() && !promoInfo->ensure_spooling_space()) {
1374 1376      // Out of space for allocating spooling buffers;
1375 1377      // try expanding and allocating spooling buffers.
1376 1378      if (!expand_and_ensure_spooling_space(promoInfo)) {
1377 1379        return NULL;
1378 1380      }
1379 1381    }
1380 1382    assert(promoInfo->has_spooling_space(), "Control point invariant");
1381 1383    const size_t alloc_sz = CompactibleFreeListSpace::adjustObjectSize(word_sz);
1382 1384    HeapWord* obj_ptr = ps->lab.alloc(alloc_sz);
1383 1385    if (obj_ptr == NULL) {
1384 1386       obj_ptr = expand_and_par_lab_allocate(ps, alloc_sz);
1385 1387       if (obj_ptr == NULL) {
1386 1388         return NULL;
1387 1389       }
1388 1390    }
1389 1391    oop obj = oop(obj_ptr);
1390 1392    OrderAccess::storestore();
1391 1393    assert(obj->klass_or_null() == NULL, "Object should be uninitialized here.");
1392 1394    assert(!((FreeChunk*)obj_ptr)->isFree(), "Error, block will look free but show wrong size");
1393 1395    // IMPORTANT: See note on object initialization for CMS above.
1394 1396    // Otherwise, copy the object.  Here we must be careful to insert the
1395 1397    // klass pointer last, since this marks the block as an allocated object.
1396 1398    // Except with compressed oops it's the mark word.
1397 1399    HeapWord* old_ptr = (HeapWord*)old;
1398 1400    // Restore the mark word copied above.
1399 1401    obj->set_mark(m);
1400 1402    assert(obj->klass_or_null() == NULL, "Object should be uninitialized here.");
1401 1403    assert(!((FreeChunk*)obj_ptr)->isFree(), "Error, block will look free but show wrong size");
1402 1404    OrderAccess::storestore();
1403 1405  
1404 1406    if (UseCompressedOops) {
1405 1407      // Copy gap missed by (aligned) header size calculation below
1406 1408      obj->set_klass_gap(old->klass_gap());
1407 1409    }
1408 1410    if (word_sz > (size_t)oopDesc::header_size()) {
1409 1411      Copy::aligned_disjoint_words(old_ptr + oopDesc::header_size(),
1410 1412                                   obj_ptr + oopDesc::header_size(),
1411 1413                                   word_sz - oopDesc::header_size());
1412 1414    }
1413 1415  
1414 1416    // Now we can track the promoted object, if necessary.  We take care
1415 1417    // to delay the transition from uninitialized to full object
1416 1418    // (i.e., insertion of klass pointer) until after, so that it
1417 1419    // atomically becomes a promoted object.
1418 1420    if (promoInfo->tracking()) {
1419 1421      promoInfo->track((PromotedObject*)obj, old->klass());
1420 1422    }
1421 1423    assert(obj->klass_or_null() == NULL, "Object should be uninitialized here.");
1422 1424    assert(!((FreeChunk*)obj_ptr)->isFree(), "Error, block will look free but show wrong size");
1423 1425    assert(old->is_oop(), "Will use and dereference old klass ptr below");
1424 1426  
1425 1427    // Finally, install the klass pointer (this should be volatile).
1426 1428    OrderAccess::storestore();
1427 1429    obj->set_klass(old->klass());
1428 1430    // We should now be able to calculate the right size for this object
1429 1431    assert(obj->is_oop() && obj->size() == (int)word_sz, "Error, incorrect size computed for promoted object");
1430 1432  
1431 1433    collector()->promoted(true,          // parallel
1432 1434                          obj_ptr, old->is_objArray(), word_sz);
1433 1435  
1434 1436    NOT_PRODUCT(
1435 1437      Atomic::inc_ptr(&_numObjectsPromoted);
1436 1438      Atomic::add_ptr(alloc_sz, &_numWordsPromoted);
1437 1439    )
1438 1440  
1439 1441    return obj;
1440 1442  }
1441 1443  
1442 1444  void
1443 1445  ConcurrentMarkSweepGeneration::
1444 1446  par_promote_alloc_undo(int thread_num,
1445 1447                         HeapWord* obj, size_t word_sz) {
1446 1448    // CMS does not support promotion undo.
1447 1449    ShouldNotReachHere();
1448 1450  }
1449 1451  
1450 1452  void
1451 1453  ConcurrentMarkSweepGeneration::
1452 1454  par_promote_alloc_done(int thread_num) {
1453 1455    CMSParGCThreadState* ps = _par_gc_thread_states[thread_num];
1454 1456    ps->lab.retire(thread_num);
1455 1457  }
1456 1458  
1457 1459  void
1458 1460  ConcurrentMarkSweepGeneration::
1459 1461  par_oop_since_save_marks_iterate_done(int thread_num) {
1460 1462    CMSParGCThreadState* ps = _par_gc_thread_states[thread_num];
1461 1463    ParScanWithoutBarrierClosure* dummy_cl = NULL;
1462 1464    ps->promo.promoted_oops_iterate_nv(dummy_cl);
1463 1465  }
1464 1466  
1465 1467  // XXXPERM
1466 1468  bool ConcurrentMarkSweepGeneration::should_collect(bool   full,
1467 1469                                                     size_t size,
1468 1470                                                     bool   tlab)
1469 1471  {
1470 1472    // We allow a STW collection only if a full
1471 1473    // collection was requested.
1472 1474    return full || should_allocate(size, tlab); // FIX ME !!!
1473 1475    // This and promotion failure handling are connected at the
1474 1476    // hip and should be fixed by untying them.
1475 1477  }
1476 1478  
1477 1479  bool CMSCollector::shouldConcurrentCollect() {
1478 1480    if (_full_gc_requested) {
1479 1481      if (Verbose && PrintGCDetails) {
1480 1482        gclog_or_tty->print_cr("CMSCollector: collect because of explicit "
1481 1483                               " gc request (or gc_locker)");
1482 1484      }
1483 1485      return true;
1484 1486    }
1485 1487  
1486 1488    // For debugging purposes, change the type of collection.
1487 1489    // If the rotation is not on the concurrent collection
1488 1490    // type, don't start a concurrent collection.
1489 1491    NOT_PRODUCT(
1490 1492      if (RotateCMSCollectionTypes &&
1491 1493          (_cmsGen->debug_collection_type() !=
1492 1494            ConcurrentMarkSweepGeneration::Concurrent_collection_type)) {
1493 1495        assert(_cmsGen->debug_collection_type() !=
1494 1496          ConcurrentMarkSweepGeneration::Unknown_collection_type,
1495 1497          "Bad cms collection type");
1496 1498        return false;
1497 1499      }
1498 1500    )
1499 1501  
1500 1502    FreelistLocker x(this);
1501 1503    // ------------------------------------------------------------------
1502 1504    // Print out lots of information which affects the initiation of
1503 1505    // a collection.
1504 1506    if (PrintCMSInitiationStatistics && stats().valid()) {
1505 1507      gclog_or_tty->print("CMSCollector shouldConcurrentCollect: ");
1506 1508      gclog_or_tty->stamp();
1507 1509      gclog_or_tty->print_cr("");
1508 1510      stats().print_on(gclog_or_tty);
1509 1511      gclog_or_tty->print_cr("time_until_cms_gen_full %3.7f",
1510 1512        stats().time_until_cms_gen_full());
1511 1513      gclog_or_tty->print_cr("free="SIZE_FORMAT, _cmsGen->free());
1512 1514      gclog_or_tty->print_cr("contiguous_available="SIZE_FORMAT,
1513 1515                             _cmsGen->contiguous_available());
1514 1516      gclog_or_tty->print_cr("promotion_rate=%g", stats().promotion_rate());
1515 1517      gclog_or_tty->print_cr("cms_allocation_rate=%g", stats().cms_allocation_rate());
1516 1518      gclog_or_tty->print_cr("occupancy=%3.7f", _cmsGen->occupancy());
1517 1519      gclog_or_tty->print_cr("initiatingOccupancy=%3.7f", _cmsGen->initiating_occupancy());
1518 1520      gclog_or_tty->print_cr("initiatingPermOccupancy=%3.7f", _permGen->initiating_occupancy());
1519 1521    }
1520 1522    // ------------------------------------------------------------------
1521 1523  
1522 1524    // If the estimated time to complete a cms collection (cms_duration())
1523 1525    // is less than the estimated time remaining until the cms generation
1524 1526    // is full, start a collection.
1525 1527    if (!UseCMSInitiatingOccupancyOnly) {
1526 1528      if (stats().valid()) {
1527 1529        if (stats().time_until_cms_start() == 0.0) {
1528 1530          return true;
1529 1531        }
1530 1532      } else {
1531 1533        // We want to conservatively collect somewhat early in order
1532 1534        // to try and "bootstrap" our CMS/promotion statistics;
1533 1535        // this branch will not fire after the first successful CMS
1534 1536        // collection because the stats should then be valid.
1535 1537        if (_cmsGen->occupancy() >= _bootstrap_occupancy) {
1536 1538          if (Verbose && PrintGCDetails) {
1537 1539            gclog_or_tty->print_cr(
1538 1540              " CMSCollector: collect for bootstrapping statistics:"
1539 1541              " occupancy = %f, boot occupancy = %f", _cmsGen->occupancy(),
1540 1542              _bootstrap_occupancy);
1541 1543          }
1542 1544          return true;
1543 1545        }
1544 1546      }
1545 1547    }
1546 1548  
1547 1549    // Otherwise, we start a collection cycle if either the perm gen or
1548 1550    // old gen want a collection cycle started. Each may use
1549 1551    // an appropriate criterion for making this decision.
1550 1552    // XXX We need to make sure that the gen expansion
1551 1553    // criterion dovetails well with this. XXX NEED TO FIX THIS
1552 1554    if (_cmsGen->should_concurrent_collect()) {
1553 1555      if (Verbose && PrintGCDetails) {
1554 1556        gclog_or_tty->print_cr("CMS old gen initiated");
1555 1557      }
1556 1558      return true;
1557 1559    }
1558 1560  
1559 1561    // We start a collection if we believe an incremental collection may fail;
1560 1562    // this is not likely to be productive in practice because it's probably too
1561 1563    // late anyway.
1562 1564    GenCollectedHeap* gch = GenCollectedHeap::heap();
1563 1565    assert(gch->collector_policy()->is_two_generation_policy(),
1564 1566           "You may want to check the correctness of the following");
1565 1567    if (gch->incremental_collection_will_fail(true /* consult_young */)) {
1566 1568      if (Verbose && PrintGCDetails) {
1567 1569        gclog_or_tty->print("CMSCollector: collect because incremental collection will fail ");
1568 1570      }
1569 1571      return true;
1570 1572    }
1571 1573  
1572 1574    if (CMSClassUnloadingEnabled && _permGen->should_concurrent_collect()) {
1573 1575      bool res = update_should_unload_classes();
1574 1576      if (res) {
1575 1577        if (Verbose && PrintGCDetails) {
1576 1578          gclog_or_tty->print_cr("CMS perm gen initiated");
1577 1579        }
1578 1580        return true;
1579 1581      }
1580 1582    }
1581 1583    return false;
1582 1584  }
1583 1585  
1584 1586  // Clear _expansion_cause fields of constituent generations
1585 1587  void CMSCollector::clear_expansion_cause() {
1586 1588    _cmsGen->clear_expansion_cause();
1587 1589    _permGen->clear_expansion_cause();
1588 1590  }
1589 1591  
1590 1592  // We should be conservative in starting a collection cycle.  To
1591 1593  // start too eagerly runs the risk of collecting too often in the
1592 1594  // extreme.  To collect too rarely falls back on full collections,
1593 1595  // which works, even if not optimum in terms of concurrent work.
1594 1596  // As a work around for too eagerly collecting, use the flag
1595 1597  // UseCMSInitiatingOccupancyOnly.  This also has the advantage of
1596 1598  // giving the user an easily understandable way of controlling the
1597 1599  // collections.
1598 1600  // We want to start a new collection cycle if any of the following
1599 1601  // conditions hold:
1600 1602  // . our current occupancy exceeds the configured initiating occupancy
1601 1603  //   for this generation, or
1602 1604  // . we recently needed to expand this space and have not, since that
1603 1605  //   expansion, done a collection of this generation, or
1604 1606  // . the underlying space believes that it may be a good idea to initiate
1605 1607  //   a concurrent collection (this may be based on criteria such as the
1606 1608  //   following: the space uses linear allocation and linear allocation is
1607 1609  //   going to fail, or there is believed to be excessive fragmentation in
1608 1610  //   the generation, etc... or ...
1609 1611  // [.(currently done by CMSCollector::shouldConcurrentCollect() only for
1610 1612  //   the case of the old generation, not the perm generation; see CR 6543076):
1611 1613  //   we may be approaching a point at which allocation requests may fail because
1612 1614  //   we will be out of sufficient free space given allocation rate estimates.]
1613 1615  bool ConcurrentMarkSweepGeneration::should_concurrent_collect() const {
1614 1616  
1615 1617    assert_lock_strong(freelistLock());
1616 1618    if (occupancy() > initiating_occupancy()) {
1617 1619      if (PrintGCDetails && Verbose) {
1618 1620        gclog_or_tty->print(" %s: collect because of occupancy %f / %f  ",
1619 1621          short_name(), occupancy(), initiating_occupancy());
1620 1622      }
1621 1623      return true;
1622 1624    }
1623 1625    if (UseCMSInitiatingOccupancyOnly) {
1624 1626      return false;
1625 1627    }
1626 1628    if (expansion_cause() == CMSExpansionCause::_satisfy_allocation) {
1627 1629      if (PrintGCDetails && Verbose) {
1628 1630        gclog_or_tty->print(" %s: collect because expanded for allocation ",
1629 1631          short_name());
1630 1632      }
1631 1633      return true;
1632 1634    }
1633 1635    if (_cmsSpace->should_concurrent_collect()) {
1634 1636      if (PrintGCDetails && Verbose) {
1635 1637        gclog_or_tty->print(" %s: collect because cmsSpace says so ",
1636 1638          short_name());
1637 1639      }
1638 1640      return true;
1639 1641    }
1640 1642    return false;
1641 1643  }
1642 1644  
1643 1645  void ConcurrentMarkSweepGeneration::collect(bool   full,
1644 1646                                              bool   clear_all_soft_refs,
1645 1647                                              size_t size,
1646 1648                                              bool   tlab)
1647 1649  {
1648 1650    collector()->collect(full, clear_all_soft_refs, size, tlab);
1649 1651  }
1650 1652  
1651 1653  void CMSCollector::collect(bool   full,
1652 1654                             bool   clear_all_soft_refs,
1653 1655                             size_t size,
1654 1656                             bool   tlab)
1655 1657  {
1656 1658    if (!UseCMSCollectionPassing && _collectorState > Idling) {
1657 1659      // For debugging purposes skip the collection if the state
1658 1660      // is not currently idle
1659 1661      if (TraceCMSState) {
1660 1662        gclog_or_tty->print_cr("Thread " INTPTR_FORMAT " skipped full:%d CMS state %d",
1661 1663          Thread::current(), full, _collectorState);
1662 1664      }
1663 1665      return;
1664 1666    }
1665 1667  
1666 1668    // The following "if" branch is present for defensive reasons.
1667 1669    // In the current uses of this interface, it can be replaced with:
1668 1670    // assert(!GC_locker.is_active(), "Can't be called otherwise");
1669 1671    // But I am not placing that assert here to allow future
1670 1672    // generality in invoking this interface.
1671 1673    if (GC_locker::is_active()) {
1672 1674      // A consistency test for GC_locker
1673 1675      assert(GC_locker::needs_gc(), "Should have been set already");
1674 1676      // Skip this foreground collection, instead
1675 1677      // expanding the heap if necessary.
1676 1678      // Need the free list locks for the call to free() in compute_new_size()
1677 1679      compute_new_size();
1678 1680      return;
1679 1681    }
1680 1682    acquire_control_and_collect(full, clear_all_soft_refs);
1681 1683    _full_gcs_since_conc_gc++;
1682 1684  
1683 1685  }
1684 1686  
1685 1687  void CMSCollector::request_full_gc(unsigned int full_gc_count) {
1686 1688    GenCollectedHeap* gch = GenCollectedHeap::heap();
1687 1689    unsigned int gc_count = gch->total_full_collections();
1688 1690    if (gc_count == full_gc_count) {
1689 1691      MutexLockerEx y(CGC_lock, Mutex::_no_safepoint_check_flag);
1690 1692      _full_gc_requested = true;
1691 1693      CGC_lock->notify();   // nudge CMS thread
1692 1694    }
1693 1695  }
1694 1696  
1695 1697  
1696 1698  // The foreground and background collectors need to coordinate in order
1697 1699  // to make sure that they do not mutually interfere with CMS collections.
1698 1700  // When a background collection is active,
1699 1701  // the foreground collector may need to take over (preempt) and
1700 1702  // synchronously complete an ongoing collection. Depending on the
1701 1703  // frequency of the background collections and the heap usage
1702 1704  // of the application, this preemption can be seldom or frequent.
1703 1705  // There are only certain
1704 1706  // points in the background collection that the "collection-baton"
1705 1707  // can be passed to the foreground collector.
1706 1708  //
1707 1709  // The foreground collector will wait for the baton before
1708 1710  // starting any part of the collection.  The foreground collector
1709 1711  // will only wait at one location.
1710 1712  //
1711 1713  // The background collector will yield the baton before starting a new
1712 1714  // phase of the collection (e.g., before initial marking, marking from roots,
1713 1715  // precleaning, final re-mark, sweep etc.)  This is normally done at the head
1714 1716  // of the loop which switches the phases. The background collector does some
1715 1717  // of the phases (initial mark, final re-mark) with the world stopped.
1716 1718  // Because of locking involved in stopping the world,
1717 1719  // the foreground collector should not block waiting for the background
1718 1720  // collector when it is doing a stop-the-world phase.  The background
1719 1721  // collector will yield the baton at an additional point just before
1720 1722  // it enters a stop-the-world phase.  Once the world is stopped, the
1721 1723  // background collector checks the phase of the collection.  If the
1722 1724  // phase has not changed, it proceeds with the collection.  If the
1723 1725  // phase has changed, it skips that phase of the collection.  See
1724 1726  // the comments on the use of the Heap_lock in collect_in_background().
1725 1727  //
1726 1728  // Variable used in baton passing.
1727 1729  //   _foregroundGCIsActive - Set to true by the foreground collector when
1728 1730  //      it wants the baton.  The foreground clears it when it has finished
1729 1731  //      the collection.
1730 1732  //   _foregroundGCShouldWait - Set to true by the background collector
1731 1733  //        when it is running.  The foreground collector waits while
1732 1734  //      _foregroundGCShouldWait is true.
1733 1735  //  CGC_lock - monitor used to protect access to the above variables
1734 1736  //      and to notify the foreground and background collectors.
1735 1737  //  _collectorState - current state of the CMS collection.
1736 1738  //
1737 1739  // The foreground collector
1738 1740  //   acquires the CGC_lock
1739 1741  //   sets _foregroundGCIsActive
1740 1742  //   waits on the CGC_lock for _foregroundGCShouldWait to be false
1741 1743  //     various locks acquired in preparation for the collection
1742 1744  //     are released so as not to block the background collector
1743 1745  //     that is in the midst of a collection
1744 1746  //   proceeds with the collection
1745 1747  //   clears _foregroundGCIsActive
1746 1748  //   returns
1747 1749  //
1748 1750  // The background collector in a loop iterating on the phases of the
1749 1751  //      collection
1750 1752  //   acquires the CGC_lock
1751 1753  //   sets _foregroundGCShouldWait
1752 1754  //   if _foregroundGCIsActive is set
1753 1755  //     clears _foregroundGCShouldWait, notifies _CGC_lock
1754 1756  //     waits on _CGC_lock for _foregroundGCIsActive to become false
1755 1757  //     and exits the loop.
1756 1758  //   otherwise
1757 1759  //     proceed with that phase of the collection
1758 1760  //     if the phase is a stop-the-world phase,
1759 1761  //       yield the baton once more just before enqueueing
1760 1762  //       the stop-world CMS operation (executed by the VM thread).
1761 1763  //   returns after all phases of the collection are done
1762 1764  //
1763 1765  
1764 1766  void CMSCollector::acquire_control_and_collect(bool full,
1765 1767          bool clear_all_soft_refs) {
1766 1768    assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint");
1767 1769    assert(!Thread::current()->is_ConcurrentGC_thread(),
1768 1770           "shouldn't try to acquire control from self!");
1769 1771  
1770 1772    // Start the protocol for acquiring control of the
1771 1773    // collection from the background collector (aka CMS thread).
1772 1774    assert(ConcurrentMarkSweepThread::vm_thread_has_cms_token(),
1773 1775           "VM thread should have CMS token");
1774 1776    // Remember the possibly interrupted state of an ongoing
1775 1777    // concurrent collection
1776 1778    CollectorState first_state = _collectorState;
1777 1779  
1778 1780    // Signal to a possibly ongoing concurrent collection that
1779 1781    // we want to do a foreground collection.
1780 1782    _foregroundGCIsActive = true;
1781 1783  
1782 1784    // Disable incremental mode during a foreground collection.
1783 1785    ICMSDisabler icms_disabler;
1784 1786  
1785 1787    // release locks and wait for a notify from the background collector
1786 1788    // releasing the locks in only necessary for phases which
1787 1789    // do yields to improve the granularity of the collection.
1788 1790    assert_lock_strong(bitMapLock());
1789 1791    // We need to lock the Free list lock for the space that we are
1790 1792    // currently collecting.
1791 1793    assert(haveFreelistLocks(), "Must be holding free list locks");
1792 1794    bitMapLock()->unlock();
1793 1795    releaseFreelistLocks();
1794 1796    {
1795 1797      MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
1796 1798      if (_foregroundGCShouldWait) {
1797 1799        // We are going to be waiting for action for the CMS thread;
1798 1800        // it had better not be gone (for instance at shutdown)!
1799 1801        assert(ConcurrentMarkSweepThread::cmst() != NULL,
1800 1802               "CMS thread must be running");
1801 1803        // Wait here until the background collector gives us the go-ahead
1802 1804        ConcurrentMarkSweepThread::clear_CMS_flag(
1803 1805          ConcurrentMarkSweepThread::CMS_vm_has_token);  // release token
1804 1806        // Get a possibly blocked CMS thread going:
1805 1807        //   Note that we set _foregroundGCIsActive true above,
1806 1808        //   without protection of the CGC_lock.
1807 1809        CGC_lock->notify();
1808 1810        assert(!ConcurrentMarkSweepThread::vm_thread_wants_cms_token(),
1809 1811               "Possible deadlock");
1810 1812        while (_foregroundGCShouldWait) {
1811 1813          // wait for notification
1812 1814          CGC_lock->wait(Mutex::_no_safepoint_check_flag);
1813 1815          // Possibility of delay/starvation here, since CMS token does
1814 1816          // not know to give priority to VM thread? Actually, i think
1815 1817          // there wouldn't be any delay/starvation, but the proof of
1816 1818          // that "fact" (?) appears non-trivial. XXX 20011219YSR
1817 1819        }
1818 1820        ConcurrentMarkSweepThread::set_CMS_flag(
1819 1821          ConcurrentMarkSweepThread::CMS_vm_has_token);
1820 1822      }
1821 1823    }
1822 1824    // The CMS_token is already held.  Get back the other locks.
1823 1825    assert(ConcurrentMarkSweepThread::vm_thread_has_cms_token(),
1824 1826           "VM thread should have CMS token");
1825 1827    getFreelistLocks();
1826 1828    bitMapLock()->lock_without_safepoint_check();
1827 1829    if (TraceCMSState) {
1828 1830      gclog_or_tty->print_cr("CMS foreground collector has asked for control "
1829 1831        INTPTR_FORMAT " with first state %d", Thread::current(), first_state);
1830 1832      gclog_or_tty->print_cr("    gets control with state %d", _collectorState);
1831 1833    }
1832 1834  
1833 1835    // Check if we need to do a compaction, or if not, whether
1834 1836    // we need to start the mark-sweep from scratch.
1835 1837    bool should_compact    = false;
1836 1838    bool should_start_over = false;
1837 1839    decide_foreground_collection_type(clear_all_soft_refs,
1838 1840      &should_compact, &should_start_over);
1839 1841  
1840 1842  NOT_PRODUCT(
1841 1843    if (RotateCMSCollectionTypes) {
1842 1844      if (_cmsGen->debug_collection_type() ==
1843 1845          ConcurrentMarkSweepGeneration::MSC_foreground_collection_type) {
1844 1846        should_compact = true;
1845 1847      } else if (_cmsGen->debug_collection_type() ==
1846 1848                 ConcurrentMarkSweepGeneration::MS_foreground_collection_type) {
1847 1849        should_compact = false;
1848 1850      }
1849 1851    }
1850 1852  )
1851 1853  
1852 1854    if (PrintGCDetails && first_state > Idling) {
1853 1855      GCCause::Cause cause = GenCollectedHeap::heap()->gc_cause();
1854 1856      if (GCCause::is_user_requested_gc(cause) ||
1855 1857          GCCause::is_serviceability_requested_gc(cause)) {
1856 1858        gclog_or_tty->print(" (concurrent mode interrupted)");
1857 1859      } else {
1858 1860        gclog_or_tty->print(" (concurrent mode failure)");
1859 1861      }
1860 1862    }
1861 1863  
1862 1864    if (should_compact) {
1863 1865      // If the collection is being acquired from the background
1864 1866      // collector, there may be references on the discovered
1865 1867      // references lists that have NULL referents (being those
1866 1868      // that were concurrently cleared by a mutator) or
1867 1869      // that are no longer active (having been enqueued concurrently
1868 1870      // by the mutator).
1869 1871      // Scrub the list of those references because Mark-Sweep-Compact
1870 1872      // code assumes referents are not NULL and that all discovered
1871 1873      // Reference objects are active.
1872 1874      ref_processor()->clean_up_discovered_references();
1873 1875  
1874 1876      do_compaction_work(clear_all_soft_refs);
1875 1877  
1876 1878      // Has the GC time limit been exceeded?
1877 1879      DefNewGeneration* young_gen = _young_gen->as_DefNewGeneration();
1878 1880      size_t max_eden_size = young_gen->max_capacity() -
1879 1881                             young_gen->to()->capacity() -
1880 1882                             young_gen->from()->capacity();
1881 1883      GenCollectedHeap* gch = GenCollectedHeap::heap();
1882 1884      GCCause::Cause gc_cause = gch->gc_cause();
1883 1885      size_policy()->check_gc_overhead_limit(_young_gen->used(),
1884 1886                                             young_gen->eden()->used(),
1885 1887                                             _cmsGen->max_capacity(),
1886 1888                                             max_eden_size,
1887 1889                                             full,
1888 1890                                             gc_cause,
1889 1891                                             gch->collector_policy());
1890 1892    } else {
1891 1893      do_mark_sweep_work(clear_all_soft_refs, first_state,
1892 1894        should_start_over);
1893 1895    }
1894 1896    // Reset the expansion cause, now that we just completed
1895 1897    // a collection cycle.
1896 1898    clear_expansion_cause();
1897 1899    _foregroundGCIsActive = false;
1898 1900    return;
1899 1901  }
1900 1902  
1901 1903  // Resize the perm generation and the tenured generation
1902 1904  // after obtaining the free list locks for the
1903 1905  // two generations.
1904 1906  void CMSCollector::compute_new_size() {
1905 1907    assert_locked_or_safepoint(Heap_lock);
1906 1908    FreelistLocker z(this);
1907 1909    _permGen->compute_new_size();
1908 1910    _cmsGen->compute_new_size();
1909 1911  }
1910 1912  
1911 1913  // A work method used by foreground collection to determine
1912 1914  // what type of collection (compacting or not, continuing or fresh)
1913 1915  // it should do.
1914 1916  // NOTE: the intent is to make UseCMSCompactAtFullCollection
1915 1917  // and CMSCompactWhenClearAllSoftRefs the default in the future
1916 1918  // and do away with the flags after a suitable period.
1917 1919  void CMSCollector::decide_foreground_collection_type(
1918 1920    bool clear_all_soft_refs, bool* should_compact,
1919 1921    bool* should_start_over) {
1920 1922    // Normally, we'll compact only if the UseCMSCompactAtFullCollection
1921 1923    // flag is set, and we have either requested a System.gc() or
1922 1924    // the number of full gc's since the last concurrent cycle
1923 1925    // has exceeded the threshold set by CMSFullGCsBeforeCompaction,
1924 1926    // or if an incremental collection has failed
1925 1927    GenCollectedHeap* gch = GenCollectedHeap::heap();
1926 1928    assert(gch->collector_policy()->is_two_generation_policy(),
1927 1929           "You may want to check the correctness of the following");
1928 1930    // Inform cms gen if this was due to partial collection failing.
1929 1931    // The CMS gen may use this fact to determine its expansion policy.
1930 1932    if (gch->incremental_collection_will_fail(false /* don't consult_young */)) {
1931 1933      assert(!_cmsGen->incremental_collection_failed(),
1932 1934             "Should have been noticed, reacted to and cleared");
1933 1935      _cmsGen->set_incremental_collection_failed();
1934 1936    }
1935 1937    *should_compact =
1936 1938      UseCMSCompactAtFullCollection &&
1937 1939      ((_full_gcs_since_conc_gc >= CMSFullGCsBeforeCompaction) ||
1938 1940       GCCause::is_user_requested_gc(gch->gc_cause()) ||
1939 1941       gch->incremental_collection_will_fail(true /* consult_young */));
1940 1942    *should_start_over = false;
1941 1943    if (clear_all_soft_refs && !*should_compact) {
1942 1944      // We are about to do a last ditch collection attempt
1943 1945      // so it would normally make sense to do a compaction
1944 1946      // to reclaim as much space as possible.
1945 1947      if (CMSCompactWhenClearAllSoftRefs) {
1946 1948        // Default: The rationale is that in this case either
1947 1949        // we are past the final marking phase, in which case
1948 1950        // we'd have to start over, or so little has been done
1949 1951        // that there's little point in saving that work. Compaction
1950 1952        // appears to be the sensible choice in either case.
1951 1953        *should_compact = true;
1952 1954      } else {
1953 1955        // We have been asked to clear all soft refs, but not to
1954 1956        // compact. Make sure that we aren't past the final checkpoint
1955 1957        // phase, for that is where we process soft refs. If we are already
1956 1958        // past that phase, we'll need to redo the refs discovery phase and
1957 1959        // if necessary clear soft refs that weren't previously
1958 1960        // cleared. We do so by remembering the phase in which
1959 1961        // we came in, and if we are past the refs processing
1960 1962        // phase, we'll choose to just redo the mark-sweep
1961 1963        // collection from scratch.
1962 1964        if (_collectorState > FinalMarking) {
1963 1965          // We are past the refs processing phase;
1964 1966          // start over and do a fresh synchronous CMS cycle
1965 1967          _collectorState = Resetting; // skip to reset to start new cycle
1966 1968          reset(false /* == !asynch */);
1967 1969          *should_start_over = true;
1968 1970        } // else we can continue a possibly ongoing current cycle
1969 1971      }
1970 1972    }
1971 1973  }
1972 1974  
1973 1975  // A work method used by the foreground collector to do
1974 1976  // a mark-sweep-compact.
1975 1977  void CMSCollector::do_compaction_work(bool clear_all_soft_refs) {
1976 1978    GenCollectedHeap* gch = GenCollectedHeap::heap();
1977 1979    TraceTime t("CMS:MSC ", PrintGCDetails && Verbose, true, gclog_or_tty);
1978 1980    if (PrintGC && Verbose && !(GCCause::is_user_requested_gc(gch->gc_cause()))) {
1979 1981      gclog_or_tty->print_cr("Compact ConcurrentMarkSweepGeneration after %d "
1980 1982        "collections passed to foreground collector", _full_gcs_since_conc_gc);
1981 1983    }
1982 1984  
1983 1985    // Sample collection interval time and reset for collection pause.
1984 1986    if (UseAdaptiveSizePolicy) {
1985 1987      size_policy()->msc_collection_begin();
1986 1988    }
1987 1989  
1988 1990    // Temporarily widen the span of the weak reference processing to
1989 1991    // the entire heap.
1990 1992    MemRegion new_span(GenCollectedHeap::heap()->reserved_region());
1991 1993    ReferenceProcessorSpanMutator x(ref_processor(), new_span);
1992 1994

↓ open down ↓

1338 lines elided

↑ open up ↑

1993 1995    // Temporarily, clear the "is_alive_non_header" field of the
1994 1996    // reference processor.
1995 1997    ReferenceProcessorIsAliveMutator y(ref_processor(), NULL);
1996 1998  
1997 1999    // Temporarily make reference _processing_ single threaded (non-MT).
1998 2000    ReferenceProcessorMTProcMutator z(ref_processor(), false);
1999 2001  
2000 2002    // Temporarily make refs discovery atomic
2001 2003    ReferenceProcessorAtomicMutator w(ref_processor(), true);
2002 2004  
     2005 +  // Temporarily make refs discovery ST
     2006 +  ReferenceProcessorMTDiscoveryMutator(ref_processor(), false);
     2007 +
2003 2008    ref_processor()->set_enqueuing_is_done(false);
2004 2009    ref_processor()->enable_discovery();
2005 2010    ref_processor()->setup_policy(clear_all_soft_refs);
2006 2011    // If an asynchronous collection finishes, the _modUnionTable is
2007 2012    // all clear.  If we are assuming the collection from an asynchronous
2008 2013    // collection, clear the _modUnionTable.
2009 2014    assert(_collectorState != Idling || _modUnionTable.isAllClear(),
2010 2015      "_modUnionTable should be clear if the baton was not passed");
2011 2016    _modUnionTable.clear_all();
2012 2017

2013 2018    // We must adjust the allocation statistics being maintained
2014 2019    // in the free list space. We do so by reading and clearing
2015 2020    // the sweep timer and updating the block flux rate estimates below.
2016 2021    assert(!_intra_sweep_timer.is_active(), "_intra_sweep_timer should be inactive");
2017 2022    if (_inter_sweep_timer.is_active()) {
2018 2023      _inter_sweep_timer.stop();
2019 2024      // Note that we do not use this sample to update the _inter_sweep_estimate.
2020 2025      _cmsGen->cmsSpace()->beginSweepFLCensus((float)(_inter_sweep_timer.seconds()),
2021 2026                                              _inter_sweep_estimate.padded_average(),
2022 2027                                              _intra_sweep_estimate.padded_average());
2023 2028    }
2024 2029  
2025 2030    {
2026 2031      TraceCMSMemoryManagerStats();
2027 2032    }
2028 2033    GenMarkSweep::invoke_at_safepoint(_cmsGen->level(),
2029 2034      ref_processor(), clear_all_soft_refs);
2030 2035    #ifdef ASSERT
2031 2036      CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace();
2032 2037      size_t free_size = cms_space->free();
2033 2038      assert(free_size ==
2034 2039             pointer_delta(cms_space->end(), cms_space->compaction_top())
2035 2040             * HeapWordSize,
2036 2041        "All the free space should be compacted into one chunk at top");
2037 2042      assert(cms_space->dictionary()->totalChunkSize(
2038 2043                                        debug_only(cms_space->freelistLock())) == 0 ||
2039 2044             cms_space->totalSizeInIndexedFreeLists() == 0,
2040 2045        "All the free space should be in a single chunk");
2041 2046      size_t num = cms_space->totalCount();
2042 2047      assert((free_size == 0 && num == 0) ||
2043 2048             (free_size > 0  && (num == 1 || num == 2)),
2044 2049           "There should be at most 2 free chunks after compaction");
2045 2050    #endif // ASSERT
2046 2051    _collectorState = Resetting;
2047 2052    assert(_restart_addr == NULL,
2048 2053           "Should have been NULL'd before baton was passed");
2049 2054    reset(false /* == !asynch */);
2050 2055    _cmsGen->reset_after_compaction();
2051 2056    _concurrent_cycles_since_last_unload = 0;
2052 2057  
2053 2058    if (verifying() && !should_unload_classes()) {
2054 2059      perm_gen_verify_bit_map()->clear_all();
2055 2060    }
2056 2061  
2057 2062    // Clear any data recorded in the PLAB chunk arrays.
2058 2063    if (_survivor_plab_array != NULL) {
2059 2064      reset_survivor_plab_arrays();
2060 2065    }
2061 2066  
2062 2067    // Adjust the per-size allocation stats for the next epoch.
2063 2068    _cmsGen->cmsSpace()->endSweepFLCensus(sweep_count() /* fake */);
2064 2069    // Restart the "inter sweep timer" for the next epoch.
2065 2070    _inter_sweep_timer.reset();
2066 2071    _inter_sweep_timer.start();
2067 2072  
2068 2073    // Sample collection pause time and reset for collection interval.
2069 2074    if (UseAdaptiveSizePolicy) {
2070 2075      size_policy()->msc_collection_end(gch->gc_cause());
2071 2076    }
2072 2077  
2073 2078    // For a mark-sweep-compact, compute_new_size() will be called
2074 2079    // in the heap's do_collection() method.
2075 2080  }
2076 2081  
2077 2082  // A work method used by the foreground collector to do
2078 2083  // a mark-sweep, after taking over from a possibly on-going
2079 2084  // concurrent mark-sweep collection.
2080 2085  void CMSCollector::do_mark_sweep_work(bool clear_all_soft_refs,
2081 2086    CollectorState first_state, bool should_start_over) {
2082 2087    if (PrintGC && Verbose) {
2083 2088      gclog_or_tty->print_cr("Pass concurrent collection to foreground "
2084 2089        "collector with count %d",
2085 2090        _full_gcs_since_conc_gc);
2086 2091    }
2087 2092    switch (_collectorState) {
2088 2093      case Idling:
2089 2094        if (first_state == Idling || should_start_over) {
2090 2095          // The background GC was not active, or should
2091 2096          // restarted from scratch;  start the cycle.
2092 2097          _collectorState = InitialMarking;
2093 2098        }
2094 2099        // If first_state was not Idling, then a background GC
2095 2100        // was in progress and has now finished.  No need to do it
2096 2101        // again.  Leave the state as Idling.
2097 2102        break;
2098 2103      case Precleaning:
2099 2104        // In the foreground case don't do the precleaning since
2100 2105        // it is not done concurrently and there is extra work
2101 2106        // required.
2102 2107        _collectorState = FinalMarking;
2103 2108    }
2104 2109    if (PrintGCDetails &&
2105 2110        (_collectorState > Idling ||
2106 2111         !GCCause::is_user_requested_gc(GenCollectedHeap::heap()->gc_cause()))) {
2107 2112      gclog_or_tty->print(" (concurrent mode failure)");
2108 2113    }
2109 2114    collect_in_foreground(clear_all_soft_refs);
2110 2115  
2111 2116    // For a mark-sweep, compute_new_size() will be called
2112 2117    // in the heap's do_collection() method.
2113 2118  }
2114 2119  
2115 2120  
2116 2121  void CMSCollector::getFreelistLocks() const {
2117 2122    // Get locks for all free lists in all generations that this
2118 2123    // collector is responsible for
2119 2124    _cmsGen->freelistLock()->lock_without_safepoint_check();
2120 2125    _permGen->freelistLock()->lock_without_safepoint_check();
2121 2126  }
2122 2127  
2123 2128  void CMSCollector::releaseFreelistLocks() const {
2124 2129    // Release locks for all free lists in all generations that this
2125 2130    // collector is responsible for
2126 2131    _cmsGen->freelistLock()->unlock();
2127 2132    _permGen->freelistLock()->unlock();
2128 2133  }
2129 2134  
2130 2135  bool CMSCollector::haveFreelistLocks() const {
2131 2136    // Check locks for all free lists in all generations that this
2132 2137    // collector is responsible for
2133 2138    assert_lock_strong(_cmsGen->freelistLock());
2134 2139    assert_lock_strong(_permGen->freelistLock());
2135 2140    PRODUCT_ONLY(ShouldNotReachHere());
2136 2141    return true;
2137 2142  }
2138 2143  
2139 2144  // A utility class that is used by the CMS collector to
2140 2145  // temporarily "release" the foreground collector from its
2141 2146  // usual obligation to wait for the background collector to
2142 2147  // complete an ongoing phase before proceeding.
2143 2148  class ReleaseForegroundGC: public StackObj {
2144 2149   private:
2145 2150    CMSCollector* _c;
2146 2151   public:
2147 2152    ReleaseForegroundGC(CMSCollector* c) : _c(c) {
2148 2153      assert(_c->_foregroundGCShouldWait, "Else should not need to call");
2149 2154      MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
2150 2155      // allow a potentially blocked foreground collector to proceed
2151 2156      _c->_foregroundGCShouldWait = false;
2152 2157      if (_c->_foregroundGCIsActive) {
2153 2158        CGC_lock->notify();
2154 2159      }
2155 2160      assert(!ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
2156 2161             "Possible deadlock");
2157 2162    }
2158 2163  
2159 2164    ~ReleaseForegroundGC() {
2160 2165      assert(!_c->_foregroundGCShouldWait, "Usage protocol violation?");
2161 2166      MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
2162 2167      _c->_foregroundGCShouldWait = true;
2163 2168    }
2164 2169  };
2165 2170  
2166 2171  // There are separate collect_in_background and collect_in_foreground because of
2167 2172  // the different locking requirements of the background collector and the
2168 2173  // foreground collector.  There was originally an attempt to share
2169 2174  // one "collect" method between the background collector and the foreground
2170 2175  // collector but the if-then-else required made it cleaner to have
2171 2176  // separate methods.
2172 2177  void CMSCollector::collect_in_background(bool clear_all_soft_refs) {
2173 2178    assert(Thread::current()->is_ConcurrentGC_thread(),
2174 2179      "A CMS asynchronous collection is only allowed on a CMS thread.");
2175 2180  
2176 2181    GenCollectedHeap* gch = GenCollectedHeap::heap();
2177 2182    {
2178 2183      bool safepoint_check = Mutex::_no_safepoint_check_flag;
2179 2184      MutexLockerEx hl(Heap_lock, safepoint_check);
2180 2185      FreelistLocker fll(this);
2181 2186      MutexLockerEx x(CGC_lock, safepoint_check);
2182 2187      if (_foregroundGCIsActive || !UseAsyncConcMarkSweepGC) {
2183 2188        // The foreground collector is active or we're
2184 2189        // not using asynchronous collections.  Skip this
2185 2190        // background collection.
2186 2191        assert(!_foregroundGCShouldWait, "Should be clear");
2187 2192        return;
2188 2193      } else {
2189 2194        assert(_collectorState == Idling, "Should be idling before start.");
2190 2195        _collectorState = InitialMarking;
2191 2196        // Reset the expansion cause, now that we are about to begin
2192 2197        // a new cycle.
2193 2198        clear_expansion_cause();
2194 2199      }
2195 2200      // Decide if we want to enable class unloading as part of the
2196 2201      // ensuing concurrent GC cycle.
2197 2202      update_should_unload_classes();
2198 2203      _full_gc_requested = false;           // acks all outstanding full gc requests
2199 2204      // Signal that we are about to start a collection
2200 2205      gch->increment_total_full_collections();  // ... starting a collection cycle
2201 2206      _collection_count_start = gch->total_full_collections();
2202 2207    }
2203 2208  
2204 2209    // Used for PrintGC
2205 2210    size_t prev_used;
2206 2211    if (PrintGC && Verbose) {
2207 2212      prev_used = _cmsGen->used(); // XXXPERM
2208 2213    }
2209 2214  
2210 2215    // The change of the collection state is normally done at this level;
2211 2216    // the exceptions are phases that are executed while the world is
2212 2217    // stopped.  For those phases the change of state is done while the
2213 2218    // world is stopped.  For baton passing purposes this allows the
2214 2219    // background collector to finish the phase and change state atomically.
2215 2220    // The foreground collector cannot wait on a phase that is done
2216 2221    // while the world is stopped because the foreground collector already
2217 2222    // has the world stopped and would deadlock.
2218 2223    while (_collectorState != Idling) {
2219 2224      if (TraceCMSState) {
2220 2225        gclog_or_tty->print_cr("Thread " INTPTR_FORMAT " in CMS state %d",
2221 2226          Thread::current(), _collectorState);
2222 2227      }
2223 2228      // The foreground collector
2224 2229      //   holds the Heap_lock throughout its collection.
2225 2230      //   holds the CMS token (but not the lock)
2226 2231      //     except while it is waiting for the background collector to yield.
2227 2232      //
2228 2233      // The foreground collector should be blocked (not for long)
2229 2234      //   if the background collector is about to start a phase
2230 2235      //   executed with world stopped.  If the background
2231 2236      //   collector has already started such a phase, the
2232 2237      //   foreground collector is blocked waiting for the
2233 2238      //   Heap_lock.  The stop-world phases (InitialMarking and FinalMarking)
2234 2239      //   are executed in the VM thread.
2235 2240      //
2236 2241      // The locking order is
2237 2242      //   PendingListLock (PLL)  -- if applicable (FinalMarking)
2238 2243      //   Heap_lock  (both this & PLL locked in VM_CMS_Operation::prologue())
2239 2244      //   CMS token  (claimed in
2240 2245      //                stop_world_and_do() -->
2241 2246      //                  safepoint_synchronize() -->
2242 2247      //                    CMSThread::synchronize())
2243 2248  
2244 2249      {
2245 2250        // Check if the FG collector wants us to yield.
2246 2251        CMSTokenSync x(true); // is cms thread
2247 2252        if (waitForForegroundGC()) {
2248 2253          // We yielded to a foreground GC, nothing more to be
2249 2254          // done this round.
2250 2255          assert(_foregroundGCShouldWait == false, "We set it to false in "
2251 2256                 "waitForForegroundGC()");
2252 2257          if (TraceCMSState) {
2253 2258            gclog_or_tty->print_cr("CMS Thread " INTPTR_FORMAT
2254 2259              " exiting collection CMS state %d",
2255 2260              Thread::current(), _collectorState);
2256 2261          }
2257 2262          return;
2258 2263        } else {
2259 2264          // The background collector can run but check to see if the
2260 2265          // foreground collector has done a collection while the
2261 2266          // background collector was waiting to get the CGC_lock
2262 2267          // above.  If yes, break so that _foregroundGCShouldWait
2263 2268          // is cleared before returning.
2264 2269          if (_collectorState == Idling) {
2265 2270            break;
2266 2271          }
2267 2272        }
2268 2273      }
2269 2274  
2270 2275      assert(_foregroundGCShouldWait, "Foreground collector, if active, "
2271 2276        "should be waiting");
2272 2277  
2273 2278      switch (_collectorState) {
2274 2279        case InitialMarking:
2275 2280          {
2276 2281            ReleaseForegroundGC x(this);
2277 2282            stats().record_cms_begin();
2278 2283  
2279 2284            VM_CMS_Initial_Mark initial_mark_op(this);
2280 2285            VMThread::execute(&initial_mark_op);
2281 2286          }
2282 2287          // The collector state may be any legal state at this point
2283 2288          // since the background collector may have yielded to the
2284 2289          // foreground collector.
2285 2290          break;
2286 2291        case Marking:
2287 2292          // initial marking in checkpointRootsInitialWork has been completed
2288 2293          if (markFromRoots(true)) { // we were successful
2289 2294            assert(_collectorState == Precleaning, "Collector state should "
2290 2295              "have changed");
2291 2296          } else {
2292 2297            assert(_foregroundGCIsActive, "Internal state inconsistency");
2293 2298          }
2294 2299          break;
2295 2300        case Precleaning:
2296 2301          if (UseAdaptiveSizePolicy) {
2297 2302            size_policy()->concurrent_precleaning_begin();
2298 2303          }
2299 2304          // marking from roots in markFromRoots has been completed
2300 2305          preclean();
2301 2306          if (UseAdaptiveSizePolicy) {
2302 2307            size_policy()->concurrent_precleaning_end();
2303 2308          }
2304 2309          assert(_collectorState == AbortablePreclean ||
2305 2310                 _collectorState == FinalMarking,
2306 2311                 "Collector state should have changed");
2307 2312          break;
2308 2313        case AbortablePreclean:
2309 2314          if (UseAdaptiveSizePolicy) {
2310 2315          size_policy()->concurrent_phases_resume();
2311 2316          }
2312 2317          abortable_preclean();
2313 2318          if (UseAdaptiveSizePolicy) {
2314 2319            size_policy()->concurrent_precleaning_end();
2315 2320          }
2316 2321          assert(_collectorState == FinalMarking, "Collector state should "
2317 2322            "have changed");
2318 2323          break;
2319 2324        case FinalMarking:
2320 2325          {
2321 2326            ReleaseForegroundGC x(this);
2322 2327  
2323 2328            VM_CMS_Final_Remark final_remark_op(this);
2324 2329            VMThread::execute(&final_remark_op);
2325 2330          }
2326 2331          assert(_foregroundGCShouldWait, "block post-condition");
2327 2332          break;
2328 2333        case Sweeping:
2329 2334          if (UseAdaptiveSizePolicy) {
2330 2335            size_policy()->concurrent_sweeping_begin();
2331 2336          }
2332 2337          // final marking in checkpointRootsFinal has been completed
2333 2338          sweep(true);
2334 2339          assert(_collectorState == Resizing, "Collector state change "
2335 2340            "to Resizing must be done under the free_list_lock");
2336 2341          _full_gcs_since_conc_gc = 0;
2337 2342  
2338 2343          // Stop the timers for adaptive size policy for the concurrent phases
2339 2344          if (UseAdaptiveSizePolicy) {
2340 2345            size_policy()->concurrent_sweeping_end();
2341 2346            size_policy()->concurrent_phases_end(gch->gc_cause(),
2342 2347                                               gch->prev_gen(_cmsGen)->capacity(),
2343 2348                                               _cmsGen->free());
2344 2349          }
2345 2350  
2346 2351        case Resizing: {
2347 2352          // Sweeping has been completed...
2348 2353          // At this point the background collection has completed.
2349 2354          // Don't move the call to compute_new_size() down
2350 2355          // into code that might be executed if the background
2351 2356          // collection was preempted.
2352 2357          {
2353 2358            ReleaseForegroundGC x(this);   // unblock FG collection
2354 2359            MutexLockerEx       y(Heap_lock, Mutex::_no_safepoint_check_flag);
2355 2360            CMSTokenSync        z(true);   // not strictly needed.
2356 2361            if (_collectorState == Resizing) {
2357 2362              compute_new_size();
2358 2363              _collectorState = Resetting;
2359 2364            } else {
2360 2365              assert(_collectorState == Idling, "The state should only change"
2361 2366                     " because the foreground collector has finished the collection");
2362 2367            }
2363 2368          }
2364 2369          break;
2365 2370        }
2366 2371        case Resetting:
2367 2372          // CMS heap resizing has been completed
2368 2373          reset(true);
2369 2374          assert(_collectorState == Idling, "Collector state should "
2370 2375            "have changed");
2371 2376          stats().record_cms_end();
2372 2377          // Don't move the concurrent_phases_end() and compute_new_size()
2373 2378          // calls to here because a preempted background collection
2374 2379          // has it's state set to "Resetting".
2375 2380          break;
2376 2381        case Idling:
2377 2382        default:
2378 2383          ShouldNotReachHere();
2379 2384          break;
2380 2385      }
2381 2386      if (TraceCMSState) {
2382 2387        gclog_or_tty->print_cr("  Thread " INTPTR_FORMAT " done - next CMS state %d",
2383 2388          Thread::current(), _collectorState);
2384 2389      }
2385 2390      assert(_foregroundGCShouldWait, "block post-condition");
2386 2391    }
2387 2392  
2388 2393    // Should this be in gc_epilogue?
2389 2394    collector_policy()->counters()->update_counters();
2390 2395  
2391 2396    {
2392 2397      // Clear _foregroundGCShouldWait and, in the event that the
2393 2398      // foreground collector is waiting, notify it, before
2394 2399      // returning.
2395 2400      MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
2396 2401      _foregroundGCShouldWait = false;
2397 2402      if (_foregroundGCIsActive) {
2398 2403        CGC_lock->notify();
2399 2404      }
2400 2405      assert(!ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
2401 2406             "Possible deadlock");
2402 2407    }
2403 2408    if (TraceCMSState) {
2404 2409      gclog_or_tty->print_cr("CMS Thread " INTPTR_FORMAT
2405 2410        " exiting collection CMS state %d",
2406 2411        Thread::current(), _collectorState);
2407 2412    }
2408 2413    if (PrintGC && Verbose) {
2409 2414      _cmsGen->print_heap_change(prev_used);
2410 2415    }
2411 2416  }
2412 2417  
2413 2418  void CMSCollector::collect_in_foreground(bool clear_all_soft_refs) {
2414 2419    assert(_foregroundGCIsActive && !_foregroundGCShouldWait,
2415 2420           "Foreground collector should be waiting, not executing");
2416 2421    assert(Thread::current()->is_VM_thread(), "A foreground collection"
2417 2422      "may only be done by the VM Thread with the world stopped");
2418 2423    assert(ConcurrentMarkSweepThread::vm_thread_has_cms_token(),
2419 2424           "VM thread should have CMS token");
2420 2425  
2421 2426    NOT_PRODUCT(TraceTime t("CMS:MS (foreground) ", PrintGCDetails && Verbose,
2422 2427      true, gclog_or_tty);)
2423 2428    if (UseAdaptiveSizePolicy) {
2424 2429      size_policy()->ms_collection_begin();
2425 2430    }
2426 2431    COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact);
2427 2432  
2428 2433    HandleMark hm;  // Discard invalid handles created during verification
2429 2434  
2430 2435    if (VerifyBeforeGC &&
2431 2436        GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
2432 2437      Universe::verify(true);
2433 2438    }
2434 2439  
2435 2440    // Snapshot the soft reference policy to be used in this collection cycle.
2436 2441    ref_processor()->setup_policy(clear_all_soft_refs);
2437 2442  
2438 2443    bool init_mark_was_synchronous = false; // until proven otherwise
2439 2444    while (_collectorState != Idling) {
2440 2445      if (TraceCMSState) {
2441 2446        gclog_or_tty->print_cr("Thread " INTPTR_FORMAT " in CMS state %d",
2442 2447          Thread::current(), _collectorState);
2443 2448      }
2444 2449      switch (_collectorState) {
2445 2450        case InitialMarking:
2446 2451          init_mark_was_synchronous = true;  // fact to be exploited in re-mark
2447 2452          checkpointRootsInitial(false);
2448 2453          assert(_collectorState == Marking, "Collector state should have changed"
2449 2454            " within checkpointRootsInitial()");
2450 2455          break;
2451 2456        case Marking:
2452 2457          // initial marking in checkpointRootsInitialWork has been completed
2453 2458          if (VerifyDuringGC &&
2454 2459              GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
2455 2460            gclog_or_tty->print("Verify before initial mark: ");
2456 2461            Universe::verify(true);
2457 2462          }
2458 2463          {
2459 2464            bool res = markFromRoots(false);
2460 2465            assert(res && _collectorState == FinalMarking, "Collector state should "
2461 2466              "have changed");
2462 2467            break;
2463 2468          }
2464 2469        case FinalMarking:
2465 2470          if (VerifyDuringGC &&
2466 2471              GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
2467 2472            gclog_or_tty->print("Verify before re-mark: ");
2468 2473            Universe::verify(true);
2469 2474          }
2470 2475          checkpointRootsFinal(false, clear_all_soft_refs,
2471 2476                               init_mark_was_synchronous);
2472 2477          assert(_collectorState == Sweeping, "Collector state should not "
2473 2478            "have changed within checkpointRootsFinal()");
2474 2479          break;
2475 2480        case Sweeping:
2476 2481          // final marking in checkpointRootsFinal has been completed
2477 2482          if (VerifyDuringGC &&
2478 2483              GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
2479 2484            gclog_or_tty->print("Verify before sweep: ");
2480 2485            Universe::verify(true);
2481 2486          }
2482 2487          sweep(false);
2483 2488          assert(_collectorState == Resizing, "Incorrect state");
2484 2489          break;
2485 2490        case Resizing: {
2486 2491          // Sweeping has been completed; the actual resize in this case
2487 2492          // is done separately; nothing to be done in this state.
2488 2493          _collectorState = Resetting;
2489 2494          break;
2490 2495        }
2491 2496        case Resetting:
2492 2497          // The heap has been resized.
2493 2498          if (VerifyDuringGC &&
2494 2499              GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
2495 2500            gclog_or_tty->print("Verify before reset: ");
2496 2501            Universe::verify(true);
2497 2502          }
2498 2503          reset(false);
2499 2504          assert(_collectorState == Idling, "Collector state should "
2500 2505            "have changed");
2501 2506          break;
2502 2507        case Precleaning:
2503 2508        case AbortablePreclean:
2504 2509          // Elide the preclean phase
2505 2510          _collectorState = FinalMarking;
2506 2511          break;
2507 2512        default:
2508 2513          ShouldNotReachHere();
2509 2514      }
2510 2515      if (TraceCMSState) {
2511 2516        gclog_or_tty->print_cr("  Thread " INTPTR_FORMAT " done - next CMS state %d",
2512 2517          Thread::current(), _collectorState);
2513 2518      }
2514 2519    }
2515 2520  
2516 2521    if (UseAdaptiveSizePolicy) {
2517 2522      GenCollectedHeap* gch = GenCollectedHeap::heap();
2518 2523      size_policy()->ms_collection_end(gch->gc_cause());
2519 2524    }
2520 2525  
2521 2526    if (VerifyAfterGC &&
2522 2527        GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
2523 2528      Universe::verify(true);
2524 2529    }
2525 2530    if (TraceCMSState) {
2526 2531      gclog_or_tty->print_cr("CMS Thread " INTPTR_FORMAT
2527 2532        " exiting collection CMS state %d",
2528 2533        Thread::current(), _collectorState);
2529 2534    }
2530 2535  }
2531 2536  
2532 2537  bool CMSCollector::waitForForegroundGC() {
2533 2538    bool res = false;
2534 2539    assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
2535 2540           "CMS thread should have CMS token");
2536 2541    // Block the foreground collector until the
2537 2542    // background collectors decides whether to
2538 2543    // yield.
2539 2544    MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
2540 2545    _foregroundGCShouldWait = true;
2541 2546    if (_foregroundGCIsActive) {
2542 2547      // The background collector yields to the
2543 2548      // foreground collector and returns a value
2544 2549      // indicating that it has yielded.  The foreground
2545 2550      // collector can proceed.
2546 2551      res = true;
2547 2552      _foregroundGCShouldWait = false;
2548 2553      ConcurrentMarkSweepThread::clear_CMS_flag(
2549 2554        ConcurrentMarkSweepThread::CMS_cms_has_token);
2550 2555      ConcurrentMarkSweepThread::set_CMS_flag(
2551 2556        ConcurrentMarkSweepThread::CMS_cms_wants_token);
2552 2557      // Get a possibly blocked foreground thread going
2553 2558      CGC_lock->notify();
2554 2559      if (TraceCMSState) {
2555 2560        gclog_or_tty->print_cr("CMS Thread " INTPTR_FORMAT " waiting at CMS state %d",
2556 2561          Thread::current(), _collectorState);
2557 2562      }
2558 2563      while (_foregroundGCIsActive) {
2559 2564        CGC_lock->wait(Mutex::_no_safepoint_check_flag);
2560 2565      }
2561 2566      ConcurrentMarkSweepThread::set_CMS_flag(
2562 2567        ConcurrentMarkSweepThread::CMS_cms_has_token);
2563 2568      ConcurrentMarkSweepThread::clear_CMS_flag(
2564 2569        ConcurrentMarkSweepThread::CMS_cms_wants_token);
2565 2570    }
2566 2571    if (TraceCMSState) {
2567 2572      gclog_or_tty->print_cr("CMS Thread " INTPTR_FORMAT " continuing at CMS state %d",
2568 2573        Thread::current(), _collectorState);
2569 2574    }
2570 2575    return res;
2571 2576  }
2572 2577  
2573 2578  // Because of the need to lock the free lists and other structures in
2574 2579  // the collector, common to all the generations that the collector is
2575 2580  // collecting, we need the gc_prologues of individual CMS generations
2576 2581  // delegate to their collector. It may have been simpler had the
2577 2582  // current infrastructure allowed one to call a prologue on a
2578 2583  // collector. In the absence of that we have the generation's
2579 2584  // prologue delegate to the collector, which delegates back
2580 2585  // some "local" work to a worker method in the individual generations
2581 2586  // that it's responsible for collecting, while itself doing any
2582 2587  // work common to all generations it's responsible for. A similar
2583 2588  // comment applies to the  gc_epilogue()'s.
2584 2589  // The role of the varaible _between_prologue_and_epilogue is to
2585 2590  // enforce the invocation protocol.
2586 2591  void CMSCollector::gc_prologue(bool full) {
2587 2592    // Call gc_prologue_work() for each CMSGen and PermGen that
2588 2593    // we are responsible for.
2589 2594  
2590 2595    // The following locking discipline assumes that we are only called
2591 2596    // when the world is stopped.
2592 2597    assert(SafepointSynchronize::is_at_safepoint(), "world is stopped assumption");
2593 2598  
2594 2599    // The CMSCollector prologue must call the gc_prologues for the
2595 2600    // "generations" (including PermGen if any) that it's responsible
2596 2601    // for.
2597 2602  
2598 2603    assert(   Thread::current()->is_VM_thread()
2599 2604           || (   CMSScavengeBeforeRemark
2600 2605               && Thread::current()->is_ConcurrentGC_thread()),
2601 2606           "Incorrect thread type for prologue execution");
2602 2607  
2603 2608    if (_between_prologue_and_epilogue) {
2604 2609      // We have already been invoked; this is a gc_prologue delegation
2605 2610      // from yet another CMS generation that we are responsible for, just
2606 2611      // ignore it since all relevant work has already been done.
2607 2612      return;
2608 2613    }
2609 2614  
2610 2615    // set a bit saying prologue has been called; cleared in epilogue
2611 2616    _between_prologue_and_epilogue = true;
2612 2617    // Claim locks for common data structures, then call gc_prologue_work()
2613 2618    // for each CMSGen and PermGen that we are responsible for.
2614 2619  
2615 2620    getFreelistLocks();   // gets free list locks on constituent spaces
2616 2621    bitMapLock()->lock_without_safepoint_check();
2617 2622  
2618 2623    // Should call gc_prologue_work() for all cms gens we are responsible for
2619 2624    bool registerClosure =    _collectorState >= Marking
2620 2625                           && _collectorState < Sweeping;
2621 2626    ModUnionClosure* muc = CollectedHeap::use_parallel_gc_threads() ?
2622 2627                                                 &_modUnionClosurePar
2623 2628                                                 : &_modUnionClosure;
2624 2629    _cmsGen->gc_prologue_work(full, registerClosure, muc);
2625 2630    _permGen->gc_prologue_work(full, registerClosure, muc);
2626 2631  
2627 2632    if (!full) {
2628 2633      stats().record_gc0_begin();
2629 2634    }
2630 2635  }
2631 2636  
2632 2637  void ConcurrentMarkSweepGeneration::gc_prologue(bool full) {
2633 2638    // Delegate to CMScollector which knows how to coordinate between
2634 2639    // this and any other CMS generations that it is responsible for
2635 2640    // collecting.
2636 2641    collector()->gc_prologue(full);
2637 2642  }
2638 2643  
2639 2644  // This is a "private" interface for use by this generation's CMSCollector.
2640 2645  // Not to be called directly by any other entity (for instance,
2641 2646  // GenCollectedHeap, which calls the "public" gc_prologue method above).
2642 2647  void ConcurrentMarkSweepGeneration::gc_prologue_work(bool full,
2643 2648    bool registerClosure, ModUnionClosure* modUnionClosure) {
2644 2649    assert(!incremental_collection_failed(), "Shouldn't be set yet");
2645 2650    assert(cmsSpace()->preconsumptionDirtyCardClosure() == NULL,
2646 2651      "Should be NULL");
2647 2652    if (registerClosure) {
2648 2653      cmsSpace()->setPreconsumptionDirtyCardClosure(modUnionClosure);
2649 2654    }
2650 2655    cmsSpace()->gc_prologue();
2651 2656    // Clear stat counters
2652 2657    NOT_PRODUCT(
2653 2658      assert(_numObjectsPromoted == 0, "check");
2654 2659      assert(_numWordsPromoted   == 0, "check");
2655 2660      if (Verbose && PrintGC) {
2656 2661        gclog_or_tty->print("Allocated "SIZE_FORMAT" objects, "
2657 2662                            SIZE_FORMAT" bytes concurrently",
2658 2663        _numObjectsAllocated, _numWordsAllocated*sizeof(HeapWord));
2659 2664      }
2660 2665      _numObjectsAllocated = 0;
2661 2666      _numWordsAllocated   = 0;
2662 2667    )
2663 2668  }
2664 2669  
2665 2670  void CMSCollector::gc_epilogue(bool full) {
2666 2671    // The following locking discipline assumes that we are only called
2667 2672    // when the world is stopped.
2668 2673    assert(SafepointSynchronize::is_at_safepoint(),
2669 2674           "world is stopped assumption");
2670 2675  
2671 2676    // Currently the CMS epilogue (see CompactibleFreeListSpace) merely checks
2672 2677    // if linear allocation blocks need to be appropriately marked to allow the
2673 2678    // the blocks to be parsable. We also check here whether we need to nudge the
2674 2679    // CMS collector thread to start a new cycle (if it's not already active).
2675 2680    assert(   Thread::current()->is_VM_thread()
2676 2681           || (   CMSScavengeBeforeRemark
2677 2682               && Thread::current()->is_ConcurrentGC_thread()),
2678 2683           "Incorrect thread type for epilogue execution");
2679 2684  
2680 2685    if (!_between_prologue_and_epilogue) {
2681 2686      // We have already been invoked; this is a gc_epilogue delegation
2682 2687      // from yet another CMS generation that we are responsible for, just
2683 2688      // ignore it since all relevant work has already been done.
2684 2689      return;
2685 2690    }
2686 2691    assert(haveFreelistLocks(), "must have freelist locks");
2687 2692    assert_lock_strong(bitMapLock());
2688 2693  
2689 2694    _cmsGen->gc_epilogue_work(full);
2690 2695    _permGen->gc_epilogue_work(full);
2691 2696  
2692 2697    if (_collectorState == AbortablePreclean || _collectorState == Precleaning) {
2693 2698      // in case sampling was not already enabled, enable it
2694 2699      _start_sampling = true;
2695 2700    }
2696 2701    // reset _eden_chunk_array so sampling starts afresh
2697 2702    _eden_chunk_index = 0;
2698 2703  
2699 2704    size_t cms_used   = _cmsGen->cmsSpace()->used();
2700 2705    size_t perm_used  = _permGen->cmsSpace()->used();
2701 2706  
2702 2707    // update performance counters - this uses a special version of
2703 2708    // update_counters() that allows the utilization to be passed as a
2704 2709    // parameter, avoiding multiple calls to used().
2705 2710    //
2706 2711    _cmsGen->update_counters(cms_used);
2707 2712    _permGen->update_counters(perm_used);
2708 2713  
2709 2714    if (CMSIncrementalMode) {
2710 2715      icms_update_allocation_limits();
2711 2716    }
2712 2717  
2713 2718    bitMapLock()->unlock();
2714 2719    releaseFreelistLocks();
2715 2720  
2716 2721    _between_prologue_and_epilogue = false;  // ready for next cycle
2717 2722  }
2718 2723  
2719 2724  void ConcurrentMarkSweepGeneration::gc_epilogue(bool full) {
2720 2725    collector()->gc_epilogue(full);
2721 2726  
2722 2727    // Also reset promotion tracking in par gc thread states.
2723 2728    if (CollectedHeap::use_parallel_gc_threads()) {
2724 2729      for (uint i = 0; i < ParallelGCThreads; i++) {
2725 2730        _par_gc_thread_states[i]->promo.stopTrackingPromotions(i);
2726 2731      }
2727 2732    }
2728 2733  }
2729 2734  
2730 2735  void ConcurrentMarkSweepGeneration::gc_epilogue_work(bool full) {
2731 2736    assert(!incremental_collection_failed(), "Should have been cleared");
2732 2737    cmsSpace()->setPreconsumptionDirtyCardClosure(NULL);
2733 2738    cmsSpace()->gc_epilogue();
2734 2739      // Print stat counters
2735 2740    NOT_PRODUCT(
2736 2741      assert(_numObjectsAllocated == 0, "check");
2737 2742      assert(_numWordsAllocated == 0, "check");
2738 2743      if (Verbose && PrintGC) {
2739 2744        gclog_or_tty->print("Promoted "SIZE_FORMAT" objects, "
2740 2745                            SIZE_FORMAT" bytes",
2741 2746                   _numObjectsPromoted, _numWordsPromoted*sizeof(HeapWord));
2742 2747      }
2743 2748      _numObjectsPromoted = 0;
2744 2749      _numWordsPromoted   = 0;
2745 2750    )
2746 2751  
2747 2752    if (PrintGC && Verbose) {
2748 2753      // Call down the chain in contiguous_available needs the freelistLock
2749 2754      // so print this out before releasing the freeListLock.
2750 2755      gclog_or_tty->print(" Contiguous available "SIZE_FORMAT" bytes ",
2751 2756                          contiguous_available());
2752 2757    }
2753 2758  }
2754 2759  
2755 2760  #ifndef PRODUCT
2756 2761  bool CMSCollector::have_cms_token() {
2757 2762    Thread* thr = Thread::current();
2758 2763    if (thr->is_VM_thread()) {
2759 2764      return ConcurrentMarkSweepThread::vm_thread_has_cms_token();
2760 2765    } else if (thr->is_ConcurrentGC_thread()) {
2761 2766      return ConcurrentMarkSweepThread::cms_thread_has_cms_token();
2762 2767    } else if (thr->is_GC_task_thread()) {
2763 2768      return ConcurrentMarkSweepThread::vm_thread_has_cms_token() &&
2764 2769             ParGCRareEvent_lock->owned_by_self();
2765 2770    }
2766 2771    return false;
2767 2772  }
2768 2773  #endif
2769 2774  
2770 2775  // Check reachability of the given heap address in CMS generation,
2771 2776  // treating all other generations as roots.
2772 2777  bool CMSCollector::is_cms_reachable(HeapWord* addr) {
2773 2778    // We could "guarantee" below, rather than assert, but i'll
2774 2779    // leave these as "asserts" so that an adventurous debugger
2775 2780    // could try this in the product build provided some subset of
2776 2781    // the conditions were met, provided they were intersted in the
2777 2782    // results and knew that the computation below wouldn't interfere
2778 2783    // with other concurrent computations mutating the structures
2779 2784    // being read or written.
2780 2785    assert(SafepointSynchronize::is_at_safepoint(),
2781 2786           "Else mutations in object graph will make answer suspect");
2782 2787    assert(have_cms_token(), "Should hold cms token");
2783 2788    assert(haveFreelistLocks(), "must hold free list locks");
2784 2789    assert_lock_strong(bitMapLock());
2785 2790  
2786 2791    // Clear the marking bit map array before starting, but, just
2787 2792    // for kicks, first report if the given address is already marked
2788 2793    gclog_or_tty->print_cr("Start: Address 0x%x is%s marked", addr,
2789 2794                  _markBitMap.isMarked(addr) ? "" : " not");
2790 2795  
2791 2796    if (verify_after_remark()) {
2792 2797      MutexLockerEx x(verification_mark_bm()->lock(), Mutex::_no_safepoint_check_flag);
2793 2798      bool result = verification_mark_bm()->isMarked(addr);
2794 2799      gclog_or_tty->print_cr("TransitiveMark: Address 0x%x %s marked", addr,
2795 2800                             result ? "IS" : "is NOT");
2796 2801      return result;
2797 2802    } else {
2798 2803      gclog_or_tty->print_cr("Could not compute result");
2799 2804      return false;
2800 2805    }
2801 2806  }
2802 2807  
2803 2808  ////////////////////////////////////////////////////////
2804 2809  // CMS Verification Support
2805 2810  ////////////////////////////////////////////////////////
2806 2811  // Following the remark phase, the following invariant
2807 2812  // should hold -- each object in the CMS heap which is
2808 2813  // marked in markBitMap() should be marked in the verification_mark_bm().
2809 2814  
2810 2815  class VerifyMarkedClosure: public BitMapClosure {
2811 2816    CMSBitMap* _marks;
2812 2817    bool       _failed;
2813 2818  
2814 2819   public:
2815 2820    VerifyMarkedClosure(CMSBitMap* bm): _marks(bm), _failed(false) {}
2816 2821  
2817 2822    bool do_bit(size_t offset) {
2818 2823      HeapWord* addr = _marks->offsetToHeapWord(offset);
2819 2824      if (!_marks->isMarked(addr)) {
2820 2825        oop(addr)->print_on(gclog_or_tty);
2821 2826        gclog_or_tty->print_cr(" ("INTPTR_FORMAT" should have been marked)", addr);
2822 2827        _failed = true;
2823 2828      }
2824 2829      return true;
2825 2830    }
2826 2831  
2827 2832    bool failed() { return _failed; }
2828 2833  };
2829 2834  
2830 2835  bool CMSCollector::verify_after_remark() {
2831 2836    gclog_or_tty->print(" [Verifying CMS Marking... ");
2832 2837    MutexLockerEx ml(verification_mark_bm()->lock(), Mutex::_no_safepoint_check_flag);
2833 2838    static bool init = false;
2834 2839  
2835 2840    assert(SafepointSynchronize::is_at_safepoint(),
2836 2841           "Else mutations in object graph will make answer suspect");
2837 2842    assert(have_cms_token(),
2838 2843           "Else there may be mutual interference in use of "
2839 2844           " verification data structures");
2840 2845    assert(_collectorState > Marking && _collectorState <= Sweeping,
2841 2846           "Else marking info checked here may be obsolete");
2842 2847    assert(haveFreelistLocks(), "must hold free list locks");
2843 2848    assert_lock_strong(bitMapLock());
2844 2849  
2845 2850  
2846 2851    // Allocate marking bit map if not already allocated
2847 2852    if (!init) { // first time
2848 2853      if (!verification_mark_bm()->allocate(_span)) {
2849 2854        return false;
2850 2855      }
2851 2856      init = true;
2852 2857    }
2853 2858  
2854 2859    assert(verification_mark_stack()->isEmpty(), "Should be empty");
2855 2860  
2856 2861    // Turn off refs discovery -- so we will be tracing through refs.
2857 2862    // This is as intended, because by this time
2858 2863    // GC must already have cleared any refs that need to be cleared,
2859 2864    // and traced those that need to be marked; moreover,
2860 2865    // the marking done here is not going to intefere in any
2861 2866    // way with the marking information used by GC.
2862 2867    NoRefDiscovery no_discovery(ref_processor());
2863 2868  
2864 2869    COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact;)
2865 2870  
2866 2871    // Clear any marks from a previous round
2867 2872    verification_mark_bm()->clear_all();
2868 2873    assert(verification_mark_stack()->isEmpty(), "markStack should be empty");
2869 2874    verify_work_stacks_empty();
2870 2875  
2871 2876    GenCollectedHeap* gch = GenCollectedHeap::heap();
2872 2877    gch->ensure_parsability(false);  // fill TLABs, but no need to retire them
2873 2878    // Update the saved marks which may affect the root scans.
2874 2879    gch->save_marks();
2875 2880  
2876 2881    if (CMSRemarkVerifyVariant == 1) {
2877 2882      // In this first variant of verification, we complete
2878 2883      // all marking, then check if the new marks-verctor is
2879 2884      // a subset of the CMS marks-vector.
2880 2885      verify_after_remark_work_1();
2881 2886    } else if (CMSRemarkVerifyVariant == 2) {
2882 2887      // In this second variant of verification, we flag an error
2883 2888      // (i.e. an object reachable in the new marks-vector not reachable
2884 2889      // in the CMS marks-vector) immediately, also indicating the
2885 2890      // identify of an object (A) that references the unmarked object (B) --
2886 2891      // presumably, a mutation to A failed to be picked up by preclean/remark?
2887 2892      verify_after_remark_work_2();
2888 2893    } else {
2889 2894      warning("Unrecognized value %d for CMSRemarkVerifyVariant",
2890 2895              CMSRemarkVerifyVariant);
2891 2896    }
2892 2897    gclog_or_tty->print(" done] ");
2893 2898    return true;
2894 2899  }
2895 2900  
2896 2901  void CMSCollector::verify_after_remark_work_1() {
2897 2902    ResourceMark rm;
2898 2903    HandleMark  hm;
2899 2904    GenCollectedHeap* gch = GenCollectedHeap::heap();
2900 2905  
2901 2906    // Mark from roots one level into CMS
2902 2907    MarkRefsIntoClosure notOlder(_span, verification_mark_bm());
2903 2908    gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
2904 2909  
2905 2910    gch->gen_process_strong_roots(_cmsGen->level(),
2906 2911                                  true,   // younger gens are roots
2907 2912                                  true,   // activate StrongRootsScope
2908 2913                                  true,   // collecting perm gen
2909 2914                                  SharedHeap::ScanningOption(roots_scanning_options()),
2910 2915                                  &notOlder,
2911 2916                                  true,   // walk code active on stacks
2912 2917                                  NULL);
2913 2918  
2914 2919    // Now mark from the roots
2915 2920    assert(_revisitStack.isEmpty(), "Should be empty");
2916 2921    MarkFromRootsClosure markFromRootsClosure(this, _span,
2917 2922      verification_mark_bm(), verification_mark_stack(), &_revisitStack,
2918 2923      false /* don't yield */, true /* verifying */);
2919 2924    assert(_restart_addr == NULL, "Expected pre-condition");
2920 2925    verification_mark_bm()->iterate(&markFromRootsClosure);
2921 2926    while (_restart_addr != NULL) {
2922 2927      // Deal with stack overflow: by restarting at the indicated
2923 2928      // address.
2924 2929      HeapWord* ra = _restart_addr;
2925 2930      markFromRootsClosure.reset(ra);
2926 2931      _restart_addr = NULL;
2927 2932      verification_mark_bm()->iterate(&markFromRootsClosure, ra, _span.end());
2928 2933    }
2929 2934    assert(verification_mark_stack()->isEmpty(), "Should have been drained");
2930 2935    verify_work_stacks_empty();
2931 2936    // Should reset the revisit stack above, since no class tree
2932 2937    // surgery is forthcoming.
2933 2938    _revisitStack.reset(); // throwing away all contents
2934 2939  
2935 2940    // Marking completed -- now verify that each bit marked in
2936 2941    // verification_mark_bm() is also marked in markBitMap(); flag all
2937 2942    // errors by printing corresponding objects.
2938 2943    VerifyMarkedClosure vcl(markBitMap());
2939 2944    verification_mark_bm()->iterate(&vcl);
2940 2945    if (vcl.failed()) {
2941 2946      gclog_or_tty->print("Verification failed");
2942 2947      Universe::heap()->print_on(gclog_or_tty);
2943 2948      fatal("CMS: failed marking verification after remark");
2944 2949    }
2945 2950  }
2946 2951  
2947 2952  void CMSCollector::verify_after_remark_work_2() {
2948 2953    ResourceMark rm;
2949 2954    HandleMark  hm;
2950 2955    GenCollectedHeap* gch = GenCollectedHeap::heap();
2951 2956  
2952 2957    // Mark from roots one level into CMS
2953 2958    MarkRefsIntoVerifyClosure notOlder(_span, verification_mark_bm(),
2954 2959                                       markBitMap());
2955 2960    gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
2956 2961    gch->gen_process_strong_roots(_cmsGen->level(),
2957 2962                                  true,   // younger gens are roots
2958 2963                                  true,   // activate StrongRootsScope
2959 2964                                  true,   // collecting perm gen
2960 2965                                  SharedHeap::ScanningOption(roots_scanning_options()),
2961 2966                                  &notOlder,
2962 2967                                  true,   // walk code active on stacks
2963 2968                                  NULL);
2964 2969  
2965 2970    // Now mark from the roots
2966 2971    assert(_revisitStack.isEmpty(), "Should be empty");
2967 2972    MarkFromRootsVerifyClosure markFromRootsClosure(this, _span,
2968 2973      verification_mark_bm(), markBitMap(), verification_mark_stack());
2969 2974    assert(_restart_addr == NULL, "Expected pre-condition");
2970 2975    verification_mark_bm()->iterate(&markFromRootsClosure);
2971 2976    while (_restart_addr != NULL) {
2972 2977      // Deal with stack overflow: by restarting at the indicated
2973 2978      // address.
2974 2979      HeapWord* ra = _restart_addr;
2975 2980      markFromRootsClosure.reset(ra);
2976 2981      _restart_addr = NULL;
2977 2982      verification_mark_bm()->iterate(&markFromRootsClosure, ra, _span.end());
2978 2983    }
2979 2984    assert(verification_mark_stack()->isEmpty(), "Should have been drained");
2980 2985    verify_work_stacks_empty();
2981 2986    // Should reset the revisit stack above, since no class tree
2982 2987    // surgery is forthcoming.
2983 2988    _revisitStack.reset(); // throwing away all contents
2984 2989  
2985 2990    // Marking completed -- now verify that each bit marked in
2986 2991    // verification_mark_bm() is also marked in markBitMap(); flag all
2987 2992    // errors by printing corresponding objects.
2988 2993    VerifyMarkedClosure vcl(markBitMap());
2989 2994    verification_mark_bm()->iterate(&vcl);
2990 2995    assert(!vcl.failed(), "Else verification above should not have succeeded");
2991 2996  }
2992 2997  
2993 2998  void ConcurrentMarkSweepGeneration::save_marks() {
2994 2999    // delegate to CMS space
2995 3000    cmsSpace()->save_marks();
2996 3001    for (uint i = 0; i < ParallelGCThreads; i++) {
2997 3002      _par_gc_thread_states[i]->promo.startTrackingPromotions();
2998 3003    }
2999 3004  }
3000 3005  
3001 3006  bool ConcurrentMarkSweepGeneration::no_allocs_since_save_marks() {
3002 3007    return cmsSpace()->no_allocs_since_save_marks();
3003 3008  }
3004 3009  
3005 3010  #define CMS_SINCE_SAVE_MARKS_DEFN(OopClosureType, nv_suffix)    \
3006 3011                                                                  \
3007 3012  void ConcurrentMarkSweepGeneration::                            \
3008 3013  oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl) {   \
3009 3014    cl->set_generation(this);                                     \
3010 3015    cmsSpace()->oop_since_save_marks_iterate##nv_suffix(cl);      \
3011 3016    cl->reset_generation();                                       \
3012 3017    save_marks();                                                 \
3013 3018  }
3014 3019  
3015 3020  ALL_SINCE_SAVE_MARKS_CLOSURES(CMS_SINCE_SAVE_MARKS_DEFN)
3016 3021  
3017 3022  void
3018 3023  ConcurrentMarkSweepGeneration::object_iterate_since_last_GC(ObjectClosure* blk)
3019 3024  {
3020 3025    // Not currently implemented; need to do the following. -- ysr.
3021 3026    // dld -- I think that is used for some sort of allocation profiler.  So it
3022 3027    // really means the objects allocated by the mutator since the last
3023 3028    // GC.  We could potentially implement this cheaply by recording only
3024 3029    // the direct allocations in a side data structure.
3025 3030    //
3026 3031    // I think we probably ought not to be required to support these
3027 3032    // iterations at any arbitrary point; I think there ought to be some
3028 3033    // call to enable/disable allocation profiling in a generation/space,
3029 3034    // and the iterator ought to return the objects allocated in the
3030 3035    // gen/space since the enable call, or the last iterator call (which
3031 3036    // will probably be at a GC.)  That way, for gens like CM&S that would
3032 3037    // require some extra data structure to support this, we only pay the
3033 3038    // cost when it's in use...
3034 3039    cmsSpace()->object_iterate_since_last_GC(blk);
3035 3040  }
3036 3041  
3037 3042  void
3038 3043  ConcurrentMarkSweepGeneration::younger_refs_iterate(OopsInGenClosure* cl) {
3039 3044    cl->set_generation(this);
3040 3045    younger_refs_in_space_iterate(_cmsSpace, cl);
3041 3046    cl->reset_generation();
3042 3047  }
3043 3048  
3044 3049  void
3045 3050  ConcurrentMarkSweepGeneration::oop_iterate(MemRegion mr, OopClosure* cl) {
3046 3051    if (freelistLock()->owned_by_self()) {
3047 3052      Generation::oop_iterate(mr, cl);
3048 3053    } else {
3049 3054      MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
3050 3055      Generation::oop_iterate(mr, cl);
3051 3056    }
3052 3057  }
3053 3058  
3054 3059  void
3055 3060  ConcurrentMarkSweepGeneration::oop_iterate(OopClosure* cl) {
3056 3061    if (freelistLock()->owned_by_self()) {
3057 3062      Generation::oop_iterate(cl);
3058 3063    } else {
3059 3064      MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
3060 3065      Generation::oop_iterate(cl);
3061 3066    }
3062 3067  }
3063 3068  
3064 3069  void
3065 3070  ConcurrentMarkSweepGeneration::object_iterate(ObjectClosure* cl) {
3066 3071    if (freelistLock()->owned_by_self()) {
3067 3072      Generation::object_iterate(cl);
3068 3073    } else {
3069 3074      MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
3070 3075      Generation::object_iterate(cl);
3071 3076    }
3072 3077  }
3073 3078  
3074 3079  void
3075 3080  ConcurrentMarkSweepGeneration::safe_object_iterate(ObjectClosure* cl) {
3076 3081    if (freelistLock()->owned_by_self()) {
3077 3082      Generation::safe_object_iterate(cl);
3078 3083    } else {
3079 3084      MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
3080 3085      Generation::safe_object_iterate(cl);
3081 3086    }
3082 3087  }
3083 3088  
3084 3089  void
3085 3090  ConcurrentMarkSweepGeneration::pre_adjust_pointers() {
3086 3091  }
3087 3092  
3088 3093  void
3089 3094  ConcurrentMarkSweepGeneration::post_compact() {
3090 3095  }
3091 3096  
3092 3097  void
3093 3098  ConcurrentMarkSweepGeneration::prepare_for_verify() {
3094 3099    // Fix the linear allocation blocks to look like free blocks.
3095 3100  
3096 3101    // Locks are normally acquired/released in gc_prologue/gc_epilogue, but those
3097 3102    // are not called when the heap is verified during universe initialization and
3098 3103    // at vm shutdown.
3099 3104    if (freelistLock()->owned_by_self()) {
3100 3105      cmsSpace()->prepare_for_verify();
3101 3106    } else {
3102 3107      MutexLockerEx fll(freelistLock(), Mutex::_no_safepoint_check_flag);
3103 3108      cmsSpace()->prepare_for_verify();
3104 3109    }
3105 3110  }
3106 3111  
3107 3112  void
3108 3113  ConcurrentMarkSweepGeneration::verify(bool allow_dirty /* ignored */) {
3109 3114    // Locks are normally acquired/released in gc_prologue/gc_epilogue, but those
3110 3115    // are not called when the heap is verified during universe initialization and
3111 3116    // at vm shutdown.
3112 3117    if (freelistLock()->owned_by_self()) {
3113 3118      cmsSpace()->verify(false /* ignored */);
3114 3119    } else {
3115 3120      MutexLockerEx fll(freelistLock(), Mutex::_no_safepoint_check_flag);
3116 3121      cmsSpace()->verify(false /* ignored */);
3117 3122    }
3118 3123  }
3119 3124  
3120 3125  void CMSCollector::verify(bool allow_dirty /* ignored */) {
3121 3126    _cmsGen->verify(allow_dirty);
3122 3127    _permGen->verify(allow_dirty);
3123 3128  }
3124 3129  
3125 3130  #ifndef PRODUCT
3126 3131  bool CMSCollector::overflow_list_is_empty() const {
3127 3132    assert(_num_par_pushes >= 0, "Inconsistency");
3128 3133    if (_overflow_list == NULL) {
3129 3134      assert(_num_par_pushes == 0, "Inconsistency");
3130 3135    }
3131 3136    return _overflow_list == NULL;
3132 3137  }
3133 3138  
3134 3139  // The methods verify_work_stacks_empty() and verify_overflow_empty()
3135 3140  // merely consolidate assertion checks that appear to occur together frequently.
3136 3141  void CMSCollector::verify_work_stacks_empty() const {
3137 3142    assert(_markStack.isEmpty(), "Marking stack should be empty");
3138 3143    assert(overflow_list_is_empty(), "Overflow list should be empty");
3139 3144  }
3140 3145  
3141 3146  void CMSCollector::verify_overflow_empty() const {
3142 3147    assert(overflow_list_is_empty(), "Overflow list should be empty");
3143 3148    assert(no_preserved_marks(), "No preserved marks");
3144 3149  }
3145 3150  #endif // PRODUCT
3146 3151  
3147 3152  // Decide if we want to enable class unloading as part of the
3148 3153  // ensuing concurrent GC cycle. We will collect the perm gen and
3149 3154  // unload classes if it's the case that:
3150 3155  // (1) an explicit gc request has been made and the flag
3151 3156  //     ExplicitGCInvokesConcurrentAndUnloadsClasses is set, OR
3152 3157  // (2) (a) class unloading is enabled at the command line, and
3153 3158  //     (b) (i)   perm gen threshold has been crossed, or
3154 3159  //         (ii)  old gen is getting really full, or
3155 3160  //         (iii) the previous N CMS collections did not collect the
3156 3161  //               perm gen
3157 3162  // NOTE: Provided there is no change in the state of the heap between
3158 3163  // calls to this method, it should have idempotent results. Moreover,
3159 3164  // its results should be monotonically increasing (i.e. going from 0 to 1,
3160 3165  // but not 1 to 0) between successive calls between which the heap was
3161 3166  // not collected. For the implementation below, it must thus rely on
3162 3167  // the property that concurrent_cycles_since_last_unload()
3163 3168  // will not decrease unless a collection cycle happened and that
3164 3169  // _permGen->should_concurrent_collect() and _cmsGen->is_too_full() are
3165 3170  // themselves also monotonic in that sense. See check_monotonicity()
3166 3171  // below.
3167 3172  bool CMSCollector::update_should_unload_classes() {
3168 3173    _should_unload_classes = false;
3169 3174    // Condition 1 above
3170 3175    if (_full_gc_requested && ExplicitGCInvokesConcurrentAndUnloadsClasses) {
3171 3176      _should_unload_classes = true;
3172 3177    } else if (CMSClassUnloadingEnabled) { // Condition 2.a above
3173 3178      // Disjuncts 2.b.(i,ii,iii) above
3174 3179      _should_unload_classes = (concurrent_cycles_since_last_unload() >=
3175 3180                                CMSClassUnloadingMaxInterval)
3176 3181                             || _permGen->should_concurrent_collect()
3177 3182                             || _cmsGen->is_too_full();
3178 3183    }
3179 3184    return _should_unload_classes;
3180 3185  }
3181 3186  
3182 3187  bool ConcurrentMarkSweepGeneration::is_too_full() const {
3183 3188    bool res = should_concurrent_collect();
3184 3189    res = res && (occupancy() > (double)CMSIsTooFullPercentage/100.0);
3185 3190    return res;
3186 3191  }
3187 3192  
3188 3193  void CMSCollector::setup_cms_unloading_and_verification_state() {
3189 3194    const  bool should_verify =    VerifyBeforeGC || VerifyAfterGC || VerifyDuringGC
3190 3195                               || VerifyBeforeExit;
3191 3196    const  int  rso           =    SharedHeap::SO_Symbols | SharedHeap::SO_Strings
3192 3197                               |   SharedHeap::SO_CodeCache;
3193 3198  
3194 3199    if (should_unload_classes()) {   // Should unload classes this cycle
3195 3200      remove_root_scanning_option(rso);  // Shrink the root set appropriately
3196 3201      set_verifying(should_verify);    // Set verification state for this cycle
3197 3202      return;                            // Nothing else needs to be done at this time
3198 3203    }
3199 3204  
3200 3205    // Not unloading classes this cycle
3201 3206    assert(!should_unload_classes(), "Inconsitency!");
3202 3207    if ((!verifying() || unloaded_classes_last_cycle()) && should_verify) {
3203 3208      // We were not verifying, or we _were_ unloading classes in the last cycle,
3204 3209      // AND some verification options are enabled this cycle; in this case,
3205 3210      // we must make sure that the deadness map is allocated if not already so,
3206 3211      // and cleared (if already allocated previously --
3207 3212      // CMSBitMap::sizeInBits() is used to determine if it's allocated).
3208 3213      if (perm_gen_verify_bit_map()->sizeInBits() == 0) {
3209 3214        if (!perm_gen_verify_bit_map()->allocate(_permGen->reserved())) {
3210 3215          warning("Failed to allocate permanent generation verification CMS Bit Map;\n"
3211 3216                  "permanent generation verification disabled");
3212 3217          return;  // Note that we leave verification disabled, so we'll retry this
3213 3218                   // allocation next cycle. We _could_ remember this failure
3214 3219                   // and skip further attempts and permanently disable verification
3215 3220                   // attempts if that is considered more desirable.
3216 3221        }
3217 3222        assert(perm_gen_verify_bit_map()->covers(_permGen->reserved()),
3218 3223                "_perm_gen_ver_bit_map inconsistency?");
3219 3224      } else {
3220 3225        perm_gen_verify_bit_map()->clear_all();
3221 3226      }
3222 3227      // Include symbols, strings and code cache elements to prevent their resurrection.
3223 3228      add_root_scanning_option(rso);
3224 3229      set_verifying(true);
3225 3230    } else if (verifying() && !should_verify) {
3226 3231      // We were verifying, but some verification flags got disabled.
3227 3232      set_verifying(false);
3228 3233      // Exclude symbols, strings and code cache elements from root scanning to
3229 3234      // reduce IM and RM pauses.
3230 3235      remove_root_scanning_option(rso);
3231 3236    }
3232 3237  }
3233 3238  
3234 3239  
3235 3240  #ifndef PRODUCT
3236 3241  HeapWord* CMSCollector::block_start(const void* p) const {
3237 3242    const HeapWord* addr = (HeapWord*)p;
3238 3243    if (_span.contains(p)) {
3239 3244      if (_cmsGen->cmsSpace()->is_in_reserved(addr)) {
3240 3245        return _cmsGen->cmsSpace()->block_start(p);
3241 3246      } else {
3242 3247        assert(_permGen->cmsSpace()->is_in_reserved(addr),
3243 3248               "Inconsistent _span?");
3244 3249        return _permGen->cmsSpace()->block_start(p);
3245 3250      }
3246 3251    }
3247 3252    return NULL;
3248 3253  }
3249 3254  #endif
3250 3255  
3251 3256  HeapWord*
3252 3257  ConcurrentMarkSweepGeneration::expand_and_allocate(size_t word_size,
3253 3258                                                     bool   tlab,
3254 3259                                                     bool   parallel) {
3255 3260    CMSSynchronousYieldRequest yr;
3256 3261    assert(!tlab, "Can't deal with TLAB allocation");
3257 3262    MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
3258 3263    expand(word_size*HeapWordSize, MinHeapDeltaBytes,
3259 3264      CMSExpansionCause::_satisfy_allocation);
3260 3265    if (GCExpandToAllocateDelayMillis > 0) {
3261 3266      os::sleep(Thread::current(), GCExpandToAllocateDelayMillis, false);
3262 3267    }
3263 3268    return have_lock_and_allocate(word_size, tlab);
3264 3269  }
3265 3270  
3266 3271  // YSR: All of this generation expansion/shrinking stuff is an exact copy of
3267 3272  // OneContigSpaceCardGeneration, which makes me wonder if we should move this
3268 3273  // to CardGeneration and share it...
3269 3274  bool ConcurrentMarkSweepGeneration::expand(size_t bytes, size_t expand_bytes) {
3270 3275    return CardGeneration::expand(bytes, expand_bytes);
3271 3276  }
3272 3277  
3273 3278  void ConcurrentMarkSweepGeneration::expand(size_t bytes, size_t expand_bytes,
3274 3279    CMSExpansionCause::Cause cause)
3275 3280  {
3276 3281  
3277 3282    bool success = expand(bytes, expand_bytes);
3278 3283  
3279 3284    // remember why we expanded; this information is used
3280 3285    // by shouldConcurrentCollect() when making decisions on whether to start
3281 3286    // a new CMS cycle.
3282 3287    if (success) {
3283 3288      set_expansion_cause(cause);
3284 3289      if (PrintGCDetails && Verbose) {
3285 3290        gclog_or_tty->print_cr("Expanded CMS gen for %s",
3286 3291          CMSExpansionCause::to_string(cause));
3287 3292      }
3288 3293    }
3289 3294  }
3290 3295  
3291 3296  HeapWord* ConcurrentMarkSweepGeneration::expand_and_par_lab_allocate(CMSParGCThreadState* ps, size_t word_sz) {
3292 3297    HeapWord* res = NULL;
3293 3298    MutexLocker x(ParGCRareEvent_lock);
3294 3299    while (true) {
3295 3300      // Expansion by some other thread might make alloc OK now:
3296 3301      res = ps->lab.alloc(word_sz);
3297 3302      if (res != NULL) return res;
3298 3303      // If there's not enough expansion space available, give up.
3299 3304      if (_virtual_space.uncommitted_size() < (word_sz * HeapWordSize)) {
3300 3305        return NULL;
3301 3306      }
3302 3307      // Otherwise, we try expansion.
3303 3308      expand(word_sz*HeapWordSize, MinHeapDeltaBytes,
3304 3309        CMSExpansionCause::_allocate_par_lab);
3305 3310      // Now go around the loop and try alloc again;
3306 3311      // A competing par_promote might beat us to the expansion space,
3307 3312      // so we may go around the loop again if promotion fails agaion.
3308 3313      if (GCExpandToAllocateDelayMillis > 0) {
3309 3314        os::sleep(Thread::current(), GCExpandToAllocateDelayMillis, false);
3310 3315      }
3311 3316    }
3312 3317  }
3313 3318  
3314 3319  
3315 3320  bool ConcurrentMarkSweepGeneration::expand_and_ensure_spooling_space(
3316 3321    PromotionInfo* promo) {
3317 3322    MutexLocker x(ParGCRareEvent_lock);
3318 3323    size_t refill_size_bytes = promo->refillSize() * HeapWordSize;
3319 3324    while (true) {
3320 3325      // Expansion by some other thread might make alloc OK now:
3321 3326      if (promo->ensure_spooling_space()) {
3322 3327        assert(promo->has_spooling_space(),
3323 3328               "Post-condition of successful ensure_spooling_space()");
3324 3329        return true;
3325 3330      }
3326 3331      // If there's not enough expansion space available, give up.
3327 3332      if (_virtual_space.uncommitted_size() < refill_size_bytes) {
3328 3333        return false;
3329 3334      }
3330 3335      // Otherwise, we try expansion.
3331 3336      expand(refill_size_bytes, MinHeapDeltaBytes,
3332 3337        CMSExpansionCause::_allocate_par_spooling_space);
3333 3338      // Now go around the loop and try alloc again;
3334 3339      // A competing allocation might beat us to the expansion space,
3335 3340      // so we may go around the loop again if allocation fails again.
3336 3341      if (GCExpandToAllocateDelayMillis > 0) {
3337 3342        os::sleep(Thread::current(), GCExpandToAllocateDelayMillis, false);
3338 3343      }
3339 3344    }
3340 3345  }
3341 3346  
3342 3347  
3343 3348  
3344 3349  void ConcurrentMarkSweepGeneration::shrink(size_t bytes) {
3345 3350    assert_locked_or_safepoint(Heap_lock);
3346 3351    size_t size = ReservedSpace::page_align_size_down(bytes);
3347 3352    if (size > 0) {
3348 3353      shrink_by(size);
3349 3354    }
3350 3355  }
3351 3356  
3352 3357  bool ConcurrentMarkSweepGeneration::grow_by(size_t bytes) {
3353 3358    assert_locked_or_safepoint(Heap_lock);
3354 3359    bool result = _virtual_space.expand_by(bytes);
3355 3360    if (result) {
3356 3361      HeapWord* old_end = _cmsSpace->end();
3357 3362      size_t new_word_size =
3358 3363        heap_word_size(_virtual_space.committed_size());
3359 3364      MemRegion mr(_cmsSpace->bottom(), new_word_size);
3360 3365      _bts->resize(new_word_size);  // resize the block offset shared array
3361 3366      Universe::heap()->barrier_set()->resize_covered_region(mr);
3362 3367      // Hmmmm... why doesn't CFLS::set_end verify locking?
3363 3368      // This is quite ugly; FIX ME XXX
3364 3369      _cmsSpace->assert_locked(freelistLock());
3365 3370      _cmsSpace->set_end((HeapWord*)_virtual_space.high());
3366 3371  
3367 3372      // update the space and generation capacity counters
3368 3373      if (UsePerfData) {
3369 3374        _space_counters->update_capacity();
3370 3375        _gen_counters->update_all();
3371 3376      }
3372 3377  
3373 3378      if (Verbose && PrintGC) {
3374 3379        size_t new_mem_size = _virtual_space.committed_size();
3375 3380        size_t old_mem_size = new_mem_size - bytes;
3376 3381        gclog_or_tty->print_cr("Expanding %s from %ldK by %ldK to %ldK",
3377 3382                      name(), old_mem_size/K, bytes/K, new_mem_size/K);
3378 3383      }
3379 3384    }
3380 3385    return result;
3381 3386  }
3382 3387  
3383 3388  bool ConcurrentMarkSweepGeneration::grow_to_reserved() {
3384 3389    assert_locked_or_safepoint(Heap_lock);
3385 3390    bool success = true;
3386 3391    const size_t remaining_bytes = _virtual_space.uncommitted_size();
3387 3392    if (remaining_bytes > 0) {
3388 3393      success = grow_by(remaining_bytes);
3389 3394      DEBUG_ONLY(if (!success) warning("grow to reserved failed");)
3390 3395    }
3391 3396    return success;
3392 3397  }
3393 3398  
3394 3399  void ConcurrentMarkSweepGeneration::shrink_by(size_t bytes) {
3395 3400    assert_locked_or_safepoint(Heap_lock);
3396 3401    assert_lock_strong(freelistLock());
3397 3402    // XXX Fix when compaction is implemented.
3398 3403    warning("Shrinking of CMS not yet implemented");
3399 3404    return;
3400 3405  }
3401 3406  
3402 3407  
3403 3408  // Simple ctor/dtor wrapper for accounting & timer chores around concurrent
3404 3409  // phases.
3405 3410  class CMSPhaseAccounting: public StackObj {
3406 3411   public:
3407 3412    CMSPhaseAccounting(CMSCollector *collector,
3408 3413                       const char *phase,
3409 3414                       bool print_cr = true);
3410 3415    ~CMSPhaseAccounting();
3411 3416  
3412 3417   private:
3413 3418    CMSCollector *_collector;
3414 3419    const char *_phase;
3415 3420    elapsedTimer _wallclock;
3416 3421    bool _print_cr;
3417 3422  
3418 3423   public:
3419 3424    // Not MT-safe; so do not pass around these StackObj's
3420 3425    // where they may be accessed by other threads.
3421 3426    jlong wallclock_millis() {
3422 3427      assert(_wallclock.is_active(), "Wall clock should not stop");
3423 3428      _wallclock.stop();  // to record time
3424 3429      jlong ret = _wallclock.milliseconds();
3425 3430      _wallclock.start(); // restart
3426 3431      return ret;
3427 3432    }
3428 3433  };
3429 3434  
3430 3435  CMSPhaseAccounting::CMSPhaseAccounting(CMSCollector *collector,
3431 3436                                         const char *phase,
3432 3437                                         bool print_cr) :
3433 3438    _collector(collector), _phase(phase), _print_cr(print_cr) {
3434 3439  
3435 3440    if (PrintCMSStatistics != 0) {
3436 3441      _collector->resetYields();
3437 3442    }
3438 3443    if (PrintGCDetails && PrintGCTimeStamps) {
3439 3444      gclog_or_tty->date_stamp(PrintGCDateStamps);
3440 3445      gclog_or_tty->stamp();
3441 3446      gclog_or_tty->print_cr(": [%s-concurrent-%s-start]",
3442 3447        _collector->cmsGen()->short_name(), _phase);
3443 3448    }
3444 3449    _collector->resetTimer();
3445 3450    _wallclock.start();
3446 3451    _collector->startTimer();
3447 3452  }
3448 3453  
3449 3454  CMSPhaseAccounting::~CMSPhaseAccounting() {
3450 3455    assert(_wallclock.is_active(), "Wall clock should not have stopped");
3451 3456    _collector->stopTimer();
3452 3457    _wallclock.stop();
3453 3458    if (PrintGCDetails) {
3454 3459      gclog_or_tty->date_stamp(PrintGCDateStamps);
3455 3460      if (PrintGCTimeStamps) {
3456 3461        gclog_or_tty->stamp();
3457 3462        gclog_or_tty->print(": ");
3458 3463      }
3459 3464      gclog_or_tty->print("[%s-concurrent-%s: %3.3f/%3.3f secs]",
3460 3465                   _collector->cmsGen()->short_name(),
3461 3466                   _phase, _collector->timerValue(), _wallclock.seconds());
3462 3467      if (_print_cr) {
3463 3468        gclog_or_tty->print_cr("");
3464 3469      }
3465 3470      if (PrintCMSStatistics != 0) {
3466 3471        gclog_or_tty->print_cr(" (CMS-concurrent-%s yielded %d times)", _phase,
3467 3472                      _collector->yields());
3468 3473      }
3469 3474    }
3470 3475  }
3471 3476  
3472 3477  // CMS work
3473 3478  
3474 3479  // Checkpoint the roots into this generation from outside
3475 3480  // this generation. [Note this initial checkpoint need only
3476 3481  // be approximate -- we'll do a catch up phase subsequently.]
3477 3482  void CMSCollector::checkpointRootsInitial(bool asynch) {
3478 3483    assert(_collectorState == InitialMarking, "Wrong collector state");
3479 3484    check_correct_thread_executing();
3480 3485    TraceCMSMemoryManagerStats tms(_collectorState);
3481 3486  
3482 3487    ReferenceProcessor* rp = ref_processor();
3483 3488    SpecializationStats::clear();
3484 3489    assert(_restart_addr == NULL, "Control point invariant");
3485 3490    if (asynch) {
3486 3491      // acquire locks for subsequent manipulations
3487 3492      MutexLockerEx x(bitMapLock(),
3488 3493                      Mutex::_no_safepoint_check_flag);
3489 3494      checkpointRootsInitialWork(asynch);
3490 3495      rp->verify_no_references_recorded();
3491 3496      rp->enable_discovery(); // enable ("weak") refs discovery
3492 3497      _collectorState = Marking;
3493 3498    } else {
3494 3499      // (Weak) Refs discovery: this is controlled from genCollectedHeap::do_collection
3495 3500      // which recognizes if we are a CMS generation, and doesn't try to turn on
3496 3501      // discovery; verify that they aren't meddling.
3497 3502      assert(!rp->discovery_is_atomic(),
3498 3503             "incorrect setting of discovery predicate");
3499 3504      assert(!rp->discovery_enabled(), "genCollectedHeap shouldn't control "
3500 3505             "ref discovery for this generation kind");
3501 3506      // already have locks
3502 3507      checkpointRootsInitialWork(asynch);
3503 3508      rp->enable_discovery(); // now enable ("weak") refs discovery
3504 3509      _collectorState = Marking;
3505 3510    }
3506 3511    SpecializationStats::print();
3507 3512  }
3508 3513  
3509 3514  void CMSCollector::checkpointRootsInitialWork(bool asynch) {
3510 3515    assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
3511 3516    assert(_collectorState == InitialMarking, "just checking");
3512 3517  
3513 3518    // If there has not been a GC[n-1] since last GC[n] cycle completed,
3514 3519    // precede our marking with a collection of all
3515 3520    // younger generations to keep floating garbage to a minimum.
3516 3521    // XXX: we won't do this for now -- it's an optimization to be done later.
3517 3522  
3518 3523    // already have locks
3519 3524    assert_lock_strong(bitMapLock());
3520 3525    assert(_markBitMap.isAllClear(), "was reset at end of previous cycle");
3521 3526  
3522 3527    // Setup the verification and class unloading state for this
3523 3528    // CMS collection cycle.
3524 3529    setup_cms_unloading_and_verification_state();
3525 3530  
3526 3531    NOT_PRODUCT(TraceTime t("\ncheckpointRootsInitialWork",
3527 3532      PrintGCDetails && Verbose, true, gclog_or_tty);)
3528 3533    if (UseAdaptiveSizePolicy) {
3529 3534      size_policy()->checkpoint_roots_initial_begin();
3530 3535    }
3531 3536  
3532 3537    // Reset all the PLAB chunk arrays if necessary.
3533 3538    if (_survivor_plab_array != NULL && !CMSPLABRecordAlways) {
3534 3539      reset_survivor_plab_arrays();
3535 3540    }
3536 3541  
3537 3542    ResourceMark rm;
3538 3543    HandleMark  hm;
3539 3544  
3540 3545    FalseClosure falseClosure;
3541 3546    // In the case of a synchronous collection, we will elide the
3542 3547    // remark step, so it's important to catch all the nmethod oops
3543 3548    // in this step.
3544 3549    // The final 'true' flag to gen_process_strong_roots will ensure this.
3545 3550    // If 'async' is true, we can relax the nmethod tracing.
3546 3551    MarkRefsIntoClosure notOlder(_span, &_markBitMap);
3547 3552    GenCollectedHeap* gch = GenCollectedHeap::heap();
3548 3553  
3549 3554    verify_work_stacks_empty();
3550 3555    verify_overflow_empty();
3551 3556  
3552 3557    gch->ensure_parsability(false);  // fill TLABs, but no need to retire them
3553 3558    // Update the saved marks which may affect the root scans.
3554 3559    gch->save_marks();
3555 3560  
3556 3561    // weak reference processing has not started yet.
3557 3562    ref_processor()->set_enqueuing_is_done(false);
3558 3563  
3559 3564    {
3560 3565      // This is not needed. DEBUG_ONLY(RememberKlassesChecker imx(true);)
3561 3566      COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact;)
3562 3567      gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
3563 3568      gch->gen_process_strong_roots(_cmsGen->level(),
3564 3569                                    true,   // younger gens are roots
3565 3570                                    true,   // activate StrongRootsScope
3566 3571                                    true,   // collecting perm gen
3567 3572                                    SharedHeap::ScanningOption(roots_scanning_options()),
3568 3573                                    &notOlder,
3569 3574                                    true,   // walk all of code cache if (so & SO_CodeCache)
3570 3575                                    NULL);
3571 3576    }
3572 3577  
3573 3578    // Clear mod-union table; it will be dirtied in the prologue of
3574 3579    // CMS generation per each younger generation collection.
3575 3580  
3576 3581    assert(_modUnionTable.isAllClear(),
3577 3582         "Was cleared in most recent final checkpoint phase"
3578 3583         " or no bits are set in the gc_prologue before the start of the next "
3579 3584         "subsequent marking phase.");
3580 3585  
3581 3586    // Temporarily disabled, since pre/post-consumption closures don't
3582 3587    // care about precleaned cards
3583 3588    #if 0
3584 3589    {
3585 3590      MemRegion mr = MemRegion((HeapWord*)_virtual_space.low(),
3586 3591                               (HeapWord*)_virtual_space.high());
3587 3592      _ct->ct_bs()->preclean_dirty_cards(mr);
3588 3593    }
3589 3594    #endif
3590 3595  
3591 3596    // Save the end of the used_region of the constituent generations
3592 3597    // to be used to limit the extent of sweep in each generation.
3593 3598    save_sweep_limits();
3594 3599    if (UseAdaptiveSizePolicy) {
3595 3600      size_policy()->checkpoint_roots_initial_end(gch->gc_cause());
3596 3601    }
3597 3602    verify_overflow_empty();
3598 3603  }
3599 3604  
3600 3605  bool CMSCollector::markFromRoots(bool asynch) {
3601 3606    // we might be tempted to assert that:
3602 3607    // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
3603 3608    //        "inconsistent argument?");
3604 3609    // However that wouldn't be right, because it's possible that
3605 3610    // a safepoint is indeed in progress as a younger generation
3606 3611    // stop-the-world GC happens even as we mark in this generation.
3607 3612    assert(_collectorState == Marking, "inconsistent state?");
3608 3613    check_correct_thread_executing();
3609 3614    verify_overflow_empty();
3610 3615  
3611 3616    bool res;
3612 3617    if (asynch) {
3613 3618  
3614 3619      // Start the timers for adaptive size policy for the concurrent phases
3615 3620      // Do it here so that the foreground MS can use the concurrent
3616 3621      // timer since a foreground MS might has the sweep done concurrently
3617 3622      // or STW.
3618 3623      if (UseAdaptiveSizePolicy) {
3619 3624        size_policy()->concurrent_marking_begin();
3620 3625      }
3621 3626  
3622 3627      // Weak ref discovery note: We may be discovering weak
3623 3628      // refs in this generation concurrent (but interleaved) with
3624 3629      // weak ref discovery by a younger generation collector.
3625 3630  
3626 3631      CMSTokenSyncWithLocks ts(true, bitMapLock());
3627 3632      TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
3628 3633      CMSPhaseAccounting pa(this, "mark", !PrintGCDetails);
3629 3634      res = markFromRootsWork(asynch);
3630 3635      if (res) {
3631 3636        _collectorState = Precleaning;
3632 3637      } else { // We failed and a foreground collection wants to take over
3633 3638        assert(_foregroundGCIsActive, "internal state inconsistency");
3634 3639        assert(_restart_addr == NULL,  "foreground will restart from scratch");
3635 3640        if (PrintGCDetails) {
3636 3641          gclog_or_tty->print_cr("bailing out to foreground collection");
3637 3642        }
3638 3643      }
3639 3644      if (UseAdaptiveSizePolicy) {
3640 3645        size_policy()->concurrent_marking_end();
3641 3646      }
3642 3647    } else {
3643 3648      assert(SafepointSynchronize::is_at_safepoint(),
3644 3649             "inconsistent with asynch == false");
3645 3650      if (UseAdaptiveSizePolicy) {
3646 3651        size_policy()->ms_collection_marking_begin();
3647 3652      }
3648 3653      // already have locks
3649 3654      res = markFromRootsWork(asynch);
3650 3655      _collectorState = FinalMarking;
3651 3656      if (UseAdaptiveSizePolicy) {
3652 3657        GenCollectedHeap* gch = GenCollectedHeap::heap();
3653 3658        size_policy()->ms_collection_marking_end(gch->gc_cause());
3654 3659      }
3655 3660    }
3656 3661    verify_overflow_empty();
3657 3662    return res;
3658 3663  }
3659 3664  
3660 3665  bool CMSCollector::markFromRootsWork(bool asynch) {
3661 3666    // iterate over marked bits in bit map, doing a full scan and mark
3662 3667    // from these roots using the following algorithm:
3663 3668    // . if oop is to the right of the current scan pointer,
3664 3669    //   mark corresponding bit (we'll process it later)
3665 3670    // . else (oop is to left of current scan pointer)
3666 3671    //   push oop on marking stack
3667 3672    // . drain the marking stack
3668 3673  
3669 3674    // Note that when we do a marking step we need to hold the
3670 3675    // bit map lock -- recall that direct allocation (by mutators)
3671 3676    // and promotion (by younger generation collectors) is also
3672 3677    // marking the bit map. [the so-called allocate live policy.]
3673 3678    // Because the implementation of bit map marking is not
3674 3679    // robust wrt simultaneous marking of bits in the same word,
3675 3680    // we need to make sure that there is no such interference
3676 3681    // between concurrent such updates.
3677 3682  
3678 3683    // already have locks
3679 3684    assert_lock_strong(bitMapLock());
3680 3685  
3681 3686    // Clear the revisit stack, just in case there are any
3682 3687    // obsolete contents from a short-circuited previous CMS cycle.
3683 3688    _revisitStack.reset();
3684 3689    verify_work_stacks_empty();
3685 3690    verify_overflow_empty();
3686 3691    assert(_revisitStack.isEmpty(), "tabula rasa");
3687 3692    DEBUG_ONLY(RememberKlassesChecker cmx(should_unload_classes());)
3688 3693    bool result = false;
3689 3694    if (CMSConcurrentMTEnabled && ConcGCThreads > 0) {
3690 3695      result = do_marking_mt(asynch);
3691 3696    } else {
3692 3697      result = do_marking_st(asynch);
3693 3698    }
3694 3699    return result;
3695 3700  }
3696 3701  
3697 3702  // Forward decl
3698 3703  class CMSConcMarkingTask;
3699 3704  
3700 3705  class CMSConcMarkingTerminator: public ParallelTaskTerminator {
3701 3706    CMSCollector*       _collector;
3702 3707    CMSConcMarkingTask* _task;
3703 3708   public:
3704 3709    virtual void yield();
3705 3710  
3706 3711    // "n_threads" is the number of threads to be terminated.
3707 3712    // "queue_set" is a set of work queues of other threads.
3708 3713    // "collector" is the CMS collector associated with this task terminator.
3709 3714    // "yield" indicates whether we need the gang as a whole to yield.
3710 3715    CMSConcMarkingTerminator(int n_threads, TaskQueueSetSuper* queue_set, CMSCollector* collector) :
3711 3716      ParallelTaskTerminator(n_threads, queue_set),
3712 3717      _collector(collector) { }
3713 3718  
3714 3719    void set_task(CMSConcMarkingTask* task) {
3715 3720      _task = task;
3716 3721    }
3717 3722  };
3718 3723  
3719 3724  class CMSConcMarkingTerminatorTerminator: public TerminatorTerminator {
3720 3725    CMSConcMarkingTask* _task;
3721 3726   public:
3722 3727    bool should_exit_termination();
3723 3728    void set_task(CMSConcMarkingTask* task) {
3724 3729      _task = task;
3725 3730    }
3726 3731  };
3727 3732  
3728 3733  // MT Concurrent Marking Task
3729 3734  class CMSConcMarkingTask: public YieldingFlexibleGangTask {
3730 3735    CMSCollector* _collector;
3731 3736    int           _n_workers;                  // requested/desired # workers
3732 3737    bool          _asynch;
3733 3738    bool          _result;
3734 3739    CompactibleFreeListSpace*  _cms_space;
3735 3740    CompactibleFreeListSpace* _perm_space;
3736 3741    char          _pad_front[64];   // padding to ...
3737 3742    HeapWord*     _global_finger;   // ... avoid sharing cache line
3738 3743    char          _pad_back[64];
3739 3744    HeapWord*     _restart_addr;
3740 3745  
3741 3746    //  Exposed here for yielding support
3742 3747    Mutex* const _bit_map_lock;
3743 3748  
3744 3749    // The per thread work queues, available here for stealing
3745 3750    OopTaskQueueSet*  _task_queues;
3746 3751  
3747 3752    // Termination (and yielding) support
3748 3753    CMSConcMarkingTerminator _term;
3749 3754    CMSConcMarkingTerminatorTerminator _term_term;
3750 3755  
3751 3756   public:
3752 3757    CMSConcMarkingTask(CMSCollector* collector,
3753 3758                   CompactibleFreeListSpace* cms_space,
3754 3759                   CompactibleFreeListSpace* perm_space,
3755 3760                   bool asynch,
3756 3761                   YieldingFlexibleWorkGang* workers,
3757 3762                   OopTaskQueueSet* task_queues):
3758 3763      YieldingFlexibleGangTask("Concurrent marking done multi-threaded"),
3759 3764      _collector(collector),
3760 3765      _cms_space(cms_space),
3761 3766      _perm_space(perm_space),
3762 3767      _asynch(asynch), _n_workers(0), _result(true),
3763 3768      _task_queues(task_queues),
3764 3769      _term(_n_workers, task_queues, _collector),
3765 3770      _bit_map_lock(collector->bitMapLock())
3766 3771    {
3767 3772      _requested_size = _n_workers;
3768 3773      _term.set_task(this);
3769 3774      _term_term.set_task(this);
3770 3775      assert(_cms_space->bottom() < _perm_space->bottom(),
3771 3776             "Finger incorrectly initialized below");
3772 3777      _restart_addr = _global_finger = _cms_space->bottom();
3773 3778    }
3774 3779  
3775 3780  
3776 3781    OopTaskQueueSet* task_queues()  { return _task_queues; }
3777 3782  
3778 3783    OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
3779 3784  
3780 3785    HeapWord** global_finger_addr() { return &_global_finger; }
3781 3786  
3782 3787    CMSConcMarkingTerminator* terminator() { return &_term; }
3783 3788  
3784 3789    virtual void set_for_termination(int active_workers) {
3785 3790      terminator()->reset_for_reuse(active_workers);
3786 3791    }
3787 3792  
3788 3793    void work(int i);
3789 3794    bool should_yield() {
3790 3795      return    ConcurrentMarkSweepThread::should_yield()
3791 3796             && !_collector->foregroundGCIsActive()
3792 3797             && _asynch;
3793 3798    }
3794 3799  
3795 3800    virtual void coordinator_yield();  // stuff done by coordinator
3796 3801    bool result() { return _result; }
3797 3802  
3798 3803    void reset(HeapWord* ra) {
3799 3804      assert(_global_finger >= _cms_space->end(),  "Postcondition of ::work(i)");
3800 3805      assert(_global_finger >= _perm_space->end(), "Postcondition of ::work(i)");
3801 3806      assert(ra             <  _perm_space->end(), "ra too large");
3802 3807      _restart_addr = _global_finger = ra;
3803 3808      _term.reset_for_reuse();
3804 3809    }
3805 3810  
3806 3811    static bool get_work_from_overflow_stack(CMSMarkStack* ovflw_stk,
3807 3812                                             OopTaskQueue* work_q);
3808 3813  
3809 3814   private:
3810 3815    void do_scan_and_mark(int i, CompactibleFreeListSpace* sp);
3811 3816    void do_work_steal(int i);
3812 3817    void bump_global_finger(HeapWord* f);
3813 3818  };
3814 3819  
3815 3820  bool CMSConcMarkingTerminatorTerminator::should_exit_termination() {
3816 3821    assert(_task != NULL, "Error");
3817 3822    return _task->yielding();
3818 3823    // Note that we do not need the disjunct || _task->should_yield() above
3819 3824    // because we want terminating threads to yield only if the task
3820 3825    // is already in the midst of yielding, which happens only after at least one
3821 3826    // thread has yielded.
3822 3827  }
3823 3828  
3824 3829  void CMSConcMarkingTerminator::yield() {
3825 3830    if (_task->should_yield()) {
3826 3831      _task->yield();
3827 3832    } else {
3828 3833      ParallelTaskTerminator::yield();
3829 3834    }
3830 3835  }
3831 3836  
3832 3837  ////////////////////////////////////////////////////////////////
3833 3838  // Concurrent Marking Algorithm Sketch
3834 3839  ////////////////////////////////////////////////////////////////
3835 3840  // Until all tasks exhausted (both spaces):
3836 3841  // -- claim next available chunk
3837 3842  // -- bump global finger via CAS
3838 3843  // -- find first object that starts in this chunk
3839 3844  //    and start scanning bitmap from that position
3840 3845  // -- scan marked objects for oops
3841 3846  // -- CAS-mark target, and if successful:
3842 3847  //    . if target oop is above global finger (volatile read)
3843 3848  //      nothing to do
3844 3849  //    . if target oop is in chunk and above local finger
3845 3850  //        then nothing to do
3846 3851  //    . else push on work-queue
3847 3852  // -- Deal with possible overflow issues:
3848 3853  //    . local work-queue overflow causes stuff to be pushed on
3849 3854  //      global (common) overflow queue
3850 3855  //    . always first empty local work queue
3851 3856  //    . then get a batch of oops from global work queue if any
3852 3857  //    . then do work stealing
3853 3858  // -- When all tasks claimed (both spaces)
3854 3859  //    and local work queue empty,
3855 3860  //    then in a loop do:
3856 3861  //    . check global overflow stack; steal a batch of oops and trace
3857 3862  //    . try to steal from other threads oif GOS is empty
3858 3863  //    . if neither is available, offer termination
3859 3864  // -- Terminate and return result
3860 3865  //
3861 3866  void CMSConcMarkingTask::work(int i) {
3862 3867    elapsedTimer _timer;
3863 3868    ResourceMark rm;
3864 3869    HandleMark hm;
3865 3870  
3866 3871    DEBUG_ONLY(_collector->verify_overflow_empty();)
3867 3872  
3868 3873    // Before we begin work, our work queue should be empty
3869 3874    assert(work_queue(i)->size() == 0, "Expected to be empty");
3870 3875    // Scan the bitmap covering _cms_space, tracing through grey objects.
3871 3876    _timer.start();
3872 3877    do_scan_and_mark(i, _cms_space);
3873 3878    _timer.stop();
3874 3879    if (PrintCMSStatistics != 0) {
3875 3880      gclog_or_tty->print_cr("Finished cms space scanning in %dth thread: %3.3f sec",
3876 3881        i, _timer.seconds()); // XXX: need xxx/xxx type of notation, two timers
3877 3882    }
3878 3883  
3879 3884    // ... do the same for the _perm_space
3880 3885    _timer.reset();
3881 3886    _timer.start();
3882 3887    do_scan_and_mark(i, _perm_space);
3883 3888    _timer.stop();
3884 3889    if (PrintCMSStatistics != 0) {
3885 3890      gclog_or_tty->print_cr("Finished perm space scanning in %dth thread: %3.3f sec",
3886 3891        i, _timer.seconds()); // XXX: need xxx/xxx type of notation, two timers
3887 3892    }
3888 3893  
3889 3894    // ... do work stealing
3890 3895    _timer.reset();
3891 3896    _timer.start();
3892 3897    do_work_steal(i);
3893 3898    _timer.stop();
3894 3899    if (PrintCMSStatistics != 0) {
3895 3900      gclog_or_tty->print_cr("Finished work stealing in %dth thread: %3.3f sec",
3896 3901        i, _timer.seconds()); // XXX: need xxx/xxx type of notation, two timers
3897 3902    }
3898 3903    assert(_collector->_markStack.isEmpty(), "Should have been emptied");
3899 3904    assert(work_queue(i)->size() == 0, "Should have been emptied");
3900 3905    // Note that under the current task protocol, the
3901 3906    // following assertion is true even of the spaces
3902 3907    // expanded since the completion of the concurrent
3903 3908    // marking. XXX This will likely change under a strict
3904 3909    // ABORT semantics.
3905 3910    assert(_global_finger >  _cms_space->end() &&
3906 3911           _global_finger >= _perm_space->end(),
3907 3912           "All tasks have been completed");
3908 3913    DEBUG_ONLY(_collector->verify_overflow_empty();)
3909 3914  }
3910 3915  
3911 3916  void CMSConcMarkingTask::bump_global_finger(HeapWord* f) {
3912 3917    HeapWord* read = _global_finger;
3913 3918    HeapWord* cur  = read;
3914 3919    while (f > read) {
3915 3920      cur = read;
3916 3921      read = (HeapWord*) Atomic::cmpxchg_ptr(f, &_global_finger, cur);
3917 3922      if (cur == read) {
3918 3923        // our cas succeeded
3919 3924        assert(_global_finger >= f, "protocol consistency");
3920 3925        break;
3921 3926      }
3922 3927    }
3923 3928  }
3924 3929  
3925 3930  // This is really inefficient, and should be redone by
3926 3931  // using (not yet available) block-read and -write interfaces to the
3927 3932  // stack and the work_queue. XXX FIX ME !!!
3928 3933  bool CMSConcMarkingTask::get_work_from_overflow_stack(CMSMarkStack* ovflw_stk,
3929 3934                                                        OopTaskQueue* work_q) {
3930 3935    // Fast lock-free check
3931 3936    if (ovflw_stk->length() == 0) {
3932 3937      return false;
3933 3938    }
3934 3939    assert(work_q->size() == 0, "Shouldn't steal");
3935 3940    MutexLockerEx ml(ovflw_stk->par_lock(),
3936 3941                     Mutex::_no_safepoint_check_flag);
3937 3942    // Grab up to 1/4 the size of the work queue
3938 3943    size_t num = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
3939 3944                      (size_t)ParGCDesiredObjsFromOverflowList);
3940 3945    num = MIN2(num, ovflw_stk->length());
3941 3946    for (int i = (int) num; i > 0; i--) {
3942 3947      oop cur = ovflw_stk->pop();
3943 3948      assert(cur != NULL, "Counted wrong?");
3944 3949      work_q->push(cur);
3945 3950    }
3946 3951    return num > 0;
3947 3952  }
3948 3953  
3949 3954  void CMSConcMarkingTask::do_scan_and_mark(int i, CompactibleFreeListSpace* sp) {
3950 3955    SequentialSubTasksDone* pst = sp->conc_par_seq_tasks();
3951 3956    int n_tasks = pst->n_tasks();
3952 3957    // We allow that there may be no tasks to do here because
3953 3958    // we are restarting after a stack overflow.
3954 3959    assert(pst->valid() || n_tasks == 0, "Uninitialized use?");
3955 3960    int nth_task = 0;
3956 3961  
3957 3962    HeapWord* aligned_start = sp->bottom();
3958 3963    if (sp->used_region().contains(_restart_addr)) {
3959 3964      // Align down to a card boundary for the start of 0th task
3960 3965      // for this space.
3961 3966      aligned_start =
3962 3967        (HeapWord*)align_size_down((uintptr_t)_restart_addr,
3963 3968                                   CardTableModRefBS::card_size);
3964 3969    }
3965 3970  
3966 3971    size_t chunk_size = sp->marking_task_size();
3967 3972    while (!pst->is_task_claimed(/* reference */ nth_task)) {
3968 3973      // Having claimed the nth task in this space,
3969 3974      // compute the chunk that it corresponds to:
3970 3975      MemRegion span = MemRegion(aligned_start + nth_task*chunk_size,
3971 3976                                 aligned_start + (nth_task+1)*chunk_size);
3972 3977      // Try and bump the global finger via a CAS;
3973 3978      // note that we need to do the global finger bump
3974 3979      // _before_ taking the intersection below, because
3975 3980      // the task corresponding to that region will be
3976 3981      // deemed done even if the used_region() expands
3977 3982      // because of allocation -- as it almost certainly will
3978 3983      // during start-up while the threads yield in the
3979 3984      // closure below.
3980 3985      HeapWord* finger = span.end();
3981 3986      bump_global_finger(finger);   // atomically
3982 3987      // There are null tasks here corresponding to chunks
3983 3988      // beyond the "top" address of the space.
3984 3989      span = span.intersection(sp->used_region());
3985 3990      if (!span.is_empty()) {  // Non-null task
3986 3991        HeapWord* prev_obj;
3987 3992        assert(!span.contains(_restart_addr) || nth_task == 0,
3988 3993               "Inconsistency");
3989 3994        if (nth_task == 0) {
3990 3995          // For the 0th task, we'll not need to compute a block_start.
3991 3996          if (span.contains(_restart_addr)) {
3992 3997            // In the case of a restart because of stack overflow,
3993 3998            // we might additionally skip a chunk prefix.
3994 3999            prev_obj = _restart_addr;
3995 4000          } else {
3996 4001            prev_obj = span.start();
3997 4002          }
3998 4003        } else {
3999 4004          // We want to skip the first object because
4000 4005          // the protocol is to scan any object in its entirety
4001 4006          // that _starts_ in this span; a fortiori, any
4002 4007          // object starting in an earlier span is scanned
4003 4008          // as part of an earlier claimed task.
4004 4009          // Below we use the "careful" version of block_start
4005 4010          // so we do not try to navigate uninitialized objects.
4006 4011          prev_obj = sp->block_start_careful(span.start());
4007 4012          // Below we use a variant of block_size that uses the
4008 4013          // Printezis bits to avoid waiting for allocated
4009 4014          // objects to become initialized/parsable.
4010 4015          while (prev_obj < span.start()) {
4011 4016            size_t sz = sp->block_size_no_stall(prev_obj, _collector);
4012 4017            if (sz > 0) {
4013 4018              prev_obj += sz;
4014 4019            } else {
4015 4020              // In this case we may end up doing a bit of redundant
4016 4021              // scanning, but that appears unavoidable, short of
4017 4022              // locking the free list locks; see bug 6324141.
4018 4023              break;
4019 4024            }
4020 4025          }
4021 4026        }
4022 4027        if (prev_obj < span.end()) {
4023 4028          MemRegion my_span = MemRegion(prev_obj, span.end());
4024 4029          // Do the marking work within a non-empty span --
4025 4030          // the last argument to the constructor indicates whether the
4026 4031          // iteration should be incremental with periodic yields.
4027 4032          Par_MarkFromRootsClosure cl(this, _collector, my_span,
4028 4033                                      &_collector->_markBitMap,
4029 4034                                      work_queue(i),
4030 4035                                      &_collector->_markStack,
4031 4036                                      &_collector->_revisitStack,
4032 4037                                      _asynch);
4033 4038          _collector->_markBitMap.iterate(&cl, my_span.start(), my_span.end());
4034 4039        } // else nothing to do for this task
4035 4040      }   // else nothing to do for this task
4036 4041    }
4037 4042    // We'd be tempted to assert here that since there are no
4038 4043    // more tasks left to claim in this space, the global_finger
4039 4044    // must exceed space->top() and a fortiori space->end(). However,
4040 4045    // that would not quite be correct because the bumping of
4041 4046    // global_finger occurs strictly after the claiming of a task,
4042 4047    // so by the time we reach here the global finger may not yet
4043 4048    // have been bumped up by the thread that claimed the last
4044 4049    // task.
4045 4050    pst->all_tasks_completed();
4046 4051  }
4047 4052  
4048 4053  class Par_ConcMarkingClosure: public Par_KlassRememberingOopClosure {
4049 4054   private:
4050 4055    CMSConcMarkingTask* _task;
4051 4056    MemRegion     _span;
4052 4057    CMSBitMap*    _bit_map;
4053 4058    CMSMarkStack* _overflow_stack;
4054 4059    OopTaskQueue* _work_queue;
4055 4060   protected:
4056 4061    DO_OOP_WORK_DEFN
4057 4062   public:
4058 4063    Par_ConcMarkingClosure(CMSCollector* collector, CMSConcMarkingTask* task, OopTaskQueue* work_queue,
4059 4064                           CMSBitMap* bit_map, CMSMarkStack* overflow_stack,
4060 4065                           CMSMarkStack* revisit_stack):
4061 4066      Par_KlassRememberingOopClosure(collector, NULL, revisit_stack),
4062 4067      _task(task),
4063 4068      _span(collector->_span),
4064 4069      _work_queue(work_queue),
4065 4070      _bit_map(bit_map),
4066 4071      _overflow_stack(overflow_stack)
4067 4072    { }
4068 4073    virtual void do_oop(oop* p);
4069 4074    virtual void do_oop(narrowOop* p);
4070 4075    void trim_queue(size_t max);
4071 4076    void handle_stack_overflow(HeapWord* lost);
4072 4077    void do_yield_check() {
4073 4078      if (_task->should_yield()) {
4074 4079        _task->yield();
4075 4080      }
4076 4081    }
4077 4082  };
4078 4083  
4079 4084  // Grey object scanning during work stealing phase --
4080 4085  // the salient assumption here is that any references
4081 4086  // that are in these stolen objects being scanned must
4082 4087  // already have been initialized (else they would not have
4083 4088  // been published), so we do not need to check for
4084 4089  // uninitialized objects before pushing here.
4085 4090  void Par_ConcMarkingClosure::do_oop(oop obj) {
4086 4091    assert(obj->is_oop_or_null(true), "expected an oop or NULL");
4087 4092    HeapWord* addr = (HeapWord*)obj;
4088 4093    // Check if oop points into the CMS generation
4089 4094    // and is not marked
4090 4095    if (_span.contains(addr) && !_bit_map->isMarked(addr)) {
4091 4096      // a white object ...
4092 4097      // If we manage to "claim" the object, by being the
4093 4098      // first thread to mark it, then we push it on our
4094 4099      // marking stack
4095 4100      if (_bit_map->par_mark(addr)) {     // ... now grey
4096 4101        // push on work queue (grey set)
4097 4102        bool simulate_overflow = false;
4098 4103        NOT_PRODUCT(
4099 4104          if (CMSMarkStackOverflowALot &&
4100 4105              _collector->simulate_overflow()) {
4101 4106            // simulate a stack overflow
4102 4107            simulate_overflow = true;
4103 4108          }
4104 4109        )
4105 4110        if (simulate_overflow ||
4106 4111            !(_work_queue->push(obj) || _overflow_stack->par_push(obj))) {
4107 4112          // stack overflow
4108 4113          if (PrintCMSStatistics != 0) {
4109 4114            gclog_or_tty->print_cr("CMS marking stack overflow (benign) at "
4110 4115                                   SIZE_FORMAT, _overflow_stack->capacity());
4111 4116          }
4112 4117          // We cannot assert that the overflow stack is full because
4113 4118          // it may have been emptied since.
4114 4119          assert(simulate_overflow ||
4115 4120                 _work_queue->size() == _work_queue->max_elems(),
4116 4121                "Else push should have succeeded");
4117 4122          handle_stack_overflow(addr);
4118 4123        }
4119 4124      } // Else, some other thread got there first
4120 4125      do_yield_check();
4121 4126    }
4122 4127  }
4123 4128  
4124 4129  void Par_ConcMarkingClosure::do_oop(oop* p)       { Par_ConcMarkingClosure::do_oop_work(p); }
4125 4130  void Par_ConcMarkingClosure::do_oop(narrowOop* p) { Par_ConcMarkingClosure::do_oop_work(p); }
4126 4131  
4127 4132  void Par_ConcMarkingClosure::trim_queue(size_t max) {
4128 4133    while (_work_queue->size() > max) {
4129 4134      oop new_oop;
4130 4135      if (_work_queue->pop_local(new_oop)) {
4131 4136        assert(new_oop->is_oop(), "Should be an oop");
4132 4137        assert(_bit_map->isMarked((HeapWord*)new_oop), "Grey object");
4133 4138        assert(_span.contains((HeapWord*)new_oop), "Not in span");
4134 4139        assert(new_oop->is_parsable(), "Should be parsable");
4135 4140        new_oop->oop_iterate(this);  // do_oop() above
4136 4141        do_yield_check();
4137 4142      }
4138 4143    }
4139 4144  }
4140 4145  
4141 4146  // Upon stack overflow, we discard (part of) the stack,
4142 4147  // remembering the least address amongst those discarded
4143 4148  // in CMSCollector's _restart_address.
4144 4149  void Par_ConcMarkingClosure::handle_stack_overflow(HeapWord* lost) {
4145 4150    // We need to do this under a mutex to prevent other
4146 4151    // workers from interfering with the work done below.
4147 4152    MutexLockerEx ml(_overflow_stack->par_lock(),
4148 4153                     Mutex::_no_safepoint_check_flag);
4149 4154    // Remember the least grey address discarded
4150 4155    HeapWord* ra = (HeapWord*)_overflow_stack->least_value(lost);
4151 4156    _collector->lower_restart_addr(ra);
4152 4157    _overflow_stack->reset();  // discard stack contents
4153 4158    _overflow_stack->expand(); // expand the stack if possible
4154 4159  }
4155 4160  
4156 4161  
4157 4162  void CMSConcMarkingTask::do_work_steal(int i) {
4158 4163    OopTaskQueue* work_q = work_queue(i);
4159 4164    oop obj_to_scan;
4160 4165    CMSBitMap* bm = &(_collector->_markBitMap);
4161 4166    CMSMarkStack* ovflw = &(_collector->_markStack);
4162 4167    CMSMarkStack* revisit = &(_collector->_revisitStack);
4163 4168    int* seed = _collector->hash_seed(i);
4164 4169    Par_ConcMarkingClosure cl(_collector, this, work_q, bm, ovflw, revisit);
4165 4170    while (true) {
4166 4171      cl.trim_queue(0);
4167 4172      assert(work_q->size() == 0, "Should have been emptied above");
4168 4173      if (get_work_from_overflow_stack(ovflw, work_q)) {
4169 4174        // Can't assert below because the work obtained from the
4170 4175        // overflow stack may already have been stolen from us.
4171 4176        // assert(work_q->size() > 0, "Work from overflow stack");
4172 4177        continue;
4173 4178      } else if (task_queues()->steal(i, seed, /* reference */ obj_to_scan)) {
4174 4179        assert(obj_to_scan->is_oop(), "Should be an oop");
4175 4180        assert(bm->isMarked((HeapWord*)obj_to_scan), "Grey object");
4176 4181        obj_to_scan->oop_iterate(&cl);
4177 4182      } else if (terminator()->offer_termination(&_term_term)) {
4178 4183        assert(work_q->size() == 0, "Impossible!");
4179 4184        break;
4180 4185      } else if (yielding() || should_yield()) {
4181 4186        yield();
4182 4187      }
4183 4188    }
4184 4189  }
4185 4190  
4186 4191  // This is run by the CMS (coordinator) thread.
4187 4192  void CMSConcMarkingTask::coordinator_yield() {
4188 4193    assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
4189 4194           "CMS thread should hold CMS token");
4190 4195    DEBUG_ONLY(RememberKlassesChecker mux(false);)
4191 4196    // First give up the locks, then yield, then re-lock
4192 4197    // We should probably use a constructor/destructor idiom to
4193 4198    // do this unlock/lock or modify the MutexUnlocker class to
4194 4199    // serve our purpose. XXX
4195 4200    assert_lock_strong(_bit_map_lock);
4196 4201    _bit_map_lock->unlock();
4197 4202    ConcurrentMarkSweepThread::desynchronize(true);
4198 4203    ConcurrentMarkSweepThread::acknowledge_yield_request();
4199 4204    _collector->stopTimer();
4200 4205    if (PrintCMSStatistics != 0) {
4201 4206      _collector->incrementYields();
4202 4207    }
4203 4208    _collector->icms_wait();
4204 4209  
4205 4210    // It is possible for whichever thread initiated the yield request
4206 4211    // not to get a chance to wake up and take the bitmap lock between
4207 4212    // this thread releasing it and reacquiring it. So, while the
4208 4213    // should_yield() flag is on, let's sleep for a bit to give the
4209 4214    // other thread a chance to wake up. The limit imposed on the number
4210 4215    // of iterations is defensive, to avoid any unforseen circumstances
4211 4216    // putting us into an infinite loop. Since it's always been this
4212 4217    // (coordinator_yield()) method that was observed to cause the
4213 4218    // problem, we are using a parameter (CMSCoordinatorYieldSleepCount)
4214 4219    // which is by default non-zero. For the other seven methods that
4215 4220    // also perform the yield operation, as are using a different
4216 4221    // parameter (CMSYieldSleepCount) which is by default zero. This way we
4217 4222    // can enable the sleeping for those methods too, if necessary.
4218 4223    // See 6442774.
4219 4224    //
4220 4225    // We really need to reconsider the synchronization between the GC
4221 4226    // thread and the yield-requesting threads in the future and we
4222 4227    // should really use wait/notify, which is the recommended
4223 4228    // way of doing this type of interaction. Additionally, we should
4224 4229    // consolidate the eight methods that do the yield operation and they
4225 4230    // are almost identical into one for better maintenability and
4226 4231    // readability. See 6445193.
4227 4232    //
4228 4233    // Tony 2006.06.29
4229 4234    for (unsigned i = 0; i < CMSCoordinatorYieldSleepCount &&
4230 4235                     ConcurrentMarkSweepThread::should_yield() &&
4231 4236                     !CMSCollector::foregroundGCIsActive(); ++i) {
4232 4237      os::sleep(Thread::current(), 1, false);
4233 4238      ConcurrentMarkSweepThread::acknowledge_yield_request();
4234 4239    }
4235 4240  
4236 4241    ConcurrentMarkSweepThread::synchronize(true);
4237 4242    _bit_map_lock->lock_without_safepoint_check();
4238 4243    _collector->startTimer();
4239 4244  }
4240 4245  
4241 4246  bool CMSCollector::do_marking_mt(bool asynch) {
4242 4247    assert(ConcGCThreads > 0 && conc_workers() != NULL, "precondition");
4243 4248    // In the future this would be determined ergonomically, based
4244 4249    // on #cpu's, # active mutator threads (and load), and mutation rate.
4245 4250    int num_workers = ConcGCThreads;
4246 4251  
4247 4252    CompactibleFreeListSpace* cms_space  = _cmsGen->cmsSpace();
4248 4253    CompactibleFreeListSpace* perm_space = _permGen->cmsSpace();
4249 4254  
4250 4255    CMSConcMarkingTask tsk(this,
4251 4256                           cms_space,
4252 4257                           perm_space,
4253 4258                           asynch,
4254 4259                           conc_workers(),
4255 4260                           task_queues());

↓ open down ↓

2243 lines elided

↑ open up ↑

4256 4261  
4257 4262    // Since the actual number of workers we get may be different
4258 4263    // from the number we requested above, do we need to do anything different
4259 4264    // below? In particular, may be we need to subclass the SequantialSubTasksDone
4260 4265    // class?? XXX
4261 4266    cms_space ->initialize_sequential_subtasks_for_marking(num_workers);
4262 4267    perm_space->initialize_sequential_subtasks_for_marking(num_workers);
4263 4268  
4264 4269    // Refs discovery is already non-atomic.
4265 4270    assert(!ref_processor()->discovery_is_atomic(), "Should be non-atomic");
4266      -  // Mutate the Refs discovery so it is MT during the
4267      -  // multi-threaded marking phase.
4268      -  ReferenceProcessorMTMutator mt(ref_processor(), num_workers > 1);
     4271 +  assert(num_workers <= 1 || ref_processor()->discovery_is_mt(), "Discovery should be MT");
4269 4272    DEBUG_ONLY(RememberKlassesChecker cmx(should_unload_classes());)
4270 4273    conc_workers()->start_task(&tsk);
4271 4274    while (tsk.yielded()) {
4272 4275      tsk.coordinator_yield();
4273 4276      conc_workers()->continue_task(&tsk);
4274 4277    }
4275 4278    // If the task was aborted, _restart_addr will be non-NULL
4276 4279    assert(tsk.completed() || _restart_addr != NULL, "Inconsistency");
4277 4280    while (_restart_addr != NULL) {
4278 4281      // XXX For now we do not make use of ABORTED state and have not

4279 4282      // yet implemented the right abort semantics (even in the original
4280 4283      // single-threaded CMS case). That needs some more investigation
4281 4284      // and is deferred for now; see CR# TBF. 07252005YSR. XXX
4282 4285      assert(!CMSAbortSemantics || tsk.aborted(), "Inconsistency");
4283 4286      // If _restart_addr is non-NULL, a marking stack overflow
4284 4287      // occurred; we need to do a fresh marking iteration from the
4285 4288      // indicated restart address.
4286 4289      if (_foregroundGCIsActive && asynch) {
4287 4290        // We may be running into repeated stack overflows, having
4288 4291        // reached the limit of the stack size, while making very
4289 4292        // slow forward progress. It may be best to bail out and
4290 4293        // let the foreground collector do its job.
4291 4294        // Clear _restart_addr, so that foreground GC
4292 4295        // works from scratch. This avoids the headache of
4293 4296        // a "rescan" which would otherwise be needed because
4294 4297        // of the dirty mod union table & card table.
4295 4298        _restart_addr = NULL;
4296 4299        return false;
4297 4300      }
4298 4301      // Adjust the task to restart from _restart_addr
4299 4302      tsk.reset(_restart_addr);
4300 4303      cms_space ->initialize_sequential_subtasks_for_marking(num_workers,
4301 4304                    _restart_addr);
4302 4305      perm_space->initialize_sequential_subtasks_for_marking(num_workers,
4303 4306                    _restart_addr);
4304 4307      _restart_addr = NULL;
4305 4308      // Get the workers going again
4306 4309      conc_workers()->start_task(&tsk);
4307 4310      while (tsk.yielded()) {
4308 4311        tsk.coordinator_yield();
4309 4312        conc_workers()->continue_task(&tsk);
4310 4313      }
4311 4314    }
4312 4315    assert(tsk.completed(), "Inconsistency");
4313 4316    assert(tsk.result() == true, "Inconsistency");
4314 4317    return true;
4315 4318  }
4316 4319  
4317 4320  bool CMSCollector::do_marking_st(bool asynch) {
4318 4321    ResourceMark rm;
4319 4322    HandleMark   hm;
4320 4323  
4321 4324    MarkFromRootsClosure markFromRootsClosure(this, _span, &_markBitMap,
4322 4325      &_markStack, &_revisitStack, CMSYield && asynch);
4323 4326    // the last argument to iterate indicates whether the iteration
4324 4327    // should be incremental with periodic yields.
4325 4328    _markBitMap.iterate(&markFromRootsClosure);
4326 4329    // If _restart_addr is non-NULL, a marking stack overflow
4327 4330    // occurred; we need to do a fresh iteration from the
4328 4331    // indicated restart address.
4329 4332    while (_restart_addr != NULL) {
4330 4333      if (_foregroundGCIsActive && asynch) {
4331 4334        // We may be running into repeated stack overflows, having
4332 4335        // reached the limit of the stack size, while making very
4333 4336        // slow forward progress. It may be best to bail out and
4334 4337        // let the foreground collector do its job.
4335 4338        // Clear _restart_addr, so that foreground GC
4336 4339        // works from scratch. This avoids the headache of
4337 4340        // a "rescan" which would otherwise be needed because
4338 4341        // of the dirty mod union table & card table.
4339 4342        _restart_addr = NULL;
4340 4343        return false;  // indicating failure to complete marking
4341 4344      }
4342 4345      // Deal with stack overflow:
4343 4346      // we restart marking from _restart_addr
4344 4347      HeapWord* ra = _restart_addr;
4345 4348      markFromRootsClosure.reset(ra);
4346 4349      _restart_addr = NULL;
4347 4350      _markBitMap.iterate(&markFromRootsClosure, ra, _span.end());
4348 4351    }
4349 4352    return true;
4350 4353  }
4351 4354  
4352 4355  void CMSCollector::preclean() {
4353 4356    check_correct_thread_executing();
4354 4357    assert(Thread::current()->is_ConcurrentGC_thread(), "Wrong thread");
4355 4358    verify_work_stacks_empty();
4356 4359    verify_overflow_empty();
4357 4360    _abort_preclean = false;
4358 4361    if (CMSPrecleaningEnabled) {
4359 4362      // Precleaning is currently not MT but the reference processor
4360 4363      // may be set for MT.  Disable it temporarily here.
4361 4364      ReferenceProcessor* rp = ref_processor();
4362 4365      ReferenceProcessorMTProcMutator z(rp, false);
4363 4366      _eden_chunk_index = 0;
4364 4367      size_t used = get_eden_used();
4365 4368      size_t capacity = get_eden_capacity();
4366 4369      // Don't start sampling unless we will get sufficiently
4367 4370      // many samples.
4368 4371      if (used < (capacity/(CMSScheduleRemarkSamplingRatio * 100)
4369 4372                  * CMSScheduleRemarkEdenPenetration)) {
4370 4373        _start_sampling = true;
4371 4374      } else {
4372 4375        _start_sampling = false;
4373 4376      }
4374 4377      TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
4375 4378      CMSPhaseAccounting pa(this, "preclean", !PrintGCDetails);
4376 4379      preclean_work(CMSPrecleanRefLists1, CMSPrecleanSurvivors1);
4377 4380    }
4378 4381    CMSTokenSync x(true); // is cms thread
4379 4382    if (CMSPrecleaningEnabled) {
4380 4383      sample_eden();
4381 4384      _collectorState = AbortablePreclean;
4382 4385    } else {
4383 4386      _collectorState = FinalMarking;
4384 4387    }
4385 4388    verify_work_stacks_empty();
4386 4389    verify_overflow_empty();
4387 4390  }
4388 4391  
4389 4392  // Try and schedule the remark such that young gen
4390 4393  // occupancy is CMSScheduleRemarkEdenPenetration %.
4391 4394  void CMSCollector::abortable_preclean() {
4392 4395    check_correct_thread_executing();
4393 4396    assert(CMSPrecleaningEnabled,  "Inconsistent control state");
4394 4397    assert(_collectorState == AbortablePreclean, "Inconsistent control state");
4395 4398  
4396 4399    // If Eden's current occupancy is below this threshold,
4397 4400    // immediately schedule the remark; else preclean
4398 4401    // past the next scavenge in an effort to
4399 4402    // schedule the pause as described avove. By choosing
4400 4403    // CMSScheduleRemarkEdenSizeThreshold >= max eden size
4401 4404    // we will never do an actual abortable preclean cycle.
4402 4405    if (get_eden_used() > CMSScheduleRemarkEdenSizeThreshold) {
4403 4406      TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
4404 4407      CMSPhaseAccounting pa(this, "abortable-preclean", !PrintGCDetails);
4405 4408      // We need more smarts in the abortable preclean
4406 4409      // loop below to deal with cases where allocation
4407 4410      // in young gen is very very slow, and our precleaning
4408 4411      // is running a losing race against a horde of
4409 4412      // mutators intent on flooding us with CMS updates
4410 4413      // (dirty cards).
4411 4414      // One, admittedly dumb, strategy is to give up
4412 4415      // after a certain number of abortable precleaning loops
4413 4416      // or after a certain maximum time. We want to make
4414 4417      // this smarter in the next iteration.
4415 4418      // XXX FIX ME!!! YSR
4416 4419      size_t loops = 0, workdone = 0, cumworkdone = 0, waited = 0;
4417 4420      while (!(should_abort_preclean() ||
4418 4421               ConcurrentMarkSweepThread::should_terminate())) {
4419 4422        workdone = preclean_work(CMSPrecleanRefLists2, CMSPrecleanSurvivors2);
4420 4423        cumworkdone += workdone;
4421 4424        loops++;
4422 4425        // Voluntarily terminate abortable preclean phase if we have
4423 4426        // been at it for too long.
4424 4427        if ((CMSMaxAbortablePrecleanLoops != 0) &&
4425 4428            loops >= CMSMaxAbortablePrecleanLoops) {
4426 4429          if (PrintGCDetails) {
4427 4430            gclog_or_tty->print(" CMS: abort preclean due to loops ");
4428 4431          }
4429 4432          break;
4430 4433        }
4431 4434        if (pa.wallclock_millis() > CMSMaxAbortablePrecleanTime) {
4432 4435          if (PrintGCDetails) {
4433 4436            gclog_or_tty->print(" CMS: abort preclean due to time ");
4434 4437          }
4435 4438          break;
4436 4439        }
4437 4440        // If we are doing little work each iteration, we should
4438 4441        // take a short break.
4439 4442        if (workdone < CMSAbortablePrecleanMinWorkPerIteration) {
4440 4443          // Sleep for some time, waiting for work to accumulate
4441 4444          stopTimer();
4442 4445          cmsThread()->wait_on_cms_lock(CMSAbortablePrecleanWaitMillis);
4443 4446          startTimer();
4444 4447          waited++;
4445 4448        }
4446 4449      }
4447 4450      if (PrintCMSStatistics > 0) {
4448 4451        gclog_or_tty->print(" [%d iterations, %d waits, %d cards)] ",
4449 4452                            loops, waited, cumworkdone);
4450 4453      }
4451 4454    }
4452 4455    CMSTokenSync x(true); // is cms thread
4453 4456    if (_collectorState != Idling) {
4454 4457      assert(_collectorState == AbortablePreclean,
4455 4458             "Spontaneous state transition?");
4456 4459      _collectorState = FinalMarking;
4457 4460    } // Else, a foreground collection completed this CMS cycle.
4458 4461    return;
4459 4462  }
4460 4463  
4461 4464  // Respond to an Eden sampling opportunity
4462 4465  void CMSCollector::sample_eden() {
4463 4466    // Make sure a young gc cannot sneak in between our
4464 4467    // reading and recording of a sample.
4465 4468    assert(Thread::current()->is_ConcurrentGC_thread(),
4466 4469           "Only the cms thread may collect Eden samples");
4467 4470    assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
4468 4471           "Should collect samples while holding CMS token");
4469 4472    if (!_start_sampling) {
4470 4473      return;
4471 4474    }
4472 4475    if (_eden_chunk_array) {
4473 4476      if (_eden_chunk_index < _eden_chunk_capacity) {
4474 4477        _eden_chunk_array[_eden_chunk_index] = *_top_addr;   // take sample
4475 4478        assert(_eden_chunk_array[_eden_chunk_index] <= *_end_addr,
4476 4479               "Unexpected state of Eden");
4477 4480        // We'd like to check that what we just sampled is an oop-start address;
4478 4481        // however, we cannot do that here since the object may not yet have been
4479 4482        // initialized. So we'll instead do the check when we _use_ this sample
4480 4483        // later.
4481 4484        if (_eden_chunk_index == 0 ||
4482 4485            (pointer_delta(_eden_chunk_array[_eden_chunk_index],
4483 4486                           _eden_chunk_array[_eden_chunk_index-1])
4484 4487             >= CMSSamplingGrain)) {
4485 4488          _eden_chunk_index++;  // commit sample
4486 4489        }
4487 4490      }
4488 4491    }
4489 4492    if ((_collectorState == AbortablePreclean) && !_abort_preclean) {
4490 4493      size_t used = get_eden_used();
4491 4494      size_t capacity = get_eden_capacity();
4492 4495      assert(used <= capacity, "Unexpected state of Eden");
4493 4496      if (used >  (capacity/100 * CMSScheduleRemarkEdenPenetration)) {
4494 4497        _abort_preclean = true;
4495 4498      }
4496 4499    }
4497 4500  }
4498 4501  
4499 4502  
4500 4503  size_t CMSCollector::preclean_work(bool clean_refs, bool clean_survivor) {
4501 4504    assert(_collectorState == Precleaning ||
4502 4505           _collectorState == AbortablePreclean, "incorrect state");
4503 4506    ResourceMark rm;
4504 4507    HandleMark   hm;
4505 4508    // Do one pass of scrubbing the discovered reference lists
4506 4509    // to remove any reference objects with strongly-reachable
4507 4510    // referents.
4508 4511    if (clean_refs) {
4509 4512      ReferenceProcessor* rp = ref_processor();
4510 4513      CMSPrecleanRefsYieldClosure yield_cl(this);
4511 4514      assert(rp->span().equals(_span), "Spans should be equal");
4512 4515      CMSKeepAliveClosure keep_alive(this, _span, &_markBitMap,
4513 4516                                     &_markStack, &_revisitStack,
4514 4517                                     true /* preclean */);
4515 4518      CMSDrainMarkingStackClosure complete_trace(this,
4516 4519                                     _span, &_markBitMap, &_markStack,
4517 4520                                     &keep_alive, true /* preclean */);
4518 4521  
4519 4522      // We don't want this step to interfere with a young
4520 4523      // collection because we don't want to take CPU
4521 4524      // or memory bandwidth away from the young GC threads
4522 4525      // (which may be as many as there are CPUs).
4523 4526      // Note that we don't need to protect ourselves from
4524 4527      // interference with mutators because they can't
4525 4528      // manipulate the discovered reference lists nor affect
4526 4529      // the computed reachability of the referents, the
4527 4530      // only properties manipulated by the precleaning
4528 4531      // of these reference lists.
4529 4532      stopTimer();
4530 4533      CMSTokenSyncWithLocks x(true /* is cms thread */,
4531 4534                              bitMapLock());
4532 4535      startTimer();
4533 4536      sample_eden();
4534 4537  
4535 4538      // The following will yield to allow foreground
4536 4539      // collection to proceed promptly. XXX YSR:
4537 4540      // The code in this method may need further
4538 4541      // tweaking for better performance and some restructuring
4539 4542      // for cleaner interfaces.
4540 4543      rp->preclean_discovered_references(
4541 4544            rp->is_alive_non_header(), &keep_alive, &complete_trace,
4542 4545            &yield_cl, should_unload_classes());
4543 4546    }
4544 4547  
4545 4548    if (clean_survivor) {  // preclean the active survivor space(s)
4546 4549      assert(_young_gen->kind() == Generation::DefNew ||
4547 4550             _young_gen->kind() == Generation::ParNew ||
4548 4551             _young_gen->kind() == Generation::ASParNew,
4549 4552           "incorrect type for cast");
4550 4553      DefNewGeneration* dng = (DefNewGeneration*)_young_gen;
4551 4554      PushAndMarkClosure pam_cl(this, _span, ref_processor(),
4552 4555                               &_markBitMap, &_modUnionTable,
4553 4556                               &_markStack, &_revisitStack,
4554 4557                               true /* precleaning phase */);
4555 4558      stopTimer();
4556 4559      CMSTokenSyncWithLocks ts(true /* is cms thread */,
4557 4560                               bitMapLock());
4558 4561      startTimer();
4559 4562      unsigned int before_count =
4560 4563        GenCollectedHeap::heap()->total_collections();
4561 4564      SurvivorSpacePrecleanClosure
4562 4565        sss_cl(this, _span, &_markBitMap, &_markStack,
4563 4566               &pam_cl, before_count, CMSYield);
4564 4567      DEBUG_ONLY(RememberKlassesChecker mx(should_unload_classes());)
4565 4568      dng->from()->object_iterate_careful(&sss_cl);
4566 4569      dng->to()->object_iterate_careful(&sss_cl);
4567 4570    }
4568 4571    MarkRefsIntoAndScanClosure
4569 4572      mrias_cl(_span, ref_processor(), &_markBitMap, &_modUnionTable,
4570 4573               &_markStack, &_revisitStack, this, CMSYield,
4571 4574               true /* precleaning phase */);
4572 4575    // CAUTION: The following closure has persistent state that may need to
4573 4576    // be reset upon a decrease in the sequence of addresses it
4574 4577    // processes.
4575 4578    ScanMarkedObjectsAgainCarefullyClosure
4576 4579      smoac_cl(this, _span,
4577 4580        &_markBitMap, &_markStack, &_revisitStack, &mrias_cl, CMSYield);
4578 4581  
4579 4582    // Preclean dirty cards in ModUnionTable and CardTable using
4580 4583    // appropriate convergence criterion;
4581 4584    // repeat CMSPrecleanIter times unless we find that
4582 4585    // we are losing.
4583 4586    assert(CMSPrecleanIter < 10, "CMSPrecleanIter is too large");
4584 4587    assert(CMSPrecleanNumerator < CMSPrecleanDenominator,
4585 4588           "Bad convergence multiplier");
4586 4589    assert(CMSPrecleanThreshold >= 100,
4587 4590           "Unreasonably low CMSPrecleanThreshold");
4588 4591  
4589 4592    size_t numIter, cumNumCards, lastNumCards, curNumCards;
4590 4593    for (numIter = 0, cumNumCards = lastNumCards = curNumCards = 0;
4591 4594         numIter < CMSPrecleanIter;
4592 4595         numIter++, lastNumCards = curNumCards, cumNumCards += curNumCards) {
4593 4596      curNumCards  = preclean_mod_union_table(_cmsGen, &smoac_cl);
4594 4597      if (CMSPermGenPrecleaningEnabled) {
4595 4598        curNumCards  += preclean_mod_union_table(_permGen, &smoac_cl);
4596 4599      }
4597 4600      if (Verbose && PrintGCDetails) {
4598 4601        gclog_or_tty->print(" (modUnionTable: %d cards)", curNumCards);
4599 4602      }
4600 4603      // Either there are very few dirty cards, so re-mark
4601 4604      // pause will be small anyway, or our pre-cleaning isn't
4602 4605      // that much faster than the rate at which cards are being
4603 4606      // dirtied, so we might as well stop and re-mark since
4604 4607      // precleaning won't improve our re-mark time by much.
4605 4608      if (curNumCards <= CMSPrecleanThreshold ||
4606 4609          (numIter > 0 &&
4607 4610           (curNumCards * CMSPrecleanDenominator >
4608 4611           lastNumCards * CMSPrecleanNumerator))) {
4609 4612        numIter++;
4610 4613        cumNumCards += curNumCards;
4611 4614        break;
4612 4615      }
4613 4616    }
4614 4617    curNumCards = preclean_card_table(_cmsGen, &smoac_cl);
4615 4618    if (CMSPermGenPrecleaningEnabled) {
4616 4619      curNumCards += preclean_card_table(_permGen, &smoac_cl);
4617 4620    }
4618 4621    cumNumCards += curNumCards;
4619 4622    if (PrintGCDetails && PrintCMSStatistics != 0) {
4620 4623      gclog_or_tty->print_cr(" (cardTable: %d cards, re-scanned %d cards, %d iterations)",
4621 4624                    curNumCards, cumNumCards, numIter);
4622 4625    }
4623 4626    return cumNumCards;   // as a measure of useful work done
4624 4627  }
4625 4628  
4626 4629  // PRECLEANING NOTES:
4627 4630  // Precleaning involves:
4628 4631  // . reading the bits of the modUnionTable and clearing the set bits.
4629 4632  // . For the cards corresponding to the set bits, we scan the
4630 4633  //   objects on those cards. This means we need the free_list_lock
4631 4634  //   so that we can safely iterate over the CMS space when scanning
4632 4635  //   for oops.
4633 4636  // . When we scan the objects, we'll be both reading and setting
4634 4637  //   marks in the marking bit map, so we'll need the marking bit map.
4635 4638  // . For protecting _collector_state transitions, we take the CGC_lock.
4636 4639  //   Note that any races in the reading of of card table entries by the
4637 4640  //   CMS thread on the one hand and the clearing of those entries by the
4638 4641  //   VM thread or the setting of those entries by the mutator threads on the
4639 4642  //   other are quite benign. However, for efficiency it makes sense to keep
4640 4643  //   the VM thread from racing with the CMS thread while the latter is
4641 4644  //   dirty card info to the modUnionTable. We therefore also use the
4642 4645  //   CGC_lock to protect the reading of the card table and the mod union
4643 4646  //   table by the CM thread.
4644 4647  // . We run concurrently with mutator updates, so scanning
4645 4648  //   needs to be done carefully  -- we should not try to scan
4646 4649  //   potentially uninitialized objects.
4647 4650  //
4648 4651  // Locking strategy: While holding the CGC_lock, we scan over and
4649 4652  // reset a maximal dirty range of the mod union / card tables, then lock
4650 4653  // the free_list_lock and bitmap lock to do a full marking, then
4651 4654  // release these locks; and repeat the cycle. This allows for a
4652 4655  // certain amount of fairness in the sharing of these locks between
4653 4656  // the CMS collector on the one hand, and the VM thread and the
4654 4657  // mutators on the other.
4655 4658  
4656 4659  // NOTE: preclean_mod_union_table() and preclean_card_table()
4657 4660  // further below are largely identical; if you need to modify
4658 4661  // one of these methods, please check the other method too.
4659 4662  
4660 4663  size_t CMSCollector::preclean_mod_union_table(
4661 4664    ConcurrentMarkSweepGeneration* gen,
4662 4665    ScanMarkedObjectsAgainCarefullyClosure* cl) {
4663 4666    verify_work_stacks_empty();
4664 4667    verify_overflow_empty();
4665 4668  
4666 4669    // Turn off checking for this method but turn it back on
4667 4670    // selectively.  There are yield points in this method
4668 4671    // but it is difficult to turn the checking off just around
4669 4672    // the yield points.  It is simpler to selectively turn
4670 4673    // it on.
4671 4674    DEBUG_ONLY(RememberKlassesChecker mux(false);)
4672 4675  
4673 4676    // strategy: starting with the first card, accumulate contiguous
4674 4677    // ranges of dirty cards; clear these cards, then scan the region
4675 4678    // covered by these cards.
4676 4679  
4677 4680    // Since all of the MUT is committed ahead, we can just use
4678 4681    // that, in case the generations expand while we are precleaning.
4679 4682    // It might also be fine to just use the committed part of the
4680 4683    // generation, but we might potentially miss cards when the
4681 4684    // generation is rapidly expanding while we are in the midst
4682 4685    // of precleaning.
4683 4686    HeapWord* startAddr = gen->reserved().start();
4684 4687    HeapWord* endAddr   = gen->reserved().end();
4685 4688  
4686 4689    cl->setFreelistLock(gen->freelistLock());   // needed for yielding
4687 4690  
4688 4691    size_t numDirtyCards, cumNumDirtyCards;
4689 4692    HeapWord *nextAddr, *lastAddr;
4690 4693    for (cumNumDirtyCards = numDirtyCards = 0,
4691 4694         nextAddr = lastAddr = startAddr;
4692 4695         nextAddr < endAddr;
4693 4696         nextAddr = lastAddr, cumNumDirtyCards += numDirtyCards) {
4694 4697  
4695 4698      ResourceMark rm;
4696 4699      HandleMark   hm;
4697 4700  
4698 4701      MemRegion dirtyRegion;
4699 4702      {
4700 4703        stopTimer();
4701 4704        // Potential yield point
4702 4705        CMSTokenSync ts(true);
4703 4706        startTimer();
4704 4707        sample_eden();
4705 4708        // Get dirty region starting at nextOffset (inclusive),
4706 4709        // simultaneously clearing it.
4707 4710        dirtyRegion =
4708 4711          _modUnionTable.getAndClearMarkedRegion(nextAddr, endAddr);
4709 4712        assert(dirtyRegion.start() >= nextAddr,
4710 4713               "returned region inconsistent?");
4711 4714      }
4712 4715      // Remember where the next search should begin.
4713 4716      // The returned region (if non-empty) is a right open interval,
4714 4717      // so lastOffset is obtained from the right end of that
4715 4718      // interval.
4716 4719      lastAddr = dirtyRegion.end();
4717 4720      // Should do something more transparent and less hacky XXX
4718 4721      numDirtyCards =
4719 4722        _modUnionTable.heapWordDiffToOffsetDiff(dirtyRegion.word_size());
4720 4723  
4721 4724      // We'll scan the cards in the dirty region (with periodic
4722 4725      // yields for foreground GC as needed).
4723 4726      if (!dirtyRegion.is_empty()) {
4724 4727        assert(numDirtyCards > 0, "consistency check");
4725 4728        HeapWord* stop_point = NULL;
4726 4729        stopTimer();
4727 4730        // Potential yield point
4728 4731        CMSTokenSyncWithLocks ts(true, gen->freelistLock(),
4729 4732                                 bitMapLock());
4730 4733        startTimer();
4731 4734        {
4732 4735          verify_work_stacks_empty();
4733 4736          verify_overflow_empty();
4734 4737          sample_eden();
4735 4738          DEBUG_ONLY(RememberKlassesChecker mx(should_unload_classes());)
4736 4739          stop_point =
4737 4740            gen->cmsSpace()->object_iterate_careful_m(dirtyRegion, cl);
4738 4741        }
4739 4742        if (stop_point != NULL) {
4740 4743          // The careful iteration stopped early either because it found an
4741 4744          // uninitialized object, or because we were in the midst of an
4742 4745          // "abortable preclean", which should now be aborted. Redirty
4743 4746          // the bits corresponding to the partially-scanned or unscanned
4744 4747          // cards. We'll either restart at the next block boundary or
4745 4748          // abort the preclean.
4746 4749          assert((CMSPermGenPrecleaningEnabled && (gen == _permGen)) ||
4747 4750                 (_collectorState == AbortablePreclean && should_abort_preclean()),
4748 4751                 "Unparsable objects should only be in perm gen.");
4749 4752          _modUnionTable.mark_range(MemRegion(stop_point, dirtyRegion.end()));
4750 4753          if (should_abort_preclean()) {
4751 4754            break; // out of preclean loop
4752 4755          } else {
4753 4756            // Compute the next address at which preclean should pick up;
4754 4757            // might need bitMapLock in order to read P-bits.
4755 4758            lastAddr = next_card_start_after_block(stop_point);
4756 4759          }
4757 4760        }
4758 4761      } else {
4759 4762        assert(lastAddr == endAddr, "consistency check");
4760 4763        assert(numDirtyCards == 0, "consistency check");
4761 4764        break;
4762 4765      }
4763 4766    }
4764 4767    verify_work_stacks_empty();
4765 4768    verify_overflow_empty();
4766 4769    return cumNumDirtyCards;
4767 4770  }
4768 4771  
4769 4772  // NOTE: preclean_mod_union_table() above and preclean_card_table()
4770 4773  // below are largely identical; if you need to modify
4771 4774  // one of these methods, please check the other method too.
4772 4775  
4773 4776  size_t CMSCollector::preclean_card_table(ConcurrentMarkSweepGeneration* gen,
4774 4777    ScanMarkedObjectsAgainCarefullyClosure* cl) {
4775 4778    // strategy: it's similar to precleamModUnionTable above, in that
4776 4779    // we accumulate contiguous ranges of dirty cards, mark these cards
4777 4780    // precleaned, then scan the region covered by these cards.
4778 4781    HeapWord* endAddr   = (HeapWord*)(gen->_virtual_space.high());
4779 4782    HeapWord* startAddr = (HeapWord*)(gen->_virtual_space.low());
4780 4783  
4781 4784    cl->setFreelistLock(gen->freelistLock());   // needed for yielding
4782 4785  
4783 4786    size_t numDirtyCards, cumNumDirtyCards;
4784 4787    HeapWord *lastAddr, *nextAddr;
4785 4788  
4786 4789    for (cumNumDirtyCards = numDirtyCards = 0,
4787 4790         nextAddr = lastAddr = startAddr;
4788 4791         nextAddr < endAddr;
4789 4792         nextAddr = lastAddr, cumNumDirtyCards += numDirtyCards) {
4790 4793  
4791 4794      ResourceMark rm;
4792 4795      HandleMark   hm;
4793 4796  
4794 4797      MemRegion dirtyRegion;
4795 4798      {
4796 4799        // See comments in "Precleaning notes" above on why we
4797 4800        // do this locking. XXX Could the locking overheads be
4798 4801        // too high when dirty cards are sparse? [I don't think so.]
4799 4802        stopTimer();
4800 4803        CMSTokenSync x(true); // is cms thread
4801 4804        startTimer();
4802 4805        sample_eden();
4803 4806        // Get and clear dirty region from card table
4804 4807        dirtyRegion = _ct->ct_bs()->dirty_card_range_after_reset(
4805 4808                                      MemRegion(nextAddr, endAddr),
4806 4809                                      true,
4807 4810                                      CardTableModRefBS::precleaned_card_val());
4808 4811  
4809 4812        assert(dirtyRegion.start() >= nextAddr,
4810 4813               "returned region inconsistent?");
4811 4814      }
4812 4815      lastAddr = dirtyRegion.end();
4813 4816      numDirtyCards =
4814 4817        dirtyRegion.word_size()/CardTableModRefBS::card_size_in_words;
4815 4818  
4816 4819      if (!dirtyRegion.is_empty()) {
4817 4820        stopTimer();
4818 4821        CMSTokenSyncWithLocks ts(true, gen->freelistLock(), bitMapLock());
4819 4822        startTimer();
4820 4823        sample_eden();
4821 4824        verify_work_stacks_empty();
4822 4825        verify_overflow_empty();
4823 4826        DEBUG_ONLY(RememberKlassesChecker mx(should_unload_classes());)
4824 4827        HeapWord* stop_point =
4825 4828          gen->cmsSpace()->object_iterate_careful_m(dirtyRegion, cl);
4826 4829        if (stop_point != NULL) {
4827 4830          // The careful iteration stopped early because it found an
4828 4831          // uninitialized object.  Redirty the bits corresponding to the
4829 4832          // partially-scanned or unscanned cards, and start again at the
4830 4833          // next block boundary.
4831 4834          assert(CMSPermGenPrecleaningEnabled ||
4832 4835                 (_collectorState == AbortablePreclean && should_abort_preclean()),
4833 4836                 "Unparsable objects should only be in perm gen.");
4834 4837          _ct->ct_bs()->invalidate(MemRegion(stop_point, dirtyRegion.end()));
4835 4838          if (should_abort_preclean()) {
4836 4839            break; // out of preclean loop
4837 4840          } else {
4838 4841            // Compute the next address at which preclean should pick up.
4839 4842            lastAddr = next_card_start_after_block(stop_point);
4840 4843          }
4841 4844        }
4842 4845      } else {
4843 4846        break;
4844 4847      }
4845 4848    }
4846 4849    verify_work_stacks_empty();
4847 4850    verify_overflow_empty();
4848 4851    return cumNumDirtyCards;
4849 4852  }
4850 4853  
4851 4854  void CMSCollector::checkpointRootsFinal(bool asynch,
4852 4855    bool clear_all_soft_refs, bool init_mark_was_synchronous) {
4853 4856    assert(_collectorState == FinalMarking, "incorrect state transition?");
4854 4857    check_correct_thread_executing();
4855 4858    // world is stopped at this checkpoint
4856 4859    assert(SafepointSynchronize::is_at_safepoint(),
4857 4860           "world should be stopped");
4858 4861    TraceCMSMemoryManagerStats tms(_collectorState);
4859 4862    verify_work_stacks_empty();
4860 4863    verify_overflow_empty();
4861 4864  
4862 4865    SpecializationStats::clear();
4863 4866    if (PrintGCDetails) {
4864 4867      gclog_or_tty->print("[YG occupancy: "SIZE_FORMAT" K ("SIZE_FORMAT" K)]",
4865 4868                          _young_gen->used() / K,
4866 4869                          _young_gen->capacity() / K);
4867 4870    }
4868 4871    if (asynch) {
4869 4872      if (CMSScavengeBeforeRemark) {
4870 4873        GenCollectedHeap* gch = GenCollectedHeap::heap();
4871 4874        // Temporarily set flag to false, GCH->do_collection will
4872 4875        // expect it to be false and set to true
4873 4876        FlagSetting fl(gch->_is_gc_active, false);
4874 4877        NOT_PRODUCT(TraceTime t("Scavenge-Before-Remark",
4875 4878          PrintGCDetails && Verbose, true, gclog_or_tty);)
4876 4879        int level = _cmsGen->level() - 1;
4877 4880        if (level >= 0) {
4878 4881          gch->do_collection(true,        // full (i.e. force, see below)
4879 4882                             false,       // !clear_all_soft_refs
4880 4883                             0,           // size
4881 4884                             false,       // is_tlab
4882 4885                             level        // max_level
4883 4886                            );
4884 4887        }
4885 4888      }
4886 4889      FreelistLocker x(this);
4887 4890      MutexLockerEx y(bitMapLock(),
4888 4891                      Mutex::_no_safepoint_check_flag);
4889 4892      assert(!init_mark_was_synchronous, "but that's impossible!");
4890 4893      checkpointRootsFinalWork(asynch, clear_all_soft_refs, false);
4891 4894    } else {
4892 4895      // already have all the locks
4893 4896      checkpointRootsFinalWork(asynch, clear_all_soft_refs,
4894 4897                               init_mark_was_synchronous);
4895 4898    }
4896 4899    verify_work_stacks_empty();
4897 4900    verify_overflow_empty();
4898 4901    SpecializationStats::print();
4899 4902  }
4900 4903  
4901 4904  void CMSCollector::checkpointRootsFinalWork(bool asynch,
4902 4905    bool clear_all_soft_refs, bool init_mark_was_synchronous) {
4903 4906  
4904 4907    NOT_PRODUCT(TraceTime tr("checkpointRootsFinalWork", PrintGCDetails, false, gclog_or_tty);)
4905 4908  
4906 4909    assert(haveFreelistLocks(), "must have free list locks");
4907 4910    assert_lock_strong(bitMapLock());
4908 4911  
4909 4912    if (UseAdaptiveSizePolicy) {
4910 4913      size_policy()->checkpoint_roots_final_begin();
4911 4914    }
4912 4915  
4913 4916    ResourceMark rm;
4914 4917    HandleMark   hm;
4915 4918  
4916 4919    GenCollectedHeap* gch = GenCollectedHeap::heap();
4917 4920  
4918 4921    if (should_unload_classes()) {
4919 4922      CodeCache::gc_prologue();
4920 4923    }
4921 4924    assert(haveFreelistLocks(), "must have free list locks");
4922 4925    assert_lock_strong(bitMapLock());
4923 4926  
4924 4927    DEBUG_ONLY(RememberKlassesChecker fmx(should_unload_classes());)
4925 4928    if (!init_mark_was_synchronous) {
4926 4929      // We might assume that we need not fill TLAB's when
4927 4930      // CMSScavengeBeforeRemark is set, because we may have just done
4928 4931      // a scavenge which would have filled all TLAB's -- and besides
4929 4932      // Eden would be empty. This however may not always be the case --
4930 4933      // for instance although we asked for a scavenge, it may not have
4931 4934      // happened because of a JNI critical section. We probably need
4932 4935      // a policy for deciding whether we can in that case wait until
4933 4936      // the critical section releases and then do the remark following
4934 4937      // the scavenge, and skip it here. In the absence of that policy,
4935 4938      // or of an indication of whether the scavenge did indeed occur,
4936 4939      // we cannot rely on TLAB's having been filled and must do
4937 4940      // so here just in case a scavenge did not happen.
4938 4941      gch->ensure_parsability(false);  // fill TLAB's, but no need to retire them
4939 4942      // Update the saved marks which may affect the root scans.
4940 4943      gch->save_marks();
4941 4944  
4942 4945      {
4943 4946        COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact;)
4944 4947  
4945 4948        // Note on the role of the mod union table:
4946 4949        // Since the marker in "markFromRoots" marks concurrently with
4947 4950        // mutators, it is possible for some reachable objects not to have been
4948 4951        // scanned. For instance, an only reference to an object A was
4949 4952        // placed in object B after the marker scanned B. Unless B is rescanned,
4950 4953        // A would be collected. Such updates to references in marked objects
4951 4954        // are detected via the mod union table which is the set of all cards
4952 4955        // dirtied since the first checkpoint in this GC cycle and prior to
4953 4956        // the most recent young generation GC, minus those cleaned up by the
4954 4957        // concurrent precleaning.
4955 4958        if (CMSParallelRemarkEnabled && CollectedHeap::use_parallel_gc_threads()) {
4956 4959          TraceTime t("Rescan (parallel) ", PrintGCDetails, false, gclog_or_tty);
4957 4960          do_remark_parallel();
4958 4961        } else {
4959 4962          TraceTime t("Rescan (non-parallel) ", PrintGCDetails, false,
4960 4963                      gclog_or_tty);
4961 4964          do_remark_non_parallel();
4962 4965        }
4963 4966      }
4964 4967    } else {
4965 4968      assert(!asynch, "Can't have init_mark_was_synchronous in asynch mode");
4966 4969      // The initial mark was stop-world, so there's no rescanning to
4967 4970      // do; go straight on to the next step below.
4968 4971    }
4969 4972    verify_work_stacks_empty();
4970 4973    verify_overflow_empty();
4971 4974  
4972 4975    {
4973 4976      NOT_PRODUCT(TraceTime ts("refProcessingWork", PrintGCDetails, false, gclog_or_tty);)
4974 4977      refProcessingWork(asynch, clear_all_soft_refs);
4975 4978    }
4976 4979    verify_work_stacks_empty();
4977 4980    verify_overflow_empty();
4978 4981  
4979 4982    if (should_unload_classes()) {
4980 4983      CodeCache::gc_epilogue();
4981 4984    }
4982 4985    JvmtiExport::gc_epilogue();
4983 4986  
4984 4987    // If we encountered any (marking stack / work queue) overflow
4985 4988    // events during the current CMS cycle, take appropriate
4986 4989    // remedial measures, where possible, so as to try and avoid
4987 4990    // recurrence of that condition.
4988 4991    assert(_markStack.isEmpty(), "No grey objects");
4989 4992    size_t ser_ovflw = _ser_pmc_remark_ovflw + _ser_pmc_preclean_ovflw +
4990 4993                       _ser_kac_ovflw        + _ser_kac_preclean_ovflw;
4991 4994    if (ser_ovflw > 0) {
4992 4995      if (PrintCMSStatistics != 0) {
4993 4996        gclog_or_tty->print_cr("Marking stack overflow (benign) "
4994 4997          "(pmc_pc="SIZE_FORMAT", pmc_rm="SIZE_FORMAT", kac="SIZE_FORMAT
4995 4998          ", kac_preclean="SIZE_FORMAT")",
4996 4999          _ser_pmc_preclean_ovflw, _ser_pmc_remark_ovflw,
4997 5000          _ser_kac_ovflw, _ser_kac_preclean_ovflw);
4998 5001      }
4999 5002      _markStack.expand();
5000 5003      _ser_pmc_remark_ovflw = 0;
5001 5004      _ser_pmc_preclean_ovflw = 0;
5002 5005      _ser_kac_preclean_ovflw = 0;
5003 5006      _ser_kac_ovflw = 0;
5004 5007    }
5005 5008    if (_par_pmc_remark_ovflw > 0 || _par_kac_ovflw > 0) {
5006 5009      if (PrintCMSStatistics != 0) {
5007 5010        gclog_or_tty->print_cr("Work queue overflow (benign) "
5008 5011          "(pmc_rm="SIZE_FORMAT", kac="SIZE_FORMAT")",
5009 5012          _par_pmc_remark_ovflw, _par_kac_ovflw);
5010 5013      }
5011 5014      _par_pmc_remark_ovflw = 0;
5012 5015      _par_kac_ovflw = 0;
5013 5016    }
5014 5017    if (PrintCMSStatistics != 0) {
5015 5018       if (_markStack._hit_limit > 0) {
5016 5019         gclog_or_tty->print_cr(" (benign) Hit max stack size limit ("SIZE_FORMAT")",
5017 5020                                _markStack._hit_limit);
5018 5021       }
5019 5022       if (_markStack._failed_double > 0) {
5020 5023         gclog_or_tty->print_cr(" (benign) Failed stack doubling ("SIZE_FORMAT"),"
5021 5024                                " current capacity "SIZE_FORMAT,
5022 5025                                _markStack._failed_double,
5023 5026                                _markStack.capacity());
5024 5027       }
5025 5028    }
5026 5029    _markStack._hit_limit = 0;
5027 5030    _markStack._failed_double = 0;
5028 5031  
5029 5032    // Check that all the klasses have been checked
5030 5033    assert(_revisitStack.isEmpty(), "Not all klasses revisited");
5031 5034  
5032 5035    if ((VerifyAfterGC || VerifyDuringGC) &&
5033 5036        GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
5034 5037      verify_after_remark();
5035 5038    }
5036 5039  
5037 5040    // Change under the freelistLocks.
5038 5041    _collectorState = Sweeping;
5039 5042    // Call isAllClear() under bitMapLock
5040 5043    assert(_modUnionTable.isAllClear(), "Should be clear by end of the"
5041 5044      " final marking");
5042 5045    if (UseAdaptiveSizePolicy) {
5043 5046      size_policy()->checkpoint_roots_final_end(gch->gc_cause());
5044 5047    }
5045 5048  }
5046 5049  
5047 5050  // Parallel remark task
5048 5051  class CMSParRemarkTask: public AbstractGangTask {
5049 5052    CMSCollector* _collector;
5050 5053    int           _n_workers;
5051 5054    CompactibleFreeListSpace* _cms_space;
5052 5055    CompactibleFreeListSpace* _perm_space;
5053 5056  
5054 5057    // The per-thread work queues, available here for stealing.
5055 5058    OopTaskQueueSet*       _task_queues;
5056 5059    ParallelTaskTerminator _term;
5057 5060  
5058 5061   public:
5059 5062    CMSParRemarkTask(CMSCollector* collector,
5060 5063                     CompactibleFreeListSpace* cms_space,
5061 5064                     CompactibleFreeListSpace* perm_space,
5062 5065                     int n_workers, FlexibleWorkGang* workers,
5063 5066                     OopTaskQueueSet* task_queues):
5064 5067      AbstractGangTask("Rescan roots and grey objects in parallel"),
5065 5068      _collector(collector),
5066 5069      _cms_space(cms_space), _perm_space(perm_space),
5067 5070      _n_workers(n_workers),
5068 5071      _task_queues(task_queues),
5069 5072      _term(n_workers, task_queues) { }
5070 5073  
5071 5074    OopTaskQueueSet* task_queues() { return _task_queues; }
5072 5075  
5073 5076    OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
5074 5077  
5075 5078    ParallelTaskTerminator* terminator() { return &_term; }
5076 5079    int n_workers() { return _n_workers; }
5077 5080  
5078 5081    void work(int i);
5079 5082  
5080 5083   private:
5081 5084    // Work method in support of parallel rescan ... of young gen spaces
5082 5085    void do_young_space_rescan(int i, Par_MarkRefsIntoAndScanClosure* cl,
5083 5086                               ContiguousSpace* space,
5084 5087                               HeapWord** chunk_array, size_t chunk_top);
5085 5088  
5086 5089    // ... of  dirty cards in old space
5087 5090    void do_dirty_card_rescan_tasks(CompactibleFreeListSpace* sp, int i,
5088 5091                                    Par_MarkRefsIntoAndScanClosure* cl);
5089 5092  
5090 5093    // ... work stealing for the above
5091 5094    void do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl, int* seed);
5092 5095  };
5093 5096  
5094 5097  // work_queue(i) is passed to the closure
5095 5098  // Par_MarkRefsIntoAndScanClosure.  The "i" parameter
5096 5099  // also is passed to do_dirty_card_rescan_tasks() and to
5097 5100  // do_work_steal() to select the i-th task_queue.
5098 5101  
5099 5102  void CMSParRemarkTask::work(int i) {
5100 5103    elapsedTimer _timer;
5101 5104    ResourceMark rm;
5102 5105    HandleMark   hm;
5103 5106  
5104 5107    // ---------- rescan from roots --------------
5105 5108    _timer.start();
5106 5109    GenCollectedHeap* gch = GenCollectedHeap::heap();
5107 5110    Par_MarkRefsIntoAndScanClosure par_mrias_cl(_collector,
5108 5111      _collector->_span, _collector->ref_processor(),
5109 5112      &(_collector->_markBitMap),
5110 5113      work_queue(i), &(_collector->_revisitStack));
5111 5114  
5112 5115    // Rescan young gen roots first since these are likely
5113 5116    // coarsely partitioned and may, on that account, constitute
5114 5117    // the critical path; thus, it's best to start off that
5115 5118    // work first.
5116 5119    // ---------- young gen roots --------------
5117 5120    {
5118 5121      DefNewGeneration* dng = _collector->_young_gen->as_DefNewGeneration();
5119 5122      EdenSpace* eden_space = dng->eden();
5120 5123      ContiguousSpace* from_space = dng->from();
5121 5124      ContiguousSpace* to_space   = dng->to();
5122 5125  
5123 5126      HeapWord** eca = _collector->_eden_chunk_array;
5124 5127      size_t     ect = _collector->_eden_chunk_index;
5125 5128      HeapWord** sca = _collector->_survivor_chunk_array;
5126 5129      size_t     sct = _collector->_survivor_chunk_index;
5127 5130  
5128 5131      assert(ect <= _collector->_eden_chunk_capacity, "out of bounds");
5129 5132      assert(sct <= _collector->_survivor_chunk_capacity, "out of bounds");
5130 5133  
5131 5134      do_young_space_rescan(i, &par_mrias_cl, to_space, NULL, 0);
5132 5135      do_young_space_rescan(i, &par_mrias_cl, from_space, sca, sct);
5133 5136      do_young_space_rescan(i, &par_mrias_cl, eden_space, eca, ect);
5134 5137  
5135 5138      _timer.stop();
5136 5139      if (PrintCMSStatistics != 0) {
5137 5140        gclog_or_tty->print_cr(
5138 5141          "Finished young gen rescan work in %dth thread: %3.3f sec",
5139 5142          i, _timer.seconds());
5140 5143      }
5141 5144    }
5142 5145  
5143 5146    // ---------- remaining roots --------------
5144 5147    _timer.reset();
5145 5148    _timer.start();
5146 5149    gch->gen_process_strong_roots(_collector->_cmsGen->level(),
5147 5150                                  false,     // yg was scanned above
5148 5151                                  false,     // this is parallel code
5149 5152                                  true,      // collecting perm gen
5150 5153                                  SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
5151 5154                                  &par_mrias_cl,
5152 5155                                  true,   // walk all of code cache if (so & SO_CodeCache)
5153 5156                                  NULL);
5154 5157    assert(_collector->should_unload_classes()
5155 5158           || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_CodeCache),
5156 5159           "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
5157 5160    _timer.stop();
5158 5161    if (PrintCMSStatistics != 0) {
5159 5162      gclog_or_tty->print_cr(
5160 5163        "Finished remaining root rescan work in %dth thread: %3.3f sec",
5161 5164        i, _timer.seconds());
5162 5165    }
5163 5166  
5164 5167    // ---------- rescan dirty cards ------------
5165 5168    _timer.reset();
5166 5169    _timer.start();
5167 5170  
5168 5171    // Do the rescan tasks for each of the two spaces
5169 5172    // (cms_space and perm_space) in turn.
5170 5173    // "i" is passed to select the "i-th" task_queue
5171 5174    do_dirty_card_rescan_tasks(_cms_space, i, &par_mrias_cl);
5172 5175    do_dirty_card_rescan_tasks(_perm_space, i, &par_mrias_cl);
5173 5176    _timer.stop();
5174 5177    if (PrintCMSStatistics != 0) {
5175 5178      gclog_or_tty->print_cr(
5176 5179        "Finished dirty card rescan work in %dth thread: %3.3f sec",
5177 5180        i, _timer.seconds());
5178 5181    }
5179 5182  
5180 5183    // ---------- steal work from other threads ...
5181 5184    // ---------- ... and drain overflow list.
5182 5185    _timer.reset();
5183 5186    _timer.start();
5184 5187    do_work_steal(i, &par_mrias_cl, _collector->hash_seed(i));
5185 5188    _timer.stop();
5186 5189    if (PrintCMSStatistics != 0) {
5187 5190      gclog_or_tty->print_cr(
5188 5191        "Finished work stealing in %dth thread: %3.3f sec",
5189 5192        i, _timer.seconds());
5190 5193    }
5191 5194  }
5192 5195  
5193 5196  // Note that parameter "i" is not used.
5194 5197  void
5195 5198  CMSParRemarkTask::do_young_space_rescan(int i,
5196 5199    Par_MarkRefsIntoAndScanClosure* cl, ContiguousSpace* space,
5197 5200    HeapWord** chunk_array, size_t chunk_top) {
5198 5201    // Until all tasks completed:
5199 5202    // . claim an unclaimed task
5200 5203    // . compute region boundaries corresponding to task claimed
5201 5204    //   using chunk_array
5202 5205    // . par_oop_iterate(cl) over that region
5203 5206  
5204 5207    ResourceMark rm;
5205 5208    HandleMark   hm;
5206 5209  
5207 5210    SequentialSubTasksDone* pst = space->par_seq_tasks();
5208 5211    assert(pst->valid(), "Uninitialized use?");
5209 5212  
5210 5213    int nth_task = 0;
5211 5214    int n_tasks  = pst->n_tasks();
5212 5215  
5213 5216    HeapWord *start, *end;
5214 5217    while (!pst->is_task_claimed(/* reference */ nth_task)) {
5215 5218      // We claimed task # nth_task; compute its boundaries.
5216 5219      if (chunk_top == 0) {  // no samples were taken
5217 5220        assert(nth_task == 0 && n_tasks == 1, "Can have only 1 EdenSpace task");
5218 5221        start = space->bottom();
5219 5222        end   = space->top();
5220 5223      } else if (nth_task == 0) {
5221 5224        start = space->bottom();
5222 5225        end   = chunk_array[nth_task];
5223 5226      } else if (nth_task < (jint)chunk_top) {
5224 5227        assert(nth_task >= 1, "Control point invariant");
5225 5228        start = chunk_array[nth_task - 1];
5226 5229        end   = chunk_array[nth_task];
5227 5230      } else {
5228 5231        assert(nth_task == (jint)chunk_top, "Control point invariant");
5229 5232        start = chunk_array[chunk_top - 1];
5230 5233        end   = space->top();
5231 5234      }
5232 5235      MemRegion mr(start, end);
5233 5236      // Verify that mr is in space
5234 5237      assert(mr.is_empty() || space->used_region().contains(mr),
5235 5238             "Should be in space");
5236 5239      // Verify that "start" is an object boundary
5237 5240      assert(mr.is_empty() || oop(mr.start())->is_oop(),
5238 5241             "Should be an oop");
5239 5242      space->par_oop_iterate(mr, cl);
5240 5243    }
5241 5244    pst->all_tasks_completed();
5242 5245  }
5243 5246  
5244 5247  void
5245 5248  CMSParRemarkTask::do_dirty_card_rescan_tasks(
5246 5249    CompactibleFreeListSpace* sp, int i,
5247 5250    Par_MarkRefsIntoAndScanClosure* cl) {
5248 5251    // Until all tasks completed:
5249 5252    // . claim an unclaimed task
5250 5253    // . compute region boundaries corresponding to task claimed
5251 5254    // . transfer dirty bits ct->mut for that region
5252 5255    // . apply rescanclosure to dirty mut bits for that region
5253 5256  
5254 5257    ResourceMark rm;
5255 5258    HandleMark   hm;
5256 5259  
5257 5260    OopTaskQueue* work_q = work_queue(i);
5258 5261    ModUnionClosure modUnionClosure(&(_collector->_modUnionTable));
5259 5262    // CAUTION! CAUTION! CAUTION! CAUTION! CAUTION! CAUTION! CAUTION!
5260 5263    // CAUTION: This closure has state that persists across calls to
5261 5264    // the work method dirty_range_iterate_clear() in that it has
5262 5265    // imbedded in it a (subtype of) UpwardsObjectClosure. The
5263 5266    // use of that state in the imbedded UpwardsObjectClosure instance
5264 5267    // assumes that the cards are always iterated (even if in parallel
5265 5268    // by several threads) in monotonically increasing order per each
5266 5269    // thread. This is true of the implementation below which picks
5267 5270    // card ranges (chunks) in monotonically increasing order globally
5268 5271    // and, a-fortiori, in monotonically increasing order per thread
5269 5272    // (the latter order being a subsequence of the former).
5270 5273    // If the work code below is ever reorganized into a more chaotic
5271 5274    // work-partitioning form than the current "sequential tasks"
5272 5275    // paradigm, the use of that persistent state will have to be
5273 5276    // revisited and modified appropriately. See also related
5274 5277    // bug 4756801 work on which should examine this code to make
5275 5278    // sure that the changes there do not run counter to the
5276 5279    // assumptions made here and necessary for correctness and
5277 5280    // efficiency. Note also that this code might yield inefficient
5278 5281    // behaviour in the case of very large objects that span one or
5279 5282    // more work chunks. Such objects would potentially be scanned
5280 5283    // several times redundantly. Work on 4756801 should try and
5281 5284    // address that performance anomaly if at all possible. XXX
5282 5285    MemRegion  full_span  = _collector->_span;
5283 5286    CMSBitMap* bm    = &(_collector->_markBitMap);     // shared
5284 5287    CMSMarkStack* rs = &(_collector->_revisitStack);   // shared
5285 5288    MarkFromDirtyCardsClosure
5286 5289      greyRescanClosure(_collector, full_span, // entire span of interest
5287 5290                        sp, bm, work_q, rs, cl);
5288 5291  
5289 5292    SequentialSubTasksDone* pst = sp->conc_par_seq_tasks();
5290 5293    assert(pst->valid(), "Uninitialized use?");
5291 5294    int nth_task = 0;
5292 5295    const int alignment = CardTableModRefBS::card_size * BitsPerWord;
5293 5296    MemRegion span = sp->used_region();
5294 5297    HeapWord* start_addr = span.start();
5295 5298    HeapWord* end_addr = (HeapWord*)round_to((intptr_t)span.end(),
5296 5299                                             alignment);
5297 5300    const size_t chunk_size = sp->rescan_task_size(); // in HeapWord units
5298 5301    assert((HeapWord*)round_to((intptr_t)start_addr, alignment) ==
5299 5302           start_addr, "Check alignment");
5300 5303    assert((size_t)round_to((intptr_t)chunk_size, alignment) ==
5301 5304           chunk_size, "Check alignment");
5302 5305  
5303 5306    while (!pst->is_task_claimed(/* reference */ nth_task)) {
5304 5307      // Having claimed the nth_task, compute corresponding mem-region,
5305 5308      // which is a-fortiori aligned correctly (i.e. at a MUT bopundary).
5306 5309      // The alignment restriction ensures that we do not need any
5307 5310      // synchronization with other gang-workers while setting or
5308 5311      // clearing bits in thus chunk of the MUT.
5309 5312      MemRegion this_span = MemRegion(start_addr + nth_task*chunk_size,
5310 5313                                      start_addr + (nth_task+1)*chunk_size);
5311 5314      // The last chunk's end might be way beyond end of the
5312 5315      // used region. In that case pull back appropriately.
5313 5316      if (this_span.end() > end_addr) {
5314 5317        this_span.set_end(end_addr);
5315 5318        assert(!this_span.is_empty(), "Program logic (calculation of n_tasks)");
5316 5319      }
5317 5320      // Iterate over the dirty cards covering this chunk, marking them
5318 5321      // precleaned, and setting the corresponding bits in the mod union
5319 5322      // table. Since we have been careful to partition at Card and MUT-word
5320 5323      // boundaries no synchronization is needed between parallel threads.
5321 5324      _collector->_ct->ct_bs()->dirty_card_iterate(this_span,
5322 5325                                                   &modUnionClosure);
5323 5326  
5324 5327      // Having transferred these marks into the modUnionTable,
5325 5328      // rescan the marked objects on the dirty cards in the modUnionTable.
5326 5329      // Even if this is at a synchronous collection, the initial marking
5327 5330      // may have been done during an asynchronous collection so there
5328 5331      // may be dirty bits in the mod-union table.
5329 5332      _collector->_modUnionTable.dirty_range_iterate_clear(
5330 5333                    this_span, &greyRescanClosure);
5331 5334      _collector->_modUnionTable.verifyNoOneBitsInRange(
5332 5335                                   this_span.start(),
5333 5336                                   this_span.end());
5334 5337    }
5335 5338    pst->all_tasks_completed();  // declare that i am done
5336 5339  }
5337 5340  
5338 5341  // . see if we can share work_queues with ParNew? XXX
5339 5342  void
5340 5343  CMSParRemarkTask::do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl,
5341 5344                                  int* seed) {
5342 5345    OopTaskQueue* work_q = work_queue(i);
5343 5346    NOT_PRODUCT(int num_steals = 0;)
5344 5347    oop obj_to_scan;
5345 5348    CMSBitMap* bm = &(_collector->_markBitMap);
5346 5349  
5347 5350    while (true) {
5348 5351      // Completely finish any left over work from (an) earlier round(s)
5349 5352      cl->trim_queue(0);
5350 5353      size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
5351 5354                                           (size_t)ParGCDesiredObjsFromOverflowList);
5352 5355      // Now check if there's any work in the overflow list
5353 5356      // Passing ParallelGCThreads as the third parameter, no_of_gc_threads,
5354 5357      // only affects the number of attempts made to get work from the
5355 5358      // overflow list and does not affect the number of workers.  Just
5356 5359      // pass ParallelGCThreads so this behavior is unchanged.
5357 5360      if (_collector->par_take_from_overflow_list(num_from_overflow_list,
5358 5361                                                  work_q,
5359 5362                                                  ParallelGCThreads)) {
5360 5363        // found something in global overflow list;
5361 5364        // not yet ready to go stealing work from others.
5362 5365        // We'd like to assert(work_q->size() != 0, ...)
5363 5366        // because we just took work from the overflow list,
5364 5367        // but of course we can't since all of that could have
5365 5368        // been already stolen from us.
5366 5369        // "He giveth and He taketh away."
5367 5370        continue;
5368 5371      }
5369 5372      // Verify that we have no work before we resort to stealing
5370 5373      assert(work_q->size() == 0, "Have work, shouldn't steal");
5371 5374      // Try to steal from other queues that have work
5372 5375      if (task_queues()->steal(i, seed, /* reference */ obj_to_scan)) {
5373 5376        NOT_PRODUCT(num_steals++;)
5374 5377        assert(obj_to_scan->is_oop(), "Oops, not an oop!");
5375 5378        assert(bm->isMarked((HeapWord*)obj_to_scan), "Stole an unmarked oop?");
5376 5379        // Do scanning work
5377 5380        obj_to_scan->oop_iterate(cl);
5378 5381        // Loop around, finish this work, and try to steal some more
5379 5382      } else if (terminator()->offer_termination()) {
5380 5383          break;  // nirvana from the infinite cycle
5381 5384      }
5382 5385    }
5383 5386    NOT_PRODUCT(
5384 5387      if (PrintCMSStatistics != 0) {
5385 5388        gclog_or_tty->print("\n\t(%d: stole %d oops)", i, num_steals);
5386 5389      }
5387 5390    )
5388 5391    assert(work_q->size() == 0 && _collector->overflow_list_is_empty(),
5389 5392           "Else our work is not yet done");
5390 5393  }
5391 5394  
5392 5395  // Return a thread-local PLAB recording array, as appropriate.
5393 5396  void* CMSCollector::get_data_recorder(int thr_num) {
5394 5397    if (_survivor_plab_array != NULL &&
5395 5398        (CMSPLABRecordAlways ||
5396 5399         (_collectorState > Marking && _collectorState < FinalMarking))) {
5397 5400      assert(thr_num < (int)ParallelGCThreads, "thr_num is out of bounds");
5398 5401      ChunkArray* ca = &_survivor_plab_array[thr_num];
5399 5402      ca->reset();   // clear it so that fresh data is recorded
5400 5403      return (void*) ca;
5401 5404    } else {
5402 5405      return NULL;
5403 5406    }
5404 5407  }
5405 5408  
5406 5409  // Reset all the thread-local PLAB recording arrays
5407 5410  void CMSCollector::reset_survivor_plab_arrays() {
5408 5411    for (uint i = 0; i < ParallelGCThreads; i++) {
5409 5412      _survivor_plab_array[i].reset();
5410 5413    }
5411 5414  }
5412 5415  
5413 5416  // Merge the per-thread plab arrays into the global survivor chunk
5414 5417  // array which will provide the partitioning of the survivor space
5415 5418  // for CMS rescan.
5416 5419  void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv,
5417 5420                                                int no_of_gc_threads) {
5418 5421    assert(_survivor_plab_array  != NULL, "Error");
5419 5422    assert(_survivor_chunk_array != NULL, "Error");
5420 5423    assert(_collectorState == FinalMarking, "Error");
5421 5424    for (int j = 0; j < no_of_gc_threads; j++) {
5422 5425      _cursor[j] = 0;
5423 5426    }
5424 5427    HeapWord* top = surv->top();
5425 5428    size_t i;
5426 5429    for (i = 0; i < _survivor_chunk_capacity; i++) {  // all sca entries
5427 5430      HeapWord* min_val = top;          // Higher than any PLAB address
5428 5431      uint      min_tid = 0;            // position of min_val this round
5429 5432      for (int j = 0; j < no_of_gc_threads; j++) {
5430 5433        ChunkArray* cur_sca = &_survivor_plab_array[j];
5431 5434        if (_cursor[j] == cur_sca->end()) {
5432 5435          continue;
5433 5436        }
5434 5437        assert(_cursor[j] < cur_sca->end(), "ctl pt invariant");
5435 5438        HeapWord* cur_val = cur_sca->nth(_cursor[j]);
5436 5439        assert(surv->used_region().contains(cur_val), "Out of bounds value");
5437 5440        if (cur_val < min_val) {
5438 5441          min_tid = j;
5439 5442          min_val = cur_val;
5440 5443        } else {
5441 5444          assert(cur_val < top, "All recorded addresses should be less");
5442 5445        }
5443 5446      }
5444 5447      // At this point min_val and min_tid are respectively
5445 5448      // the least address in _survivor_plab_array[j]->nth(_cursor[j])
5446 5449      // and the thread (j) that witnesses that address.
5447 5450      // We record this address in the _survivor_chunk_array[i]
5448 5451      // and increment _cursor[min_tid] prior to the next round i.
5449 5452      if (min_val == top) {
5450 5453        break;
5451 5454      }
5452 5455      _survivor_chunk_array[i] = min_val;
5453 5456      _cursor[min_tid]++;
5454 5457    }
5455 5458    // We are all done; record the size of the _survivor_chunk_array
5456 5459    _survivor_chunk_index = i; // exclusive: [0, i)
5457 5460    if (PrintCMSStatistics > 0) {
5458 5461      gclog_or_tty->print(" (Survivor:" SIZE_FORMAT "chunks) ", i);
5459 5462    }
5460 5463    // Verify that we used up all the recorded entries
5461 5464    #ifdef ASSERT
5462 5465      size_t total = 0;
5463 5466      for (int j = 0; j < no_of_gc_threads; j++) {
5464 5467        assert(_cursor[j] == _survivor_plab_array[j].end(), "Ctl pt invariant");
5465 5468        total += _cursor[j];
5466 5469      }
5467 5470      assert(total == _survivor_chunk_index, "Ctl Pt Invariant");
5468 5471      // Check that the merged array is in sorted order
5469 5472      if (total > 0) {
5470 5473        for (size_t i = 0; i < total - 1; i++) {
5471 5474          if (PrintCMSStatistics > 0) {
5472 5475            gclog_or_tty->print(" (chunk" SIZE_FORMAT ":" INTPTR_FORMAT ") ",
5473 5476                                i, _survivor_chunk_array[i]);
5474 5477          }
5475 5478          assert(_survivor_chunk_array[i] < _survivor_chunk_array[i+1],
5476 5479                 "Not sorted");
5477 5480        }
5478 5481      }
5479 5482    #endif // ASSERT
5480 5483  }
5481 5484  
5482 5485  // Set up the space's par_seq_tasks structure for work claiming
5483 5486  // for parallel rescan of young gen.
5484 5487  // See ParRescanTask where this is currently used.
5485 5488  void
5486 5489  CMSCollector::
5487 5490  initialize_sequential_subtasks_for_young_gen_rescan(int n_threads) {
5488 5491    assert(n_threads > 0, "Unexpected n_threads argument");
5489 5492    DefNewGeneration* dng = (DefNewGeneration*)_young_gen;
5490 5493  
5491 5494    // Eden space
5492 5495    {
5493 5496      SequentialSubTasksDone* pst = dng->eden()->par_seq_tasks();
5494 5497      assert(!pst->valid(), "Clobbering existing data?");
5495 5498      // Each valid entry in [0, _eden_chunk_index) represents a task.
5496 5499      size_t n_tasks = _eden_chunk_index + 1;
5497 5500      assert(n_tasks == 1 || _eden_chunk_array != NULL, "Error");
5498 5501      // Sets the condition for completion of the subtask (how many threads
5499 5502      // need to finish in order to be done).
5500 5503      pst->set_n_threads(n_threads);
5501 5504      pst->set_n_tasks((int)n_tasks);
5502 5505    }
5503 5506  
5504 5507    // Merge the survivor plab arrays into _survivor_chunk_array
5505 5508    if (_survivor_plab_array != NULL) {
5506 5509      merge_survivor_plab_arrays(dng->from(), n_threads);
5507 5510    } else {
5508 5511      assert(_survivor_chunk_index == 0, "Error");
5509 5512    }
5510 5513  
5511 5514    // To space
5512 5515    {
5513 5516      SequentialSubTasksDone* pst = dng->to()->par_seq_tasks();
5514 5517      assert(!pst->valid(), "Clobbering existing data?");
5515 5518      // Sets the condition for completion of the subtask (how many threads
5516 5519      // need to finish in order to be done).
5517 5520      pst->set_n_threads(n_threads);
5518 5521      pst->set_n_tasks(1);
5519 5522      assert(pst->valid(), "Error");
5520 5523    }
5521 5524  
5522 5525    // From space
5523 5526    {
5524 5527      SequentialSubTasksDone* pst = dng->from()->par_seq_tasks();
5525 5528      assert(!pst->valid(), "Clobbering existing data?");
5526 5529      size_t n_tasks = _survivor_chunk_index + 1;
5527 5530      assert(n_tasks == 1 || _survivor_chunk_array != NULL, "Error");
5528 5531      // Sets the condition for completion of the subtask (how many threads
5529 5532      // need to finish in order to be done).
5530 5533      pst->set_n_threads(n_threads);
5531 5534      pst->set_n_tasks((int)n_tasks);
5532 5535      assert(pst->valid(), "Error");
5533 5536    }
5534 5537  }
5535 5538  
5536 5539  // Parallel version of remark
5537 5540  void CMSCollector::do_remark_parallel() {
5538 5541    GenCollectedHeap* gch = GenCollectedHeap::heap();
5539 5542    FlexibleWorkGang* workers = gch->workers();
5540 5543    assert(workers != NULL, "Need parallel worker threads.");
5541 5544    int n_workers = workers->total_workers();
5542 5545    CompactibleFreeListSpace* cms_space  = _cmsGen->cmsSpace();
5543 5546    CompactibleFreeListSpace* perm_space = _permGen->cmsSpace();
5544 5547  
5545 5548    CMSParRemarkTask tsk(this,
5546 5549      cms_space, perm_space,
5547 5550      n_workers, workers, task_queues());
5548 5551  
5549 5552    // Set up for parallel process_strong_roots work.
5550 5553    gch->set_par_threads(n_workers);
5551 5554    // We won't be iterating over the cards in the card table updating
5552 5555    // the younger_gen cards, so we shouldn't call the following else
5553 5556    // the verification code as well as subsequent younger_refs_iterate
5554 5557    // code would get confused. XXX
5555 5558    // gch->rem_set()->prepare_for_younger_refs_iterate(true); // parallel
5556 5559  
5557 5560    // The young gen rescan work will not be done as part of
5558 5561    // process_strong_roots (which currently doesn't knw how to
5559 5562    // parallelize such a scan), but rather will be broken up into
5560 5563    // a set of parallel tasks (via the sampling that the [abortable]
5561 5564    // preclean phase did of EdenSpace, plus the [two] tasks of
5562 5565    // scanning the [two] survivor spaces. Further fine-grain
5563 5566    // parallelization of the scanning of the survivor spaces
5564 5567    // themselves, and of precleaning of the younger gen itself
5565 5568    // is deferred to the future.
5566 5569    initialize_sequential_subtasks_for_young_gen_rescan(n_workers);
5567 5570  
5568 5571    // The dirty card rescan work is broken up into a "sequence"

↓ open down ↓

1290 lines elided

↑ open up ↑

5569 5572    // of parallel tasks (per constituent space) that are dynamically
5570 5573    // claimed by the parallel threads.
5571 5574    cms_space->initialize_sequential_subtasks_for_rescan(n_workers);
5572 5575    perm_space->initialize_sequential_subtasks_for_rescan(n_workers);
5573 5576  
5574 5577    // It turns out that even when we're using 1 thread, doing the work in a
5575 5578    // separate thread causes wide variance in run times.  We can't help this
5576 5579    // in the multi-threaded case, but we special-case n=1 here to get
5577 5580    // repeatable measurements of the 1-thread overhead of the parallel code.
5578 5581    if (n_workers > 1) {
5579      -    // Make refs discovery MT-safe
5580      -    ReferenceProcessorMTMutator mt(ref_processor(), true);
     5582 +    // Make refs discovery MT-safe, if it isn't already
     5583 +    ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), true);
5581 5584      GenCollectedHeap::StrongRootsScope srs(gch);
5582 5585      workers->run_task(&tsk);
5583 5586    } else {
5584 5587      GenCollectedHeap::StrongRootsScope srs(gch);
5585 5588      tsk.work(0);
5586 5589    }
5587 5590    gch->set_par_threads(0);  // 0 ==> non-parallel.
5588 5591    // restore, single-threaded for now, any preserved marks
5589 5592    // as a result of work_q overflow
5590 5593    restore_preserved_marks_if_any();

5591 5594  }
5592 5595  
5593 5596  // Non-parallel version of remark
5594 5597  void CMSCollector::do_remark_non_parallel() {
5595 5598    ResourceMark rm;
5596 5599    HandleMark   hm;
5597 5600    GenCollectedHeap* gch = GenCollectedHeap::heap();
5598 5601    MarkRefsIntoAndScanClosure
5599 5602      mrias_cl(_span, ref_processor(), &_markBitMap, &_modUnionTable,
5600 5603               &_markStack, &_revisitStack, this,
5601 5604               false /* should_yield */, false /* not precleaning */);
5602 5605    MarkFromDirtyCardsClosure
5603 5606      markFromDirtyCardsClosure(this, _span,
5604 5607                                NULL,  // space is set further below
5605 5608                                &_markBitMap, &_markStack, &_revisitStack,
5606 5609                                &mrias_cl);
5607 5610    {
5608 5611      TraceTime t("grey object rescan", PrintGCDetails, false, gclog_or_tty);
5609 5612      // Iterate over the dirty cards, setting the corresponding bits in the
5610 5613      // mod union table.
5611 5614      {
5612 5615        ModUnionClosure modUnionClosure(&_modUnionTable);
5613 5616        _ct->ct_bs()->dirty_card_iterate(
5614 5617                        _cmsGen->used_region(),
5615 5618                        &modUnionClosure);
5616 5619        _ct->ct_bs()->dirty_card_iterate(
5617 5620                        _permGen->used_region(),
5618 5621                        &modUnionClosure);
5619 5622      }
5620 5623      // Having transferred these marks into the modUnionTable, we just need
5621 5624      // to rescan the marked objects on the dirty cards in the modUnionTable.
5622 5625      // The initial marking may have been done during an asynchronous
5623 5626      // collection so there may be dirty bits in the mod-union table.
5624 5627      const int alignment =
5625 5628        CardTableModRefBS::card_size * BitsPerWord;
5626 5629      {
5627 5630        // ... First handle dirty cards in CMS gen
5628 5631        markFromDirtyCardsClosure.set_space(_cmsGen->cmsSpace());
5629 5632        MemRegion ur = _cmsGen->used_region();
5630 5633        HeapWord* lb = ur.start();
5631 5634        HeapWord* ub = (HeapWord*)round_to((intptr_t)ur.end(), alignment);
5632 5635        MemRegion cms_span(lb, ub);
5633 5636        _modUnionTable.dirty_range_iterate_clear(cms_span,
5634 5637                                                 &markFromDirtyCardsClosure);
5635 5638        verify_work_stacks_empty();
5636 5639        if (PrintCMSStatistics != 0) {
5637 5640          gclog_or_tty->print(" (re-scanned "SIZE_FORMAT" dirty cards in cms gen) ",
5638 5641            markFromDirtyCardsClosure.num_dirty_cards());
5639 5642        }
5640 5643      }
5641 5644      {
5642 5645        // .. and then repeat for dirty cards in perm gen
5643 5646        markFromDirtyCardsClosure.set_space(_permGen->cmsSpace());
5644 5647        MemRegion ur = _permGen->used_region();
5645 5648        HeapWord* lb = ur.start();
5646 5649        HeapWord* ub = (HeapWord*)round_to((intptr_t)ur.end(), alignment);
5647 5650        MemRegion perm_span(lb, ub);
5648 5651        _modUnionTable.dirty_range_iterate_clear(perm_span,
5649 5652                                                 &markFromDirtyCardsClosure);
5650 5653        verify_work_stacks_empty();
5651 5654        if (PrintCMSStatistics != 0) {
5652 5655          gclog_or_tty->print(" (re-scanned "SIZE_FORMAT" dirty cards in perm gen) ",
5653 5656            markFromDirtyCardsClosure.num_dirty_cards());
5654 5657        }
5655 5658      }
5656 5659    }
5657 5660    if (VerifyDuringGC &&
5658 5661        GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
5659 5662      HandleMark hm;  // Discard invalid handles created during verification
5660 5663      Universe::verify(true);
5661 5664    }
5662 5665    {
5663 5666      TraceTime t("root rescan", PrintGCDetails, false, gclog_or_tty);
5664 5667  
5665 5668      verify_work_stacks_empty();
5666 5669  
5667 5670      gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
5668 5671      GenCollectedHeap::StrongRootsScope srs(gch);
5669 5672      gch->gen_process_strong_roots(_cmsGen->level(),
5670 5673                                    true,  // younger gens as roots
5671 5674                                    false, // use the local StrongRootsScope
5672 5675                                    true,  // collecting perm gen
5673 5676                                    SharedHeap::ScanningOption(roots_scanning_options()),
5674 5677                                    &mrias_cl,
5675 5678                                    true,   // walk code active on stacks
5676 5679                                    NULL);
5677 5680      assert(should_unload_classes()
5678 5681             || (roots_scanning_options() & SharedHeap::SO_CodeCache),
5679 5682             "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
5680 5683    }
5681 5684    verify_work_stacks_empty();
5682 5685    // Restore evacuated mark words, if any, used for overflow list links
5683 5686    if (!CMSOverflowEarlyRestoration) {
5684 5687      restore_preserved_marks_if_any();
5685 5688    }
5686 5689    verify_overflow_empty();
5687 5690  }
5688 5691  
5689 5692  ////////////////////////////////////////////////////////
5690 5693  // Parallel Reference Processing Task Proxy Class
5691 5694  ////////////////////////////////////////////////////////
5692 5695  class CMSRefProcTaskProxy: public AbstractGangTaskWOopQueues {
5693 5696    typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
5694 5697    CMSCollector*          _collector;
5695 5698    CMSBitMap*             _mark_bit_map;

↓ open down ↓

105 lines elided

↑ open up ↑

5696 5699    const MemRegion        _span;
5697 5700    ProcessTask&           _task;
5698 5701  
5699 5702  public:
5700 5703    CMSRefProcTaskProxy(ProcessTask&     task,
5701 5704                        CMSCollector*    collector,
5702 5705                        const MemRegion& span,
5703 5706                        CMSBitMap*       mark_bit_map,
5704 5707                        AbstractWorkGang* workers,
5705 5708                        OopTaskQueueSet* task_queues):
     5709 +    // XXX Should superclass AGTWOQ also know about AWG since it knows
     5710 +    // about the task_queues used by the AWG? Then it could initialize
     5711 +    // the terminator() object. See 6984287. The set_for_termination()
     5712 +    // below is a temporary band-aid for the regression in 6984287.
5706 5713      AbstractGangTaskWOopQueues("Process referents by policy in parallel",
5707 5714        task_queues),
5708 5715      _task(task),
5709 5716      _collector(collector), _span(span), _mark_bit_map(mark_bit_map)
5710      -    {
5711      -      assert(_collector->_span.equals(_span) && !_span.is_empty(),
5712      -             "Inconsistency in _span");
5713      -    }
     5717 +  {
     5718 +    assert(_collector->_span.equals(_span) && !_span.is_empty(),
     5719 +           "Inconsistency in _span");
     5720 +    set_for_termination(workers->active_workers());
     5721 +  }
5714 5722  
5715 5723    OopTaskQueueSet* task_queues() { return queues(); }
5716 5724  
5717 5725    OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
5718 5726  
5719 5727    void do_work_steal(int i,
5720 5728                       CMSParDrainMarkingStackClosure* drain,
5721 5729                       CMSParKeepAliveClosure* keep_alive,
5722 5730                       int* seed);
5723 5731

5724 5732    virtual void work(int i);
5725 5733  };
5726 5734  
5727 5735  void CMSRefProcTaskProxy::work(int i) {
5728 5736    assert(_collector->_span.equals(_span), "Inconsistency in _span");
5729 5737    CMSParKeepAliveClosure par_keep_alive(_collector, _span,
5730 5738                                          _mark_bit_map,
5731 5739                                          &_collector->_revisitStack,
5732 5740                                          work_queue(i));
5733 5741    CMSParDrainMarkingStackClosure par_drain_stack(_collector, _span,
5734 5742                                                   _mark_bit_map,
5735 5743                                                   &_collector->_revisitStack,
5736 5744                                                   work_queue(i));
5737 5745    CMSIsAliveClosure is_alive_closure(_span, _mark_bit_map);
5738 5746    _task.work(i, is_alive_closure, par_keep_alive, par_drain_stack);
5739 5747    if (_task.marks_oops_alive()) {
5740 5748      do_work_steal(i, &par_drain_stack, &par_keep_alive,
5741 5749                    _collector->hash_seed(i));
5742 5750    }
5743 5751    assert(work_queue(i)->size() == 0, "work_queue should be empty");
5744 5752    assert(_collector->_overflow_list == NULL, "non-empty _overflow_list");
5745 5753  }
5746 5754  
5747 5755  class CMSRefEnqueueTaskProxy: public AbstractGangTask {
5748 5756    typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
5749 5757    EnqueueTask& _task;
5750 5758  
5751 5759  public:
5752 5760    CMSRefEnqueueTaskProxy(EnqueueTask& task)
5753 5761      : AbstractGangTask("Enqueue reference objects in parallel"),
5754 5762        _task(task)
5755 5763    { }
5756 5764  
5757 5765    virtual void work(int i)
5758 5766    {
5759 5767      _task.work(i);
5760 5768    }
5761 5769  };
5762 5770  
5763 5771  CMSParKeepAliveClosure::CMSParKeepAliveClosure(CMSCollector* collector,
5764 5772    MemRegion span, CMSBitMap* bit_map, CMSMarkStack* revisit_stack,
5765 5773    OopTaskQueue* work_queue):
5766 5774     Par_KlassRememberingOopClosure(collector, NULL, revisit_stack),
5767 5775     _span(span),
5768 5776     _bit_map(bit_map),
5769 5777     _work_queue(work_queue),
5770 5778     _mark_and_push(collector, span, bit_map, revisit_stack, work_queue),
5771 5779     _low_water_mark(MIN2((uint)(work_queue->max_elems()/4),
5772 5780                          (uint)(CMSWorkQueueDrainThreshold * ParallelGCThreads)))
5773 5781  { }
5774 5782  
5775 5783  // . see if we can share work_queues with ParNew? XXX
5776 5784  void CMSRefProcTaskProxy::do_work_steal(int i,
5777 5785    CMSParDrainMarkingStackClosure* drain,
5778 5786    CMSParKeepAliveClosure* keep_alive,
5779 5787    int* seed) {
5780 5788    OopTaskQueue* work_q = work_queue(i);
5781 5789    NOT_PRODUCT(int num_steals = 0;)
5782 5790    oop obj_to_scan;
5783 5791  
5784 5792    while (true) {
5785 5793      // Completely finish any left over work from (an) earlier round(s)
5786 5794      drain->trim_queue(0);
5787 5795      size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
5788 5796                                           (size_t)ParGCDesiredObjsFromOverflowList);
5789 5797      // Now check if there's any work in the overflow list
5790 5798      // Passing ParallelGCThreads as the third parameter, no_of_gc_threads,
5791 5799      // only affects the number of attempts made to get work from the
5792 5800      // overflow list and does not affect the number of workers.  Just
5793 5801      // pass ParallelGCThreads so this behavior is unchanged.
5794 5802      if (_collector->par_take_from_overflow_list(num_from_overflow_list,
5795 5803                                                  work_q,
5796 5804                                                  ParallelGCThreads)) {
5797 5805        // Found something in global overflow list;
5798 5806        // not yet ready to go stealing work from others.
5799 5807        // We'd like to assert(work_q->size() != 0, ...)
5800 5808        // because we just took work from the overflow list,
5801 5809        // but of course we can't, since all of that might have
5802 5810        // been already stolen from us.
5803 5811        continue;
5804 5812      }
5805 5813      // Verify that we have no work before we resort to stealing
5806 5814      assert(work_q->size() == 0, "Have work, shouldn't steal");
5807 5815      // Try to steal from other queues that have work
5808 5816      if (task_queues()->steal(i, seed, /* reference */ obj_to_scan)) {
5809 5817        NOT_PRODUCT(num_steals++;)
5810 5818        assert(obj_to_scan->is_oop(), "Oops, not an oop!");
5811 5819        assert(_mark_bit_map->isMarked((HeapWord*)obj_to_scan), "Stole an unmarked oop?");
5812 5820        // Do scanning work
5813 5821        obj_to_scan->oop_iterate(keep_alive);
5814 5822        // Loop around, finish this work, and try to steal some more
5815 5823      } else if (terminator()->offer_termination()) {
5816 5824        break;  // nirvana from the infinite cycle
5817 5825      }
5818 5826    }
5819 5827    NOT_PRODUCT(
5820 5828      if (PrintCMSStatistics != 0) {
5821 5829        gclog_or_tty->print("\n\t(%d: stole %d oops)", i, num_steals);
5822 5830      }
5823 5831    )
5824 5832  }
5825 5833  
5826 5834  void CMSRefProcTaskExecutor::execute(ProcessTask& task)
5827 5835  {
5828 5836    GenCollectedHeap* gch = GenCollectedHeap::heap();
5829 5837    FlexibleWorkGang* workers = gch->workers();
5830 5838    assert(workers != NULL, "Need parallel worker threads.");
5831 5839    CMSRefProcTaskProxy rp_task(task, &_collector,
5832 5840                                _collector.ref_processor()->span(),
5833 5841                                _collector.markBitMap(),
5834 5842                                workers, _collector.task_queues());
5835 5843    workers->run_task(&rp_task);
5836 5844  }
5837 5845  
5838 5846  void CMSRefProcTaskExecutor::execute(EnqueueTask& task)
5839 5847  {
5840 5848  
5841 5849    GenCollectedHeap* gch = GenCollectedHeap::heap();
5842 5850    FlexibleWorkGang* workers = gch->workers();
5843 5851    assert(workers != NULL, "Need parallel worker threads.");
5844 5852    CMSRefEnqueueTaskProxy enq_task(task);
5845 5853    workers->run_task(&enq_task);
5846 5854  }
5847 5855  
5848 5856  void CMSCollector::refProcessingWork(bool asynch, bool clear_all_soft_refs) {
5849 5857  
5850 5858    ResourceMark rm;
5851 5859    HandleMark   hm;
5852 5860  
5853 5861    ReferenceProcessor* rp = ref_processor();
5854 5862    assert(rp->span().equals(_span), "Spans should be equal");
5855 5863    assert(!rp->enqueuing_is_done(), "Enqueuing should not be complete");
5856 5864    // Process weak references.
5857 5865    rp->setup_policy(clear_all_soft_refs);
5858 5866    verify_work_stacks_empty();
5859 5867  
5860 5868    CMSKeepAliveClosure cmsKeepAliveClosure(this, _span, &_markBitMap,
5861 5869                                            &_markStack, &_revisitStack,
5862 5870                                            false /* !preclean */);
5863 5871    CMSDrainMarkingStackClosure cmsDrainMarkingStackClosure(this,
5864 5872                                  _span, &_markBitMap, &_markStack,

↓ open down ↓

141 lines elided

↑ open up ↑

5865 5873                                  &cmsKeepAliveClosure, false /* !preclean */);
5866 5874    {
5867 5875      TraceTime t("weak refs processing", PrintGCDetails, false, gclog_or_tty);
5868 5876      if (rp->processing_is_mt()) {
5869 5877        // Set the degree of MT here.  If the discovery is done MT, there
5870 5878        // may have been a different number of threads doing the discovery
5871 5879        // and a different number of discovered lists may have Ref objects.
5872 5880        // That is OK as long as the Reference lists are balanced (see
5873 5881        // balance_all_queues() and balance_queues()).
5874 5882  
5875      -
5876      -      rp->set_mt_degree(ParallelGCThreads);
     5883 +      rp->set_active_mt_degree(ParallelGCThreads);
5877 5884        CMSRefProcTaskExecutor task_executor(*this);
5878 5885        rp->process_discovered_references(&_is_alive_closure,
5879 5886                                          &cmsKeepAliveClosure,
5880 5887                                          &cmsDrainMarkingStackClosure,
5881 5888                                          &task_executor);
5882 5889      } else {
5883 5890        rp->process_discovered_references(&_is_alive_closure,
5884 5891                                          &cmsKeepAliveClosure,
5885 5892                                          &cmsDrainMarkingStackClosure,
5886 5893                                          NULL);

5887 5894      }
5888 5895      verify_work_stacks_empty();
5889 5896    }
5890 5897  
5891 5898    if (should_unload_classes()) {
5892 5899      {
5893 5900        TraceTime t("class unloading", PrintGCDetails, false, gclog_or_tty);
5894 5901  
5895 5902        // Follow SystemDictionary roots and unload classes
5896 5903        bool purged_class = SystemDictionary::do_unloading(&_is_alive_closure);
5897 5904  
5898 5905        // Follow CodeCache roots and unload any methods marked for unloading
5899 5906        CodeCache::do_unloading(&_is_alive_closure,
5900 5907                                &cmsKeepAliveClosure,
5901 5908                                purged_class);
5902 5909  
5903 5910        cmsDrainMarkingStackClosure.do_void();
5904 5911        verify_work_stacks_empty();
5905 5912  
5906 5913        // Update subklass/sibling/implementor links in KlassKlass descendants
5907 5914        assert(!_revisitStack.isEmpty(), "revisit stack should not be empty");
5908 5915        oop k;
5909 5916        while ((k = _revisitStack.pop()) != NULL) {
5910 5917          ((Klass*)(oopDesc*)k)->follow_weak_klass_links(
5911 5918                         &_is_alive_closure,
5912 5919                         &cmsKeepAliveClosure);
5913 5920        }
5914 5921        assert(!ClassUnloading ||
5915 5922               (_markStack.isEmpty() && overflow_list_is_empty()),
5916 5923               "Should not have found new reachable objects");
5917 5924        assert(_revisitStack.isEmpty(), "revisit stack should have been drained");
5918 5925        cmsDrainMarkingStackClosure.do_void();
5919 5926        verify_work_stacks_empty();
5920 5927      }
5921 5928  
5922 5929      {
5923 5930        TraceTime t("scrub symbol & string tables", PrintGCDetails, false, gclog_or_tty);
5924 5931        // Now clean up stale oops in StringTable
5925 5932        StringTable::unlink(&_is_alive_closure);
5926 5933        // Clean up unreferenced symbols in symbol table.
5927 5934        SymbolTable::unlink();
5928 5935      }
5929 5936    }
5930 5937  
5931 5938    verify_work_stacks_empty();
5932 5939    // Restore any preserved marks as a result of mark stack or
5933 5940    // work queue overflow
5934 5941    restore_preserved_marks_if_any();  // done single-threaded for now
5935 5942  
5936 5943    rp->set_enqueuing_is_done(true);
5937 5944    if (rp->processing_is_mt()) {
5938 5945      rp->balance_all_queues();
5939 5946      CMSRefProcTaskExecutor task_executor(*this);
5940 5947      rp->enqueue_discovered_references(&task_executor);
5941 5948    } else {
5942 5949      rp->enqueue_discovered_references(NULL);
5943 5950    }
5944 5951    rp->verify_no_references_recorded();
5945 5952    assert(!rp->discovery_enabled(), "should have been disabled");
5946 5953  }
5947 5954  
5948 5955  #ifndef PRODUCT
5949 5956  void CMSCollector::check_correct_thread_executing() {
5950 5957    Thread* t = Thread::current();
5951 5958    // Only the VM thread or the CMS thread should be here.
5952 5959    assert(t->is_ConcurrentGC_thread() || t->is_VM_thread(),
5953 5960           "Unexpected thread type");
5954 5961    // If this is the vm thread, the foreground process
5955 5962    // should not be waiting.  Note that _foregroundGCIsActive is
5956 5963    // true while the foreground collector is waiting.
5957 5964    if (_foregroundGCShouldWait) {
5958 5965      // We cannot be the VM thread
5959 5966      assert(t->is_ConcurrentGC_thread(),
5960 5967             "Should be CMS thread");
5961 5968    } else {
5962 5969      // We can be the CMS thread only if we are in a stop-world
5963 5970      // phase of CMS collection.
5964 5971      if (t->is_ConcurrentGC_thread()) {
5965 5972        assert(_collectorState == InitialMarking ||
5966 5973               _collectorState == FinalMarking,
5967 5974               "Should be a stop-world phase");
5968 5975        // The CMS thread should be holding the CMS_token.
5969 5976        assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
5970 5977               "Potential interference with concurrently "
5971 5978               "executing VM thread");
5972 5979      }
5973 5980    }
5974 5981  }
5975 5982  #endif
5976 5983  
5977 5984  void CMSCollector::sweep(bool asynch) {
5978 5985    assert(_collectorState == Sweeping, "just checking");
5979 5986    check_correct_thread_executing();
5980 5987    verify_work_stacks_empty();
5981 5988    verify_overflow_empty();
5982 5989    increment_sweep_count();
5983 5990    TraceCMSMemoryManagerStats tms(_collectorState);
5984 5991  
5985 5992    _inter_sweep_timer.stop();
5986 5993    _inter_sweep_estimate.sample(_inter_sweep_timer.seconds());
5987 5994    size_policy()->avg_cms_free_at_sweep()->sample(_cmsGen->free());
5988 5995  
5989 5996    // PermGen verification support: If perm gen sweeping is disabled in
5990 5997    // this cycle, we preserve the perm gen object "deadness" information
5991 5998    // in the perm_gen_verify_bit_map. In order to do that we traverse
5992 5999    // all blocks in perm gen and mark all dead objects.
5993 6000    if (verifying() && !should_unload_classes()) {
5994 6001      assert(perm_gen_verify_bit_map()->sizeInBits() != 0,
5995 6002             "Should have already been allocated");
5996 6003      MarkDeadObjectsClosure mdo(this, _permGen->cmsSpace(),
5997 6004                                 markBitMap(), perm_gen_verify_bit_map());
5998 6005      if (asynch) {
5999 6006        CMSTokenSyncWithLocks ts(true, _permGen->freelistLock(),
6000 6007                                 bitMapLock());
6001 6008        _permGen->cmsSpace()->blk_iterate(&mdo);
6002 6009      } else {
6003 6010        // In the case of synchronous sweep, we already have
6004 6011        // the requisite locks/tokens.
6005 6012        _permGen->cmsSpace()->blk_iterate(&mdo);
6006 6013      }
6007 6014    }
6008 6015  
6009 6016    assert(!_intra_sweep_timer.is_active(), "Should not be active");
6010 6017    _intra_sweep_timer.reset();
6011 6018    _intra_sweep_timer.start();
6012 6019    if (asynch) {
6013 6020      TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
6014 6021      CMSPhaseAccounting pa(this, "sweep", !PrintGCDetails);
6015 6022      // First sweep the old gen then the perm gen
6016 6023      {
6017 6024        CMSTokenSyncWithLocks ts(true, _cmsGen->freelistLock(),
6018 6025                                 bitMapLock());
6019 6026        sweepWork(_cmsGen, asynch);
6020 6027      }
6021 6028  
6022 6029      // Now repeat for perm gen
6023 6030      if (should_unload_classes()) {
6024 6031        CMSTokenSyncWithLocks ts(true, _permGen->freelistLock(),
6025 6032                               bitMapLock());
6026 6033        sweepWork(_permGen, asynch);
6027 6034      }
6028 6035  
6029 6036      // Update Universe::_heap_*_at_gc figures.
6030 6037      // We need all the free list locks to make the abstract state
6031 6038      // transition from Sweeping to Resetting. See detailed note
6032 6039      // further below.
6033 6040      {
6034 6041        CMSTokenSyncWithLocks ts(true, _cmsGen->freelistLock(),
6035 6042                                 _permGen->freelistLock());
6036 6043        // Update heap occupancy information which is used as
6037 6044        // input to soft ref clearing policy at the next gc.
6038 6045        Universe::update_heap_info_at_gc();
6039 6046        _collectorState = Resizing;
6040 6047      }
6041 6048    } else {
6042 6049      // already have needed locks
6043 6050      sweepWork(_cmsGen,  asynch);
6044 6051  
6045 6052      if (should_unload_classes()) {
6046 6053        sweepWork(_permGen, asynch);
6047 6054      }
6048 6055      // Update heap occupancy information which is used as
6049 6056      // input to soft ref clearing policy at the next gc.
6050 6057      Universe::update_heap_info_at_gc();
6051 6058      _collectorState = Resizing;
6052 6059    }
6053 6060    verify_work_stacks_empty();
6054 6061    verify_overflow_empty();
6055 6062  
6056 6063    _intra_sweep_timer.stop();
6057 6064    _intra_sweep_estimate.sample(_intra_sweep_timer.seconds());
6058 6065  
6059 6066    _inter_sweep_timer.reset();
6060 6067    _inter_sweep_timer.start();
6061 6068  
6062 6069    update_time_of_last_gc(os::javaTimeMillis());
6063 6070  
6064 6071    // NOTE on abstract state transitions:
6065 6072    // Mutators allocate-live and/or mark the mod-union table dirty
6066 6073    // based on the state of the collection.  The former is done in
6067 6074    // the interval [Marking, Sweeping] and the latter in the interval
6068 6075    // [Marking, Sweeping).  Thus the transitions into the Marking state
6069 6076    // and out of the Sweeping state must be synchronously visible
6070 6077    // globally to the mutators.
6071 6078    // The transition into the Marking state happens with the world
6072 6079    // stopped so the mutators will globally see it.  Sweeping is
6073 6080    // done asynchronously by the background collector so the transition
6074 6081    // from the Sweeping state to the Resizing state must be done
6075 6082    // under the freelistLock (as is the check for whether to
6076 6083    // allocate-live and whether to dirty the mod-union table).
6077 6084    assert(_collectorState == Resizing, "Change of collector state to"
6078 6085      " Resizing must be done under the freelistLocks (plural)");
6079 6086  
6080 6087    // Now that sweeping has been completed, we clear
6081 6088    // the incremental_collection_failed flag,
6082 6089    // thus inviting a younger gen collection to promote into
6083 6090    // this generation. If such a promotion may still fail,
6084 6091    // the flag will be set again when a young collection is
6085 6092    // attempted.
6086 6093    GenCollectedHeap* gch = GenCollectedHeap::heap();
6087 6094    gch->clear_incremental_collection_failed();  // Worth retrying as fresh space may have been freed up
6088 6095    gch->update_full_collections_completed(_collection_count_start);
6089 6096  }
6090 6097  
6091 6098  // FIX ME!!! Looks like this belongs in CFLSpace, with
6092 6099  // CMSGen merely delegating to it.
6093 6100  void ConcurrentMarkSweepGeneration::setNearLargestChunk() {
6094 6101    double nearLargestPercent = FLSLargestBlockCoalesceProximity;
6095 6102    HeapWord*  minAddr        = _cmsSpace->bottom();
6096 6103    HeapWord*  largestAddr    =
6097 6104      (HeapWord*) _cmsSpace->dictionary()->findLargestDict();
6098 6105    if (largestAddr == NULL) {
6099 6106      // The dictionary appears to be empty.  In this case
6100 6107      // try to coalesce at the end of the heap.
6101 6108      largestAddr = _cmsSpace->end();
6102 6109    }
6103 6110    size_t largestOffset     = pointer_delta(largestAddr, minAddr);
6104 6111    size_t nearLargestOffset =
6105 6112      (size_t)((double)largestOffset * nearLargestPercent) - MinChunkSize;
6106 6113    if (PrintFLSStatistics != 0) {
6107 6114      gclog_or_tty->print_cr(
6108 6115        "CMS: Large Block: " PTR_FORMAT ";"
6109 6116        " Proximity: " PTR_FORMAT " -> " PTR_FORMAT,
6110 6117        largestAddr,
6111 6118        _cmsSpace->nearLargestChunk(), minAddr + nearLargestOffset);
6112 6119    }
6113 6120    _cmsSpace->set_nearLargestChunk(minAddr + nearLargestOffset);
6114 6121  }
6115 6122  
6116 6123  bool ConcurrentMarkSweepGeneration::isNearLargestChunk(HeapWord* addr) {
6117 6124    return addr >= _cmsSpace->nearLargestChunk();
6118 6125  }
6119 6126  
6120 6127  FreeChunk* ConcurrentMarkSweepGeneration::find_chunk_at_end() {
6121 6128    return _cmsSpace->find_chunk_at_end();
6122 6129  }
6123 6130  
6124 6131  void ConcurrentMarkSweepGeneration::update_gc_stats(int current_level,
6125 6132                                                      bool full) {
6126 6133    // The next lower level has been collected.  Gather any statistics
6127 6134    // that are of interest at this point.
6128 6135    if (!full && (current_level + 1) == level()) {
6129 6136      // Gather statistics on the young generation collection.
6130 6137      collector()->stats().record_gc0_end(used());
6131 6138    }
6132 6139  }
6133 6140  
6134 6141  CMSAdaptiveSizePolicy* ConcurrentMarkSweepGeneration::size_policy() {
6135 6142    GenCollectedHeap* gch = GenCollectedHeap::heap();
6136 6143    assert(gch->kind() == CollectedHeap::GenCollectedHeap,
6137 6144      "Wrong type of heap");
6138 6145    CMSAdaptiveSizePolicy* sp = (CMSAdaptiveSizePolicy*)
6139 6146      gch->gen_policy()->size_policy();
6140 6147    assert(sp->is_gc_cms_adaptive_size_policy(),
6141 6148      "Wrong type of size policy");
6142 6149    return sp;
6143 6150  }
6144 6151  
6145 6152  void ConcurrentMarkSweepGeneration::rotate_debug_collection_type() {
6146 6153    if (PrintGCDetails && Verbose) {
6147 6154      gclog_or_tty->print("Rotate from %d ", _debug_collection_type);
6148 6155    }
6149 6156    _debug_collection_type = (CollectionTypes) (_debug_collection_type + 1);
6150 6157    _debug_collection_type =
6151 6158      (CollectionTypes) (_debug_collection_type % Unknown_collection_type);
6152 6159    if (PrintGCDetails && Verbose) {
6153 6160      gclog_or_tty->print_cr("to %d ", _debug_collection_type);
6154 6161    }
6155 6162  }
6156 6163  
6157 6164  void CMSCollector::sweepWork(ConcurrentMarkSweepGeneration* gen,
6158 6165    bool asynch) {
6159 6166    // We iterate over the space(s) underlying this generation,
6160 6167    // checking the mark bit map to see if the bits corresponding
6161 6168    // to specific blocks are marked or not. Blocks that are
6162 6169    // marked are live and are not swept up. All remaining blocks
6163 6170    // are swept up, with coalescing on-the-fly as we sweep up
6164 6171    // contiguous free and/or garbage blocks:
6165 6172    // We need to ensure that the sweeper synchronizes with allocators
6166 6173    // and stop-the-world collectors. In particular, the following
6167 6174    // locks are used:
6168 6175    // . CMS token: if this is held, a stop the world collection cannot occur
6169 6176    // . freelistLock: if this is held no allocation can occur from this
6170 6177    //                 generation by another thread
6171 6178    // . bitMapLock: if this is held, no other thread can access or update
6172 6179    //
6173 6180  
6174 6181    // Note that we need to hold the freelistLock if we use
6175 6182    // block iterate below; else the iterator might go awry if
6176 6183    // a mutator (or promotion) causes block contents to change
6177 6184    // (for instance if the allocator divvies up a block).
6178 6185    // If we hold the free list lock, for all practical purposes
6179 6186    // young generation GC's can't occur (they'll usually need to
6180 6187    // promote), so we might as well prevent all young generation
6181 6188    // GC's while we do a sweeping step. For the same reason, we might
6182 6189    // as well take the bit map lock for the entire duration
6183 6190  
6184 6191    // check that we hold the requisite locks
6185 6192    assert(have_cms_token(), "Should hold cms token");
6186 6193    assert(   (asynch && ConcurrentMarkSweepThread::cms_thread_has_cms_token())
6187 6194           || (!asynch && ConcurrentMarkSweepThread::vm_thread_has_cms_token()),
6188 6195          "Should possess CMS token to sweep");
6189 6196    assert_lock_strong(gen->freelistLock());
6190 6197    assert_lock_strong(bitMapLock());
6191 6198  
6192 6199    assert(!_inter_sweep_timer.is_active(), "Was switched off in an outer context");
6193 6200    assert(_intra_sweep_timer.is_active(),  "Was switched on  in an outer context");
6194 6201    gen->cmsSpace()->beginSweepFLCensus((float)(_inter_sweep_timer.seconds()),
6195 6202                                        _inter_sweep_estimate.padded_average(),
6196 6203                                        _intra_sweep_estimate.padded_average());
6197 6204    gen->setNearLargestChunk();
6198 6205  
6199 6206    {
6200 6207      SweepClosure sweepClosure(this, gen, &_markBitMap,
6201 6208                              CMSYield && asynch);
6202 6209      gen->cmsSpace()->blk_iterate_careful(&sweepClosure);
6203 6210      // We need to free-up/coalesce garbage/blocks from a
6204 6211      // co-terminal free run. This is done in the SweepClosure
6205 6212      // destructor; so, do not remove this scope, else the
6206 6213      // end-of-sweep-census below will be off by a little bit.
6207 6214    }
6208 6215    gen->cmsSpace()->sweep_completed();
6209 6216    gen->cmsSpace()->endSweepFLCensus(sweep_count());
6210 6217    if (should_unload_classes()) {                // unloaded classes this cycle,
6211 6218      _concurrent_cycles_since_last_unload = 0;   // ... reset count
6212 6219    } else {                                      // did not unload classes,
6213 6220      _concurrent_cycles_since_last_unload++;     // ... increment count
6214 6221    }
6215 6222  }
6216 6223  
6217 6224  // Reset CMS data structures (for now just the marking bit map)
6218 6225  // preparatory for the next cycle.
6219 6226  void CMSCollector::reset(bool asynch) {
6220 6227    GenCollectedHeap* gch = GenCollectedHeap::heap();
6221 6228    CMSAdaptiveSizePolicy* sp = size_policy();
6222 6229    AdaptiveSizePolicyOutput(sp, gch->total_collections());
6223 6230    if (asynch) {
6224 6231      CMSTokenSyncWithLocks ts(true, bitMapLock());
6225 6232  
6226 6233      // If the state is not "Resetting", the foreground  thread
6227 6234      // has done a collection and the resetting.
6228 6235      if (_collectorState != Resetting) {
6229 6236        assert(_collectorState == Idling, "The state should only change"
6230 6237          " because the foreground collector has finished the collection");
6231 6238        return;
6232 6239      }
6233 6240  
6234 6241      // Clear the mark bitmap (no grey objects to start with)
6235 6242      // for the next cycle.
6236 6243      TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
6237 6244      CMSPhaseAccounting cmspa(this, "reset", !PrintGCDetails);
6238 6245  
6239 6246      HeapWord* curAddr = _markBitMap.startWord();
6240 6247      while (curAddr < _markBitMap.endWord()) {
6241 6248        size_t remaining  = pointer_delta(_markBitMap.endWord(), curAddr);
6242 6249        MemRegion chunk(curAddr, MIN2(CMSBitMapYieldQuantum, remaining));
6243 6250        _markBitMap.clear_large_range(chunk);
6244 6251        if (ConcurrentMarkSweepThread::should_yield() &&
6245 6252            !foregroundGCIsActive() &&
6246 6253            CMSYield) {
6247 6254          assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
6248 6255                 "CMS thread should hold CMS token");
6249 6256          assert_lock_strong(bitMapLock());
6250 6257          bitMapLock()->unlock();
6251 6258          ConcurrentMarkSweepThread::desynchronize(true);
6252 6259          ConcurrentMarkSweepThread::acknowledge_yield_request();
6253 6260          stopTimer();
6254 6261          if (PrintCMSStatistics != 0) {
6255 6262            incrementYields();
6256 6263          }
6257 6264          icms_wait();
6258 6265  
6259 6266          // See the comment in coordinator_yield()
6260 6267          for (unsigned i = 0; i < CMSYieldSleepCount &&
6261 6268                           ConcurrentMarkSweepThread::should_yield() &&
6262 6269                           !CMSCollector::foregroundGCIsActive(); ++i) {
6263 6270            os::sleep(Thread::current(), 1, false);
6264 6271            ConcurrentMarkSweepThread::acknowledge_yield_request();
6265 6272          }
6266 6273  
6267 6274          ConcurrentMarkSweepThread::synchronize(true);
6268 6275          bitMapLock()->lock_without_safepoint_check();
6269 6276          startTimer();
6270 6277        }
6271 6278        curAddr = chunk.end();
6272 6279      }
6273 6280      // A successful mostly concurrent collection has been done.
6274 6281      // Because only the full (i.e., concurrent mode failure) collections
6275 6282      // are being measured for gc overhead limits, clean the "near" flag
6276 6283      // and count.
6277 6284      sp->reset_gc_overhead_limit_count();
6278 6285      _collectorState = Idling;
6279 6286    } else {
6280 6287      // already have the lock
6281 6288      assert(_collectorState == Resetting, "just checking");
6282 6289      assert_lock_strong(bitMapLock());
6283 6290      _markBitMap.clear_all();
6284 6291      _collectorState = Idling;
6285 6292    }
6286 6293  
6287 6294    // Stop incremental mode after a cycle completes, so that any future cycles
6288 6295    // are triggered by allocation.
6289 6296    stop_icms();
6290 6297  
6291 6298    NOT_PRODUCT(
6292 6299      if (RotateCMSCollectionTypes) {
6293 6300        _cmsGen->rotate_debug_collection_type();
6294 6301      }
6295 6302    )
6296 6303  }
6297 6304  
6298 6305  void CMSCollector::do_CMS_operation(CMS_op_type op) {
6299 6306    gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
6300 6307    TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
6301 6308    TraceTime t("GC", PrintGC, !PrintGCDetails, gclog_or_tty);
6302 6309    TraceCollectorStats tcs(counters());
6303 6310  
6304 6311    switch (op) {
6305 6312      case CMS_op_checkpointRootsInitial: {
6306 6313        SvcGCMarker sgcm(SvcGCMarker::OTHER);
6307 6314        checkpointRootsInitial(true);       // asynch
6308 6315        if (PrintGC) {
6309 6316          _cmsGen->printOccupancy("initial-mark");
6310 6317        }
6311 6318        break;
6312 6319      }
6313 6320      case CMS_op_checkpointRootsFinal: {
6314 6321        SvcGCMarker sgcm(SvcGCMarker::OTHER);
6315 6322        checkpointRootsFinal(true,    // asynch
6316 6323                             false,   // !clear_all_soft_refs
6317 6324                             false);  // !init_mark_was_synchronous
6318 6325        if (PrintGC) {
6319 6326          _cmsGen->printOccupancy("remark");
6320 6327        }
6321 6328        break;
6322 6329      }
6323 6330      default:
6324 6331        fatal("No such CMS_op");
6325 6332    }
6326 6333  }
6327 6334  
6328 6335  #ifndef PRODUCT
6329 6336  size_t const CMSCollector::skip_header_HeapWords() {
6330 6337    return FreeChunk::header_size();
6331 6338  }
6332 6339  
6333 6340  // Try and collect here conditions that should hold when
6334 6341  // CMS thread is exiting. The idea is that the foreground GC
6335 6342  // thread should not be blocked if it wants to terminate
6336 6343  // the CMS thread and yet continue to run the VM for a while
6337 6344  // after that.
6338 6345  void CMSCollector::verify_ok_to_terminate() const {
6339 6346    assert(Thread::current()->is_ConcurrentGC_thread(),
6340 6347           "should be called by CMS thread");
6341 6348    assert(!_foregroundGCShouldWait, "should be false");
6342 6349    // We could check here that all the various low-level locks
6343 6350    // are not held by the CMS thread, but that is overkill; see
6344 6351    // also CMSThread::verify_ok_to_terminate() where the CGC_lock
6345 6352    // is checked.
6346 6353  }
6347 6354  #endif
6348 6355  
6349 6356  size_t CMSCollector::block_size_using_printezis_bits(HeapWord* addr) const {
6350 6357     assert(_markBitMap.isMarked(addr) && _markBitMap.isMarked(addr + 1),
6351 6358            "missing Printezis mark?");
6352 6359    HeapWord* nextOneAddr = _markBitMap.getNextMarkedWordAddress(addr + 2);
6353 6360    size_t size = pointer_delta(nextOneAddr + 1, addr);
6354 6361    assert(size == CompactibleFreeListSpace::adjustObjectSize(size),
6355 6362           "alignment problem");
6356 6363    assert(size >= 3, "Necessary for Printezis marks to work");
6357 6364    return size;
6358 6365  }
6359 6366  
6360 6367  // A variant of the above (block_size_using_printezis_bits()) except
6361 6368  // that we return 0 if the P-bits are not yet set.
6362 6369  size_t CMSCollector::block_size_if_printezis_bits(HeapWord* addr) const {
6363 6370    if (_markBitMap.isMarked(addr + 1)) {
6364 6371      assert(_markBitMap.isMarked(addr), "P-bit can be set only for marked objects");
6365 6372      HeapWord* nextOneAddr = _markBitMap.getNextMarkedWordAddress(addr + 2);
6366 6373      size_t size = pointer_delta(nextOneAddr + 1, addr);
6367 6374      assert(size == CompactibleFreeListSpace::adjustObjectSize(size),
6368 6375             "alignment problem");
6369 6376      assert(size >= 3, "Necessary for Printezis marks to work");
6370 6377      return size;
6371 6378    }
6372 6379    return 0;
6373 6380  }
6374 6381  
6375 6382  HeapWord* CMSCollector::next_card_start_after_block(HeapWord* addr) const {
6376 6383    size_t sz = 0;
6377 6384    oop p = (oop)addr;
6378 6385    if (p->klass_or_null() != NULL && p->is_parsable()) {
6379 6386      sz = CompactibleFreeListSpace::adjustObjectSize(p->size());
6380 6387    } else {
6381 6388      sz = block_size_using_printezis_bits(addr);
6382 6389    }
6383 6390    assert(sz > 0, "size must be nonzero");
6384 6391    HeapWord* next_block = addr + sz;
6385 6392    HeapWord* next_card  = (HeapWord*)round_to((uintptr_t)next_block,
6386 6393                                               CardTableModRefBS::card_size);
6387 6394    assert(round_down((uintptr_t)addr,      CardTableModRefBS::card_size) <
6388 6395           round_down((uintptr_t)next_card, CardTableModRefBS::card_size),
6389 6396           "must be different cards");
6390 6397    return next_card;
6391 6398  }
6392 6399  
6393 6400  
6394 6401  // CMS Bit Map Wrapper /////////////////////////////////////////
6395 6402  
6396 6403  // Construct a CMS bit map infrastructure, but don't create the
6397 6404  // bit vector itself. That is done by a separate call CMSBitMap::allocate()
6398 6405  // further below.
6399 6406  CMSBitMap::CMSBitMap(int shifter, int mutex_rank, const char* mutex_name):
6400 6407    _bm(),
6401 6408    _shifter(shifter),
6402 6409    _lock(mutex_rank >= 0 ? new Mutex(mutex_rank, mutex_name, true) : NULL)
6403 6410  {
6404 6411    _bmStartWord = 0;
6405 6412    _bmWordSize  = 0;
6406 6413  }
6407 6414  
6408 6415  bool CMSBitMap::allocate(MemRegion mr) {
6409 6416    _bmStartWord = mr.start();
6410 6417    _bmWordSize  = mr.word_size();
6411 6418    ReservedSpace brs(ReservedSpace::allocation_align_size_up(
6412 6419                       (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
6413 6420    if (!brs.is_reserved()) {
6414 6421      warning("CMS bit map allocation failure");
6415 6422      return false;
6416 6423    }
6417 6424    // For now we'll just commit all of the bit map up fromt.
6418 6425    // Later on we'll try to be more parsimonious with swap.
6419 6426    if (!_virtual_space.initialize(brs, brs.size())) {
6420 6427      warning("CMS bit map backing store failure");
6421 6428      return false;
6422 6429    }
6423 6430    assert(_virtual_space.committed_size() == brs.size(),
6424 6431           "didn't reserve backing store for all of CMS bit map?");
6425 6432    _bm.set_map((BitMap::bm_word_t*)_virtual_space.low());
6426 6433    assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
6427 6434           _bmWordSize, "inconsistency in bit map sizing");
6428 6435    _bm.set_size(_bmWordSize >> _shifter);
6429 6436  
6430 6437    // bm.clear(); // can we rely on getting zero'd memory? verify below
6431 6438    assert(isAllClear(),
6432 6439           "Expected zero'd memory from ReservedSpace constructor");
6433 6440    assert(_bm.size() == heapWordDiffToOffsetDiff(sizeInWords()),
6434 6441           "consistency check");
6435 6442    return true;
6436 6443  }
6437 6444  
6438 6445  void CMSBitMap::dirty_range_iterate_clear(MemRegion mr, MemRegionClosure* cl) {
6439 6446    HeapWord *next_addr, *end_addr, *last_addr;
6440 6447    assert_locked();
6441 6448    assert(covers(mr), "out-of-range error");
6442 6449    // XXX assert that start and end are appropriately aligned
6443 6450    for (next_addr = mr.start(), end_addr = mr.end();
6444 6451         next_addr < end_addr; next_addr = last_addr) {
6445 6452      MemRegion dirty_region = getAndClearMarkedRegion(next_addr, end_addr);
6446 6453      last_addr = dirty_region.end();
6447 6454      if (!dirty_region.is_empty()) {
6448 6455        cl->do_MemRegion(dirty_region);
6449 6456      } else {
6450 6457        assert(last_addr == end_addr, "program logic");
6451 6458        return;
6452 6459      }
6453 6460    }
6454 6461  }
6455 6462  
6456 6463  #ifndef PRODUCT
6457 6464  void CMSBitMap::assert_locked() const {
6458 6465    CMSLockVerifier::assert_locked(lock());
6459 6466  }
6460 6467  
6461 6468  bool CMSBitMap::covers(MemRegion mr) const {
6462 6469    // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
6463 6470    assert((size_t)_bm.size() == (_bmWordSize >> _shifter),
6464 6471           "size inconsistency");
6465 6472    return (mr.start() >= _bmStartWord) &&
6466 6473           (mr.end()   <= endWord());
6467 6474  }
6468 6475  
6469 6476  bool CMSBitMap::covers(HeapWord* start, size_t size) const {
6470 6477      return (start >= _bmStartWord && (start + size) <= endWord());
6471 6478  }
6472 6479  
6473 6480  void CMSBitMap::verifyNoOneBitsInRange(HeapWord* left, HeapWord* right) {
6474 6481    // verify that there are no 1 bits in the interval [left, right)
6475 6482    FalseBitMapClosure falseBitMapClosure;
6476 6483    iterate(&falseBitMapClosure, left, right);
6477 6484  }
6478 6485  
6479 6486  void CMSBitMap::region_invariant(MemRegion mr)
6480 6487  {
6481 6488    assert_locked();
6482 6489    // mr = mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
6483 6490    assert(!mr.is_empty(), "unexpected empty region");
6484 6491    assert(covers(mr), "mr should be covered by bit map");
6485 6492    // convert address range into offset range
6486 6493    size_t start_ofs = heapWordToOffset(mr.start());
6487 6494    // Make sure that end() is appropriately aligned
6488 6495    assert(mr.end() == (HeapWord*)round_to((intptr_t)mr.end(),
6489 6496                          (1 << (_shifter+LogHeapWordSize))),
6490 6497           "Misaligned mr.end()");
6491 6498    size_t end_ofs   = heapWordToOffset(mr.end());
6492 6499    assert(end_ofs > start_ofs, "Should mark at least one bit");
6493 6500  }
6494 6501  
6495 6502  #endif
6496 6503  
6497 6504  bool CMSMarkStack::allocate(size_t size) {
6498 6505    // allocate a stack of the requisite depth
6499 6506    ReservedSpace rs(ReservedSpace::allocation_align_size_up(
6500 6507                     size * sizeof(oop)));
6501 6508    if (!rs.is_reserved()) {
6502 6509      warning("CMSMarkStack allocation failure");
6503 6510      return false;
6504 6511    }
6505 6512    if (!_virtual_space.initialize(rs, rs.size())) {
6506 6513      warning("CMSMarkStack backing store failure");
6507 6514      return false;
6508 6515    }
6509 6516    assert(_virtual_space.committed_size() == rs.size(),
6510 6517           "didn't reserve backing store for all of CMS stack?");
6511 6518    _base = (oop*)(_virtual_space.low());
6512 6519    _index = 0;
6513 6520    _capacity = size;
6514 6521    NOT_PRODUCT(_max_depth = 0);
6515 6522    return true;
6516 6523  }
6517 6524  
6518 6525  // XXX FIX ME !!! In the MT case we come in here holding a
6519 6526  // leaf lock. For printing we need to take a further lock
6520 6527  // which has lower rank. We need to recallibrate the two
6521 6528  // lock-ranks involved in order to be able to rpint the
6522 6529  // messages below. (Or defer the printing to the caller.
6523 6530  // For now we take the expedient path of just disabling the
6524 6531  // messages for the problematic case.)
6525 6532  void CMSMarkStack::expand() {
6526 6533    assert(_capacity <= MarkStackSizeMax, "stack bigger than permitted");
6527 6534    if (_capacity == MarkStackSizeMax) {
6528 6535      if (_hit_limit++ == 0 && !CMSConcurrentMTEnabled && PrintGCDetails) {
6529 6536        // We print a warning message only once per CMS cycle.
6530 6537        gclog_or_tty->print_cr(" (benign) Hit CMSMarkStack max size limit");
6531 6538      }
6532 6539      return;
6533 6540    }
6534 6541    // Double capacity if possible
6535 6542    size_t new_capacity = MIN2(_capacity*2, MarkStackSizeMax);
6536 6543    // Do not give up existing stack until we have managed to
6537 6544    // get the double capacity that we desired.
6538 6545    ReservedSpace rs(ReservedSpace::allocation_align_size_up(
6539 6546                     new_capacity * sizeof(oop)));
6540 6547    if (rs.is_reserved()) {
6541 6548      // Release the backing store associated with old stack
6542 6549      _virtual_space.release();
6543 6550      // Reinitialize virtual space for new stack
6544 6551      if (!_virtual_space.initialize(rs, rs.size())) {
6545 6552        fatal("Not enough swap for expanded marking stack");
6546 6553      }
6547 6554      _base = (oop*)(_virtual_space.low());
6548 6555      _index = 0;
6549 6556      _capacity = new_capacity;
6550 6557    } else if (_failed_double++ == 0 && !CMSConcurrentMTEnabled && PrintGCDetails) {
6551 6558      // Failed to double capacity, continue;
6552 6559      // we print a detail message only once per CMS cycle.
6553 6560      gclog_or_tty->print(" (benign) Failed to expand marking stack from "SIZE_FORMAT"K to "
6554 6561              SIZE_FORMAT"K",
6555 6562              _capacity / K, new_capacity / K);
6556 6563    }
6557 6564  }
6558 6565  
6559 6566  
6560 6567  // Closures
6561 6568  // XXX: there seems to be a lot of code  duplication here;
6562 6569  // should refactor and consolidate common code.
6563 6570  
6564 6571  // This closure is used to mark refs into the CMS generation in
6565 6572  // the CMS bit map. Called at the first checkpoint. This closure
6566 6573  // assumes that we do not need to re-mark dirty cards; if the CMS
6567 6574  // generation on which this is used is not an oldest (modulo perm gen)
6568 6575  // generation then this will lose younger_gen cards!
6569 6576  
6570 6577  MarkRefsIntoClosure::MarkRefsIntoClosure(
6571 6578    MemRegion span, CMSBitMap* bitMap):
6572 6579      _span(span),
6573 6580      _bitMap(bitMap)
6574 6581  {
6575 6582      assert(_ref_processor == NULL, "deliberately left NULL");
6576 6583      assert(_bitMap->covers(_span), "_bitMap/_span mismatch");
6577 6584  }
6578 6585  
6579 6586  void MarkRefsIntoClosure::do_oop(oop obj) {
6580 6587    // if p points into _span, then mark corresponding bit in _markBitMap
6581 6588    assert(obj->is_oop(), "expected an oop");
6582 6589    HeapWord* addr = (HeapWord*)obj;
6583 6590    if (_span.contains(addr)) {
6584 6591      // this should be made more efficient
6585 6592      _bitMap->mark(addr);
6586 6593    }
6587 6594  }
6588 6595  
6589 6596  void MarkRefsIntoClosure::do_oop(oop* p)       { MarkRefsIntoClosure::do_oop_work(p); }
6590 6597  void MarkRefsIntoClosure::do_oop(narrowOop* p) { MarkRefsIntoClosure::do_oop_work(p); }
6591 6598  
6592 6599  // A variant of the above, used for CMS marking verification.
6593 6600  MarkRefsIntoVerifyClosure::MarkRefsIntoVerifyClosure(
6594 6601    MemRegion span, CMSBitMap* verification_bm, CMSBitMap* cms_bm):
6595 6602      _span(span),
6596 6603      _verification_bm(verification_bm),
6597 6604      _cms_bm(cms_bm)
6598 6605  {
6599 6606      assert(_ref_processor == NULL, "deliberately left NULL");
6600 6607      assert(_verification_bm->covers(_span), "_verification_bm/_span mismatch");
6601 6608  }
6602 6609  
6603 6610  void MarkRefsIntoVerifyClosure::do_oop(oop obj) {
6604 6611    // if p points into _span, then mark corresponding bit in _markBitMap
6605 6612    assert(obj->is_oop(), "expected an oop");
6606 6613    HeapWord* addr = (HeapWord*)obj;
6607 6614    if (_span.contains(addr)) {
6608 6615      _verification_bm->mark(addr);
6609 6616      if (!_cms_bm->isMarked(addr)) {
6610 6617        oop(addr)->print();
6611 6618        gclog_or_tty->print_cr(" (" INTPTR_FORMAT " should have been marked)", addr);
6612 6619        fatal("... aborting");
6613 6620      }
6614 6621    }
6615 6622  }
6616 6623  
6617 6624  void MarkRefsIntoVerifyClosure::do_oop(oop* p)       { MarkRefsIntoVerifyClosure::do_oop_work(p); }
6618 6625  void MarkRefsIntoVerifyClosure::do_oop(narrowOop* p) { MarkRefsIntoVerifyClosure::do_oop_work(p); }
6619 6626  
6620 6627  //////////////////////////////////////////////////
6621 6628  // MarkRefsIntoAndScanClosure
6622 6629  //////////////////////////////////////////////////
6623 6630  
6624 6631  MarkRefsIntoAndScanClosure::MarkRefsIntoAndScanClosure(MemRegion span,
6625 6632                                                         ReferenceProcessor* rp,
6626 6633                                                         CMSBitMap* bit_map,
6627 6634                                                         CMSBitMap* mod_union_table,
6628 6635                                                         CMSMarkStack*  mark_stack,
6629 6636                                                         CMSMarkStack*  revisit_stack,
6630 6637                                                         CMSCollector* collector,
6631 6638                                                         bool should_yield,
6632 6639                                                         bool concurrent_precleaning):
6633 6640    _collector(collector),
6634 6641    _span(span),
6635 6642    _bit_map(bit_map),
6636 6643    _mark_stack(mark_stack),
6637 6644    _pushAndMarkClosure(collector, span, rp, bit_map, mod_union_table,
6638 6645                        mark_stack, revisit_stack, concurrent_precleaning),
6639 6646    _yield(should_yield),
6640 6647    _concurrent_precleaning(concurrent_precleaning),
6641 6648    _freelistLock(NULL)
6642 6649  {
6643 6650    _ref_processor = rp;
6644 6651    assert(_ref_processor != NULL, "_ref_processor shouldn't be NULL");
6645 6652  }
6646 6653  
6647 6654  // This closure is used to mark refs into the CMS generation at the
6648 6655  // second (final) checkpoint, and to scan and transitively follow
6649 6656  // the unmarked oops. It is also used during the concurrent precleaning
6650 6657  // phase while scanning objects on dirty cards in the CMS generation.
6651 6658  // The marks are made in the marking bit map and the marking stack is
6652 6659  // used for keeping the (newly) grey objects during the scan.
6653 6660  // The parallel version (Par_...) appears further below.
6654 6661  void MarkRefsIntoAndScanClosure::do_oop(oop obj) {
6655 6662    if (obj != NULL) {
6656 6663      assert(obj->is_oop(), "expected an oop");
6657 6664      HeapWord* addr = (HeapWord*)obj;
6658 6665      assert(_mark_stack->isEmpty(), "pre-condition (eager drainage)");
6659 6666      assert(_collector->overflow_list_is_empty(),
6660 6667             "overflow list should be empty");
6661 6668      if (_span.contains(addr) &&
6662 6669          !_bit_map->isMarked(addr)) {
6663 6670        // mark bit map (object is now grey)
6664 6671        _bit_map->mark(addr);
6665 6672        // push on marking stack (stack should be empty), and drain the
6666 6673        // stack by applying this closure to the oops in the oops popped
6667 6674        // from the stack (i.e. blacken the grey objects)
6668 6675        bool res = _mark_stack->push(obj);
6669 6676        assert(res, "Should have space to push on empty stack");
6670 6677        do {
6671 6678          oop new_oop = _mark_stack->pop();
6672 6679          assert(new_oop != NULL && new_oop->is_oop(), "Expected an oop");
6673 6680          assert(new_oop->is_parsable(), "Found unparsable oop");
6674 6681          assert(_bit_map->isMarked((HeapWord*)new_oop),
6675 6682                 "only grey objects on this stack");
6676 6683          // iterate over the oops in this oop, marking and pushing
6677 6684          // the ones in CMS heap (i.e. in _span).
6678 6685          new_oop->oop_iterate(&_pushAndMarkClosure);
6679 6686          // check if it's time to yield
6680 6687          do_yield_check();
6681 6688        } while (!_mark_stack->isEmpty() ||
6682 6689                 (!_concurrent_precleaning && take_from_overflow_list()));
6683 6690          // if marking stack is empty, and we are not doing this
6684 6691          // during precleaning, then check the overflow list
6685 6692      }
6686 6693      assert(_mark_stack->isEmpty(), "post-condition (eager drainage)");
6687 6694      assert(_collector->overflow_list_is_empty(),
6688 6695             "overflow list was drained above");
6689 6696      // We could restore evacuated mark words, if any, used for
6690 6697      // overflow list links here because the overflow list is
6691 6698      // provably empty here. That would reduce the maximum
6692 6699      // size requirements for preserved_{oop,mark}_stack.
6693 6700      // But we'll just postpone it until we are all done
6694 6701      // so we can just stream through.
6695 6702      if (!_concurrent_precleaning && CMSOverflowEarlyRestoration) {
6696 6703        _collector->restore_preserved_marks_if_any();
6697 6704        assert(_collector->no_preserved_marks(), "No preserved marks");
6698 6705      }
6699 6706      assert(!CMSOverflowEarlyRestoration || _collector->no_preserved_marks(),
6700 6707             "All preserved marks should have been restored above");
6701 6708    }
6702 6709  }
6703 6710  
6704 6711  void MarkRefsIntoAndScanClosure::do_oop(oop* p)       { MarkRefsIntoAndScanClosure::do_oop_work(p); }
6705 6712  void MarkRefsIntoAndScanClosure::do_oop(narrowOop* p) { MarkRefsIntoAndScanClosure::do_oop_work(p); }
6706 6713  
6707 6714  void MarkRefsIntoAndScanClosure::do_yield_work() {
6708 6715    assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
6709 6716           "CMS thread should hold CMS token");
6710 6717    assert_lock_strong(_freelistLock);
6711 6718    assert_lock_strong(_bit_map->lock());
6712 6719    // relinquish the free_list_lock and bitMaplock()
6713 6720    DEBUG_ONLY(RememberKlassesChecker mux(false);)
6714 6721    _bit_map->lock()->unlock();
6715 6722    _freelistLock->unlock();
6716 6723    ConcurrentMarkSweepThread::desynchronize(true);
6717 6724    ConcurrentMarkSweepThread::acknowledge_yield_request();
6718 6725    _collector->stopTimer();
6719 6726    GCPauseTimer p(_collector->size_policy()->concurrent_timer_ptr());
6720 6727    if (PrintCMSStatistics != 0) {
6721 6728      _collector->incrementYields();
6722 6729    }
6723 6730    _collector->icms_wait();
6724 6731  
6725 6732    // See the comment in coordinator_yield()
6726 6733    for (unsigned i = 0;
6727 6734         i < CMSYieldSleepCount &&
6728 6735         ConcurrentMarkSweepThread::should_yield() &&
6729 6736         !CMSCollector::foregroundGCIsActive();
6730 6737         ++i) {
6731 6738      os::sleep(Thread::current(), 1, false);
6732 6739      ConcurrentMarkSweepThread::acknowledge_yield_request();
6733 6740    }
6734 6741  
6735 6742    ConcurrentMarkSweepThread::synchronize(true);
6736 6743    _freelistLock->lock_without_safepoint_check();
6737 6744    _bit_map->lock()->lock_without_safepoint_check();
6738 6745    _collector->startTimer();
6739 6746  }
6740 6747  
6741 6748  ///////////////////////////////////////////////////////////
6742 6749  // Par_MarkRefsIntoAndScanClosure: a parallel version of
6743 6750  //                                 MarkRefsIntoAndScanClosure
6744 6751  ///////////////////////////////////////////////////////////
6745 6752  Par_MarkRefsIntoAndScanClosure::Par_MarkRefsIntoAndScanClosure(
6746 6753    CMSCollector* collector, MemRegion span, ReferenceProcessor* rp,
6747 6754    CMSBitMap* bit_map, OopTaskQueue* work_queue, CMSMarkStack*  revisit_stack):
6748 6755    _span(span),
6749 6756    _bit_map(bit_map),
6750 6757    _work_queue(work_queue),
6751 6758    _low_water_mark(MIN2((uint)(work_queue->max_elems()/4),
6752 6759                         (uint)(CMSWorkQueueDrainThreshold * ParallelGCThreads))),
6753 6760    _par_pushAndMarkClosure(collector, span, rp, bit_map, work_queue,
6754 6761                            revisit_stack)
6755 6762  {
6756 6763    _ref_processor = rp;
6757 6764    assert(_ref_processor != NULL, "_ref_processor shouldn't be NULL");
6758 6765  }
6759 6766  
6760 6767  // This closure is used to mark refs into the CMS generation at the
6761 6768  // second (final) checkpoint, and to scan and transitively follow
6762 6769  // the unmarked oops. The marks are made in the marking bit map and
6763 6770  // the work_queue is used for keeping the (newly) grey objects during
6764 6771  // the scan phase whence they are also available for stealing by parallel
6765 6772  // threads. Since the marking bit map is shared, updates are
6766 6773  // synchronized (via CAS).
6767 6774  void Par_MarkRefsIntoAndScanClosure::do_oop(oop obj) {
6768 6775    if (obj != NULL) {
6769 6776      // Ignore mark word because this could be an already marked oop
6770 6777      // that may be chained at the end of the overflow list.
6771 6778      assert(obj->is_oop(true), "expected an oop");
6772 6779      HeapWord* addr = (HeapWord*)obj;
6773 6780      if (_span.contains(addr) &&
6774 6781          !_bit_map->isMarked(addr)) {
6775 6782        // mark bit map (object will become grey):
6776 6783        // It is possible for several threads to be
6777 6784        // trying to "claim" this object concurrently;
6778 6785        // the unique thread that succeeds in marking the
6779 6786        // object first will do the subsequent push on
6780 6787        // to the work queue (or overflow list).
6781 6788        if (_bit_map->par_mark(addr)) {
6782 6789          // push on work_queue (which may not be empty), and trim the
6783 6790          // queue to an appropriate length by applying this closure to
6784 6791          // the oops in the oops popped from the stack (i.e. blacken the
6785 6792          // grey objects)
6786 6793          bool res = _work_queue->push(obj);
6787 6794          assert(res, "Low water mark should be less than capacity?");
6788 6795          trim_queue(_low_water_mark);
6789 6796        } // Else, another thread claimed the object
6790 6797      }
6791 6798    }
6792 6799  }
6793 6800  
6794 6801  void Par_MarkRefsIntoAndScanClosure::do_oop(oop* p)       { Par_MarkRefsIntoAndScanClosure::do_oop_work(p); }
6795 6802  void Par_MarkRefsIntoAndScanClosure::do_oop(narrowOop* p) { Par_MarkRefsIntoAndScanClosure::do_oop_work(p); }
6796 6803  
6797 6804  // This closure is used to rescan the marked objects on the dirty cards
6798 6805  // in the mod union table and the card table proper.
6799 6806  size_t ScanMarkedObjectsAgainCarefullyClosure::do_object_careful_m(
6800 6807    oop p, MemRegion mr) {
6801 6808  
6802 6809    size_t size = 0;
6803 6810    HeapWord* addr = (HeapWord*)p;
6804 6811    DEBUG_ONLY(_collector->verify_work_stacks_empty();)
6805 6812    assert(_span.contains(addr), "we are scanning the CMS generation");
6806 6813    // check if it's time to yield
6807 6814    if (do_yield_check()) {
6808 6815      // We yielded for some foreground stop-world work,
6809 6816      // and we have been asked to abort this ongoing preclean cycle.
6810 6817      return 0;
6811 6818    }
6812 6819    if (_bitMap->isMarked(addr)) {
6813 6820      // it's marked; is it potentially uninitialized?
6814 6821      if (p->klass_or_null() != NULL) {
6815 6822        // If is_conc_safe is false, the object may be undergoing
6816 6823        // change by the VM outside a safepoint.  Don't try to
6817 6824        // scan it, but rather leave it for the remark phase.
6818 6825        if (CMSPermGenPrecleaningEnabled &&
6819 6826            (!p->is_conc_safe() || !p->is_parsable())) {
6820 6827          // Signal precleaning to redirty the card since
6821 6828          // the klass pointer is already installed.
6822 6829          assert(size == 0, "Initial value");
6823 6830        } else {
6824 6831          assert(p->is_parsable(), "must be parsable.");
6825 6832          // an initialized object; ignore mark word in verification below
6826 6833          // since we are running concurrent with mutators
6827 6834          assert(p->is_oop(true), "should be an oop");
6828 6835          if (p->is_objArray()) {
6829 6836            // objArrays are precisely marked; restrict scanning
6830 6837            // to dirty cards only.
6831 6838            size = CompactibleFreeListSpace::adjustObjectSize(
6832 6839                     p->oop_iterate(_scanningClosure, mr));
6833 6840          } else {
6834 6841            // A non-array may have been imprecisely marked; we need
6835 6842            // to scan object in its entirety.
6836 6843            size = CompactibleFreeListSpace::adjustObjectSize(
6837 6844                     p->oop_iterate(_scanningClosure));
6838 6845          }
6839 6846          #ifdef DEBUG
6840 6847            size_t direct_size =
6841 6848              CompactibleFreeListSpace::adjustObjectSize(p->size());
6842 6849            assert(size == direct_size, "Inconsistency in size");
6843 6850            assert(size >= 3, "Necessary for Printezis marks to work");
6844 6851            if (!_bitMap->isMarked(addr+1)) {
6845 6852              _bitMap->verifyNoOneBitsInRange(addr+2, addr+size);
6846 6853            } else {
6847 6854              _bitMap->verifyNoOneBitsInRange(addr+2, addr+size-1);
6848 6855              assert(_bitMap->isMarked(addr+size-1),
6849 6856                     "inconsistent Printezis mark");
6850 6857            }
6851 6858          #endif // DEBUG
6852 6859        }
6853 6860      } else {
6854 6861        // an unitialized object
6855 6862        assert(_bitMap->isMarked(addr+1), "missing Printezis mark?");
6856 6863        HeapWord* nextOneAddr = _bitMap->getNextMarkedWordAddress(addr + 2);
6857 6864        size = pointer_delta(nextOneAddr + 1, addr);
6858 6865        assert(size == CompactibleFreeListSpace::adjustObjectSize(size),
6859 6866               "alignment problem");
6860 6867        // Note that pre-cleaning needn't redirty the card. OopDesc::set_klass()
6861 6868        // will dirty the card when the klass pointer is installed in the
6862 6869        // object (signalling the completion of initialization).
6863 6870      }
6864 6871    } else {
6865 6872      // Either a not yet marked object or an uninitialized object
6866 6873      if (p->klass_or_null() == NULL || !p->is_parsable()) {
6867 6874        // An uninitialized object, skip to the next card, since
6868 6875        // we may not be able to read its P-bits yet.
6869 6876        assert(size == 0, "Initial value");
6870 6877      } else {
6871 6878        // An object not (yet) reached by marking: we merely need to
6872 6879        // compute its size so as to go look at the next block.
6873 6880        assert(p->is_oop(true), "should be an oop");
6874 6881        size = CompactibleFreeListSpace::adjustObjectSize(p->size());
6875 6882      }
6876 6883    }
6877 6884    DEBUG_ONLY(_collector->verify_work_stacks_empty();)
6878 6885    return size;
6879 6886  }
6880 6887  
6881 6888  void ScanMarkedObjectsAgainCarefullyClosure::do_yield_work() {
6882 6889    assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
6883 6890           "CMS thread should hold CMS token");
6884 6891    assert_lock_strong(_freelistLock);
6885 6892    assert_lock_strong(_bitMap->lock());
6886 6893    DEBUG_ONLY(RememberKlassesChecker mux(false);)
6887 6894    // relinquish the free_list_lock and bitMaplock()
6888 6895    _bitMap->lock()->unlock();
6889 6896    _freelistLock->unlock();
6890 6897    ConcurrentMarkSweepThread::desynchronize(true);
6891 6898    ConcurrentMarkSweepThread::acknowledge_yield_request();
6892 6899    _collector->stopTimer();
6893 6900    GCPauseTimer p(_collector->size_policy()->concurrent_timer_ptr());
6894 6901    if (PrintCMSStatistics != 0) {
6895 6902      _collector->incrementYields();
6896 6903    }
6897 6904    _collector->icms_wait();
6898 6905  
6899 6906    // See the comment in coordinator_yield()
6900 6907    for (unsigned i = 0; i < CMSYieldSleepCount &&
6901 6908                     ConcurrentMarkSweepThread::should_yield() &&
6902 6909                     !CMSCollector::foregroundGCIsActive(); ++i) {
6903 6910      os::sleep(Thread::current(), 1, false);
6904 6911      ConcurrentMarkSweepThread::acknowledge_yield_request();
6905 6912    }
6906 6913  
6907 6914    ConcurrentMarkSweepThread::synchronize(true);
6908 6915    _freelistLock->lock_without_safepoint_check();
6909 6916    _bitMap->lock()->lock_without_safepoint_check();
6910 6917    _collector->startTimer();
6911 6918  }
6912 6919  
6913 6920  
6914 6921  //////////////////////////////////////////////////////////////////
6915 6922  // SurvivorSpacePrecleanClosure
6916 6923  //////////////////////////////////////////////////////////////////
6917 6924  // This (single-threaded) closure is used to preclean the oops in
6918 6925  // the survivor spaces.
6919 6926  size_t SurvivorSpacePrecleanClosure::do_object_careful(oop p) {
6920 6927  
6921 6928    HeapWord* addr = (HeapWord*)p;
6922 6929    DEBUG_ONLY(_collector->verify_work_stacks_empty();)
6923 6930    assert(!_span.contains(addr), "we are scanning the survivor spaces");
6924 6931    assert(p->klass_or_null() != NULL, "object should be initializd");
6925 6932    assert(p->is_parsable(), "must be parsable.");
6926 6933    // an initialized object; ignore mark word in verification below
6927 6934    // since we are running concurrent with mutators
6928 6935    assert(p->is_oop(true), "should be an oop");
6929 6936    // Note that we do not yield while we iterate over
6930 6937    // the interior oops of p, pushing the relevant ones
6931 6938    // on our marking stack.
6932 6939    size_t size = p->oop_iterate(_scanning_closure);
6933 6940    do_yield_check();
6934 6941    // Observe that below, we do not abandon the preclean
6935 6942    // phase as soon as we should; rather we empty the
6936 6943    // marking stack before returning. This is to satisfy
6937 6944    // some existing assertions. In general, it may be a
6938 6945    // good idea to abort immediately and complete the marking
6939 6946    // from the grey objects at a later time.
6940 6947    while (!_mark_stack->isEmpty()) {
6941 6948      oop new_oop = _mark_stack->pop();
6942 6949      assert(new_oop != NULL && new_oop->is_oop(), "Expected an oop");
6943 6950      assert(new_oop->is_parsable(), "Found unparsable oop");
6944 6951      assert(_bit_map->isMarked((HeapWord*)new_oop),
6945 6952             "only grey objects on this stack");
6946 6953      // iterate over the oops in this oop, marking and pushing
6947 6954      // the ones in CMS heap (i.e. in _span).
6948 6955      new_oop->oop_iterate(_scanning_closure);
6949 6956      // check if it's time to yield
6950 6957      do_yield_check();
6951 6958    }
6952 6959    unsigned int after_count =
6953 6960      GenCollectedHeap::heap()->total_collections();
6954 6961    bool abort = (_before_count != after_count) ||
6955 6962                 _collector->should_abort_preclean();
6956 6963    return abort ? 0 : size;
6957 6964  }
6958 6965  
6959 6966  void SurvivorSpacePrecleanClosure::do_yield_work() {
6960 6967    assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
6961 6968           "CMS thread should hold CMS token");
6962 6969    assert_lock_strong(_bit_map->lock());
6963 6970    DEBUG_ONLY(RememberKlassesChecker smx(false);)
6964 6971    // Relinquish the bit map lock
6965 6972    _bit_map->lock()->unlock();
6966 6973    ConcurrentMarkSweepThread::desynchronize(true);
6967 6974    ConcurrentMarkSweepThread::acknowledge_yield_request();
6968 6975    _collector->stopTimer();
6969 6976    GCPauseTimer p(_collector->size_policy()->concurrent_timer_ptr());
6970 6977    if (PrintCMSStatistics != 0) {
6971 6978      _collector->incrementYields();
6972 6979    }
6973 6980    _collector->icms_wait();
6974 6981  
6975 6982    // See the comment in coordinator_yield()
6976 6983    for (unsigned i = 0; i < CMSYieldSleepCount &&
6977 6984                         ConcurrentMarkSweepThread::should_yield() &&
6978 6985                         !CMSCollector::foregroundGCIsActive(); ++i) {
6979 6986      os::sleep(Thread::current(), 1, false);
6980 6987      ConcurrentMarkSweepThread::acknowledge_yield_request();
6981 6988    }
6982 6989  
6983 6990    ConcurrentMarkSweepThread::synchronize(true);
6984 6991    _bit_map->lock()->lock_without_safepoint_check();
6985 6992    _collector->startTimer();
6986 6993  }
6987 6994  
6988 6995  // This closure is used to rescan the marked objects on the dirty cards
6989 6996  // in the mod union table and the card table proper. In the parallel
6990 6997  // case, although the bitMap is shared, we do a single read so the
6991 6998  // isMarked() query is "safe".
6992 6999  bool ScanMarkedObjectsAgainClosure::do_object_bm(oop p, MemRegion mr) {
6993 7000    // Ignore mark word because we are running concurrent with mutators
6994 7001    assert(p->is_oop_or_null(true), "expected an oop or null");
6995 7002    HeapWord* addr = (HeapWord*)p;
6996 7003    assert(_span.contains(addr), "we are scanning the CMS generation");
6997 7004    bool is_obj_array = false;
6998 7005    #ifdef DEBUG
6999 7006      if (!_parallel) {
7000 7007        assert(_mark_stack->isEmpty(), "pre-condition (eager drainage)");
7001 7008        assert(_collector->overflow_list_is_empty(),
7002 7009               "overflow list should be empty");
7003 7010  
7004 7011      }
7005 7012    #endif // DEBUG
7006 7013    if (_bit_map->isMarked(addr)) {
7007 7014      // Obj arrays are precisely marked, non-arrays are not;
7008 7015      // so we scan objArrays precisely and non-arrays in their
7009 7016      // entirety.
7010 7017      if (p->is_objArray()) {
7011 7018        is_obj_array = true;
7012 7019        if (_parallel) {
7013 7020          p->oop_iterate(_par_scan_closure, mr);
7014 7021        } else {
7015 7022          p->oop_iterate(_scan_closure, mr);
7016 7023        }
7017 7024      } else {
7018 7025        if (_parallel) {
7019 7026          p->oop_iterate(_par_scan_closure);
7020 7027        } else {
7021 7028          p->oop_iterate(_scan_closure);
7022 7029        }
7023 7030      }
7024 7031    }
7025 7032    #ifdef DEBUG
7026 7033      if (!_parallel) {
7027 7034        assert(_mark_stack->isEmpty(), "post-condition (eager drainage)");
7028 7035        assert(_collector->overflow_list_is_empty(),
7029 7036               "overflow list should be empty");
7030 7037  
7031 7038      }
7032 7039    #endif // DEBUG
7033 7040    return is_obj_array;
7034 7041  }
7035 7042  
7036 7043  MarkFromRootsClosure::MarkFromRootsClosure(CMSCollector* collector,
7037 7044                          MemRegion span,
7038 7045                          CMSBitMap* bitMap, CMSMarkStack*  markStack,
7039 7046                          CMSMarkStack*  revisitStack,
7040 7047                          bool should_yield, bool verifying):
7041 7048    _collector(collector),
7042 7049    _span(span),
7043 7050    _bitMap(bitMap),
7044 7051    _mut(&collector->_modUnionTable),
7045 7052    _markStack(markStack),
7046 7053    _revisitStack(revisitStack),
7047 7054    _yield(should_yield),
7048 7055    _skipBits(0)
7049 7056  {
7050 7057    assert(_markStack->isEmpty(), "stack should be empty");
7051 7058    _finger = _bitMap->startWord();
7052 7059    _threshold = _finger;
7053 7060    assert(_collector->_restart_addr == NULL, "Sanity check");
7054 7061    assert(_span.contains(_finger), "Out of bounds _finger?");
7055 7062    DEBUG_ONLY(_verifying = verifying;)
7056 7063  }
7057 7064  
7058 7065  void MarkFromRootsClosure::reset(HeapWord* addr) {
7059 7066    assert(_markStack->isEmpty(), "would cause duplicates on stack");
7060 7067    assert(_span.contains(addr), "Out of bounds _finger?");
7061 7068    _finger = addr;
7062 7069    _threshold = (HeapWord*)round_to(
7063 7070                   (intptr_t)_finger, CardTableModRefBS::card_size);
7064 7071  }
7065 7072  
7066 7073  // Should revisit to see if this should be restructured for
7067 7074  // greater efficiency.
7068 7075  bool MarkFromRootsClosure::do_bit(size_t offset) {
7069 7076    if (_skipBits > 0) {
7070 7077      _skipBits--;
7071 7078      return true;
7072 7079    }
7073 7080    // convert offset into a HeapWord*
7074 7081    HeapWord* addr = _bitMap->startWord() + offset;
7075 7082    assert(_bitMap->endWord() && addr < _bitMap->endWord(),
7076 7083           "address out of range");
7077 7084    assert(_bitMap->isMarked(addr), "tautology");
7078 7085    if (_bitMap->isMarked(addr+1)) {
7079 7086      // this is an allocated but not yet initialized object
7080 7087      assert(_skipBits == 0, "tautology");
7081 7088      _skipBits = 2;  // skip next two marked bits ("Printezis-marks")
7082 7089      oop p = oop(addr);
7083 7090      if (p->klass_or_null() == NULL || !p->is_parsable()) {
7084 7091        DEBUG_ONLY(if (!_verifying) {)
7085 7092          // We re-dirty the cards on which this object lies and increase
7086 7093          // the _threshold so that we'll come back to scan this object
7087 7094          // during the preclean or remark phase. (CMSCleanOnEnter)
7088 7095          if (CMSCleanOnEnter) {
7089 7096            size_t sz = _collector->block_size_using_printezis_bits(addr);
7090 7097            HeapWord* end_card_addr   = (HeapWord*)round_to(
7091 7098                                           (intptr_t)(addr+sz), CardTableModRefBS::card_size);
7092 7099            MemRegion redirty_range = MemRegion(addr, end_card_addr);
7093 7100            assert(!redirty_range.is_empty(), "Arithmetical tautology");
7094 7101            // Bump _threshold to end_card_addr; note that
7095 7102            // _threshold cannot possibly exceed end_card_addr, anyhow.
7096 7103            // This prevents future clearing of the card as the scan proceeds
7097 7104            // to the right.
7098 7105            assert(_threshold <= end_card_addr,
7099 7106                   "Because we are just scanning into this object");
7100 7107            if (_threshold < end_card_addr) {
7101 7108              _threshold = end_card_addr;
7102 7109            }
7103 7110            if (p->klass_or_null() != NULL) {
7104 7111              // Redirty the range of cards...
7105 7112              _mut->mark_range(redirty_range);
7106 7113            } // ...else the setting of klass will dirty the card anyway.
7107 7114          }
7108 7115        DEBUG_ONLY(})
7109 7116        return true;
7110 7117      }
7111 7118    }
7112 7119    scanOopsInOop(addr);
7113 7120    return true;
7114 7121  }
7115 7122  
7116 7123  // We take a break if we've been at this for a while,
7117 7124  // so as to avoid monopolizing the locks involved.
7118 7125  void MarkFromRootsClosure::do_yield_work() {
7119 7126    // First give up the locks, then yield, then re-lock
7120 7127    // We should probably use a constructor/destructor idiom to
7121 7128    // do this unlock/lock or modify the MutexUnlocker class to
7122 7129    // serve our purpose. XXX
7123 7130    assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
7124 7131           "CMS thread should hold CMS token");
7125 7132    assert_lock_strong(_bitMap->lock());
7126 7133    DEBUG_ONLY(RememberKlassesChecker mux(false);)
7127 7134    _bitMap->lock()->unlock();
7128 7135    ConcurrentMarkSweepThread::desynchronize(true);
7129 7136    ConcurrentMarkSweepThread::acknowledge_yield_request();
7130 7137    _collector->stopTimer();
7131 7138    GCPauseTimer p(_collector->size_policy()->concurrent_timer_ptr());
7132 7139    if (PrintCMSStatistics != 0) {
7133 7140      _collector->incrementYields();
7134 7141    }
7135 7142    _collector->icms_wait();
7136 7143  
7137 7144    // See the comment in coordinator_yield()
7138 7145    for (unsigned i = 0; i < CMSYieldSleepCount &&
7139 7146                         ConcurrentMarkSweepThread::should_yield() &&
7140 7147                         !CMSCollector::foregroundGCIsActive(); ++i) {
7141 7148      os::sleep(Thread::current(), 1, false);
7142 7149      ConcurrentMarkSweepThread::acknowledge_yield_request();
7143 7150    }
7144 7151  
7145 7152    ConcurrentMarkSweepThread::synchronize(true);
7146 7153    _bitMap->lock()->lock_without_safepoint_check();
7147 7154    _collector->startTimer();
7148 7155  }
7149 7156  
7150 7157  void MarkFromRootsClosure::scanOopsInOop(HeapWord* ptr) {
7151 7158    assert(_bitMap->isMarked(ptr), "expected bit to be set");
7152 7159    assert(_markStack->isEmpty(),
7153 7160           "should drain stack to limit stack usage");
7154 7161    // convert ptr to an oop preparatory to scanning
7155 7162    oop obj = oop(ptr);
7156 7163    // Ignore mark word in verification below, since we
7157 7164    // may be running concurrent with mutators.
7158 7165    assert(obj->is_oop(true), "should be an oop");
7159 7166    assert(_finger <= ptr, "_finger runneth ahead");
7160 7167    // advance the finger to right end of this object
7161 7168    _finger = ptr + obj->size();
7162 7169    assert(_finger > ptr, "we just incremented it above");
7163 7170    // On large heaps, it may take us some time to get through
7164 7171    // the marking phase (especially if running iCMS). During
7165 7172    // this time it's possible that a lot of mutations have
7166 7173    // accumulated in the card table and the mod union table --
7167 7174    // these mutation records are redundant until we have
7168 7175    // actually traced into the corresponding card.
7169 7176    // Here, we check whether advancing the finger would make
7170 7177    // us cross into a new card, and if so clear corresponding
7171 7178    // cards in the MUT (preclean them in the card-table in the
7172 7179    // future).
7173 7180  
7174 7181    DEBUG_ONLY(if (!_verifying) {)
7175 7182      // The clean-on-enter optimization is disabled by default,
7176 7183      // until we fix 6178663.
7177 7184      if (CMSCleanOnEnter && (_finger > _threshold)) {
7178 7185        // [_threshold, _finger) represents the interval
7179 7186        // of cards to be cleared  in MUT (or precleaned in card table).
7180 7187        // The set of cards to be cleared is all those that overlap
7181 7188        // with the interval [_threshold, _finger); note that
7182 7189        // _threshold is always kept card-aligned but _finger isn't
7183 7190        // always card-aligned.
7184 7191        HeapWord* old_threshold = _threshold;
7185 7192        assert(old_threshold == (HeapWord*)round_to(
7186 7193                (intptr_t)old_threshold, CardTableModRefBS::card_size),
7187 7194               "_threshold should always be card-aligned");
7188 7195        _threshold = (HeapWord*)round_to(
7189 7196                       (intptr_t)_finger, CardTableModRefBS::card_size);
7190 7197        MemRegion mr(old_threshold, _threshold);
7191 7198        assert(!mr.is_empty(), "Control point invariant");
7192 7199        assert(_span.contains(mr), "Should clear within span");
7193 7200        // XXX When _finger crosses from old gen into perm gen
7194 7201        // we may be doing unnecessary cleaning; do better in the
7195 7202        // future by detecting that condition and clearing fewer
7196 7203        // MUT/CT entries.
7197 7204        _mut->clear_range(mr);
7198 7205      }
7199 7206    DEBUG_ONLY(})
7200 7207    // Note: the finger doesn't advance while we drain
7201 7208    // the stack below.
7202 7209    PushOrMarkClosure pushOrMarkClosure(_collector,
7203 7210                                        _span, _bitMap, _markStack,
7204 7211                                        _revisitStack,
7205 7212                                        _finger, this);
7206 7213    bool res = _markStack->push(obj);
7207 7214    assert(res, "Empty non-zero size stack should have space for single push");
7208 7215    while (!_markStack->isEmpty()) {
7209 7216      oop new_oop = _markStack->pop();
7210 7217      // Skip verifying header mark word below because we are
7211 7218      // running concurrent with mutators.
7212 7219      assert(new_oop->is_oop(true), "Oops! expected to pop an oop");
7213 7220      // now scan this oop's oops
7214 7221      new_oop->oop_iterate(&pushOrMarkClosure);
7215 7222      do_yield_check();
7216 7223    }
7217 7224    assert(_markStack->isEmpty(), "tautology, emphasizing post-condition");
7218 7225  }
7219 7226  
7220 7227  Par_MarkFromRootsClosure::Par_MarkFromRootsClosure(CMSConcMarkingTask* task,
7221 7228                         CMSCollector* collector, MemRegion span,
7222 7229                         CMSBitMap* bit_map,
7223 7230                         OopTaskQueue* work_queue,
7224 7231                         CMSMarkStack*  overflow_stack,
7225 7232                         CMSMarkStack*  revisit_stack,
7226 7233                         bool should_yield):
7227 7234    _collector(collector),
7228 7235    _whole_span(collector->_span),
7229 7236    _span(span),
7230 7237    _bit_map(bit_map),
7231 7238    _mut(&collector->_modUnionTable),
7232 7239    _work_queue(work_queue),
7233 7240    _overflow_stack(overflow_stack),
7234 7241    _revisit_stack(revisit_stack),
7235 7242    _yield(should_yield),
7236 7243    _skip_bits(0),
7237 7244    _task(task)
7238 7245  {
7239 7246    assert(_work_queue->size() == 0, "work_queue should be empty");
7240 7247    _finger = span.start();
7241 7248    _threshold = _finger;     // XXX Defer clear-on-enter optimization for now
7242 7249    assert(_span.contains(_finger), "Out of bounds _finger?");
7243 7250  }
7244 7251  
7245 7252  // Should revisit to see if this should be restructured for
7246 7253  // greater efficiency.
7247 7254  bool Par_MarkFromRootsClosure::do_bit(size_t offset) {
7248 7255    if (_skip_bits > 0) {
7249 7256      _skip_bits--;
7250 7257      return true;
7251 7258    }
7252 7259    // convert offset into a HeapWord*
7253 7260    HeapWord* addr = _bit_map->startWord() + offset;
7254 7261    assert(_bit_map->endWord() && addr < _bit_map->endWord(),
7255 7262           "address out of range");
7256 7263    assert(_bit_map->isMarked(addr), "tautology");
7257 7264    if (_bit_map->isMarked(addr+1)) {
7258 7265      // this is an allocated object that might not yet be initialized
7259 7266      assert(_skip_bits == 0, "tautology");
7260 7267      _skip_bits = 2;  // skip next two marked bits ("Printezis-marks")
7261 7268      oop p = oop(addr);
7262 7269      if (p->klass_or_null() == NULL || !p->is_parsable()) {
7263 7270        // in the case of Clean-on-Enter optimization, redirty card
7264 7271        // and avoid clearing card by increasing  the threshold.
7265 7272        return true;
7266 7273      }
7267 7274    }
7268 7275    scan_oops_in_oop(addr);
7269 7276    return true;
7270 7277  }
7271 7278  
7272 7279  void Par_MarkFromRootsClosure::scan_oops_in_oop(HeapWord* ptr) {
7273 7280    assert(_bit_map->isMarked(ptr), "expected bit to be set");
7274 7281    // Should we assert that our work queue is empty or
7275 7282    // below some drain limit?
7276 7283    assert(_work_queue->size() == 0,
7277 7284           "should drain stack to limit stack usage");
7278 7285    // convert ptr to an oop preparatory to scanning
7279 7286    oop obj = oop(ptr);
7280 7287    // Ignore mark word in verification below, since we
7281 7288    // may be running concurrent with mutators.
7282 7289    assert(obj->is_oop(true), "should be an oop");
7283 7290    assert(_finger <= ptr, "_finger runneth ahead");
7284 7291    // advance the finger to right end of this object
7285 7292    _finger = ptr + obj->size();
7286 7293    assert(_finger > ptr, "we just incremented it above");
7287 7294    // On large heaps, it may take us some time to get through
7288 7295    // the marking phase (especially if running iCMS). During
7289 7296    // this time it's possible that a lot of mutations have
7290 7297    // accumulated in the card table and the mod union table --
7291 7298    // these mutation records are redundant until we have
7292 7299    // actually traced into the corresponding card.
7293 7300    // Here, we check whether advancing the finger would make
7294 7301    // us cross into a new card, and if so clear corresponding
7295 7302    // cards in the MUT (preclean them in the card-table in the
7296 7303    // future).
7297 7304  
7298 7305    // The clean-on-enter optimization is disabled by default,
7299 7306    // until we fix 6178663.
7300 7307    if (CMSCleanOnEnter && (_finger > _threshold)) {
7301 7308      // [_threshold, _finger) represents the interval
7302 7309      // of cards to be cleared  in MUT (or precleaned in card table).
7303 7310      // The set of cards to be cleared is all those that overlap
7304 7311      // with the interval [_threshold, _finger); note that
7305 7312      // _threshold is always kept card-aligned but _finger isn't
7306 7313      // always card-aligned.
7307 7314      HeapWord* old_threshold = _threshold;
7308 7315      assert(old_threshold == (HeapWord*)round_to(
7309 7316              (intptr_t)old_threshold, CardTableModRefBS::card_size),
7310 7317             "_threshold should always be card-aligned");
7311 7318      _threshold = (HeapWord*)round_to(
7312 7319                     (intptr_t)_finger, CardTableModRefBS::card_size);
7313 7320      MemRegion mr(old_threshold, _threshold);
7314 7321      assert(!mr.is_empty(), "Control point invariant");
7315 7322      assert(_span.contains(mr), "Should clear within span"); // _whole_span ??
7316 7323      // XXX When _finger crosses from old gen into perm gen
7317 7324      // we may be doing unnecessary cleaning; do better in the
7318 7325      // future by detecting that condition and clearing fewer
7319 7326      // MUT/CT entries.
7320 7327      _mut->clear_range(mr);
7321 7328    }
7322 7329  
7323 7330    // Note: the local finger doesn't advance while we drain
7324 7331    // the stack below, but the global finger sure can and will.
7325 7332    HeapWord** gfa = _task->global_finger_addr();
7326 7333    Par_PushOrMarkClosure pushOrMarkClosure(_collector,
7327 7334                                        _span, _bit_map,
7328 7335                                        _work_queue,
7329 7336                                        _overflow_stack,
7330 7337                                        _revisit_stack,
7331 7338                                        _finger,
7332 7339                                        gfa, this);
7333 7340    bool res = _work_queue->push(obj);   // overflow could occur here
7334 7341    assert(res, "Will hold once we use workqueues");
7335 7342    while (true) {
7336 7343      oop new_oop;
7337 7344      if (!_work_queue->pop_local(new_oop)) {
7338 7345        // We emptied our work_queue; check if there's stuff that can
7339 7346        // be gotten from the overflow stack.
7340 7347        if (CMSConcMarkingTask::get_work_from_overflow_stack(
7341 7348              _overflow_stack, _work_queue)) {
7342 7349          do_yield_check();
7343 7350          continue;
7344 7351        } else {  // done
7345 7352          break;
7346 7353        }
7347 7354      }
7348 7355      // Skip verifying header mark word below because we are
7349 7356      // running concurrent with mutators.
7350 7357      assert(new_oop->is_oop(true), "Oops! expected to pop an oop");
7351 7358      // now scan this oop's oops
7352 7359      new_oop->oop_iterate(&pushOrMarkClosure);
7353 7360      do_yield_check();
7354 7361    }
7355 7362    assert(_work_queue->size() == 0, "tautology, emphasizing post-condition");
7356 7363  }
7357 7364  
7358 7365  // Yield in response to a request from VM Thread or
7359 7366  // from mutators.
7360 7367  void Par_MarkFromRootsClosure::do_yield_work() {
7361 7368    assert(_task != NULL, "sanity");
7362 7369    _task->yield();
7363 7370  }
7364 7371  
7365 7372  // A variant of the above used for verifying CMS marking work.
7366 7373  MarkFromRootsVerifyClosure::MarkFromRootsVerifyClosure(CMSCollector* collector,
7367 7374                          MemRegion span,
7368 7375                          CMSBitMap* verification_bm, CMSBitMap* cms_bm,
7369 7376                          CMSMarkStack*  mark_stack):
7370 7377    _collector(collector),
7371 7378    _span(span),
7372 7379    _verification_bm(verification_bm),
7373 7380    _cms_bm(cms_bm),
7374 7381    _mark_stack(mark_stack),
7375 7382    _pam_verify_closure(collector, span, verification_bm, cms_bm,
7376 7383                        mark_stack)
7377 7384  {
7378 7385    assert(_mark_stack->isEmpty(), "stack should be empty");
7379 7386    _finger = _verification_bm->startWord();
7380 7387    assert(_collector->_restart_addr == NULL, "Sanity check");
7381 7388    assert(_span.contains(_finger), "Out of bounds _finger?");
7382 7389  }
7383 7390  
7384 7391  void MarkFromRootsVerifyClosure::reset(HeapWord* addr) {
7385 7392    assert(_mark_stack->isEmpty(), "would cause duplicates on stack");
7386 7393    assert(_span.contains(addr), "Out of bounds _finger?");
7387 7394    _finger = addr;
7388 7395  }
7389 7396  
7390 7397  // Should revisit to see if this should be restructured for
7391 7398  // greater efficiency.
7392 7399  bool MarkFromRootsVerifyClosure::do_bit(size_t offset) {
7393 7400    // convert offset into a HeapWord*
7394 7401    HeapWord* addr = _verification_bm->startWord() + offset;
7395 7402    assert(_verification_bm->endWord() && addr < _verification_bm->endWord(),
7396 7403           "address out of range");
7397 7404    assert(_verification_bm->isMarked(addr), "tautology");
7398 7405    assert(_cms_bm->isMarked(addr), "tautology");
7399 7406  
7400 7407    assert(_mark_stack->isEmpty(),
7401 7408           "should drain stack to limit stack usage");
7402 7409    // convert addr to an oop preparatory to scanning
7403 7410    oop obj = oop(addr);
7404 7411    assert(obj->is_oop(), "should be an oop");
7405 7412    assert(_finger <= addr, "_finger runneth ahead");
7406 7413    // advance the finger to right end of this object
7407 7414    _finger = addr + obj->size();
7408 7415    assert(_finger > addr, "we just incremented it above");
7409 7416    // Note: the finger doesn't advance while we drain
7410 7417    // the stack below.
7411 7418    bool res = _mark_stack->push(obj);
7412 7419    assert(res, "Empty non-zero size stack should have space for single push");
7413 7420    while (!_mark_stack->isEmpty()) {
7414 7421      oop new_oop = _mark_stack->pop();
7415 7422      assert(new_oop->is_oop(), "Oops! expected to pop an oop");
7416 7423      // now scan this oop's oops
7417 7424      new_oop->oop_iterate(&_pam_verify_closure);
7418 7425    }
7419 7426    assert(_mark_stack->isEmpty(), "tautology, emphasizing post-condition");
7420 7427    return true;
7421 7428  }
7422 7429  
7423 7430  PushAndMarkVerifyClosure::PushAndMarkVerifyClosure(
7424 7431    CMSCollector* collector, MemRegion span,
7425 7432    CMSBitMap* verification_bm, CMSBitMap* cms_bm,
7426 7433    CMSMarkStack*  mark_stack):
7427 7434    OopClosure(collector->ref_processor()),
7428 7435    _collector(collector),
7429 7436    _span(span),
7430 7437    _verification_bm(verification_bm),
7431 7438    _cms_bm(cms_bm),
7432 7439    _mark_stack(mark_stack)
7433 7440  { }
7434 7441  
7435 7442  void PushAndMarkVerifyClosure::do_oop(oop* p)       { PushAndMarkVerifyClosure::do_oop_work(p); }
7436 7443  void PushAndMarkVerifyClosure::do_oop(narrowOop* p) { PushAndMarkVerifyClosure::do_oop_work(p); }
7437 7444  
7438 7445  // Upon stack overflow, we discard (part of) the stack,
7439 7446  // remembering the least address amongst those discarded
7440 7447  // in CMSCollector's _restart_address.
7441 7448  void PushAndMarkVerifyClosure::handle_stack_overflow(HeapWord* lost) {
7442 7449    // Remember the least grey address discarded
7443 7450    HeapWord* ra = (HeapWord*)_mark_stack->least_value(lost);
7444 7451    _collector->lower_restart_addr(ra);
7445 7452    _mark_stack->reset();  // discard stack contents
7446 7453    _mark_stack->expand(); // expand the stack if possible
7447 7454  }
7448 7455  
7449 7456  void PushAndMarkVerifyClosure::do_oop(oop obj) {
7450 7457    assert(obj->is_oop_or_null(), "expected an oop or NULL");
7451 7458    HeapWord* addr = (HeapWord*)obj;
7452 7459    if (_span.contains(addr) && !_verification_bm->isMarked(addr)) {
7453 7460      // Oop lies in _span and isn't yet grey or black
7454 7461      _verification_bm->mark(addr);            // now grey
7455 7462      if (!_cms_bm->isMarked(addr)) {
7456 7463        oop(addr)->print();
7457 7464        gclog_or_tty->print_cr(" (" INTPTR_FORMAT " should have been marked)",
7458 7465                               addr);
7459 7466        fatal("... aborting");
7460 7467      }
7461 7468  
7462 7469      if (!_mark_stack->push(obj)) { // stack overflow
7463 7470        if (PrintCMSStatistics != 0) {
7464 7471          gclog_or_tty->print_cr("CMS marking stack overflow (benign) at "
7465 7472                                 SIZE_FORMAT, _mark_stack->capacity());
7466 7473        }
7467 7474        assert(_mark_stack->isFull(), "Else push should have succeeded");
7468 7475        handle_stack_overflow(addr);
7469 7476      }
7470 7477      // anything including and to the right of _finger
7471 7478      // will be scanned as we iterate over the remainder of the
7472 7479      // bit map
7473 7480    }
7474 7481  }
7475 7482  
7476 7483  PushOrMarkClosure::PushOrMarkClosure(CMSCollector* collector,
7477 7484                       MemRegion span,
7478 7485                       CMSBitMap* bitMap, CMSMarkStack*  markStack,
7479 7486                       CMSMarkStack*  revisitStack,
7480 7487                       HeapWord* finger, MarkFromRootsClosure* parent) :
7481 7488    KlassRememberingOopClosure(collector, collector->ref_processor(), revisitStack),
7482 7489    _span(span),
7483 7490    _bitMap(bitMap),
7484 7491    _markStack(markStack),
7485 7492    _finger(finger),
7486 7493    _parent(parent)
7487 7494  { }
7488 7495  
7489 7496  Par_PushOrMarkClosure::Par_PushOrMarkClosure(CMSCollector* collector,
7490 7497                       MemRegion span,
7491 7498                       CMSBitMap* bit_map,
7492 7499                       OopTaskQueue* work_queue,
7493 7500                       CMSMarkStack*  overflow_stack,
7494 7501                       CMSMarkStack*  revisit_stack,
7495 7502                       HeapWord* finger,
7496 7503                       HeapWord** global_finger_addr,
7497 7504                       Par_MarkFromRootsClosure* parent) :
7498 7505    Par_KlassRememberingOopClosure(collector,
7499 7506                              collector->ref_processor(),
7500 7507                              revisit_stack),
7501 7508    _whole_span(collector->_span),
7502 7509    _span(span),
7503 7510    _bit_map(bit_map),
7504 7511    _work_queue(work_queue),
7505 7512    _overflow_stack(overflow_stack),
7506 7513    _finger(finger),
7507 7514    _global_finger_addr(global_finger_addr),
7508 7515    _parent(parent)
7509 7516  { }
7510 7517  
7511 7518  // Assumes thread-safe access by callers, who are
7512 7519  // responsible for mutual exclusion.
7513 7520  void CMSCollector::lower_restart_addr(HeapWord* low) {
7514 7521    assert(_span.contains(low), "Out of bounds addr");
7515 7522    if (_restart_addr == NULL) {
7516 7523      _restart_addr = low;
7517 7524    } else {
7518 7525      _restart_addr = MIN2(_restart_addr, low);
7519 7526    }
7520 7527  }
7521 7528  
7522 7529  // Upon stack overflow, we discard (part of) the stack,
7523 7530  // remembering the least address amongst those discarded
7524 7531  // in CMSCollector's _restart_address.
7525 7532  void PushOrMarkClosure::handle_stack_overflow(HeapWord* lost) {
7526 7533    // Remember the least grey address discarded
7527 7534    HeapWord* ra = (HeapWord*)_markStack->least_value(lost);
7528 7535    _collector->lower_restart_addr(ra);
7529 7536    _markStack->reset();  // discard stack contents
7530 7537    _markStack->expand(); // expand the stack if possible
7531 7538  }
7532 7539  
7533 7540  // Upon stack overflow, we discard (part of) the stack,
7534 7541  // remembering the least address amongst those discarded
7535 7542  // in CMSCollector's _restart_address.
7536 7543  void Par_PushOrMarkClosure::handle_stack_overflow(HeapWord* lost) {
7537 7544    // We need to do this under a mutex to prevent other
7538 7545    // workers from interfering with the work done below.
7539 7546    MutexLockerEx ml(_overflow_stack->par_lock(),
7540 7547                     Mutex::_no_safepoint_check_flag);
7541 7548    // Remember the least grey address discarded
7542 7549    HeapWord* ra = (HeapWord*)_overflow_stack->least_value(lost);
7543 7550    _collector->lower_restart_addr(ra);
7544 7551    _overflow_stack->reset();  // discard stack contents
7545 7552    _overflow_stack->expand(); // expand the stack if possible
7546 7553  }
7547 7554  
7548 7555  void PushOrMarkClosure::do_oop(oop obj) {
7549 7556    // Ignore mark word because we are running concurrent with mutators.
7550 7557    assert(obj->is_oop_or_null(true), "expected an oop or NULL");
7551 7558    HeapWord* addr = (HeapWord*)obj;
7552 7559    if (_span.contains(addr) && !_bitMap->isMarked(addr)) {
7553 7560      // Oop lies in _span and isn't yet grey or black
7554 7561      _bitMap->mark(addr);            // now grey
7555 7562      if (addr < _finger) {
7556 7563        // the bit map iteration has already either passed, or
7557 7564        // sampled, this bit in the bit map; we'll need to
7558 7565        // use the marking stack to scan this oop's oops.
7559 7566        bool simulate_overflow = false;
7560 7567        NOT_PRODUCT(
7561 7568          if (CMSMarkStackOverflowALot &&
7562 7569              _collector->simulate_overflow()) {
7563 7570            // simulate a stack overflow
7564 7571            simulate_overflow = true;
7565 7572          }
7566 7573        )
7567 7574        if (simulate_overflow || !_markStack->push(obj)) { // stack overflow
7568 7575          if (PrintCMSStatistics != 0) {
7569 7576            gclog_or_tty->print_cr("CMS marking stack overflow (benign) at "
7570 7577                                   SIZE_FORMAT, _markStack->capacity());
7571 7578          }
7572 7579          assert(simulate_overflow || _markStack->isFull(), "Else push should have succeeded");
7573 7580          handle_stack_overflow(addr);
7574 7581        }
7575 7582      }
7576 7583      // anything including and to the right of _finger
7577 7584      // will be scanned as we iterate over the remainder of the
7578 7585      // bit map
7579 7586      do_yield_check();
7580 7587    }
7581 7588  }
7582 7589  
7583 7590  void PushOrMarkClosure::do_oop(oop* p)       { PushOrMarkClosure::do_oop_work(p); }
7584 7591  void PushOrMarkClosure::do_oop(narrowOop* p) { PushOrMarkClosure::do_oop_work(p); }
7585 7592  
7586 7593  void Par_PushOrMarkClosure::do_oop(oop obj) {
7587 7594    // Ignore mark word because we are running concurrent with mutators.
7588 7595    assert(obj->is_oop_or_null(true), "expected an oop or NULL");
7589 7596    HeapWord* addr = (HeapWord*)obj;
7590 7597    if (_whole_span.contains(addr) && !_bit_map->isMarked(addr)) {
7591 7598      // Oop lies in _span and isn't yet grey or black
7592 7599      // We read the global_finger (volatile read) strictly after marking oop
7593 7600      bool res = _bit_map->par_mark(addr);    // now grey
7594 7601      volatile HeapWord** gfa = (volatile HeapWord**)_global_finger_addr;
7595 7602      // Should we push this marked oop on our stack?
7596 7603      // -- if someone else marked it, nothing to do
7597 7604      // -- if target oop is above global finger nothing to do
7598 7605      // -- if target oop is in chunk and above local finger
7599 7606      //      then nothing to do
7600 7607      // -- else push on work queue
7601 7608      if (   !res       // someone else marked it, they will deal with it
7602 7609          || (addr >= *gfa)  // will be scanned in a later task
7603 7610          || (_span.contains(addr) && addr >= _finger)) { // later in this chunk
7604 7611        return;
7605 7612      }
7606 7613      // the bit map iteration has already either passed, or
7607 7614      // sampled, this bit in the bit map; we'll need to
7608 7615      // use the marking stack to scan this oop's oops.
7609 7616      bool simulate_overflow = false;
7610 7617      NOT_PRODUCT(
7611 7618        if (CMSMarkStackOverflowALot &&
7612 7619            _collector->simulate_overflow()) {
7613 7620          // simulate a stack overflow
7614 7621          simulate_overflow = true;
7615 7622        }
7616 7623      )
7617 7624      if (simulate_overflow ||
7618 7625          !(_work_queue->push(obj) || _overflow_stack->par_push(obj))) {
7619 7626        // stack overflow
7620 7627        if (PrintCMSStatistics != 0) {
7621 7628          gclog_or_tty->print_cr("CMS marking stack overflow (benign) at "
7622 7629                                 SIZE_FORMAT, _overflow_stack->capacity());
7623 7630        }
7624 7631        // We cannot assert that the overflow stack is full because
7625 7632        // it may have been emptied since.
7626 7633        assert(simulate_overflow ||
7627 7634               _work_queue->size() == _work_queue->max_elems(),
7628 7635              "Else push should have succeeded");
7629 7636        handle_stack_overflow(addr);
7630 7637      }
7631 7638      do_yield_check();
7632 7639    }
7633 7640  }
7634 7641  
7635 7642  void Par_PushOrMarkClosure::do_oop(oop* p)       { Par_PushOrMarkClosure::do_oop_work(p); }
7636 7643  void Par_PushOrMarkClosure::do_oop(narrowOop* p) { Par_PushOrMarkClosure::do_oop_work(p); }
7637 7644  
7638 7645  KlassRememberingOopClosure::KlassRememberingOopClosure(CMSCollector* collector,
7639 7646                                               ReferenceProcessor* rp,
7640 7647                                               CMSMarkStack* revisit_stack) :
7641 7648    OopClosure(rp),
7642 7649    _collector(collector),
7643 7650    _revisit_stack(revisit_stack),
7644 7651    _should_remember_klasses(collector->should_unload_classes()) {}
7645 7652  
7646 7653  PushAndMarkClosure::PushAndMarkClosure(CMSCollector* collector,
7647 7654                                         MemRegion span,
7648 7655                                         ReferenceProcessor* rp,
7649 7656                                         CMSBitMap* bit_map,
7650 7657                                         CMSBitMap* mod_union_table,
7651 7658                                         CMSMarkStack*  mark_stack,
7652 7659                                         CMSMarkStack*  revisit_stack,
7653 7660                                         bool           concurrent_precleaning):
7654 7661    KlassRememberingOopClosure(collector, rp, revisit_stack),
7655 7662    _span(span),
7656 7663    _bit_map(bit_map),
7657 7664    _mod_union_table(mod_union_table),
7658 7665    _mark_stack(mark_stack),
7659 7666    _concurrent_precleaning(concurrent_precleaning)
7660 7667  {
7661 7668    assert(_ref_processor != NULL, "_ref_processor shouldn't be NULL");
7662 7669  }
7663 7670  
7664 7671  // Grey object rescan during pre-cleaning and second checkpoint phases --
7665 7672  // the non-parallel version (the parallel version appears further below.)
7666 7673  void PushAndMarkClosure::do_oop(oop obj) {
7667 7674    // Ignore mark word verification. If during concurrent precleaning,
7668 7675    // the object monitor may be locked. If during the checkpoint
7669 7676    // phases, the object may already have been reached by a  different
7670 7677    // path and may be at the end of the global overflow list (so
7671 7678    // the mark word may be NULL).
7672 7679    assert(obj->is_oop_or_null(true /* ignore mark word */),
7673 7680           "expected an oop or NULL");
7674 7681    HeapWord* addr = (HeapWord*)obj;
7675 7682    // Check if oop points into the CMS generation
7676 7683    // and is not marked
7677 7684    if (_span.contains(addr) && !_bit_map->isMarked(addr)) {
7678 7685      // a white object ...
7679 7686      _bit_map->mark(addr);         // ... now grey
7680 7687      // push on the marking stack (grey set)
7681 7688      bool simulate_overflow = false;
7682 7689      NOT_PRODUCT(
7683 7690        if (CMSMarkStackOverflowALot &&
7684 7691            _collector->simulate_overflow()) {
7685 7692          // simulate a stack overflow
7686 7693          simulate_overflow = true;
7687 7694        }
7688 7695      )
7689 7696      if (simulate_overflow || !_mark_stack->push(obj)) {
7690 7697        if (_concurrent_precleaning) {
7691 7698           // During precleaning we can just dirty the appropriate card(s)
7692 7699           // in the mod union table, thus ensuring that the object remains
7693 7700           // in the grey set  and continue. In the case of object arrays
7694 7701           // we need to dirty all of the cards that the object spans,
7695 7702           // since the rescan of object arrays will be limited to the
7696 7703           // dirty cards.
7697 7704           // Note that no one can be intefering with us in this action
7698 7705           // of dirtying the mod union table, so no locking or atomics
7699 7706           // are required.
7700 7707           if (obj->is_objArray()) {
7701 7708             size_t sz = obj->size();
7702 7709             HeapWord* end_card_addr = (HeapWord*)round_to(
7703 7710                                          (intptr_t)(addr+sz), CardTableModRefBS::card_size);
7704 7711             MemRegion redirty_range = MemRegion(addr, end_card_addr);
7705 7712             assert(!redirty_range.is_empty(), "Arithmetical tautology");
7706 7713             _mod_union_table->mark_range(redirty_range);
7707 7714           } else {
7708 7715             _mod_union_table->mark(addr);
7709 7716           }
7710 7717           _collector->_ser_pmc_preclean_ovflw++;
7711 7718        } else {
7712 7719           // During the remark phase, we need to remember this oop
7713 7720           // in the overflow list.
7714 7721           _collector->push_on_overflow_list(obj);
7715 7722           _collector->_ser_pmc_remark_ovflw++;
7716 7723        }
7717 7724      }
7718 7725    }
7719 7726  }
7720 7727  
7721 7728  Par_PushAndMarkClosure::Par_PushAndMarkClosure(CMSCollector* collector,
7722 7729                                                 MemRegion span,
7723 7730                                                 ReferenceProcessor* rp,
7724 7731                                                 CMSBitMap* bit_map,
7725 7732                                                 OopTaskQueue* work_queue,
7726 7733                                                 CMSMarkStack* revisit_stack):
7727 7734    Par_KlassRememberingOopClosure(collector, rp, revisit_stack),
7728 7735    _span(span),
7729 7736    _bit_map(bit_map),
7730 7737    _work_queue(work_queue)
7731 7738  {
7732 7739    assert(_ref_processor != NULL, "_ref_processor shouldn't be NULL");
7733 7740  }
7734 7741  
7735 7742  void PushAndMarkClosure::do_oop(oop* p)       { PushAndMarkClosure::do_oop_work(p); }
7736 7743  void PushAndMarkClosure::do_oop(narrowOop* p) { PushAndMarkClosure::do_oop_work(p); }
7737 7744  
7738 7745  // Grey object rescan during second checkpoint phase --
7739 7746  // the parallel version.
7740 7747  void Par_PushAndMarkClosure::do_oop(oop obj) {
7741 7748    // In the assert below, we ignore the mark word because
7742 7749    // this oop may point to an already visited object that is
7743 7750    // on the overflow stack (in which case the mark word has
7744 7751    // been hijacked for chaining into the overflow stack --
7745 7752    // if this is the last object in the overflow stack then
7746 7753    // its mark word will be NULL). Because this object may
7747 7754    // have been subsequently popped off the global overflow
7748 7755    // stack, and the mark word possibly restored to the prototypical
7749 7756    // value, by the time we get to examined this failing assert in
7750 7757    // the debugger, is_oop_or_null(false) may subsequently start
7751 7758    // to hold.
7752 7759    assert(obj->is_oop_or_null(true),
7753 7760           "expected an oop or NULL");
7754 7761    HeapWord* addr = (HeapWord*)obj;
7755 7762    // Check if oop points into the CMS generation
7756 7763    // and is not marked
7757 7764    if (_span.contains(addr) && !_bit_map->isMarked(addr)) {
7758 7765      // a white object ...
7759 7766      // If we manage to "claim" the object, by being the
7760 7767      // first thread to mark it, then we push it on our
7761 7768      // marking stack
7762 7769      if (_bit_map->par_mark(addr)) {     // ... now grey
7763 7770        // push on work queue (grey set)
7764 7771        bool simulate_overflow = false;
7765 7772        NOT_PRODUCT(
7766 7773          if (CMSMarkStackOverflowALot &&
7767 7774              _collector->par_simulate_overflow()) {
7768 7775            // simulate a stack overflow
7769 7776            simulate_overflow = true;
7770 7777          }
7771 7778        )
7772 7779        if (simulate_overflow || !_work_queue->push(obj)) {
7773 7780          _collector->par_push_on_overflow_list(obj);
7774 7781          _collector->_par_pmc_remark_ovflw++; //  imprecise OK: no need to CAS
7775 7782        }
7776 7783      } // Else, some other thread got there first
7777 7784    }
7778 7785  }
7779 7786  
7780 7787  void Par_PushAndMarkClosure::do_oop(oop* p)       { Par_PushAndMarkClosure::do_oop_work(p); }
7781 7788  void Par_PushAndMarkClosure::do_oop(narrowOop* p) { Par_PushAndMarkClosure::do_oop_work(p); }
7782 7789  
7783 7790  void PushAndMarkClosure::remember_mdo(DataLayout* v) {
7784 7791    // TBD
7785 7792  }
7786 7793  
7787 7794  void Par_PushAndMarkClosure::remember_mdo(DataLayout* v) {
7788 7795    // TBD
7789 7796  }
7790 7797  
7791 7798  void CMSPrecleanRefsYieldClosure::do_yield_work() {
7792 7799    DEBUG_ONLY(RememberKlassesChecker mux(false);)
7793 7800    Mutex* bml = _collector->bitMapLock();
7794 7801    assert_lock_strong(bml);
7795 7802    assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
7796 7803           "CMS thread should hold CMS token");
7797 7804  
7798 7805    bml->unlock();
7799 7806    ConcurrentMarkSweepThread::desynchronize(true);
7800 7807  
7801 7808    ConcurrentMarkSweepThread::acknowledge_yield_request();
7802 7809  
7803 7810    _collector->stopTimer();
7804 7811    GCPauseTimer p(_collector->size_policy()->concurrent_timer_ptr());
7805 7812    if (PrintCMSStatistics != 0) {
7806 7813      _collector->incrementYields();
7807 7814    }
7808 7815    _collector->icms_wait();
7809 7816  
7810 7817    // See the comment in coordinator_yield()
7811 7818    for (unsigned i = 0; i < CMSYieldSleepCount &&
7812 7819                         ConcurrentMarkSweepThread::should_yield() &&
7813 7820                         !CMSCollector::foregroundGCIsActive(); ++i) {
7814 7821      os::sleep(Thread::current(), 1, false);
7815 7822      ConcurrentMarkSweepThread::acknowledge_yield_request();
7816 7823    }
7817 7824  
7818 7825    ConcurrentMarkSweepThread::synchronize(true);
7819 7826    bml->lock();
7820 7827  
7821 7828    _collector->startTimer();
7822 7829  }
7823 7830  
7824 7831  bool CMSPrecleanRefsYieldClosure::should_return() {
7825 7832    if (ConcurrentMarkSweepThread::should_yield()) {
7826 7833      do_yield_work();
7827 7834    }
7828 7835    return _collector->foregroundGCIsActive();
7829 7836  }
7830 7837  
7831 7838  void MarkFromDirtyCardsClosure::do_MemRegion(MemRegion mr) {
7832 7839    assert(((size_t)mr.start())%CardTableModRefBS::card_size_in_words == 0,
7833 7840           "mr should be aligned to start at a card boundary");
7834 7841    // We'd like to assert:
7835 7842    // assert(mr.word_size()%CardTableModRefBS::card_size_in_words == 0,
7836 7843    //        "mr should be a range of cards");
7837 7844    // However, that would be too strong in one case -- the last
7838 7845    // partition ends at _unallocated_block which, in general, can be
7839 7846    // an arbitrary boundary, not necessarily card aligned.
7840 7847    if (PrintCMSStatistics != 0) {
7841 7848      _num_dirty_cards +=
7842 7849           mr.word_size()/CardTableModRefBS::card_size_in_words;
7843 7850    }
7844 7851    _space->object_iterate_mem(mr, &_scan_cl);
7845 7852  }
7846 7853  
7847 7854  SweepClosure::SweepClosure(CMSCollector* collector,
7848 7855                             ConcurrentMarkSweepGeneration* g,
7849 7856                             CMSBitMap* bitMap, bool should_yield) :
7850 7857    _collector(collector),
7851 7858    _g(g),
7852 7859    _sp(g->cmsSpace()),
7853 7860    _limit(_sp->sweep_limit()),
7854 7861    _freelistLock(_sp->freelistLock()),
7855 7862    _bitMap(bitMap),
7856 7863    _yield(should_yield),
7857 7864    _inFreeRange(false),           // No free range at beginning of sweep
7858 7865    _freeRangeInFreeLists(false),  // No free range at beginning of sweep
7859 7866    _lastFreeRangeCoalesced(false),
7860 7867    _freeFinger(g->used_region().start())
7861 7868  {
7862 7869    NOT_PRODUCT(
7863 7870      _numObjectsFreed = 0;
7864 7871      _numWordsFreed   = 0;
7865 7872      _numObjectsLive = 0;
7866 7873      _numWordsLive = 0;
7867 7874      _numObjectsAlreadyFree = 0;
7868 7875      _numWordsAlreadyFree = 0;
7869 7876      _last_fc = NULL;
7870 7877  
7871 7878      _sp->initializeIndexedFreeListArrayReturnedBytes();
7872 7879      _sp->dictionary()->initializeDictReturnedBytes();
7873 7880    )
7874 7881    assert(_limit >= _sp->bottom() && _limit <= _sp->end(),
7875 7882           "sweep _limit out of bounds");
7876 7883    if (CMSTraceSweeper) {
7877 7884      gclog_or_tty->print("\n====================\nStarting new sweep\n");
7878 7885    }
7879 7886  }
7880 7887  
7881 7888  // We need this destructor to reclaim any space at the end
7882 7889  // of the space, which do_blk below may not yet have added back to
7883 7890  // the free lists.
7884 7891  SweepClosure::~SweepClosure() {
7885 7892    assert_lock_strong(_freelistLock);
7886 7893    assert(_limit >= _sp->bottom() && _limit <= _sp->end(),
7887 7894           "sweep _limit out of bounds");
7888 7895    // Flush any remaining coterminal free run as a single
7889 7896    // coalesced chunk to the appropriate free list.
7890 7897    if (inFreeRange()) {
7891 7898      assert(freeFinger() < _limit, "freeFinger points too high");
7892 7899      flush_cur_free_chunk(freeFinger(), pointer_delta(_limit, freeFinger()));
7893 7900      if (CMSTraceSweeper) {
7894 7901        gclog_or_tty->print("Sweep: last chunk: ");
7895 7902        gclog_or_tty->print("put_free_blk 0x%x ("SIZE_FORMAT") [coalesced:"SIZE_FORMAT"]\n",
7896 7903                            freeFinger(), pointer_delta(_limit, freeFinger()), lastFreeRangeCoalesced());
7897 7904      }
7898 7905    } // else nothing to flush
7899 7906    NOT_PRODUCT(
7900 7907      if (Verbose && PrintGC) {
7901 7908        gclog_or_tty->print("Collected "SIZE_FORMAT" objects, "
7902 7909                            SIZE_FORMAT " bytes",
7903 7910                   _numObjectsFreed, _numWordsFreed*sizeof(HeapWord));
7904 7911        gclog_or_tty->print_cr("\nLive "SIZE_FORMAT" objects,  "
7905 7912                               SIZE_FORMAT" bytes  "
7906 7913          "Already free "SIZE_FORMAT" objects, "SIZE_FORMAT" bytes",
7907 7914          _numObjectsLive, _numWordsLive*sizeof(HeapWord),
7908 7915          _numObjectsAlreadyFree, _numWordsAlreadyFree*sizeof(HeapWord));
7909 7916        size_t totalBytes = (_numWordsFreed + _numWordsLive + _numWordsAlreadyFree) *
7910 7917          sizeof(HeapWord);
7911 7918        gclog_or_tty->print_cr("Total sweep: "SIZE_FORMAT" bytes", totalBytes);
7912 7919  
7913 7920        if (PrintCMSStatistics && CMSVerifyReturnedBytes) {
7914 7921          size_t indexListReturnedBytes = _sp->sumIndexedFreeListArrayReturnedBytes();
7915 7922          size_t dictReturnedBytes = _sp->dictionary()->sumDictReturnedBytes();
7916 7923          size_t returnedBytes = indexListReturnedBytes + dictReturnedBytes;
7917 7924          gclog_or_tty->print("Returned "SIZE_FORMAT" bytes", returnedBytes);
7918 7925          gclog_or_tty->print("   Indexed List Returned "SIZE_FORMAT" bytes",
7919 7926            indexListReturnedBytes);
7920 7927          gclog_or_tty->print_cr("        Dictionary Returned "SIZE_FORMAT" bytes",
7921 7928            dictReturnedBytes);
7922 7929        }
7923 7930      }
7924 7931    )
7925 7932    // Now, in debug mode, just null out the sweep_limit
7926 7933    NOT_PRODUCT(_sp->clear_sweep_limit();)
7927 7934    if (CMSTraceSweeper) {
7928 7935      gclog_or_tty->print("end of sweep\n================\n");
7929 7936    }
7930 7937  }
7931 7938  
7932 7939  void SweepClosure::initialize_free_range(HeapWord* freeFinger,
7933 7940      bool freeRangeInFreeLists) {
7934 7941    if (CMSTraceSweeper) {
7935 7942      gclog_or_tty->print("---- Start free range at 0x%x with free block (%d)\n",
7936 7943                 freeFinger, freeRangeInFreeLists);
7937 7944    }
7938 7945    assert(!inFreeRange(), "Trampling existing free range");
7939 7946    set_inFreeRange(true);
7940 7947    set_lastFreeRangeCoalesced(false);
7941 7948  
7942 7949    set_freeFinger(freeFinger);
7943 7950    set_freeRangeInFreeLists(freeRangeInFreeLists);
7944 7951    if (CMSTestInFreeList) {
7945 7952      if (freeRangeInFreeLists) {
7946 7953        FreeChunk* fc = (FreeChunk*) freeFinger;
7947 7954        assert(fc->isFree(), "A chunk on the free list should be free.");
7948 7955        assert(fc->size() > 0, "Free range should have a size");
7949 7956        assert(_sp->verifyChunkInFreeLists(fc), "Chunk is not in free lists");
7950 7957      }
7951 7958    }
7952 7959  }
7953 7960  
7954 7961  // Note that the sweeper runs concurrently with mutators. Thus,
7955 7962  // it is possible for direct allocation in this generation to happen
7956 7963  // in the middle of the sweep. Note that the sweeper also coalesces
7957 7964  // contiguous free blocks. Thus, unless the sweeper and the allocator
7958 7965  // synchronize appropriately freshly allocated blocks may get swept up.
7959 7966  // This is accomplished by the sweeper locking the free lists while
7960 7967  // it is sweeping. Thus blocks that are determined to be free are
7961 7968  // indeed free. There is however one additional complication:
7962 7969  // blocks that have been allocated since the final checkpoint and
7963 7970  // mark, will not have been marked and so would be treated as
7964 7971  // unreachable and swept up. To prevent this, the allocator marks
7965 7972  // the bit map when allocating during the sweep phase. This leads,
7966 7973  // however, to a further complication -- objects may have been allocated
7967 7974  // but not yet initialized -- in the sense that the header isn't yet
7968 7975  // installed. The sweeper can not then determine the size of the block
7969 7976  // in order to skip over it. To deal with this case, we use a technique
7970 7977  // (due to Printezis) to encode such uninitialized block sizes in the
7971 7978  // bit map. Since the bit map uses a bit per every HeapWord, but the
7972 7979  // CMS generation has a minimum object size of 3 HeapWords, it follows
7973 7980  // that "normal marks" won't be adjacent in the bit map (there will
7974 7981  // always be at least two 0 bits between successive 1 bits). We make use
7975 7982  // of these "unused" bits to represent uninitialized blocks -- the bit
7976 7983  // corresponding to the start of the uninitialized object and the next
7977 7984  // bit are both set. Finally, a 1 bit marks the end of the object that
7978 7985  // started with the two consecutive 1 bits to indicate its potentially
7979 7986  // uninitialized state.
7980 7987  
7981 7988  size_t SweepClosure::do_blk_careful(HeapWord* addr) {
7982 7989    FreeChunk* fc = (FreeChunk*)addr;
7983 7990    size_t res;
7984 7991  
7985 7992    // Check if we are done sweeping. Below we check "addr >= _limit" rather
7986 7993    // than "addr == _limit" because although _limit was a block boundary when
7987 7994    // we started the sweep, it may no longer be one because heap expansion
7988 7995    // may have caused us to coalesce the block ending at the address _limit
7989 7996    // with a newly expanded chunk (this happens when _limit was set to the
7990 7997    // previous _end of the space), so we may have stepped past _limit; see CR 6977970.
7991 7998    if (addr >= _limit) { // we have swept up to or past the limit: finish up
7992 7999      assert(_limit >= _sp->bottom() && _limit <= _sp->end(),
7993 8000             "sweep _limit out of bounds");
7994 8001      assert(addr < _sp->end(), "addr out of bounds");
7995 8002      // Flush any remaining coterminal free run as a single
7996 8003      // coalesced chunk to the appropriate free list.
7997 8004      if (inFreeRange()) {
7998 8005        assert(freeFinger() < _limit, "finger points too high");
7999 8006        flush_cur_free_chunk(freeFinger(),
8000 8007                             pointer_delta(addr, freeFinger()));
8001 8008        if (CMSTraceSweeper) {
8002 8009          gclog_or_tty->print("Sweep: last chunk: ");
8003 8010          gclog_or_tty->print("put_free_blk 0x%x ("SIZE_FORMAT") "
8004 8011                     "[coalesced:"SIZE_FORMAT"]\n",
8005 8012                     freeFinger(), pointer_delta(addr, freeFinger()),
8006 8013                     lastFreeRangeCoalesced());
8007 8014        }
8008 8015      }
8009 8016  
8010 8017      // help the iterator loop finish
8011 8018      return pointer_delta(_sp->end(), addr);
8012 8019    }
8013 8020  
8014 8021    assert(addr < _limit, "sweep invariant");
8015 8022    // check if we should yield
8016 8023    do_yield_check(addr);
8017 8024    if (fc->isFree()) {
8018 8025      // Chunk that is already free
8019 8026      res = fc->size();
8020 8027      do_already_free_chunk(fc);
8021 8028      debug_only(_sp->verifyFreeLists());
8022 8029      assert(res == fc->size(), "Don't expect the size to change");
8023 8030      NOT_PRODUCT(
8024 8031        _numObjectsAlreadyFree++;
8025 8032        _numWordsAlreadyFree += res;
8026 8033      )
8027 8034      NOT_PRODUCT(_last_fc = fc;)
8028 8035    } else if (!_bitMap->isMarked(addr)) {
8029 8036      // Chunk is fresh garbage
8030 8037      res = do_garbage_chunk(fc);
8031 8038      debug_only(_sp->verifyFreeLists());
8032 8039      NOT_PRODUCT(
8033 8040        _numObjectsFreed++;
8034 8041        _numWordsFreed += res;
8035 8042      )
8036 8043    } else {
8037 8044      // Chunk that is alive.
8038 8045      res = do_live_chunk(fc);
8039 8046      debug_only(_sp->verifyFreeLists());
8040 8047      NOT_PRODUCT(
8041 8048          _numObjectsLive++;
8042 8049          _numWordsLive += res;
8043 8050      )
8044 8051    }
8045 8052    return res;
8046 8053  }
8047 8054  
8048 8055  // For the smart allocation, record following
8049 8056  //  split deaths - a free chunk is removed from its free list because
8050 8057  //      it is being split into two or more chunks.
8051 8058  //  split birth - a free chunk is being added to its free list because
8052 8059  //      a larger free chunk has been split and resulted in this free chunk.
8053 8060  //  coal death - a free chunk is being removed from its free list because
8054 8061  //      it is being coalesced into a large free chunk.
8055 8062  //  coal birth - a free chunk is being added to its free list because
8056 8063  //      it was created when two or more free chunks where coalesced into
8057 8064  //      this free chunk.
8058 8065  //
8059 8066  // These statistics are used to determine the desired number of free
8060 8067  // chunks of a given size.  The desired number is chosen to be relative
8061 8068  // to the end of a CMS sweep.  The desired number at the end of a sweep
8062 8069  // is the
8063 8070  //      count-at-end-of-previous-sweep (an amount that was enough)
8064 8071  //              - count-at-beginning-of-current-sweep  (the excess)
8065 8072  //              + split-births  (gains in this size during interval)
8066 8073  //              - split-deaths  (demands on this size during interval)
8067 8074  // where the interval is from the end of one sweep to the end of the
8068 8075  // next.
8069 8076  //
8070 8077  // When sweeping the sweeper maintains an accumulated chunk which is
8071 8078  // the chunk that is made up of chunks that have been coalesced.  That
8072 8079  // will be termed the left-hand chunk.  A new chunk of garbage that
8073 8080  // is being considered for coalescing will be referred to as the
8074 8081  // right-hand chunk.
8075 8082  //
8076 8083  // When making a decision on whether to coalesce a right-hand chunk with
8077 8084  // the current left-hand chunk, the current count vs. the desired count
8078 8085  // of the left-hand chunk is considered.  Also if the right-hand chunk
8079 8086  // is near the large chunk at the end of the heap (see
8080 8087  // ConcurrentMarkSweepGeneration::isNearLargestChunk()), then the
8081 8088  // left-hand chunk is coalesced.
8082 8089  //
8083 8090  // When making a decision about whether to split a chunk, the desired count
8084 8091  // vs. the current count of the candidate to be split is also considered.
8085 8092  // If the candidate is underpopulated (currently fewer chunks than desired)
8086 8093  // a chunk of an overpopulated (currently more chunks than desired) size may
8087 8094  // be chosen.  The "hint" associated with a free list, if non-null, points
8088 8095  // to a free list which may be overpopulated.
8089 8096  //
8090 8097  
8091 8098  void SweepClosure::do_already_free_chunk(FreeChunk* fc) {
8092 8099    size_t size = fc->size();
8093 8100    // Chunks that cannot be coalesced are not in the
8094 8101    // free lists.
8095 8102    if (CMSTestInFreeList && !fc->cantCoalesce()) {
8096 8103      assert(_sp->verifyChunkInFreeLists(fc),
8097 8104        "free chunk should be in free lists");
8098 8105    }
8099 8106    // a chunk that is already free, should not have been
8100 8107    // marked in the bit map
8101 8108    HeapWord* addr = (HeapWord*) fc;
8102 8109    assert(!_bitMap->isMarked(addr), "free chunk should be unmarked");
8103 8110    // Verify that the bit map has no bits marked between
8104 8111    // addr and purported end of this block.
8105 8112    _bitMap->verifyNoOneBitsInRange(addr + 1, addr + size);
8106 8113  
8107 8114    // Some chunks cannot be coalesced under any circumstances.
8108 8115    // See the definition of cantCoalesce().
8109 8116    if (!fc->cantCoalesce()) {
8110 8117      // This chunk can potentially be coalesced.
8111 8118      if (_sp->adaptive_freelists()) {
8112 8119        // All the work is done in
8113 8120        do_post_free_or_garbage_chunk(fc, size);
8114 8121      } else {  // Not adaptive free lists
8115 8122        // this is a free chunk that can potentially be coalesced by the sweeper;
8116 8123        if (!inFreeRange()) {
8117 8124          // if the next chunk is a free block that can't be coalesced
8118 8125          // it doesn't make sense to remove this chunk from the free lists
8119 8126          FreeChunk* nextChunk = (FreeChunk*)(addr + size);
8120 8127          assert((HeapWord*)nextChunk <= _sp->end(), "Chunk size out of bounds?");
8121 8128          if ((HeapWord*)nextChunk < _sp->end() &&     // There is another free chunk to the right ...
8122 8129              nextChunk->isFree()               &&     // ... which is free...
8123 8130              nextChunk->cantCoalesce()) {             // ... but can't be coalesced
8124 8131            // nothing to do
8125 8132          } else {
8126 8133            // Potentially the start of a new free range:
8127 8134            // Don't eagerly remove it from the free lists.
8128 8135            // No need to remove it if it will just be put
8129 8136            // back again.  (Also from a pragmatic point of view
8130 8137            // if it is a free block in a region that is beyond
8131 8138            // any allocated blocks, an assertion will fail)
8132 8139            // Remember the start of a free run.
8133 8140            initialize_free_range(addr, true);
8134 8141            // end - can coalesce with next chunk
8135 8142          }
8136 8143        } else {
8137 8144          // the midst of a free range, we are coalescing
8138 8145          debug_only(record_free_block_coalesced(fc);)
8139 8146          if (CMSTraceSweeper) {
8140 8147            gclog_or_tty->print("  -- pick up free block 0x%x (%d)\n", fc, size);
8141 8148          }
8142 8149          // remove it from the free lists
8143 8150          _sp->removeFreeChunkFromFreeLists(fc);
8144 8151          set_lastFreeRangeCoalesced(true);
8145 8152          // If the chunk is being coalesced and the current free range is
8146 8153          // in the free lists, remove the current free range so that it
8147 8154          // will be returned to the free lists in its entirety - all
8148 8155          // the coalesced pieces included.
8149 8156          if (freeRangeInFreeLists()) {
8150 8157            FreeChunk* ffc = (FreeChunk*) freeFinger();
8151 8158            assert(ffc->size() == pointer_delta(addr, freeFinger()),
8152 8159              "Size of free range is inconsistent with chunk size.");
8153 8160            if (CMSTestInFreeList) {
8154 8161              assert(_sp->verifyChunkInFreeLists(ffc),
8155 8162                "free range is not in free lists");
8156 8163            }
8157 8164            _sp->removeFreeChunkFromFreeLists(ffc);
8158 8165            set_freeRangeInFreeLists(false);
8159 8166          }
8160 8167        }
8161 8168      }
8162 8169    } else {
8163 8170      // Code path common to both original and adaptive free lists.
8164 8171  
8165 8172      // cant coalesce with previous block; this should be treated
8166 8173      // as the end of a free run if any
8167 8174      if (inFreeRange()) {
8168 8175        // we kicked some butt; time to pick up the garbage
8169 8176        assert(freeFinger() < addr, "freeFinger points too high");
8170 8177        flush_cur_free_chunk(freeFinger(), pointer_delta(addr, freeFinger()));
8171 8178      }
8172 8179      // else, nothing to do, just continue
8173 8180    }
8174 8181  }
8175 8182  
8176 8183  size_t SweepClosure::do_garbage_chunk(FreeChunk* fc) {
8177 8184    // This is a chunk of garbage.  It is not in any free list.
8178 8185    // Add it to a free list or let it possibly be coalesced into
8179 8186    // a larger chunk.
8180 8187    HeapWord* addr = (HeapWord*) fc;
8181 8188    size_t size = CompactibleFreeListSpace::adjustObjectSize(oop(addr)->size());
8182 8189  
8183 8190    if (_sp->adaptive_freelists()) {
8184 8191      // Verify that the bit map has no bits marked between
8185 8192      // addr and purported end of just dead object.
8186 8193      _bitMap->verifyNoOneBitsInRange(addr + 1, addr + size);
8187 8194  
8188 8195      do_post_free_or_garbage_chunk(fc, size);
8189 8196    } else {
8190 8197      if (!inFreeRange()) {
8191 8198        // start of a new free range
8192 8199        assert(size > 0, "A free range should have a size");
8193 8200        initialize_free_range(addr, false);
8194 8201  
8195 8202      } else {
8196 8203        // this will be swept up when we hit the end of the
8197 8204        // free range
8198 8205        if (CMSTraceSweeper) {
8199 8206          gclog_or_tty->print("  -- pick up garbage 0x%x (%d) \n", fc, size);
8200 8207        }
8201 8208        // If the chunk is being coalesced and the current free range is
8202 8209        // in the free lists, remove the current free range so that it
8203 8210        // will be returned to the free lists in its entirety - all
8204 8211        // the coalesced pieces included.
8205 8212        if (freeRangeInFreeLists()) {
8206 8213          FreeChunk* ffc = (FreeChunk*)freeFinger();
8207 8214          assert(ffc->size() == pointer_delta(addr, freeFinger()),
8208 8215            "Size of free range is inconsistent with chunk size.");
8209 8216          if (CMSTestInFreeList) {
8210 8217            assert(_sp->verifyChunkInFreeLists(ffc),
8211 8218              "free range is not in free lists");
8212 8219          }
8213 8220          _sp->removeFreeChunkFromFreeLists(ffc);
8214 8221          set_freeRangeInFreeLists(false);
8215 8222        }
8216 8223        set_lastFreeRangeCoalesced(true);
8217 8224      }
8218 8225      // this will be swept up when we hit the end of the free range
8219 8226  
8220 8227      // Verify that the bit map has no bits marked between
8221 8228      // addr and purported end of just dead object.
8222 8229      _bitMap->verifyNoOneBitsInRange(addr + 1, addr + size);
8223 8230    }
8224 8231    return size;
8225 8232  }
8226 8233  
8227 8234  size_t SweepClosure::do_live_chunk(FreeChunk* fc) {
8228 8235    HeapWord* addr = (HeapWord*) fc;
8229 8236    // The sweeper has just found a live object. Return any accumulated
8230 8237    // left hand chunk to the free lists.
8231 8238    if (inFreeRange()) {
8232 8239      assert(freeFinger() < addr, "freeFinger points too high");
8233 8240      flush_cur_free_chunk(freeFinger(), pointer_delta(addr, freeFinger()));
8234 8241    }
8235 8242  
8236 8243    // This object is live: we'd normally expect this to be
8237 8244    // an oop, and like to assert the following:
8238 8245    // assert(oop(addr)->is_oop(), "live block should be an oop");
8239 8246    // However, as we commented above, this may be an object whose
8240 8247    // header hasn't yet been initialized.
8241 8248    size_t size;
8242 8249    assert(_bitMap->isMarked(addr), "Tautology for this control point");
8243 8250    if (_bitMap->isMarked(addr + 1)) {
8244 8251      // Determine the size from the bit map, rather than trying to
8245 8252      // compute it from the object header.
8246 8253      HeapWord* nextOneAddr = _bitMap->getNextMarkedWordAddress(addr + 2);
8247 8254      size = pointer_delta(nextOneAddr + 1, addr);
8248 8255      assert(size == CompactibleFreeListSpace::adjustObjectSize(size),
8249 8256             "alignment problem");
8250 8257  
8251 8258  #ifdef DEBUG
8252 8259        if (oop(addr)->klass_or_null() != NULL &&
8253 8260            (   !_collector->should_unload_classes()
8254 8261             || (oop(addr)->is_parsable()) &&
8255 8262                 oop(addr)->is_conc_safe())) {
8256 8263          // Ignore mark word because we are running concurrent with mutators
8257 8264          assert(oop(addr)->is_oop(true), "live block should be an oop");
8258 8265          // is_conc_safe is checked before performing this assertion
8259 8266          // because an object that is not is_conc_safe may yet have
8260 8267          // the return from size() correct.
8261 8268          assert(size ==
8262 8269                 CompactibleFreeListSpace::adjustObjectSize(oop(addr)->size()),
8263 8270                 "P-mark and computed size do not agree");
8264 8271        }
8265 8272  #endif
8266 8273  
8267 8274    } else {
8268 8275      // This should be an initialized object that's alive.
8269 8276      assert(oop(addr)->klass_or_null() != NULL &&
8270 8277             (!_collector->should_unload_classes()
8271 8278              || oop(addr)->is_parsable()),
8272 8279             "Should be an initialized object");
8273 8280      // Note that there are objects used during class redefinition
8274 8281      // (e.g., merge_cp in VM_RedefineClasses::merge_cp_and_rewrite()
8275 8282      // which are discarded with their is_conc_safe state still
8276 8283      // false.  These object may be floating garbage so may be
8277 8284      // seen here.  If they are floating garbage their size
8278 8285      // should be attainable from their klass.  Do not that
8279 8286      // is_conc_safe() is true for oop(addr).
8280 8287      // Ignore mark word because we are running concurrent with mutators
8281 8288      assert(oop(addr)->is_oop(true), "live block should be an oop");
8282 8289      // Verify that the bit map has no bits marked between
8283 8290      // addr and purported end of this block.
8284 8291      size = CompactibleFreeListSpace::adjustObjectSize(oop(addr)->size());
8285 8292      assert(size >= 3, "Necessary for Printezis marks to work");
8286 8293      assert(!_bitMap->isMarked(addr+1), "Tautology for this control point");
8287 8294      DEBUG_ONLY(_bitMap->verifyNoOneBitsInRange(addr+2, addr+size);)
8288 8295    }
8289 8296    return size;
8290 8297  }
8291 8298  
8292 8299  void SweepClosure::do_post_free_or_garbage_chunk(FreeChunk* fc,
8293 8300                                                   size_t chunkSize) {
8294 8301    // do_post_free_or_garbage_chunk() should only be called in the case
8295 8302    // of the adaptive free list allocator.
8296 8303    bool fcInFreeLists = fc->isFree();
8297 8304    assert(_sp->adaptive_freelists(), "Should only be used in this case.");
8298 8305    assert((HeapWord*)fc <= _limit, "sweep invariant");
8299 8306    if (CMSTestInFreeList && fcInFreeLists) {
8300 8307      assert(_sp->verifyChunkInFreeLists(fc), "free chunk is not in free lists");
8301 8308    }
8302 8309  
8303 8310    if (CMSTraceSweeper) {
8304 8311      gclog_or_tty->print_cr("  -- pick up another chunk at 0x%x (%d)", fc, chunkSize);
8305 8312    }
8306 8313  
8307 8314    HeapWord* addr = (HeapWord*) fc;
8308 8315  
8309 8316    bool coalesce;
8310 8317    size_t left  = pointer_delta(addr, freeFinger());
8311 8318    size_t right = chunkSize;
8312 8319    switch (FLSCoalescePolicy) {
8313 8320      // numeric value forms a coalition aggressiveness metric
8314 8321      case 0:  { // never coalesce
8315 8322        coalesce = false;
8316 8323        break;
8317 8324      }
8318 8325      case 1: { // coalesce if left & right chunks on overpopulated lists
8319 8326        coalesce = _sp->coalOverPopulated(left) &&
8320 8327                   _sp->coalOverPopulated(right);
8321 8328        break;
8322 8329      }
8323 8330      case 2: { // coalesce if left chunk on overpopulated list (default)
8324 8331        coalesce = _sp->coalOverPopulated(left);
8325 8332        break;
8326 8333      }
8327 8334      case 3: { // coalesce if left OR right chunk on overpopulated list
8328 8335        coalesce = _sp->coalOverPopulated(left) ||
8329 8336                   _sp->coalOverPopulated(right);
8330 8337        break;
8331 8338      }
8332 8339      case 4: { // always coalesce
8333 8340        coalesce = true;
8334 8341        break;
8335 8342      }
8336 8343      default:
8337 8344       ShouldNotReachHere();
8338 8345    }
8339 8346  
8340 8347    // Should the current free range be coalesced?
8341 8348    // If the chunk is in a free range and either we decided to coalesce above
8342 8349    // or the chunk is near the large block at the end of the heap
8343 8350    // (isNearLargestChunk() returns true), then coalesce this chunk.
8344 8351    bool doCoalesce = inFreeRange() &&
8345 8352      (coalesce || _g->isNearLargestChunk((HeapWord*)fc));
8346 8353    if (doCoalesce) {
8347 8354      // Coalesce the current free range on the left with the new
8348 8355      // chunk on the right.  If either is on a free list,
8349 8356      // it must be removed from the list and stashed in the closure.
8350 8357      if (freeRangeInFreeLists()) {
8351 8358        FreeChunk* ffc = (FreeChunk*)freeFinger();
8352 8359        assert(ffc->size() == pointer_delta(addr, freeFinger()),
8353 8360          "Size of free range is inconsistent with chunk size.");
8354 8361        if (CMSTestInFreeList) {
8355 8362          assert(_sp->verifyChunkInFreeLists(ffc),
8356 8363            "Chunk is not in free lists");
8357 8364        }
8358 8365        _sp->coalDeath(ffc->size());
8359 8366        _sp->removeFreeChunkFromFreeLists(ffc);
8360 8367        set_freeRangeInFreeLists(false);
8361 8368      }
8362 8369      if (fcInFreeLists) {
8363 8370        _sp->coalDeath(chunkSize);
8364 8371        assert(fc->size() == chunkSize,
8365 8372          "The chunk has the wrong size or is not in the free lists");
8366 8373        _sp->removeFreeChunkFromFreeLists(fc);
8367 8374      }
8368 8375      set_lastFreeRangeCoalesced(true);
8369 8376    } else {  // not in a free range and/or should not coalesce
8370 8377      // Return the current free range and start a new one.
8371 8378      if (inFreeRange()) {
8372 8379        // In a free range but cannot coalesce with the right hand chunk.
8373 8380        // Put the current free range into the free lists.
8374 8381        flush_cur_free_chunk(freeFinger(),
8375 8382                             pointer_delta(addr, freeFinger()));
8376 8383      }
8377 8384      // Set up for new free range.  Pass along whether the right hand
8378 8385      // chunk is in the free lists.
8379 8386      initialize_free_range((HeapWord*)fc, fcInFreeLists);
8380 8387    }
8381 8388  }
8382 8389  
8383 8390  void SweepClosure::flush_cur_free_chunk(HeapWord* chunk, size_t size) {
8384 8391    assert(inFreeRange(), "Should only be called if currently in a free range.");
8385 8392    assert(size > 0,
8386 8393      "A zero sized chunk cannot be added to the free lists.");
8387 8394    if (!freeRangeInFreeLists()) {
8388 8395      if (CMSTestInFreeList) {
8389 8396        FreeChunk* fc = (FreeChunk*) chunk;
8390 8397        fc->setSize(size);
8391 8398        assert(!_sp->verifyChunkInFreeLists(fc),
8392 8399          "chunk should not be in free lists yet");
8393 8400      }
8394 8401      if (CMSTraceSweeper) {
8395 8402        gclog_or_tty->print_cr(" -- add free block 0x%x (%d) to free lists",
8396 8403                      chunk, size);
8397 8404      }
8398 8405      // A new free range is going to be starting.  The current
8399 8406      // free range has not been added to the free lists yet or
8400 8407      // was removed so add it back.
8401 8408      // If the current free range was coalesced, then the death
8402 8409      // of the free range was recorded.  Record a birth now.
8403 8410      if (lastFreeRangeCoalesced()) {
8404 8411        _sp->coalBirth(size);
8405 8412      }
8406 8413      _sp->addChunkAndRepairOffsetTable(chunk, size,
8407 8414              lastFreeRangeCoalesced());
8408 8415    }
8409 8416    set_inFreeRange(false);
8410 8417    set_freeRangeInFreeLists(false);
8411 8418  }
8412 8419  
8413 8420  // We take a break if we've been at this for a while,
8414 8421  // so as to avoid monopolizing the locks involved.
8415 8422  void SweepClosure::do_yield_work(HeapWord* addr) {
8416 8423    // Return current free chunk being used for coalescing (if any)
8417 8424    // to the appropriate freelist.  After yielding, the next
8418 8425    // free block encountered will start a coalescing range of
8419 8426    // free blocks.  If the next free block is adjacent to the
8420 8427    // chunk just flushed, they will need to wait for the next
8421 8428    // sweep to be coalesced.
8422 8429    if (inFreeRange()) {
8423 8430      flush_cur_free_chunk(freeFinger(), pointer_delta(addr, freeFinger()));
8424 8431    }
8425 8432  
8426 8433    // First give up the locks, then yield, then re-lock.
8427 8434    // We should probably use a constructor/destructor idiom to
8428 8435    // do this unlock/lock or modify the MutexUnlocker class to
8429 8436    // serve our purpose. XXX
8430 8437    assert_lock_strong(_bitMap->lock());
8431 8438    assert_lock_strong(_freelistLock);
8432 8439    assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
8433 8440           "CMS thread should hold CMS token");
8434 8441    _bitMap->lock()->unlock();
8435 8442    _freelistLock->unlock();
8436 8443    ConcurrentMarkSweepThread::desynchronize(true);
8437 8444    ConcurrentMarkSweepThread::acknowledge_yield_request();
8438 8445    _collector->stopTimer();
8439 8446    GCPauseTimer p(_collector->size_policy()->concurrent_timer_ptr());
8440 8447    if (PrintCMSStatistics != 0) {
8441 8448      _collector->incrementYields();
8442 8449    }
8443 8450    _collector->icms_wait();
8444 8451  
8445 8452    // See the comment in coordinator_yield()
8446 8453    for (unsigned i = 0; i < CMSYieldSleepCount &&
8447 8454                         ConcurrentMarkSweepThread::should_yield() &&
8448 8455                         !CMSCollector::foregroundGCIsActive(); ++i) {
8449 8456      os::sleep(Thread::current(), 1, false);
8450 8457      ConcurrentMarkSweepThread::acknowledge_yield_request();
8451 8458    }
8452 8459  
8453 8460    ConcurrentMarkSweepThread::synchronize(true);
8454 8461    _freelistLock->lock();
8455 8462    _bitMap->lock()->lock_without_safepoint_check();
8456 8463    _collector->startTimer();
8457 8464  }
8458 8465  
8459 8466  #ifndef PRODUCT
8460 8467  // This is actually very useful in a product build if it can
8461 8468  // be called from the debugger.  Compile it into the product
8462 8469  // as needed.
8463 8470  bool debug_verifyChunkInFreeLists(FreeChunk* fc) {
8464 8471    return debug_cms_space->verifyChunkInFreeLists(fc);
8465 8472  }
8466 8473  
8467 8474  void SweepClosure::record_free_block_coalesced(FreeChunk* fc) const {
8468 8475    if (CMSTraceSweeper) {
8469 8476      gclog_or_tty->print("Sweep:coal_free_blk 0x%x (%d)\n", fc, fc->size());
8470 8477    }
8471 8478  }
8472 8479  #endif
8473 8480  
8474 8481  // CMSIsAliveClosure
8475 8482  bool CMSIsAliveClosure::do_object_b(oop obj) {
8476 8483    HeapWord* addr = (HeapWord*)obj;
8477 8484    return addr != NULL &&
8478 8485           (!_span.contains(addr) || _bit_map->isMarked(addr));
8479 8486  }
8480 8487  
8481 8488  CMSKeepAliveClosure::CMSKeepAliveClosure( CMSCollector* collector,
8482 8489                        MemRegion span,
8483 8490                        CMSBitMap* bit_map, CMSMarkStack* mark_stack,
8484 8491                        CMSMarkStack* revisit_stack, bool cpc):
8485 8492    KlassRememberingOopClosure(collector, NULL, revisit_stack),
8486 8493    _span(span),
8487 8494    _bit_map(bit_map),
8488 8495    _mark_stack(mark_stack),
8489 8496    _concurrent_precleaning(cpc) {
8490 8497    assert(!_span.is_empty(), "Empty span could spell trouble");
8491 8498  }
8492 8499  
8493 8500  
8494 8501  // CMSKeepAliveClosure: the serial version
8495 8502  void CMSKeepAliveClosure::do_oop(oop obj) {
8496 8503    HeapWord* addr = (HeapWord*)obj;
8497 8504    if (_span.contains(addr) &&
8498 8505        !_bit_map->isMarked(addr)) {
8499 8506      _bit_map->mark(addr);
8500 8507      bool simulate_overflow = false;
8501 8508      NOT_PRODUCT(
8502 8509        if (CMSMarkStackOverflowALot &&
8503 8510            _collector->simulate_overflow()) {
8504 8511          // simulate a stack overflow
8505 8512          simulate_overflow = true;
8506 8513        }
8507 8514      )
8508 8515      if (simulate_overflow || !_mark_stack->push(obj)) {
8509 8516        if (_concurrent_precleaning) {
8510 8517          // We dirty the overflown object and let the remark
8511 8518          // phase deal with it.
8512 8519          assert(_collector->overflow_list_is_empty(), "Error");
8513 8520          // In the case of object arrays, we need to dirty all of
8514 8521          // the cards that the object spans. No locking or atomics
8515 8522          // are needed since no one else can be mutating the mod union
8516 8523          // table.
8517 8524          if (obj->is_objArray()) {
8518 8525            size_t sz = obj->size();
8519 8526            HeapWord* end_card_addr =
8520 8527              (HeapWord*)round_to((intptr_t)(addr+sz), CardTableModRefBS::card_size);
8521 8528            MemRegion redirty_range = MemRegion(addr, end_card_addr);
8522 8529            assert(!redirty_range.is_empty(), "Arithmetical tautology");
8523 8530            _collector->_modUnionTable.mark_range(redirty_range);
8524 8531          } else {
8525 8532            _collector->_modUnionTable.mark(addr);
8526 8533          }
8527 8534          _collector->_ser_kac_preclean_ovflw++;
8528 8535        } else {
8529 8536          _collector->push_on_overflow_list(obj);
8530 8537          _collector->_ser_kac_ovflw++;
8531 8538        }
8532 8539      }
8533 8540    }
8534 8541  }
8535 8542  
8536 8543  void CMSKeepAliveClosure::do_oop(oop* p)       { CMSKeepAliveClosure::do_oop_work(p); }
8537 8544  void CMSKeepAliveClosure::do_oop(narrowOop* p) { CMSKeepAliveClosure::do_oop_work(p); }
8538 8545  
8539 8546  // CMSParKeepAliveClosure: a parallel version of the above.
8540 8547  // The work queues are private to each closure (thread),
8541 8548  // but (may be) available for stealing by other threads.
8542 8549  void CMSParKeepAliveClosure::do_oop(oop obj) {
8543 8550    HeapWord* addr = (HeapWord*)obj;
8544 8551    if (_span.contains(addr) &&
8545 8552        !_bit_map->isMarked(addr)) {
8546 8553      // In general, during recursive tracing, several threads
8547 8554      // may be concurrently getting here; the first one to
8548 8555      // "tag" it, claims it.
8549 8556      if (_bit_map->par_mark(addr)) {
8550 8557        bool res = _work_queue->push(obj);
8551 8558        assert(res, "Low water mark should be much less than capacity");
8552 8559        // Do a recursive trim in the hope that this will keep
8553 8560        // stack usage lower, but leave some oops for potential stealers
8554 8561        trim_queue(_low_water_mark);
8555 8562      } // Else, another thread got there first
8556 8563    }
8557 8564  }
8558 8565  
8559 8566  void CMSParKeepAliveClosure::do_oop(oop* p)       { CMSParKeepAliveClosure::do_oop_work(p); }
8560 8567  void CMSParKeepAliveClosure::do_oop(narrowOop* p) { CMSParKeepAliveClosure::do_oop_work(p); }
8561 8568  
8562 8569  void CMSParKeepAliveClosure::trim_queue(uint max) {
8563 8570    while (_work_queue->size() > max) {
8564 8571      oop new_oop;
8565 8572      if (_work_queue->pop_local(new_oop)) {
8566 8573        assert(new_oop != NULL && new_oop->is_oop(), "Expected an oop");
8567 8574        assert(_bit_map->isMarked((HeapWord*)new_oop),
8568 8575               "no white objects on this stack!");
8569 8576        assert(_span.contains((HeapWord*)new_oop), "Out of bounds oop");
8570 8577        // iterate over the oops in this oop, marking and pushing
8571 8578        // the ones in CMS heap (i.e. in _span).
8572 8579        new_oop->oop_iterate(&_mark_and_push);
8573 8580      }
8574 8581    }
8575 8582  }
8576 8583  
8577 8584  CMSInnerParMarkAndPushClosure::CMSInnerParMarkAndPushClosure(
8578 8585                                  CMSCollector* collector,
8579 8586                                  MemRegion span, CMSBitMap* bit_map,
8580 8587                                  CMSMarkStack* revisit_stack,
8581 8588                                  OopTaskQueue* work_queue):
8582 8589    Par_KlassRememberingOopClosure(collector, NULL, revisit_stack),
8583 8590    _span(span),
8584 8591    _bit_map(bit_map),
8585 8592    _work_queue(work_queue) { }
8586 8593  
8587 8594  void CMSInnerParMarkAndPushClosure::do_oop(oop obj) {
8588 8595    HeapWord* addr = (HeapWord*)obj;
8589 8596    if (_span.contains(addr) &&
8590 8597        !_bit_map->isMarked(addr)) {
8591 8598      if (_bit_map->par_mark(addr)) {
8592 8599        bool simulate_overflow = false;
8593 8600        NOT_PRODUCT(
8594 8601          if (CMSMarkStackOverflowALot &&
8595 8602              _collector->par_simulate_overflow()) {
8596 8603            // simulate a stack overflow
8597 8604            simulate_overflow = true;
8598 8605          }
8599 8606        )
8600 8607        if (simulate_overflow || !_work_queue->push(obj)) {
8601 8608          _collector->par_push_on_overflow_list(obj);
8602 8609          _collector->_par_kac_ovflw++;
8603 8610        }
8604 8611      } // Else another thread got there already
8605 8612    }
8606 8613  }
8607 8614  
8608 8615  void CMSInnerParMarkAndPushClosure::do_oop(oop* p)       { CMSInnerParMarkAndPushClosure::do_oop_work(p); }
8609 8616  void CMSInnerParMarkAndPushClosure::do_oop(narrowOop* p) { CMSInnerParMarkAndPushClosure::do_oop_work(p); }
8610 8617  
8611 8618  //////////////////////////////////////////////////////////////////
8612 8619  //  CMSExpansionCause                /////////////////////////////
8613 8620  //////////////////////////////////////////////////////////////////
8614 8621  const char* CMSExpansionCause::to_string(CMSExpansionCause::Cause cause) {
8615 8622    switch (cause) {
8616 8623      case _no_expansion:
8617 8624        return "No expansion";
8618 8625      case _satisfy_free_ratio:
8619 8626        return "Free ratio";
8620 8627      case _satisfy_promotion:
8621 8628        return "Satisfy promotion";
8622 8629      case _satisfy_allocation:
8623 8630        return "allocation";
8624 8631      case _allocate_par_lab:
8625 8632        return "Par LAB";
8626 8633      case _allocate_par_spooling_space:
8627 8634        return "Par Spooling Space";
8628 8635      case _adaptive_size_policy:
8629 8636        return "Ergonomics";
8630 8637      default:
8631 8638        return "unknown";
8632 8639    }
8633 8640  }
8634 8641  
8635 8642  void CMSDrainMarkingStackClosure::do_void() {
8636 8643    // the max number to take from overflow list at a time
8637 8644    const size_t num = _mark_stack->capacity()/4;
8638 8645    assert(!_concurrent_precleaning || _collector->overflow_list_is_empty(),
8639 8646           "Overflow list should be NULL during concurrent phases");
8640 8647    while (!_mark_stack->isEmpty() ||
8641 8648           // if stack is empty, check the overflow list
8642 8649           _collector->take_from_overflow_list(num, _mark_stack)) {
8643 8650      oop obj = _mark_stack->pop();
8644 8651      HeapWord* addr = (HeapWord*)obj;
8645 8652      assert(_span.contains(addr), "Should be within span");
8646 8653      assert(_bit_map->isMarked(addr), "Should be marked");
8647 8654      assert(obj->is_oop(), "Should be an oop");
8648 8655      obj->oop_iterate(_keep_alive);
8649 8656    }
8650 8657  }
8651 8658  
8652 8659  void CMSParDrainMarkingStackClosure::do_void() {
8653 8660    // drain queue
8654 8661    trim_queue(0);
8655 8662  }
8656 8663  
8657 8664  // Trim our work_queue so its length is below max at return
8658 8665  void CMSParDrainMarkingStackClosure::trim_queue(uint max) {
8659 8666    while (_work_queue->size() > max) {
8660 8667      oop new_oop;
8661 8668      if (_work_queue->pop_local(new_oop)) {
8662 8669        assert(new_oop->is_oop(), "Expected an oop");
8663 8670        assert(_bit_map->isMarked((HeapWord*)new_oop),
8664 8671               "no white objects on this stack!");
8665 8672        assert(_span.contains((HeapWord*)new_oop), "Out of bounds oop");
8666 8673        // iterate over the oops in this oop, marking and pushing
8667 8674        // the ones in CMS heap (i.e. in _span).
8668 8675        new_oop->oop_iterate(&_mark_and_push);
8669 8676      }
8670 8677    }
8671 8678  }
8672 8679  
8673 8680  ////////////////////////////////////////////////////////////////////
8674 8681  // Support for Marking Stack Overflow list handling and related code
8675 8682  ////////////////////////////////////////////////////////////////////
8676 8683  // Much of the following code is similar in shape and spirit to the
8677 8684  // code used in ParNewGC. We should try and share that code
8678 8685  // as much as possible in the future.
8679 8686  
8680 8687  #ifndef PRODUCT
8681 8688  // Debugging support for CMSStackOverflowALot
8682 8689  
8683 8690  // It's OK to call this multi-threaded;  the worst thing
8684 8691  // that can happen is that we'll get a bunch of closely
8685 8692  // spaced simulated oveflows, but that's OK, in fact
8686 8693  // probably good as it would exercise the overflow code
8687 8694  // under contention.
8688 8695  bool CMSCollector::simulate_overflow() {
8689 8696    if (_overflow_counter-- <= 0) { // just being defensive
8690 8697      _overflow_counter = CMSMarkStackOverflowInterval;
8691 8698      return true;
8692 8699    } else {
8693 8700      return false;
8694 8701    }
8695 8702  }
8696 8703  
8697 8704  bool CMSCollector::par_simulate_overflow() {
8698 8705    return simulate_overflow();
8699 8706  }
8700 8707  #endif
8701 8708  
8702 8709  // Single-threaded
8703 8710  bool CMSCollector::take_from_overflow_list(size_t num, CMSMarkStack* stack) {
8704 8711    assert(stack->isEmpty(), "Expected precondition");
8705 8712    assert(stack->capacity() > num, "Shouldn't bite more than can chew");
8706 8713    size_t i = num;
8707 8714    oop  cur = _overflow_list;
8708 8715    const markOop proto = markOopDesc::prototype();
8709 8716    NOT_PRODUCT(ssize_t n = 0;)
8710 8717    for (oop next; i > 0 && cur != NULL; cur = next, i--) {
8711 8718      next = oop(cur->mark());
8712 8719      cur->set_mark(proto);   // until proven otherwise
8713 8720      assert(cur->is_oop(), "Should be an oop");
8714 8721      bool res = stack->push(cur);
8715 8722      assert(res, "Bit off more than can chew?");
8716 8723      NOT_PRODUCT(n++;)
8717 8724    }
8718 8725    _overflow_list = cur;
8719 8726  #ifndef PRODUCT
8720 8727    assert(_num_par_pushes >= n, "Too many pops?");
8721 8728    _num_par_pushes -=n;
8722 8729  #endif
8723 8730    return !stack->isEmpty();
8724 8731  }
8725 8732  
8726 8733  #define BUSY  (oop(0x1aff1aff))
8727 8734  // (MT-safe) Get a prefix of at most "num" from the list.
8728 8735  // The overflow list is chained through the mark word of
8729 8736  // each object in the list. We fetch the entire list,
8730 8737  // break off a prefix of the right size and return the
8731 8738  // remainder. If other threads try to take objects from
8732 8739  // the overflow list at that time, they will wait for
8733 8740  // some time to see if data becomes available. If (and
8734 8741  // only if) another thread places one or more object(s)
8735 8742  // on the global list before we have returned the suffix
8736 8743  // to the global list, we will walk down our local list
8737 8744  // to find its end and append the global list to
8738 8745  // our suffix before returning it. This suffix walk can
8739 8746  // prove to be expensive (quadratic in the amount of traffic)
8740 8747  // when there are many objects in the overflow list and
8741 8748  // there is much producer-consumer contention on the list.
8742 8749  // *NOTE*: The overflow list manipulation code here and
8743 8750  // in ParNewGeneration:: are very similar in shape,
8744 8751  // except that in the ParNew case we use the old (from/eden)
8745 8752  // copy of the object to thread the list via its klass word.
8746 8753  // Because of the common code, if you make any changes in
8747 8754  // the code below, please check the ParNew version to see if
8748 8755  // similar changes might be needed.
8749 8756  // CR 6797058 has been filed to consolidate the common code.
8750 8757  bool CMSCollector::par_take_from_overflow_list(size_t num,
8751 8758                                                 OopTaskQueue* work_q,
8752 8759                                                 int no_of_gc_threads) {
8753 8760    assert(work_q->size() == 0, "First empty local work queue");
8754 8761    assert(num < work_q->max_elems(), "Can't bite more than we can chew");
8755 8762    if (_overflow_list == NULL) {
8756 8763      return false;
8757 8764    }
8758 8765    // Grab the entire list; we'll put back a suffix
8759 8766    oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
8760 8767    Thread* tid = Thread::current();
8761 8768    // Before "no_of_gc_threads" was introduced CMSOverflowSpinCount was
8762 8769    // set to ParallelGCThreads.
8763 8770    size_t CMSOverflowSpinCount = (size_t) no_of_gc_threads; // was ParallelGCThreads;
8764 8771    size_t sleep_time_millis = MAX2((size_t)1, num/100);
8765 8772    // If the list is busy, we spin for a short while,
8766 8773    // sleeping between attempts to get the list.
8767 8774    for (size_t spin = 0; prefix == BUSY && spin < CMSOverflowSpinCount; spin++) {
8768 8775      os::sleep(tid, sleep_time_millis, false);
8769 8776      if (_overflow_list == NULL) {
8770 8777        // Nothing left to take
8771 8778        return false;
8772 8779      } else if (_overflow_list != BUSY) {
8773 8780        // Try and grab the prefix
8774 8781        prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
8775 8782      }
8776 8783    }
8777 8784    // If the list was found to be empty, or we spun long
8778 8785    // enough, we give up and return empty-handed. If we leave
8779 8786    // the list in the BUSY state below, it must be the case that
8780 8787    // some other thread holds the overflow list and will set it
8781 8788    // to a non-BUSY state in the future.
8782 8789    if (prefix == NULL || prefix == BUSY) {
8783 8790       // Nothing to take or waited long enough
8784 8791       if (prefix == NULL) {
8785 8792         // Write back the NULL in case we overwrote it with BUSY above
8786 8793         // and it is still the same value.
8787 8794         (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
8788 8795       }
8789 8796       return false;
8790 8797    }
8791 8798    assert(prefix != NULL && prefix != BUSY, "Error");
8792 8799    size_t i = num;
8793 8800    oop cur = prefix;
8794 8801    // Walk down the first "num" objects, unless we reach the end.
8795 8802    for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--);
8796 8803    if (cur->mark() == NULL) {
8797 8804      // We have "num" or fewer elements in the list, so there
8798 8805      // is nothing to return to the global list.
8799 8806      // Write back the NULL in lieu of the BUSY we wrote
8800 8807      // above, if it is still the same value.
8801 8808      if (_overflow_list == BUSY) {
8802 8809        (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
8803 8810      }
8804 8811    } else {
8805 8812      // Chop off the suffix and rerturn it to the global list.
8806 8813      assert(cur->mark() != BUSY, "Error");
8807 8814      oop suffix_head = cur->mark(); // suffix will be put back on global list
8808 8815      cur->set_mark(NULL);           // break off suffix
8809 8816      // It's possible that the list is still in the empty(busy) state
8810 8817      // we left it in a short while ago; in that case we may be
8811 8818      // able to place back the suffix without incurring the cost
8812 8819      // of a walk down the list.
8813 8820      oop observed_overflow_list = _overflow_list;
8814 8821      oop cur_overflow_list = observed_overflow_list;
8815 8822      bool attached = false;
8816 8823      while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
8817 8824        observed_overflow_list =
8818 8825          (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
8819 8826        if (cur_overflow_list == observed_overflow_list) {
8820 8827          attached = true;
8821 8828          break;
8822 8829        } else cur_overflow_list = observed_overflow_list;
8823 8830      }
8824 8831      if (!attached) {
8825 8832        // Too bad, someone else sneaked in (at least) an element; we'll need
8826 8833        // to do a splice. Find tail of suffix so we can prepend suffix to global
8827 8834        // list.
8828 8835        for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark()));
8829 8836        oop suffix_tail = cur;
8830 8837        assert(suffix_tail != NULL && suffix_tail->mark() == NULL,
8831 8838               "Tautology");
8832 8839        observed_overflow_list = _overflow_list;
8833 8840        do {
8834 8841          cur_overflow_list = observed_overflow_list;
8835 8842          if (cur_overflow_list != BUSY) {
8836 8843            // Do the splice ...
8837 8844            suffix_tail->set_mark(markOop(cur_overflow_list));
8838 8845          } else { // cur_overflow_list == BUSY
8839 8846            suffix_tail->set_mark(NULL);
8840 8847          }
8841 8848          // ... and try to place spliced list back on overflow_list ...
8842 8849          observed_overflow_list =
8843 8850            (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
8844 8851        } while (cur_overflow_list != observed_overflow_list);
8845 8852        // ... until we have succeeded in doing so.
8846 8853      }
8847 8854    }
8848 8855  
8849 8856    // Push the prefix elements on work_q
8850 8857    assert(prefix != NULL, "control point invariant");
8851 8858    const markOop proto = markOopDesc::prototype();
8852 8859    oop next;
8853 8860    NOT_PRODUCT(ssize_t n = 0;)
8854 8861    for (cur = prefix; cur != NULL; cur = next) {
8855 8862      next = oop(cur->mark());
8856 8863      cur->set_mark(proto);   // until proven otherwise
8857 8864      assert(cur->is_oop(), "Should be an oop");
8858 8865      bool res = work_q->push(cur);
8859 8866      assert(res, "Bit off more than we can chew?");
8860 8867      NOT_PRODUCT(n++;)
8861 8868    }
8862 8869  #ifndef PRODUCT
8863 8870    assert(_num_par_pushes >= n, "Too many pops?");
8864 8871    Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes);
8865 8872  #endif
8866 8873    return true;
8867 8874  }
8868 8875  
8869 8876  // Single-threaded
8870 8877  void CMSCollector::push_on_overflow_list(oop p) {
8871 8878    NOT_PRODUCT(_num_par_pushes++;)
8872 8879    assert(p->is_oop(), "Not an oop");
8873 8880    preserve_mark_if_necessary(p);
8874 8881    p->set_mark((markOop)_overflow_list);
8875 8882    _overflow_list = p;
8876 8883  }
8877 8884  
8878 8885  // Multi-threaded; use CAS to prepend to overflow list
8879 8886  void CMSCollector::par_push_on_overflow_list(oop p) {
8880 8887    NOT_PRODUCT(Atomic::inc_ptr(&_num_par_pushes);)
8881 8888    assert(p->is_oop(), "Not an oop");
8882 8889    par_preserve_mark_if_necessary(p);
8883 8890    oop observed_overflow_list = _overflow_list;
8884 8891    oop cur_overflow_list;
8885 8892    do {
8886 8893      cur_overflow_list = observed_overflow_list;
8887 8894      if (cur_overflow_list != BUSY) {
8888 8895        p->set_mark(markOop(cur_overflow_list));
8889 8896      } else {
8890 8897        p->set_mark(NULL);
8891 8898      }
8892 8899      observed_overflow_list =
8893 8900        (oop) Atomic::cmpxchg_ptr(p, &_overflow_list, cur_overflow_list);
8894 8901    } while (cur_overflow_list != observed_overflow_list);
8895 8902  }
8896 8903  #undef BUSY
8897 8904  
8898 8905  // Single threaded
8899 8906  // General Note on GrowableArray: pushes may silently fail
8900 8907  // because we are (temporarily) out of C-heap for expanding
8901 8908  // the stack. The problem is quite ubiquitous and affects
8902 8909  // a lot of code in the JVM. The prudent thing for GrowableArray
8903 8910  // to do (for now) is to exit with an error. However, that may
8904 8911  // be too draconian in some cases because the caller may be
8905 8912  // able to recover without much harm. For such cases, we
8906 8913  // should probably introduce a "soft_push" method which returns
8907 8914  // an indication of success or failure with the assumption that
8908 8915  // the caller may be able to recover from a failure; code in
8909 8916  // the VM can then be changed, incrementally, to deal with such
8910 8917  // failures where possible, thus, incrementally hardening the VM
8911 8918  // in such low resource situations.
8912 8919  void CMSCollector::preserve_mark_work(oop p, markOop m) {
8913 8920    _preserved_oop_stack.push(p);
8914 8921    _preserved_mark_stack.push(m);
8915 8922    assert(m == p->mark(), "Mark word changed");
8916 8923    assert(_preserved_oop_stack.size() == _preserved_mark_stack.size(),
8917 8924           "bijection");
8918 8925  }
8919 8926  
8920 8927  // Single threaded
8921 8928  void CMSCollector::preserve_mark_if_necessary(oop p) {
8922 8929    markOop m = p->mark();
8923 8930    if (m->must_be_preserved(p)) {
8924 8931      preserve_mark_work(p, m);
8925 8932    }
8926 8933  }
8927 8934  
8928 8935  void CMSCollector::par_preserve_mark_if_necessary(oop p) {
8929 8936    markOop m = p->mark();
8930 8937    if (m->must_be_preserved(p)) {
8931 8938      MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
8932 8939      // Even though we read the mark word without holding
8933 8940      // the lock, we are assured that it will not change
8934 8941      // because we "own" this oop, so no other thread can
8935 8942      // be trying to push it on the overflow list; see
8936 8943      // the assertion in preserve_mark_work() that checks
8937 8944      // that m == p->mark().
8938 8945      preserve_mark_work(p, m);
8939 8946    }
8940 8947  }
8941 8948  
8942 8949  // We should be able to do this multi-threaded,
8943 8950  // a chunk of stack being a task (this is
8944 8951  // correct because each oop only ever appears
8945 8952  // once in the overflow list. However, it's
8946 8953  // not very easy to completely overlap this with
8947 8954  // other operations, so will generally not be done
8948 8955  // until all work's been completed. Because we
8949 8956  // expect the preserved oop stack (set) to be small,
8950 8957  // it's probably fine to do this single-threaded.
8951 8958  // We can explore cleverer concurrent/overlapped/parallel
8952 8959  // processing of preserved marks if we feel the
8953 8960  // need for this in the future. Stack overflow should
8954 8961  // be so rare in practice and, when it happens, its
8955 8962  // effect on performance so great that this will
8956 8963  // likely just be in the noise anyway.
8957 8964  void CMSCollector::restore_preserved_marks_if_any() {
8958 8965    assert(SafepointSynchronize::is_at_safepoint(),
8959 8966           "world should be stopped");
8960 8967    assert(Thread::current()->is_ConcurrentGC_thread() ||
8961 8968           Thread::current()->is_VM_thread(),
8962 8969           "should be single-threaded");
8963 8970    assert(_preserved_oop_stack.size() == _preserved_mark_stack.size(),
8964 8971           "bijection");
8965 8972  
8966 8973    while (!_preserved_oop_stack.is_empty()) {
8967 8974      oop p = _preserved_oop_stack.pop();
8968 8975      assert(p->is_oop(), "Should be an oop");
8969 8976      assert(_span.contains(p), "oop should be in _span");
8970 8977      assert(p->mark() == markOopDesc::prototype(),
8971 8978             "Set when taken from overflow list");
8972 8979      markOop m = _preserved_mark_stack.pop();
8973 8980      p->set_mark(m);
8974 8981    }
8975 8982    assert(_preserved_mark_stack.is_empty() && _preserved_oop_stack.is_empty(),
8976 8983           "stacks were cleared above");
8977 8984  }
8978 8985  
8979 8986  #ifndef PRODUCT
8980 8987  bool CMSCollector::no_preserved_marks() const {
8981 8988    return _preserved_mark_stack.is_empty() && _preserved_oop_stack.is_empty();
8982 8989  }
8983 8990  #endif
8984 8991  
8985 8992  CMSAdaptiveSizePolicy* ASConcurrentMarkSweepGeneration::cms_size_policy() const
8986 8993  {
8987 8994    GenCollectedHeap* gch = (GenCollectedHeap*) GenCollectedHeap::heap();
8988 8995    CMSAdaptiveSizePolicy* size_policy =
8989 8996      (CMSAdaptiveSizePolicy*) gch->gen_policy()->size_policy();
8990 8997    assert(size_policy->is_gc_cms_adaptive_size_policy(),
8991 8998      "Wrong type for size policy");
8992 8999    return size_policy;
8993 9000  }
8994 9001  
8995 9002  void ASConcurrentMarkSweepGeneration::resize(size_t cur_promo_size,
8996 9003                                             size_t desired_promo_size) {
8997 9004    if (cur_promo_size < desired_promo_size) {
8998 9005      size_t expand_bytes = desired_promo_size - cur_promo_size;
8999 9006      if (PrintAdaptiveSizePolicy && Verbose) {
9000 9007        gclog_or_tty->print_cr(" ASConcurrentMarkSweepGeneration::resize "
9001 9008          "Expanding tenured generation by " SIZE_FORMAT " (bytes)",
9002 9009          expand_bytes);
9003 9010      }
9004 9011      expand(expand_bytes,
9005 9012             MinHeapDeltaBytes,
9006 9013             CMSExpansionCause::_adaptive_size_policy);
9007 9014    } else if (desired_promo_size < cur_promo_size) {
9008 9015      size_t shrink_bytes = cur_promo_size - desired_promo_size;
9009 9016      if (PrintAdaptiveSizePolicy && Verbose) {
9010 9017        gclog_or_tty->print_cr(" ASConcurrentMarkSweepGeneration::resize "
9011 9018          "Shrinking tenured generation by " SIZE_FORMAT " (bytes)",
9012 9019          shrink_bytes);
9013 9020      }
9014 9021      shrink(shrink_bytes);
9015 9022    }
9016 9023  }
9017 9024  
9018 9025  CMSGCAdaptivePolicyCounters* ASConcurrentMarkSweepGeneration::gc_adaptive_policy_counters() {
9019 9026    GenCollectedHeap* gch = GenCollectedHeap::heap();
9020 9027    CMSGCAdaptivePolicyCounters* counters =
9021 9028      (CMSGCAdaptivePolicyCounters*) gch->collector_policy()->counters();
9022 9029    assert(counters->kind() == GCPolicyCounters::CMSGCAdaptivePolicyCountersKind,
9023 9030      "Wrong kind of counters");
9024 9031    return counters;
9025 9032  }
9026 9033  
9027 9034  
9028 9035  void ASConcurrentMarkSweepGeneration::update_counters() {
9029 9036    if (UsePerfData) {
9030 9037      _space_counters->update_all();
9031 9038      _gen_counters->update_all();
9032 9039      CMSGCAdaptivePolicyCounters* counters = gc_adaptive_policy_counters();
9033 9040      GenCollectedHeap* gch = GenCollectedHeap::heap();
9034 9041      CMSGCStats* gc_stats_l = (CMSGCStats*) gc_stats();
9035 9042      assert(gc_stats_l->kind() == GCStats::CMSGCStatsKind,
9036 9043        "Wrong gc statistics type");
9037 9044      counters->update_counters(gc_stats_l);
9038 9045    }
9039 9046  }
9040 9047  
9041 9048  void ASConcurrentMarkSweepGeneration::update_counters(size_t used) {
9042 9049    if (UsePerfData) {
9043 9050      _space_counters->update_used(used);
9044 9051      _space_counters->update_capacity();
9045 9052      _gen_counters->update_all();
9046 9053  
9047 9054      CMSGCAdaptivePolicyCounters* counters = gc_adaptive_policy_counters();
9048 9055      GenCollectedHeap* gch = GenCollectedHeap::heap();
9049 9056      CMSGCStats* gc_stats_l = (CMSGCStats*) gc_stats();
9050 9057      assert(gc_stats_l->kind() == GCStats::CMSGCStatsKind,
9051 9058        "Wrong gc statistics type");
9052 9059      counters->update_counters(gc_stats_l);
9053 9060    }
9054 9061  }
9055 9062  
9056 9063  // The desired expansion delta is computed so that:
9057 9064  // . desired free percentage or greater is used
9058 9065  void ASConcurrentMarkSweepGeneration::compute_new_size() {
9059 9066    assert_locked_or_safepoint(Heap_lock);
9060 9067  
9061 9068    GenCollectedHeap* gch = (GenCollectedHeap*) GenCollectedHeap::heap();
9062 9069  
9063 9070    // If incremental collection failed, we just want to expand
9064 9071    // to the limit.
9065 9072    if (incremental_collection_failed()) {
9066 9073      clear_incremental_collection_failed();
9067 9074      grow_to_reserved();
9068 9075      return;
9069 9076    }
9070 9077  
9071 9078    assert(UseAdaptiveSizePolicy, "Should be using adaptive sizing");
9072 9079  
9073 9080    assert(gch->kind() == CollectedHeap::GenCollectedHeap,
9074 9081      "Wrong type of heap");
9075 9082    int prev_level = level() - 1;
9076 9083    assert(prev_level >= 0, "The cms generation is the lowest generation");
9077 9084    Generation* prev_gen = gch->get_gen(prev_level);
9078 9085    assert(prev_gen->kind() == Generation::ASParNew,
9079 9086      "Wrong type of young generation");
9080 9087    ParNewGeneration* younger_gen = (ParNewGeneration*) prev_gen;
9081 9088    size_t cur_eden = younger_gen->eden()->capacity();
9082 9089    CMSAdaptiveSizePolicy* size_policy = cms_size_policy();
9083 9090    size_t cur_promo = free();
9084 9091    size_policy->compute_tenured_generation_free_space(cur_promo,
9085 9092                                                         max_available(),
9086 9093                                                         cur_eden);
9087 9094    resize(cur_promo, size_policy->promo_size());
9088 9095  
9089 9096    // Record the new size of the space in the cms generation
9090 9097    // that is available for promotions.  This is temporary.
9091 9098    // It should be the desired promo size.
9092 9099    size_policy->avg_cms_promo()->sample(free());
9093 9100    size_policy->avg_old_live()->sample(used());
9094 9101  
9095 9102    if (UsePerfData) {
9096 9103      CMSGCAdaptivePolicyCounters* counters = gc_adaptive_policy_counters();
9097 9104      counters->update_cms_capacity_counter(capacity());
9098 9105    }
9099 9106  }
9100 9107  
9101 9108  void ASConcurrentMarkSweepGeneration::shrink_by(size_t desired_bytes) {
9102 9109    assert_locked_or_safepoint(Heap_lock);
9103 9110    assert_lock_strong(freelistLock());
9104 9111    HeapWord* old_end = _cmsSpace->end();
9105 9112    HeapWord* unallocated_start = _cmsSpace->unallocated_block();
9106 9113    assert(old_end >= unallocated_start, "Miscalculation of unallocated_start");
9107 9114    FreeChunk* chunk_at_end = find_chunk_at_end();
9108 9115    if (chunk_at_end == NULL) {
9109 9116      // No room to shrink
9110 9117      if (PrintGCDetails && Verbose) {
9111 9118        gclog_or_tty->print_cr("No room to shrink: old_end  "
9112 9119          PTR_FORMAT "  unallocated_start  " PTR_FORMAT
9113 9120          " chunk_at_end  " PTR_FORMAT,
9114 9121          old_end, unallocated_start, chunk_at_end);
9115 9122      }
9116 9123      return;
9117 9124    } else {
9118 9125  
9119 9126      // Find the chunk at the end of the space and determine
9120 9127      // how much it can be shrunk.
9121 9128      size_t shrinkable_size_in_bytes = chunk_at_end->size();
9122 9129      size_t aligned_shrinkable_size_in_bytes =
9123 9130        align_size_down(shrinkable_size_in_bytes, os::vm_page_size());
9124 9131      assert(unallocated_start <= chunk_at_end->end(),
9125 9132        "Inconsistent chunk at end of space");
9126 9133      size_t bytes = MIN2(desired_bytes, aligned_shrinkable_size_in_bytes);
9127 9134      size_t word_size_before = heap_word_size(_virtual_space.committed_size());
9128 9135  
9129 9136      // Shrink the underlying space
9130 9137      _virtual_space.shrink_by(bytes);
9131 9138      if (PrintGCDetails && Verbose) {
9132 9139        gclog_or_tty->print_cr("ConcurrentMarkSweepGeneration::shrink_by:"
9133 9140          " desired_bytes " SIZE_FORMAT
9134 9141          " shrinkable_size_in_bytes " SIZE_FORMAT
9135 9142          " aligned_shrinkable_size_in_bytes " SIZE_FORMAT
9136 9143          "  bytes  " SIZE_FORMAT,
9137 9144          desired_bytes, shrinkable_size_in_bytes,
9138 9145          aligned_shrinkable_size_in_bytes, bytes);
9139 9146        gclog_or_tty->print_cr("          old_end  " SIZE_FORMAT
9140 9147          "  unallocated_start  " SIZE_FORMAT,
9141 9148          old_end, unallocated_start);
9142 9149      }
9143 9150  
9144 9151      // If the space did shrink (shrinking is not guaranteed),
9145 9152      // shrink the chunk at the end by the appropriate amount.
9146 9153      if (((HeapWord*)_virtual_space.high()) < old_end) {
9147 9154        size_t new_word_size =
9148 9155          heap_word_size(_virtual_space.committed_size());
9149 9156  
9150 9157        // Have to remove the chunk from the dictionary because it is changing
9151 9158        // size and might be someplace elsewhere in the dictionary.
9152 9159  
9153 9160        // Get the chunk at end, shrink it, and put it
9154 9161        // back.
9155 9162        _cmsSpace->removeChunkFromDictionary(chunk_at_end);
9156 9163        size_t word_size_change = word_size_before - new_word_size;
9157 9164        size_t chunk_at_end_old_size = chunk_at_end->size();
9158 9165        assert(chunk_at_end_old_size >= word_size_change,
9159 9166          "Shrink is too large");
9160 9167        chunk_at_end->setSize(chunk_at_end_old_size -
9161 9168                            word_size_change);
9162 9169        _cmsSpace->freed((HeapWord*) chunk_at_end->end(),
9163 9170          word_size_change);
9164 9171  
9165 9172        _cmsSpace->returnChunkToDictionary(chunk_at_end);
9166 9173  
9167 9174        MemRegion mr(_cmsSpace->bottom(), new_word_size);
9168 9175        _bts->resize(new_word_size);  // resize the block offset shared array
9169 9176        Universe::heap()->barrier_set()->resize_covered_region(mr);
9170 9177        _cmsSpace->assert_locked();
9171 9178        _cmsSpace->set_end((HeapWord*)_virtual_space.high());
9172 9179  
9173 9180        NOT_PRODUCT(_cmsSpace->dictionary()->verify());
9174 9181  
9175 9182        // update the space and generation capacity counters
9176 9183        if (UsePerfData) {
9177 9184          _space_counters->update_capacity();
9178 9185          _gen_counters->update_all();
9179 9186        }
9180 9187  
9181 9188        if (Verbose && PrintGCDetails) {
9182 9189          size_t new_mem_size = _virtual_space.committed_size();
9183 9190          size_t old_mem_size = new_mem_size + bytes;
9184 9191          gclog_or_tty->print_cr("Shrinking %s from %ldK by %ldK to %ldK",
9185 9192                        name(), old_mem_size/K, bytes/K, new_mem_size/K);
9186 9193        }
9187 9194      }
9188 9195  
9189 9196      assert(_cmsSpace->unallocated_block() <= _cmsSpace->end(),
9190 9197        "Inconsistency at end of space");
9191 9198      assert(chunk_at_end->end() == _cmsSpace->end(),
9192 9199        "Shrinking is inconsistent");
9193 9200      return;
9194 9201    }
9195 9202  }
9196 9203  
9197 9204  // Transfer some number of overflown objects to usual marking
9198 9205  // stack. Return true if some objects were transferred.
9199 9206  bool MarkRefsIntoAndScanClosure::take_from_overflow_list() {
9200 9207    size_t num = MIN2((size_t)(_mark_stack->capacity() - _mark_stack->length())/4,
9201 9208                      (size_t)ParGCDesiredObjsFromOverflowList);
9202 9209  
9203 9210    bool res = _collector->take_from_overflow_list(num, _mark_stack);
9204 9211    assert(_collector->overflow_list_is_empty() || res,
9205 9212           "If list is not empty, we should have taken something");
9206 9213    assert(!res || !_mark_stack->isEmpty(),
9207 9214           "If we took something, it should now be on our stack");
9208 9215    return res;
9209 9216  }
9210 9217  
9211 9218  size_t MarkDeadObjectsClosure::do_blk(HeapWord* addr) {
9212 9219    size_t res = _sp->block_size_no_stall(addr, _collector);
9213 9220    if (_sp->block_is_obj(addr)) {
9214 9221      if (_live_bit_map->isMarked(addr)) {
9215 9222        // It can't have been dead in a previous cycle
9216 9223        guarantee(!_dead_bit_map->isMarked(addr), "No resurrection!");
9217 9224      } else {
9218 9225        _dead_bit_map->mark(addr);      // mark the dead object
9219 9226      }
9220 9227    }
9221 9228    // Could be 0, if the block size could not be computed without stalling.
9222 9229    return res;
9223 9230  }
9224 9231  
9225 9232  TraceCMSMemoryManagerStats::TraceCMSMemoryManagerStats(CMSCollector::CollectorState phase): TraceMemoryManagerStats() {
9226 9233  
9227 9234    switch (phase) {
9228 9235      case CMSCollector::InitialMarking:
9229 9236        initialize(true  /* fullGC */ ,
9230 9237                   true  /* recordGCBeginTime */,
9231 9238                   true  /* recordPreGCUsage */,
9232 9239                   false /* recordPeakUsage */,
9233 9240                   false /* recordPostGCusage */,
9234 9241                   true  /* recordAccumulatedGCTime */,
9235 9242                   false /* recordGCEndTime */,
9236 9243                   false /* countCollection */  );
9237 9244        break;
9238 9245  
9239 9246      case CMSCollector::FinalMarking:
9240 9247        initialize(true  /* fullGC */ ,
9241 9248                   false /* recordGCBeginTime */,
9242 9249                   false /* recordPreGCUsage */,
9243 9250                   false /* recordPeakUsage */,
9244 9251                   false /* recordPostGCusage */,
9245 9252                   true  /* recordAccumulatedGCTime */,
9246 9253                   false /* recordGCEndTime */,
9247 9254                   false /* countCollection */  );
9248 9255        break;
9249 9256  
9250 9257      case CMSCollector::Sweeping:
9251 9258        initialize(true  /* fullGC */ ,
9252 9259                   false /* recordGCBeginTime */,
9253 9260                   false /* recordPreGCUsage */,
9254 9261                   true  /* recordPeakUsage */,
9255 9262                   true  /* recordPostGCusage */,
9256 9263                   false /* recordAccumulatedGCTime */,
9257 9264                   true  /* recordGCEndTime */,
9258 9265                   true  /* countCollection */  );
9259 9266        break;
9260 9267  
9261 9268      default:
9262 9269        ShouldNotReachHere();
9263 9270    }
9264 9271  }
9265 9272  
9266 9273  // when bailing out of cms in concurrent mode failure
9267 9274  TraceCMSMemoryManagerStats::TraceCMSMemoryManagerStats(): TraceMemoryManagerStats() {
9268 9275    initialize(true /* fullGC */ ,
9269 9276               true /* recordGCBeginTime */,
9270 9277               true /* recordPreGCUsage */,
9271 9278               true /* recordPeakUsage */,
9272 9279               true /* recordPostGCusage */,
9273 9280               true /* recordAccumulatedGCTime */,
9274 9281               true /* recordGCEndTime */,
9275 9282               true /* countCollection */ );
9276 9283  }

↓ open down ↓

3390 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX