hsx-hs-gc Wdiff src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp

Print this page

rev 2691 : [mq]: g1-reference-processing

Split	Close
Expand all
Collapse all

          --- old/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
          +++ new/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp

   1    1  /*
   2    2   * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
   3    3   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4    4   *
   5    5   * This code is free software; you can redistribute it and/or modify it
   6    6   * under the terms of the GNU General Public License version 2 only, as
   7    7   * published by the Free Software Foundation.
   8    8   *
   9    9   * This code is distributed in the hope that it will be useful, but WITHOUT
  10   10   * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11   11   * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12   12   * version 2 for more details (a copy is included in the LICENSE file that
  13   13   * accompanied this code).
  14   14   *
  15   15   * You should have received a copy of the GNU General Public License version
  16   16   * 2 along with this work; if not, write to the Free Software Foundation,
  17   17   * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18   18   *
  19   19   * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20   20   * or visit www.oracle.com if you need additional information or have any
  21   21   * questions.
  22   22   *
  23   23   */
  24   24  
  25   25  #include "precompiled.hpp"
  26   26  #include "classfile/symbolTable.hpp"
  27   27  #include "classfile/systemDictionary.hpp"
  28   28  #include "code/codeCache.hpp"
  29   29  #include "gc_implementation/concurrentMarkSweep/cmsAdaptiveSizePolicy.hpp"
  30   30  #include "gc_implementation/concurrentMarkSweep/cmsCollectorPolicy.hpp"
  31   31  #include "gc_implementation/concurrentMarkSweep/cmsGCAdaptivePolicyCounters.hpp"
  32   32  #include "gc_implementation/concurrentMarkSweep/cmsOopClosures.inline.hpp"
  33   33  #include "gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp"
  34   34  #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.inline.hpp"
  35   35  #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepThread.hpp"
  36   36  #include "gc_implementation/concurrentMarkSweep/vmCMSOperations.hpp"
  37   37  #include "gc_implementation/parNew/parNewGeneration.hpp"
  38   38  #include "gc_implementation/shared/collectorCounters.hpp"
  39   39  #include "gc_implementation/shared/isGCActiveMark.hpp"
  40   40  #include "gc_interface/collectedHeap.inline.hpp"
  41   41  #include "memory/cardTableRS.hpp"
  42   42  #include "memory/collectorPolicy.hpp"
  43   43  #include "memory/gcLocker.inline.hpp"
  44   44  #include "memory/genCollectedHeap.hpp"
  45   45  #include "memory/genMarkSweep.hpp"
  46   46  #include "memory/genOopClosures.inline.hpp"
  47   47  #include "memory/iterator.hpp"
  48   48  #include "memory/referencePolicy.hpp"
  49   49  #include "memory/resourceArea.hpp"
  50   50  #include "oops/oop.inline.hpp"
  51   51  #include "prims/jvmtiExport.hpp"
  52   52  #include "runtime/globals_extension.hpp"
  53   53  #include "runtime/handles.inline.hpp"
  54   54  #include "runtime/java.hpp"
  55   55  #include "runtime/vmThread.hpp"
  56   56  #include "services/memoryService.hpp"
  57   57  #include "services/runtimeService.hpp"
  58   58  
  59   59  // statics
  60   60  CMSCollector* ConcurrentMarkSweepGeneration::_collector = NULL;
  61   61  bool          CMSCollector::_full_gc_requested          = false;
  62   62  
  63   63  //////////////////////////////////////////////////////////////////
  64   64  // In support of CMS/VM thread synchronization
  65   65  //////////////////////////////////////////////////////////////////
  66   66  // We split use of the CGC_lock into 2 "levels".
  67   67  // The low-level locking is of the usual CGC_lock monitor. We introduce
  68   68  // a higher level "token" (hereafter "CMS token") built on top of the
  69   69  // low level monitor (hereafter "CGC lock").
  70   70  // The token-passing protocol gives priority to the VM thread. The
  71   71  // CMS-lock doesn't provide any fairness guarantees, but clients
  72   72  // should ensure that it is only held for very short, bounded
  73   73  // durations.
  74   74  //
  75   75  // When either of the CMS thread or the VM thread is involved in
  76   76  // collection operations during which it does not want the other
  77   77  // thread to interfere, it obtains the CMS token.
  78   78  //
  79   79  // If either thread tries to get the token while the other has
  80   80  // it, that thread waits. However, if the VM thread and CMS thread
  81   81  // both want the token, then the VM thread gets priority while the
  82   82  // CMS thread waits. This ensures, for instance, that the "concurrent"
  83   83  // phases of the CMS thread's work do not block out the VM thread
  84   84  // for long periods of time as the CMS thread continues to hog
  85   85  // the token. (See bug 4616232).
  86   86  //
  87   87  // The baton-passing functions are, however, controlled by the
  88   88  // flags _foregroundGCShouldWait and _foregroundGCIsActive,
  89   89  // and here the low-level CMS lock, not the high level token,
  90   90  // ensures mutual exclusion.
  91   91  //
  92   92  // Two important conditions that we have to satisfy:
  93   93  // 1. if a thread does a low-level wait on the CMS lock, then it
  94   94  //    relinquishes the CMS token if it were holding that token
  95   95  //    when it acquired the low-level CMS lock.
  96   96  // 2. any low-level notifications on the low-level lock
  97   97  //    should only be sent when a thread has relinquished the token.
  98   98  //
  99   99  // In the absence of either property, we'd have potential deadlock.
 100  100  //
 101  101  // We protect each of the CMS (concurrent and sequential) phases
 102  102  // with the CMS _token_, not the CMS _lock_.
 103  103  //
 104  104  // The only code protected by CMS lock is the token acquisition code
 105  105  // itself, see ConcurrentMarkSweepThread::[de]synchronize(), and the
 106  106  // baton-passing code.
 107  107  //
 108  108  // Unfortunately, i couldn't come up with a good abstraction to factor and
 109  109  // hide the naked CGC_lock manipulation in the baton-passing code
 110  110  // further below. That's something we should try to do. Also, the proof
 111  111  // of correctness of this 2-level locking scheme is far from obvious,
 112  112  // and potentially quite slippery. We have an uneasy supsicion, for instance,
 113  113  // that there may be a theoretical possibility of delay/starvation in the
 114  114  // low-level lock/wait/notify scheme used for the baton-passing because of
 115  115  // potential intereference with the priority scheme embodied in the
 116  116  // CMS-token-passing protocol. See related comments at a CGC_lock->wait()
 117  117  // invocation further below and marked with "XXX 20011219YSR".
 118  118  // Indeed, as we note elsewhere, this may become yet more slippery
 119  119  // in the presence of multiple CMS and/or multiple VM threads. XXX
 120  120  
 121  121  class CMSTokenSync: public StackObj {
 122  122   private:
 123  123    bool _is_cms_thread;
 124  124   public:
 125  125    CMSTokenSync(bool is_cms_thread):
 126  126      _is_cms_thread(is_cms_thread) {
 127  127      assert(is_cms_thread == Thread::current()->is_ConcurrentGC_thread(),
 128  128             "Incorrect argument to constructor");
 129  129      ConcurrentMarkSweepThread::synchronize(_is_cms_thread);
 130  130    }
 131  131  
 132  132    ~CMSTokenSync() {
 133  133      assert(_is_cms_thread ?
 134  134               ConcurrentMarkSweepThread::cms_thread_has_cms_token() :
 135  135               ConcurrentMarkSweepThread::vm_thread_has_cms_token(),
 136  136            "Incorrect state");
 137  137      ConcurrentMarkSweepThread::desynchronize(_is_cms_thread);
 138  138    }
 139  139  };
 140  140  
 141  141  // Convenience class that does a CMSTokenSync, and then acquires
 142  142  // upto three locks.
 143  143  class CMSTokenSyncWithLocks: public CMSTokenSync {
 144  144   private:
 145  145    // Note: locks are acquired in textual declaration order
 146  146    // and released in the opposite order
 147  147    MutexLockerEx _locker1, _locker2, _locker3;
 148  148   public:
 149  149    CMSTokenSyncWithLocks(bool is_cms_thread, Mutex* mutex1,
 150  150                          Mutex* mutex2 = NULL, Mutex* mutex3 = NULL):
 151  151      CMSTokenSync(is_cms_thread),
 152  152      _locker1(mutex1, Mutex::_no_safepoint_check_flag),
 153  153      _locker2(mutex2, Mutex::_no_safepoint_check_flag),
 154  154      _locker3(mutex3, Mutex::_no_safepoint_check_flag)
 155  155    { }
 156  156  };
 157  157  
 158  158  
 159  159  // Wrapper class to temporarily disable icms during a foreground cms collection.
 160  160  class ICMSDisabler: public StackObj {
 161  161   public:
 162  162    // The ctor disables icms and wakes up the thread so it notices the change;
 163  163    // the dtor re-enables icms.  Note that the CMSCollector methods will check
 164  164    // CMSIncrementalMode.
 165  165    ICMSDisabler()  { CMSCollector::disable_icms(); CMSCollector::start_icms(); }
 166  166    ~ICMSDisabler() { CMSCollector::enable_icms(); }
 167  167  };
 168  168  
 169  169  //////////////////////////////////////////////////////////////////
 170  170  //  Concurrent Mark-Sweep Generation /////////////////////////////
 171  171  //////////////////////////////////////////////////////////////////
 172  172  
 173  173  NOT_PRODUCT(CompactibleFreeListSpace* debug_cms_space;)
 174  174  
 175  175  // This struct contains per-thread things necessary to support parallel
 176  176  // young-gen collection.
 177  177  class CMSParGCThreadState: public CHeapObj {
 178  178   public:
 179  179    CFLS_LAB lab;
 180  180    PromotionInfo promo;
 181  181  
 182  182    // Constructor.
 183  183    CMSParGCThreadState(CompactibleFreeListSpace* cfls) : lab(cfls) {
 184  184      promo.setSpace(cfls);
 185  185    }
 186  186  };
 187  187  
 188  188  ConcurrentMarkSweepGeneration::ConcurrentMarkSweepGeneration(
 189  189       ReservedSpace rs, size_t initial_byte_size, int level,
 190  190       CardTableRS* ct, bool use_adaptive_freelists,
 191  191       FreeBlockDictionary::DictionaryChoice dictionaryChoice) :
 192  192    CardGeneration(rs, initial_byte_size, level, ct),
 193  193    _dilatation_factor(((double)MinChunkSize)/((double)(CollectedHeap::min_fill_size()))),
 194  194    _debug_collection_type(Concurrent_collection_type)
 195  195  {
 196  196    HeapWord* bottom = (HeapWord*) _virtual_space.low();
 197  197    HeapWord* end    = (HeapWord*) _virtual_space.high();
 198  198  
 199  199    _direct_allocated_words = 0;
 200  200    NOT_PRODUCT(
 201  201      _numObjectsPromoted = 0;
 202  202      _numWordsPromoted = 0;
 203  203      _numObjectsAllocated = 0;
 204  204      _numWordsAllocated = 0;
 205  205    )
 206  206  
 207  207    _cmsSpace = new CompactibleFreeListSpace(_bts, MemRegion(bottom, end),
 208  208                                             use_adaptive_freelists,
 209  209                                             dictionaryChoice);
 210  210    NOT_PRODUCT(debug_cms_space = _cmsSpace;)
 211  211    if (_cmsSpace == NULL) {
 212  212      vm_exit_during_initialization(
 213  213        "CompactibleFreeListSpace allocation failure");
 214  214    }
 215  215    _cmsSpace->_gen = this;
 216  216  
 217  217    _gc_stats = new CMSGCStats();
 218  218  
 219  219    // Verify the assumption that FreeChunk::_prev and OopDesc::_klass
 220  220    // offsets match. The ability to tell free chunks from objects
 221  221    // depends on this property.
 222  222    debug_only(
 223  223      FreeChunk* junk = NULL;
 224  224      assert(UseCompressedOops ||
 225  225             junk->prev_addr() == (void*)(oop(junk)->klass_addr()),
 226  226             "Offset of FreeChunk::_prev within FreeChunk must match"
 227  227             "  that of OopDesc::_klass within OopDesc");
 228  228    )
 229  229    if (CollectedHeap::use_parallel_gc_threads()) {
 230  230      typedef CMSParGCThreadState* CMSParGCThreadStatePtr;
 231  231      _par_gc_thread_states =
 232  232        NEW_C_HEAP_ARRAY(CMSParGCThreadStatePtr, ParallelGCThreads);
 233  233      if (_par_gc_thread_states == NULL) {
 234  234        vm_exit_during_initialization("Could not allocate par gc structs");
 235  235      }
 236  236      for (uint i = 0; i < ParallelGCThreads; i++) {
 237  237        _par_gc_thread_states[i] = new CMSParGCThreadState(cmsSpace());
 238  238        if (_par_gc_thread_states[i] == NULL) {
 239  239          vm_exit_during_initialization("Could not allocate par gc structs");
 240  240        }
 241  241      }
 242  242    } else {
 243  243      _par_gc_thread_states = NULL;
 244  244    }
 245  245    _incremental_collection_failed = false;
 246  246    // The "dilatation_factor" is the expansion that can occur on
 247  247    // account of the fact that the minimum object size in the CMS
 248  248    // generation may be larger than that in, say, a contiguous young
 249  249    //  generation.
 250  250    // Ideally, in the calculation below, we'd compute the dilatation
 251  251    // factor as: MinChunkSize/(promoting_gen's min object size)
 252  252    // Since we do not have such a general query interface for the
 253  253    // promoting generation, we'll instead just use the mimimum
 254  254    // object size (which today is a header's worth of space);
 255  255    // note that all arithmetic is in units of HeapWords.
 256  256    assert(MinChunkSize >= CollectedHeap::min_fill_size(), "just checking");
 257  257    assert(_dilatation_factor >= 1.0, "from previous assert");
 258  258  }
 259  259  
 260  260  
 261  261  // The field "_initiating_occupancy" represents the occupancy percentage
 262  262  // at which we trigger a new collection cycle.  Unless explicitly specified
 263  263  // via CMSInitiating[Perm]OccupancyFraction (argument "io" below), it
 264  264  // is calculated by:
 265  265  //
 266  266  //   Let "f" be MinHeapFreeRatio in
 267  267  //
 268  268  //    _intiating_occupancy = 100-f +
 269  269  //                           f * (CMSTrigger[Perm]Ratio/100)
 270  270  //   where CMSTrigger[Perm]Ratio is the argument "tr" below.
 271  271  //
 272  272  // That is, if we assume the heap is at its desired maximum occupancy at the
 273  273  // end of a collection, we let CMSTrigger[Perm]Ratio of the (purported) free
 274  274  // space be allocated before initiating a new collection cycle.
 275  275  //
 276  276  void ConcurrentMarkSweepGeneration::init_initiating_occupancy(intx io, intx tr) {
 277  277    assert(io <= 100 && tr >= 0 && tr <= 100, "Check the arguments");
 278  278    if (io >= 0) {
 279  279      _initiating_occupancy = (double)io / 100.0;
 280  280    } else {
 281  281      _initiating_occupancy = ((100 - MinHeapFreeRatio) +
 282  282                               (double)(tr * MinHeapFreeRatio) / 100.0)
 283  283                              / 100.0;
 284  284    }
 285  285  }
 286  286  
 287  287  void ConcurrentMarkSweepGeneration::ref_processor_init() {
 288  288    assert(collector() != NULL, "no collector");
 289  289    collector()->ref_processor_init();
 290  290  }
 291  291  
 292  292  void CMSCollector::ref_processor_init() {
 293  293    if (_ref_processor == NULL) {
 294  294      // Allocate and initialize a reference processor
 295  295      _ref_processor =
 296  296        new ReferenceProcessor(_span,                               // span
 297  297                               (ParallelGCThreads > 1) && ParallelRefProcEnabled, // mt processing
 298  298                               (int) ParallelGCThreads,             // mt processing degree
 299  299                               _cmsGen->refs_discovery_is_mt(),     // mt discovery
 300  300                               (int) MAX2(ConcGCThreads, ParallelGCThreads), // mt discovery degree
 301  301                               _cmsGen->refs_discovery_is_atomic(), // discovery is not atomic
 302  302                               &_is_alive_closure,                  // closure for liveness info
 303  303                               false);                              // next field updates do not need write barrier
 304  304      // Initialize the _ref_processor field of CMSGen
 305  305      _cmsGen->set_ref_processor(_ref_processor);
 306  306  
 307  307      // Allocate a dummy ref processor for perm gen.
 308  308      ReferenceProcessor* rp2 = new ReferenceProcessor();
 309  309      if (rp2 == NULL) {
 310  310        vm_exit_during_initialization("Could not allocate ReferenceProcessor object");
 311  311      }
 312  312      _permGen->set_ref_processor(rp2);
 313  313    }
 314  314  }
 315  315  
 316  316  CMSAdaptiveSizePolicy* CMSCollector::size_policy() {
 317  317    GenCollectedHeap* gch = GenCollectedHeap::heap();
 318  318    assert(gch->kind() == CollectedHeap::GenCollectedHeap,
 319  319      "Wrong type of heap");
 320  320    CMSAdaptiveSizePolicy* sp = (CMSAdaptiveSizePolicy*)
 321  321      gch->gen_policy()->size_policy();
 322  322    assert(sp->is_gc_cms_adaptive_size_policy(),
 323  323      "Wrong type of size policy");
 324  324    return sp;
 325  325  }
 326  326  
 327  327  CMSGCAdaptivePolicyCounters* CMSCollector::gc_adaptive_policy_counters() {
 328  328    CMSGCAdaptivePolicyCounters* results =
 329  329      (CMSGCAdaptivePolicyCounters*) collector_policy()->counters();
 330  330    assert(
 331  331      results->kind() == GCPolicyCounters::CMSGCAdaptivePolicyCountersKind,
 332  332      "Wrong gc policy counter kind");
 333  333    return results;
 334  334  }
 335  335  
 336  336  
 337  337  void ConcurrentMarkSweepGeneration::initialize_performance_counters() {
 338  338  
 339  339    const char* gen_name = "old";
 340  340  
 341  341    // Generation Counters - generation 1, 1 subspace
 342  342    _gen_counters = new GenerationCounters(gen_name, 1, 1, &_virtual_space);
 343  343  
 344  344    _space_counters = new GSpaceCounters(gen_name, 0,
 345  345                                         _virtual_space.reserved_size(),
 346  346                                         this, _gen_counters);
 347  347  }
 348  348  
 349  349  CMSStats::CMSStats(ConcurrentMarkSweepGeneration* cms_gen, unsigned int alpha):
 350  350    _cms_gen(cms_gen)
 351  351  {
 352  352    assert(alpha <= 100, "bad value");
 353  353    _saved_alpha = alpha;
 354  354  
 355  355    // Initialize the alphas to the bootstrap value of 100.
 356  356    _gc0_alpha = _cms_alpha = 100;
 357  357  
 358  358    _cms_begin_time.update();
 359  359    _cms_end_time.update();
 360  360  
 361  361    _gc0_duration = 0.0;
 362  362    _gc0_period = 0.0;
 363  363    _gc0_promoted = 0;
 364  364  
 365  365    _cms_duration = 0.0;
 366  366    _cms_period = 0.0;
 367  367    _cms_allocated = 0;
 368  368  
 369  369    _cms_used_at_gc0_begin = 0;
 370  370    _cms_used_at_gc0_end = 0;
 371  371    _allow_duty_cycle_reduction = false;
 372  372    _valid_bits = 0;
 373  373    _icms_duty_cycle = CMSIncrementalDutyCycle;
 374  374  }
 375  375  
 376  376  double CMSStats::cms_free_adjustment_factor(size_t free) const {
 377  377    // TBD: CR 6909490
 378  378    return 1.0;
 379  379  }
 380  380  
 381  381  void CMSStats::adjust_cms_free_adjustment_factor(bool fail, size_t free) {
 382  382  }
 383  383  
 384  384  // If promotion failure handling is on use
 385  385  // the padded average size of the promotion for each
 386  386  // young generation collection.
 387  387  double CMSStats::time_until_cms_gen_full() const {
 388  388    size_t cms_free = _cms_gen->cmsSpace()->free();
 389  389    GenCollectedHeap* gch = GenCollectedHeap::heap();
 390  390    size_t expected_promotion = MIN2(gch->get_gen(0)->capacity(),
 391  391                                     (size_t) _cms_gen->gc_stats()->avg_promoted()->padded_average());
 392  392    if (cms_free > expected_promotion) {
 393  393      // Start a cms collection if there isn't enough space to promote
 394  394      // for the next minor collection.  Use the padded average as
 395  395      // a safety factor.
 396  396      cms_free -= expected_promotion;
 397  397  
 398  398      // Adjust by the safety factor.
 399  399      double cms_free_dbl = (double)cms_free;
 400  400      double cms_adjustment = (100.0 - CMSIncrementalSafetyFactor)/100.0;
 401  401      // Apply a further correction factor which tries to adjust
 402  402      // for recent occurance of concurrent mode failures.
 403  403      cms_adjustment = cms_adjustment * cms_free_adjustment_factor(cms_free);
 404  404      cms_free_dbl = cms_free_dbl * cms_adjustment;
 405  405  
 406  406      if (PrintGCDetails && Verbose) {
 407  407        gclog_or_tty->print_cr("CMSStats::time_until_cms_gen_full: cms_free "
 408  408          SIZE_FORMAT " expected_promotion " SIZE_FORMAT,
 409  409          cms_free, expected_promotion);
 410  410        gclog_or_tty->print_cr("  cms_free_dbl %f cms_consumption_rate %f",
 411  411          cms_free_dbl, cms_consumption_rate() + 1.0);
 412  412      }
 413  413      // Add 1 in case the consumption rate goes to zero.
 414  414      return cms_free_dbl / (cms_consumption_rate() + 1.0);
 415  415    }
 416  416    return 0.0;
 417  417  }
 418  418  
 419  419  // Compare the duration of the cms collection to the
 420  420  // time remaining before the cms generation is empty.
 421  421  // Note that the time from the start of the cms collection
 422  422  // to the start of the cms sweep (less than the total
 423  423  // duration of the cms collection) can be used.  This
 424  424  // has been tried and some applications experienced
 425  425  // promotion failures early in execution.  This was
 426  426  // possibly because the averages were not accurate
 427  427  // enough at the beginning.
 428  428  double CMSStats::time_until_cms_start() const {
 429  429    // We add "gc0_period" to the "work" calculation
 430  430    // below because this query is done (mostly) at the
 431  431    // end of a scavenge, so we need to conservatively
 432  432    // account for that much possible delay
 433  433    // in the query so as to avoid concurrent mode failures
 434  434    // due to starting the collection just a wee bit too
 435  435    // late.
 436  436    double work = cms_duration() + gc0_period();
 437  437    double deadline = time_until_cms_gen_full();
 438  438    // If a concurrent mode failure occurred recently, we want to be
 439  439    // more conservative and halve our expected time_until_cms_gen_full()
 440  440    if (work > deadline) {
 441  441      if (Verbose && PrintGCDetails) {
 442  442        gclog_or_tty->print(
 443  443          " CMSCollector: collect because of anticipated promotion "
 444  444          "before full %3.7f + %3.7f > %3.7f ", cms_duration(),
 445  445          gc0_period(), time_until_cms_gen_full());
 446  446      }
 447  447      return 0.0;
 448  448    }
 449  449    return work - deadline;
 450  450  }
 451  451  
 452  452  // Return a duty cycle based on old_duty_cycle and new_duty_cycle, limiting the
 453  453  // amount of change to prevent wild oscillation.
 454  454  unsigned int CMSStats::icms_damped_duty_cycle(unsigned int old_duty_cycle,
 455  455                                                unsigned int new_duty_cycle) {
 456  456    assert(old_duty_cycle <= 100, "bad input value");
 457  457    assert(new_duty_cycle <= 100, "bad input value");
 458  458  
 459  459    // Note:  use subtraction with caution since it may underflow (values are
 460  460    // unsigned).  Addition is safe since we're in the range 0-100.
 461  461    unsigned int damped_duty_cycle = new_duty_cycle;
 462  462    if (new_duty_cycle < old_duty_cycle) {
 463  463      const unsigned int largest_delta = MAX2(old_duty_cycle / 4, 5U);
 464  464      if (new_duty_cycle + largest_delta < old_duty_cycle) {
 465  465        damped_duty_cycle = old_duty_cycle - largest_delta;
 466  466      }
 467  467    } else if (new_duty_cycle > old_duty_cycle) {
 468  468      const unsigned int largest_delta = MAX2(old_duty_cycle / 4, 15U);
 469  469      if (new_duty_cycle > old_duty_cycle + largest_delta) {
 470  470        damped_duty_cycle = MIN2(old_duty_cycle + largest_delta, 100U);
 471  471      }
 472  472    }
 473  473    assert(damped_duty_cycle <= 100, "invalid duty cycle computed");
 474  474  
 475  475    if (CMSTraceIncrementalPacing) {
 476  476      gclog_or_tty->print(" [icms_damped_duty_cycle(%d,%d) = %d] ",
 477  477                             old_duty_cycle, new_duty_cycle, damped_duty_cycle);
 478  478    }
 479  479    return damped_duty_cycle;
 480  480  }
 481  481  
 482  482  unsigned int CMSStats::icms_update_duty_cycle_impl() {
 483  483    assert(CMSIncrementalPacing && valid(),
 484  484           "should be handled in icms_update_duty_cycle()");
 485  485  
 486  486    double cms_time_so_far = cms_timer().seconds();
 487  487    double scaled_duration = cms_duration_per_mb() * _cms_used_at_gc0_end / M;
 488  488    double scaled_duration_remaining = fabsd(scaled_duration - cms_time_so_far);
 489  489  
 490  490    // Avoid division by 0.
 491  491    double time_until_full = MAX2(time_until_cms_gen_full(), 0.01);
 492  492    double duty_cycle_dbl = 100.0 * scaled_duration_remaining / time_until_full;
 493  493  
 494  494    unsigned int new_duty_cycle = MIN2((unsigned int)duty_cycle_dbl, 100U);
 495  495    if (new_duty_cycle > _icms_duty_cycle) {
 496  496      // Avoid very small duty cycles (1 or 2); 0 is allowed.
 497  497      if (new_duty_cycle > 2) {
 498  498        _icms_duty_cycle = icms_damped_duty_cycle(_icms_duty_cycle,
 499  499                                                  new_duty_cycle);
 500  500      }
 501  501    } else if (_allow_duty_cycle_reduction) {
 502  502      // The duty cycle is reduced only once per cms cycle (see record_cms_end()).
 503  503      new_duty_cycle = icms_damped_duty_cycle(_icms_duty_cycle, new_duty_cycle);
 504  504      // Respect the minimum duty cycle.
 505  505      unsigned int min_duty_cycle = (unsigned int)CMSIncrementalDutyCycleMin;
 506  506      _icms_duty_cycle = MAX2(new_duty_cycle, min_duty_cycle);
 507  507    }
 508  508  
 509  509    if (PrintGCDetails || CMSTraceIncrementalPacing) {
 510  510      gclog_or_tty->print(" icms_dc=%d ", _icms_duty_cycle);
 511  511    }
 512  512  
 513  513    _allow_duty_cycle_reduction = false;
 514  514    return _icms_duty_cycle;
 515  515  }
 516  516  
 517  517  #ifndef PRODUCT
 518  518  void CMSStats::print_on(outputStream *st) const {
 519  519    st->print(" gc0_alpha=%d,cms_alpha=%d", _gc0_alpha, _cms_alpha);
 520  520    st->print(",gc0_dur=%g,gc0_per=%g,gc0_promo=" SIZE_FORMAT,
 521  521                 gc0_duration(), gc0_period(), gc0_promoted());
 522  522    st->print(",cms_dur=%g,cms_dur_per_mb=%g,cms_per=%g,cms_alloc=" SIZE_FORMAT,
 523  523              cms_duration(), cms_duration_per_mb(),
 524  524              cms_period(), cms_allocated());
 525  525    st->print(",cms_since_beg=%g,cms_since_end=%g",
 526  526              cms_time_since_begin(), cms_time_since_end());
 527  527    st->print(",cms_used_beg=" SIZE_FORMAT ",cms_used_end=" SIZE_FORMAT,
 528  528              _cms_used_at_gc0_begin, _cms_used_at_gc0_end);
 529  529    if (CMSIncrementalMode) {
 530  530      st->print(",dc=%d", icms_duty_cycle());
 531  531    }
 532  532  
 533  533    if (valid()) {
 534  534      st->print(",promo_rate=%g,cms_alloc_rate=%g",
 535  535                promotion_rate(), cms_allocation_rate());
 536  536      st->print(",cms_consumption_rate=%g,time_until_full=%g",
 537  537                cms_consumption_rate(), time_until_cms_gen_full());
 538  538    }
 539  539    st->print(" ");
 540  540  }
 541  541  #endif // #ifndef PRODUCT
 542  542  
 543  543  CMSCollector::CollectorState CMSCollector::_collectorState =
 544  544                               CMSCollector::Idling;
 545  545  bool CMSCollector::_foregroundGCIsActive = false;
 546  546  bool CMSCollector::_foregroundGCShouldWait = false;
 547  547  
 548  548  CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
 549  549                             ConcurrentMarkSweepGeneration* permGen,
 550  550                             CardTableRS*                   ct,
 551  551                             ConcurrentMarkSweepPolicy*     cp):
 552  552    _cmsGen(cmsGen),
 553  553    _permGen(permGen),
 554  554    _ct(ct),
 555  555    _ref_processor(NULL),    // will be set later
 556  556    _conc_workers(NULL),     // may be set later
 557  557    _abort_preclean(false),
 558  558    _start_sampling(false),
 559  559    _between_prologue_and_epilogue(false),
 560  560    _markBitMap(0, Mutex::leaf + 1, "CMS_markBitMap_lock"),
 561  561    _perm_gen_verify_bit_map(0, -1 /* no mutex */, "No_lock"),
 562  562    _modUnionTable((CardTableModRefBS::card_shift - LogHeapWordSize),
 563  563                   -1 /* lock-free */, "No_lock" /* dummy */),
 564  564    _modUnionClosure(&_modUnionTable),
 565  565    _modUnionClosurePar(&_modUnionTable),
 566  566    // Adjust my span to cover old (cms) gen and perm gen
 567  567    _span(cmsGen->reserved()._union(permGen->reserved())),
 568  568    // Construct the is_alive_closure with _span & markBitMap
 569  569    _is_alive_closure(_span, &_markBitMap),
 570  570    _restart_addr(NULL),
 571  571    _overflow_list(NULL),
 572  572    _stats(cmsGen),
 573  573    _eden_chunk_array(NULL),     // may be set in ctor body
 574  574    _eden_chunk_capacity(0),     // -- ditto --
 575  575    _eden_chunk_index(0),        // -- ditto --
 576  576    _survivor_plab_array(NULL),  // -- ditto --
 577  577    _survivor_chunk_array(NULL), // -- ditto --
 578  578    _survivor_chunk_capacity(0), // -- ditto --
 579  579    _survivor_chunk_index(0),    // -- ditto --
 580  580    _ser_pmc_preclean_ovflw(0),
 581  581    _ser_kac_preclean_ovflw(0),
 582  582    _ser_pmc_remark_ovflw(0),
 583  583    _par_pmc_remark_ovflw(0),
 584  584    _ser_kac_ovflw(0),
 585  585    _par_kac_ovflw(0),
 586  586  #ifndef PRODUCT
 587  587    _num_par_pushes(0),
 588  588  #endif
 589  589    _collection_count_start(0),
 590  590    _verifying(false),
 591  591    _icms_start_limit(NULL),
 592  592    _icms_stop_limit(NULL),
 593  593    _verification_mark_bm(0, Mutex::leaf + 1, "CMS_verification_mark_bm_lock"),
 594  594    _completed_initialization(false),
 595  595    _collector_policy(cp),
 596  596    _should_unload_classes(false),
 597  597    _concurrent_cycles_since_last_unload(0),
 598  598    _roots_scanning_options(0),
 599  599    _inter_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding),
 600  600    _intra_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding)
 601  601  {
 602  602    if (ExplicitGCInvokesConcurrentAndUnloadsClasses) {
 603  603      ExplicitGCInvokesConcurrent = true;
 604  604    }
 605  605    // Now expand the span and allocate the collection support structures
 606  606    // (MUT, marking bit map etc.) to cover both generations subject to
 607  607    // collection.
 608  608  
 609  609    // First check that _permGen is adjacent to _cmsGen and above it.
 610  610    assert(   _cmsGen->reserved().word_size()  > 0
 611  611           && _permGen->reserved().word_size() > 0,
 612  612           "generations should not be of zero size");
 613  613    assert(_cmsGen->reserved().intersection(_permGen->reserved()).is_empty(),
 614  614           "_cmsGen and _permGen should not overlap");
 615  615    assert(_cmsGen->reserved().end() == _permGen->reserved().start(),
 616  616           "_cmsGen->end() different from _permGen->start()");
 617  617  
 618  618    // For use by dirty card to oop closures.
 619  619    _cmsGen->cmsSpace()->set_collector(this);
 620  620    _permGen->cmsSpace()->set_collector(this);
 621  621  
 622  622    // Allocate MUT and marking bit map
 623  623    {
 624  624      MutexLockerEx x(_markBitMap.lock(), Mutex::_no_safepoint_check_flag);
 625  625      if (!_markBitMap.allocate(_span)) {
 626  626        warning("Failed to allocate CMS Bit Map");
 627  627        return;
 628  628      }
 629  629      assert(_markBitMap.covers(_span), "_markBitMap inconsistency?");
 630  630    }
 631  631    {
 632  632      _modUnionTable.allocate(_span);
 633  633      assert(_modUnionTable.covers(_span), "_modUnionTable inconsistency?");
 634  634    }
 635  635  
 636  636    if (!_markStack.allocate(MarkStackSize)) {
 637  637      warning("Failed to allocate CMS Marking Stack");
 638  638      return;
 639  639    }
 640  640    if (!_revisitStack.allocate(CMSRevisitStackSize)) {
 641  641      warning("Failed to allocate CMS Revisit Stack");
 642  642      return;
 643  643    }
 644  644  
 645  645    // Support for multi-threaded concurrent phases
 646  646    if (CMSConcurrentMTEnabled) {
 647  647      if (FLAG_IS_DEFAULT(ConcGCThreads)) {
 648  648        // just for now
 649  649        FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4);
 650  650      }
 651  651      if (ConcGCThreads > 1) {
 652  652        _conc_workers = new YieldingFlexibleWorkGang("Parallel CMS Threads",
 653  653                                   ConcGCThreads, true);
 654  654        if (_conc_workers == NULL) {
 655  655          warning("GC/CMS: _conc_workers allocation failure: "
 656  656                "forcing -CMSConcurrentMTEnabled");
 657  657          CMSConcurrentMTEnabled = false;
 658  658        } else {
 659  659          _conc_workers->initialize_workers();
 660  660        }
 661  661      } else {
 662  662        CMSConcurrentMTEnabled = false;
 663  663      }
 664  664    }
 665  665    if (!CMSConcurrentMTEnabled) {
 666  666      ConcGCThreads = 0;
 667  667    } else {
 668  668      // Turn off CMSCleanOnEnter optimization temporarily for
 669  669      // the MT case where it's not fixed yet; see 6178663.
 670  670      CMSCleanOnEnter = false;
 671  671    }
 672  672    assert((_conc_workers != NULL) == (ConcGCThreads > 1),
 673  673           "Inconsistency");
 674  674  
 675  675    // Parallel task queues; these are shared for the
 676  676    // concurrent and stop-world phases of CMS, but
 677  677    // are not shared with parallel scavenge (ParNew).
 678  678    {
 679  679      uint i;
 680  680      uint num_queues = (uint) MAX2(ParallelGCThreads, ConcGCThreads);
 681  681  
 682  682      if ((CMSParallelRemarkEnabled || CMSConcurrentMTEnabled
 683  683           || ParallelRefProcEnabled)
 684  684          && num_queues > 0) {
 685  685        _task_queues = new OopTaskQueueSet(num_queues);
 686  686        if (_task_queues == NULL) {
 687  687          warning("task_queues allocation failure.");
 688  688          return;
 689  689        }
 690  690        _hash_seed = NEW_C_HEAP_ARRAY(int, num_queues);
 691  691        if (_hash_seed == NULL) {
 692  692          warning("_hash_seed array allocation failure");
 693  693          return;
 694  694        }
 695  695  
 696  696        typedef Padded<OopTaskQueue> PaddedOopTaskQueue;
 697  697        for (i = 0; i < num_queues; i++) {
 698  698          PaddedOopTaskQueue *q = new PaddedOopTaskQueue();
 699  699          if (q == NULL) {
 700  700            warning("work_queue allocation failure.");
 701  701            return;
 702  702          }
 703  703          _task_queues->register_queue(i, q);
 704  704        }
 705  705        for (i = 0; i < num_queues; i++) {
 706  706          _task_queues->queue(i)->initialize();
 707  707          _hash_seed[i] = 17;  // copied from ParNew
 708  708        }
 709  709      }
 710  710    }
 711  711  
 712  712    _cmsGen ->init_initiating_occupancy(CMSInitiatingOccupancyFraction, CMSTriggerRatio);
 713  713    _permGen->init_initiating_occupancy(CMSInitiatingPermOccupancyFraction, CMSTriggerPermRatio);
 714  714  
 715  715    // Clip CMSBootstrapOccupancy between 0 and 100.
 716  716    _bootstrap_occupancy = ((double)MIN2((uintx)100, MAX2((uintx)0, CMSBootstrapOccupancy)))
 717  717                           /(double)100;
 718  718  
 719  719    _full_gcs_since_conc_gc = 0;
 720  720  
 721  721    // Now tell CMS generations the identity of their collector
 722  722    ConcurrentMarkSweepGeneration::set_collector(this);
 723  723  
 724  724    // Create & start a CMS thread for this CMS collector
 725  725    _cmsThread = ConcurrentMarkSweepThread::start(this);
 726  726    assert(cmsThread() != NULL, "CMS Thread should have been created");
 727  727    assert(cmsThread()->collector() == this,
 728  728           "CMS Thread should refer to this gen");
 729  729    assert(CGC_lock != NULL, "Where's the CGC_lock?");
 730  730  
 731  731    // Support for parallelizing young gen rescan
 732  732    GenCollectedHeap* gch = GenCollectedHeap::heap();
 733  733    _young_gen = gch->prev_gen(_cmsGen);
 734  734    if (gch->supports_inline_contig_alloc()) {
 735  735      _top_addr = gch->top_addr();
 736  736      _end_addr = gch->end_addr();
 737  737      assert(_young_gen != NULL, "no _young_gen");
 738  738      _eden_chunk_index = 0;
 739  739      _eden_chunk_capacity = (_young_gen->max_capacity()+CMSSamplingGrain)/CMSSamplingGrain;
 740  740      _eden_chunk_array = NEW_C_HEAP_ARRAY(HeapWord*, _eden_chunk_capacity);
 741  741      if (_eden_chunk_array == NULL) {
 742  742        _eden_chunk_capacity = 0;
 743  743        warning("GC/CMS: _eden_chunk_array allocation failure");
 744  744      }
 745  745    }
 746  746    assert(_eden_chunk_array != NULL || _eden_chunk_capacity == 0, "Error");
 747  747  
 748  748    // Support for parallelizing survivor space rescan
 749  749    if (CMSParallelRemarkEnabled && CMSParallelSurvivorRemarkEnabled) {
 750  750      const size_t max_plab_samples =
 751  751        ((DefNewGeneration*)_young_gen)->max_survivor_size()/MinTLABSize;
 752  752  
 753  753      _survivor_plab_array  = NEW_C_HEAP_ARRAY(ChunkArray, ParallelGCThreads);
 754  754      _survivor_chunk_array = NEW_C_HEAP_ARRAY(HeapWord*, 2*max_plab_samples);
 755  755      _cursor               = NEW_C_HEAP_ARRAY(size_t, ParallelGCThreads);
 756  756      if (_survivor_plab_array == NULL || _survivor_chunk_array == NULL
 757  757          || _cursor == NULL) {
 758  758        warning("Failed to allocate survivor plab/chunk array");
 759  759        if (_survivor_plab_array  != NULL) {
 760  760          FREE_C_HEAP_ARRAY(ChunkArray, _survivor_plab_array);
 761  761          _survivor_plab_array = NULL;
 762  762        }
 763  763        if (_survivor_chunk_array != NULL) {
 764  764          FREE_C_HEAP_ARRAY(HeapWord*, _survivor_chunk_array);
 765  765          _survivor_chunk_array = NULL;
 766  766        }
 767  767        if (_cursor != NULL) {
 768  768          FREE_C_HEAP_ARRAY(size_t, _cursor);
 769  769          _cursor = NULL;
 770  770        }
 771  771      } else {
 772  772        _survivor_chunk_capacity = 2*max_plab_samples;
 773  773        for (uint i = 0; i < ParallelGCThreads; i++) {
 774  774          HeapWord** vec = NEW_C_HEAP_ARRAY(HeapWord*, max_plab_samples);
 775  775          if (vec == NULL) {
 776  776            warning("Failed to allocate survivor plab array");
 777  777            for (int j = i; j > 0; j--) {
 778  778              FREE_C_HEAP_ARRAY(HeapWord*, _survivor_plab_array[j-1].array());
 779  779            }
 780  780            FREE_C_HEAP_ARRAY(ChunkArray, _survivor_plab_array);
 781  781            FREE_C_HEAP_ARRAY(HeapWord*, _survivor_chunk_array);
 782  782            _survivor_plab_array = NULL;
 783  783            _survivor_chunk_array = NULL;
 784  784            _survivor_chunk_capacity = 0;
 785  785            break;
 786  786          } else {
 787  787            ChunkArray* cur =
 788  788              ::new (&_survivor_plab_array[i]) ChunkArray(vec,
 789  789                                                          max_plab_samples);
 790  790            assert(cur->end() == 0, "Should be 0");
 791  791            assert(cur->array() == vec, "Should be vec");
 792  792            assert(cur->capacity() == max_plab_samples, "Error");
 793  793          }
 794  794        }
 795  795      }
 796  796    }
 797  797    assert(   (   _survivor_plab_array  != NULL
 798  798               && _survivor_chunk_array != NULL)
 799  799           || (   _survivor_chunk_capacity == 0
 800  800               && _survivor_chunk_index == 0),
 801  801           "Error");
 802  802  
 803  803    // Choose what strong roots should be scanned depending on verification options
 804  804    // and perm gen collection mode.
 805  805    if (!CMSClassUnloadingEnabled) {
 806  806      // If class unloading is disabled we want to include all classes into the root set.
 807  807      add_root_scanning_option(SharedHeap::SO_AllClasses);
 808  808    } else {
 809  809      add_root_scanning_option(SharedHeap::SO_SystemClasses);
 810  810    }
 811  811  
 812  812    NOT_PRODUCT(_overflow_counter = CMSMarkStackOverflowInterval;)
 813  813    _gc_counters = new CollectorCounters("CMS", 1);
 814  814    _completed_initialization = true;
 815  815    _inter_sweep_timer.start();  // start of time
 816  816  #ifdef SPARC
 817  817    // Issue a stern warning, but allow use for experimentation and debugging.
 818  818    if (VM_Version::is_sun4v() && UseMemSetInBOT) {
 819  819      assert(!FLAG_IS_DEFAULT(UseMemSetInBOT), "Error");
 820  820      warning("Experimental flag -XX:+UseMemSetInBOT is known to cause instability"
 821  821              " on sun4v; please understand that you are using at your own risk!");
 822  822    }
 823  823  #endif
 824  824  }
 825  825  
 826  826  const char* ConcurrentMarkSweepGeneration::name() const {
 827  827    return "concurrent mark-sweep generation";
 828  828  }
 829  829  void ConcurrentMarkSweepGeneration::update_counters() {
 830  830    if (UsePerfData) {
 831  831      _space_counters->update_all();
 832  832      _gen_counters->update_all();
 833  833    }
 834  834  }
 835  835  
 836  836  // this is an optimized version of update_counters(). it takes the
 837  837  // used value as a parameter rather than computing it.
 838  838  //
 839  839  void ConcurrentMarkSweepGeneration::update_counters(size_t used) {
 840  840    if (UsePerfData) {
 841  841      _space_counters->update_used(used);
 842  842      _space_counters->update_capacity();
 843  843      _gen_counters->update_all();
 844  844    }
 845  845  }
 846  846  
 847  847  void ConcurrentMarkSweepGeneration::print() const {
 848  848    Generation::print();
 849  849    cmsSpace()->print();
 850  850  }
 851  851  
 852  852  #ifndef PRODUCT
 853  853  void ConcurrentMarkSweepGeneration::print_statistics() {
 854  854    cmsSpace()->printFLCensus(0);
 855  855  }
 856  856  #endif
 857  857  
 858  858  void ConcurrentMarkSweepGeneration::printOccupancy(const char *s) {
 859  859    GenCollectedHeap* gch = GenCollectedHeap::heap();
 860  860    if (PrintGCDetails) {
 861  861      if (Verbose) {
 862  862        gclog_or_tty->print(" [%d %s-%s: "SIZE_FORMAT"("SIZE_FORMAT")]",
 863  863          level(), short_name(), s, used(), capacity());
 864  864      } else {
 865  865        gclog_or_tty->print(" [%d %s-%s: "SIZE_FORMAT"K("SIZE_FORMAT"K)]",
 866  866          level(), short_name(), s, used() / K, capacity() / K);
 867  867      }
 868  868    }
 869  869    if (Verbose) {
 870  870      gclog_or_tty->print(" "SIZE_FORMAT"("SIZE_FORMAT")",
 871  871                gch->used(), gch->capacity());
 872  872    } else {
 873  873      gclog_or_tty->print(" "SIZE_FORMAT"K("SIZE_FORMAT"K)",
 874  874                gch->used() / K, gch->capacity() / K);
 875  875    }
 876  876  }
 877  877  
 878  878  size_t
 879  879  ConcurrentMarkSweepGeneration::contiguous_available() const {
 880  880    // dld proposes an improvement in precision here. If the committed
 881  881    // part of the space ends in a free block we should add that to
 882  882    // uncommitted size in the calculation below. Will make this
 883  883    // change later, staying with the approximation below for the
 884  884    // time being. -- ysr.
 885  885    return MAX2(_virtual_space.uncommitted_size(), unsafe_max_alloc_nogc());
 886  886  }
 887  887  
 888  888  size_t
 889  889  ConcurrentMarkSweepGeneration::unsafe_max_alloc_nogc() const {
 890  890    return _cmsSpace->max_alloc_in_words() * HeapWordSize;
 891  891  }
 892  892  
 893  893  size_t ConcurrentMarkSweepGeneration::max_available() const {
 894  894    return free() + _virtual_space.uncommitted_size();
 895  895  }
 896  896  
 897  897  bool ConcurrentMarkSweepGeneration::promotion_attempt_is_safe(size_t max_promotion_in_bytes) const {
 898  898    size_t available = max_available();
 899  899    size_t av_promo  = (size_t)gc_stats()->avg_promoted()->padded_average();
 900  900    bool   res = (available >= av_promo) || (available >= max_promotion_in_bytes);
 901  901    if (Verbose && PrintGCDetails) {
 902  902      gclog_or_tty->print_cr(
 903  903        "CMS: promo attempt is%s safe: available("SIZE_FORMAT") %s av_promo("SIZE_FORMAT"),"
 904  904        "max_promo("SIZE_FORMAT")",
 905  905        res? "":" not", available, res? ">=":"<",
 906  906        av_promo, max_promotion_in_bytes);
 907  907    }
 908  908    return res;
 909  909  }
 910  910  
 911  911  // At a promotion failure dump information on block layout in heap
 912  912  // (cms old generation).
 913  913  void ConcurrentMarkSweepGeneration::promotion_failure_occurred() {
 914  914    if (CMSDumpAtPromotionFailure) {
 915  915      cmsSpace()->dump_at_safepoint_with_locks(collector(), gclog_or_tty);
 916  916    }
 917  917  }
 918  918  
 919  919  CompactibleSpace*
 920  920  ConcurrentMarkSweepGeneration::first_compaction_space() const {
 921  921    return _cmsSpace;
 922  922  }
 923  923  
 924  924  void ConcurrentMarkSweepGeneration::reset_after_compaction() {
 925  925    // Clear the promotion information.  These pointers can be adjusted
 926  926    // along with all the other pointers into the heap but
 927  927    // compaction is expected to be a rare event with
 928  928    // a heap using cms so don't do it without seeing the need.
 929  929    if (CollectedHeap::use_parallel_gc_threads()) {
 930  930      for (uint i = 0; i < ParallelGCThreads; i++) {
 931  931        _par_gc_thread_states[i]->promo.reset();
 932  932      }
 933  933    }
 934  934  }
 935  935  
 936  936  void ConcurrentMarkSweepGeneration::space_iterate(SpaceClosure* blk, bool usedOnly) {
 937  937    blk->do_space(_cmsSpace);
 938  938  }
 939  939  
 940  940  void ConcurrentMarkSweepGeneration::compute_new_size() {
 941  941    assert_locked_or_safepoint(Heap_lock);
 942  942  
 943  943    // If incremental collection failed, we just want to expand
 944  944    // to the limit.
 945  945    if (incremental_collection_failed()) {
 946  946      clear_incremental_collection_failed();
 947  947      grow_to_reserved();
 948  948      return;
 949  949    }
 950  950  
 951  951    size_t expand_bytes = 0;
 952  952    double free_percentage = ((double) free()) / capacity();
 953  953    double desired_free_percentage = (double) MinHeapFreeRatio / 100;
 954  954    double maximum_free_percentage = (double) MaxHeapFreeRatio / 100;
 955  955  
 956  956    // compute expansion delta needed for reaching desired free percentage
 957  957    if (free_percentage < desired_free_percentage) {
 958  958      size_t desired_capacity = (size_t)(used() / ((double) 1 - desired_free_percentage));
 959  959      assert(desired_capacity >= capacity(), "invalid expansion size");
 960  960      expand_bytes = MAX2(desired_capacity - capacity(), MinHeapDeltaBytes);
 961  961    }
 962  962    if (expand_bytes > 0) {
 963  963      if (PrintGCDetails && Verbose) {
 964  964        size_t desired_capacity = (size_t)(used() / ((double) 1 - desired_free_percentage));
 965  965        gclog_or_tty->print_cr("\nFrom compute_new_size: ");
 966  966        gclog_or_tty->print_cr("  Free fraction %f", free_percentage);
 967  967        gclog_or_tty->print_cr("  Desired free fraction %f",
 968  968          desired_free_percentage);
 969  969        gclog_or_tty->print_cr("  Maximum free fraction %f",
 970  970          maximum_free_percentage);
 971  971        gclog_or_tty->print_cr("  Capactiy "SIZE_FORMAT, capacity()/1000);
 972  972        gclog_or_tty->print_cr("  Desired capacity "SIZE_FORMAT,
 973  973          desired_capacity/1000);
 974  974        int prev_level = level() - 1;
 975  975        if (prev_level >= 0) {
 976  976          size_t prev_size = 0;
 977  977          GenCollectedHeap* gch = GenCollectedHeap::heap();
 978  978          Generation* prev_gen = gch->_gens[prev_level];
 979  979          prev_size = prev_gen->capacity();
 980  980            gclog_or_tty->print_cr("  Younger gen size "SIZE_FORMAT,
 981  981                                   prev_size/1000);
 982  982        }
 983  983        gclog_or_tty->print_cr("  unsafe_max_alloc_nogc "SIZE_FORMAT,
 984  984          unsafe_max_alloc_nogc()/1000);
 985  985        gclog_or_tty->print_cr("  contiguous available "SIZE_FORMAT,
 986  986          contiguous_available()/1000);
 987  987        gclog_or_tty->print_cr("  Expand by "SIZE_FORMAT" (bytes)",
 988  988          expand_bytes);
 989  989      }
 990  990      // safe if expansion fails
 991  991      expand(expand_bytes, 0, CMSExpansionCause::_satisfy_free_ratio);
 992  992      if (PrintGCDetails && Verbose) {
 993  993        gclog_or_tty->print_cr("  Expanded free fraction %f",
 994  994          ((double) free()) / capacity());
 995  995      }
 996  996    }
 997  997  }
 998  998  
 999  999  Mutex* ConcurrentMarkSweepGeneration::freelistLock() const {
1000 1000    return cmsSpace()->freelistLock();
1001 1001  }
1002 1002  
1003 1003  HeapWord* ConcurrentMarkSweepGeneration::allocate(size_t size,
1004 1004                                                    bool   tlab) {
1005 1005    CMSSynchronousYieldRequest yr;
1006 1006    MutexLockerEx x(freelistLock(),
1007 1007                    Mutex::_no_safepoint_check_flag);
1008 1008    return have_lock_and_allocate(size, tlab);
1009 1009  }
1010 1010  
1011 1011  HeapWord* ConcurrentMarkSweepGeneration::have_lock_and_allocate(size_t size,
1012 1012                                                    bool   tlab /* ignored */) {
1013 1013    assert_lock_strong(freelistLock());
1014 1014    size_t adjustedSize = CompactibleFreeListSpace::adjustObjectSize(size);
1015 1015    HeapWord* res = cmsSpace()->allocate(adjustedSize);
1016 1016    // Allocate the object live (grey) if the background collector has
1017 1017    // started marking. This is necessary because the marker may
1018 1018    // have passed this address and consequently this object will
1019 1019    // not otherwise be greyed and would be incorrectly swept up.
1020 1020    // Note that if this object contains references, the writing
1021 1021    // of those references will dirty the card containing this object
1022 1022    // allowing the object to be blackened (and its references scanned)
1023 1023    // either during a preclean phase or at the final checkpoint.
1024 1024    if (res != NULL) {
1025 1025      // We may block here with an uninitialized object with
1026 1026      // its mark-bit or P-bits not yet set. Such objects need
1027 1027      // to be safely navigable by block_start().
1028 1028      assert(oop(res)->klass_or_null() == NULL, "Object should be uninitialized here.");
1029 1029      assert(!((FreeChunk*)res)->isFree(), "Error, block will look free but show wrong size");
1030 1030      collector()->direct_allocated(res, adjustedSize);
1031 1031      _direct_allocated_words += adjustedSize;
1032 1032      // allocation counters
1033 1033      NOT_PRODUCT(
1034 1034        _numObjectsAllocated++;
1035 1035        _numWordsAllocated += (int)adjustedSize;
1036 1036      )
1037 1037    }
1038 1038    return res;
1039 1039  }
1040 1040  
1041 1041  // In the case of direct allocation by mutators in a generation that
1042 1042  // is being concurrently collected, the object must be allocated
1043 1043  // live (grey) if the background collector has started marking.
1044 1044  // This is necessary because the marker may
1045 1045  // have passed this address and consequently this object will
1046 1046  // not otherwise be greyed and would be incorrectly swept up.
1047 1047  // Note that if this object contains references, the writing
1048 1048  // of those references will dirty the card containing this object
1049 1049  // allowing the object to be blackened (and its references scanned)
1050 1050  // either during a preclean phase or at the final checkpoint.
1051 1051  void CMSCollector::direct_allocated(HeapWord* start, size_t size) {
1052 1052    assert(_markBitMap.covers(start, size), "Out of bounds");
1053 1053    if (_collectorState >= Marking) {
1054 1054      MutexLockerEx y(_markBitMap.lock(),
1055 1055                      Mutex::_no_safepoint_check_flag);
1056 1056      // [see comments preceding SweepClosure::do_blk() below for details]
1057 1057      // 1. need to mark the object as live so it isn't collected
1058 1058      // 2. need to mark the 2nd bit to indicate the object may be uninitialized
1059 1059      // 3. need to mark the end of the object so marking, precleaning or sweeping
1060 1060      //    can skip over uninitialized or unparsable objects. An allocated
1061 1061      //    object is considered uninitialized for our purposes as long as
1062 1062      //    its klass word is NULL. (Unparsable objects are those which are
1063 1063      //    initialized in the sense just described, but whose sizes can still
1064 1064      //    not be correctly determined. Note that the class of unparsable objects
1065 1065      //    can only occur in the perm gen. All old gen objects are parsable
1066 1066      //    as soon as they are initialized.)
1067 1067      _markBitMap.mark(start);          // object is live
1068 1068      _markBitMap.mark(start + 1);      // object is potentially uninitialized?
1069 1069      _markBitMap.mark(start + size - 1);
1070 1070                                        // mark end of object
1071 1071    }
1072 1072    // check that oop looks uninitialized
1073 1073    assert(oop(start)->klass_or_null() == NULL, "_klass should be NULL");
1074 1074  }
1075 1075  
1076 1076  void CMSCollector::promoted(bool par, HeapWord* start,
1077 1077                              bool is_obj_array, size_t obj_size) {
1078 1078    assert(_markBitMap.covers(start), "Out of bounds");
1079 1079    // See comment in direct_allocated() about when objects should
1080 1080    // be allocated live.
1081 1081    if (_collectorState >= Marking) {
1082 1082      // we already hold the marking bit map lock, taken in
1083 1083      // the prologue
1084 1084      if (par) {
1085 1085        _markBitMap.par_mark(start);
1086 1086      } else {
1087 1087        _markBitMap.mark(start);
1088 1088      }
1089 1089      // We don't need to mark the object as uninitialized (as
1090 1090      // in direct_allocated above) because this is being done with the
1091 1091      // world stopped and the object will be initialized by the
1092 1092      // time the marking, precleaning or sweeping get to look at it.
1093 1093      // But see the code for copying objects into the CMS generation,
1094 1094      // where we need to ensure that concurrent readers of the
1095 1095      // block offset table are able to safely navigate a block that
1096 1096      // is in flux from being free to being allocated (and in
1097 1097      // transition while being copied into) and subsequently
1098 1098      // becoming a bona-fide object when the copy/promotion is complete.
1099 1099      assert(SafepointSynchronize::is_at_safepoint(),
1100 1100             "expect promotion only at safepoints");
1101 1101  
1102 1102      if (_collectorState < Sweeping) {
1103 1103        // Mark the appropriate cards in the modUnionTable, so that
1104 1104        // this object gets scanned before the sweep. If this is
1105 1105        // not done, CMS generation references in the object might
1106 1106        // not get marked.
1107 1107        // For the case of arrays, which are otherwise precisely
1108 1108        // marked, we need to dirty the entire array, not just its head.
1109 1109        if (is_obj_array) {
1110 1110          // The [par_]mark_range() method expects mr.end() below to
1111 1111          // be aligned to the granularity of a bit's representation
1112 1112          // in the heap. In the case of the MUT below, that's a
1113 1113          // card size.
1114 1114          MemRegion mr(start,
1115 1115                       (HeapWord*)round_to((intptr_t)(start + obj_size),
1116 1116                          CardTableModRefBS::card_size /* bytes */));
1117 1117          if (par) {
1118 1118            _modUnionTable.par_mark_range(mr);
1119 1119          } else {
1120 1120            _modUnionTable.mark_range(mr);
1121 1121          }
1122 1122        } else {  // not an obj array; we can just mark the head
1123 1123          if (par) {
1124 1124            _modUnionTable.par_mark(start);
1125 1125          } else {
1126 1126            _modUnionTable.mark(start);
1127 1127          }
1128 1128        }
1129 1129      }
1130 1130    }
1131 1131  }
1132 1132  
1133 1133  static inline size_t percent_of_space(Space* space, HeapWord* addr)
1134 1134  {
1135 1135    size_t delta = pointer_delta(addr, space->bottom());
1136 1136    return (size_t)(delta * 100.0 / (space->capacity() / HeapWordSize));
1137 1137  }
1138 1138  
1139 1139  void CMSCollector::icms_update_allocation_limits()
1140 1140  {
1141 1141    Generation* gen0 = GenCollectedHeap::heap()->get_gen(0);
1142 1142    EdenSpace* eden = gen0->as_DefNewGeneration()->eden();
1143 1143  
1144 1144    const unsigned int duty_cycle = stats().icms_update_duty_cycle();
1145 1145    if (CMSTraceIncrementalPacing) {
1146 1146      stats().print();
1147 1147    }
1148 1148  
1149 1149    assert(duty_cycle <= 100, "invalid duty cycle");
1150 1150    if (duty_cycle != 0) {
1151 1151      // The duty_cycle is a percentage between 0 and 100; convert to words and
1152 1152      // then compute the offset from the endpoints of the space.
1153 1153      size_t free_words = eden->free() / HeapWordSize;
1154 1154      double free_words_dbl = (double)free_words;
1155 1155      size_t duty_cycle_words = (size_t)(free_words_dbl * duty_cycle / 100.0);
1156 1156      size_t offset_words = (free_words - duty_cycle_words) / 2;
1157 1157  
1158 1158      _icms_start_limit = eden->top() + offset_words;
1159 1159      _icms_stop_limit = eden->end() - offset_words;
1160 1160  
1161 1161      // The limits may be adjusted (shifted to the right) by
1162 1162      // CMSIncrementalOffset, to allow the application more mutator time after a
1163 1163      // young gen gc (when all mutators were stopped) and before CMS starts and
1164 1164      // takes away one or more cpus.
1165 1165      if (CMSIncrementalOffset != 0) {
1166 1166        double adjustment_dbl = free_words_dbl * CMSIncrementalOffset / 100.0;
1167 1167        size_t adjustment = (size_t)adjustment_dbl;
1168 1168        HeapWord* tmp_stop = _icms_stop_limit + adjustment;
1169 1169        if (tmp_stop > _icms_stop_limit && tmp_stop < eden->end()) {
1170 1170          _icms_start_limit += adjustment;
1171 1171          _icms_stop_limit = tmp_stop;
1172 1172        }
1173 1173      }
1174 1174    }
1175 1175    if (duty_cycle == 0 || (_icms_start_limit == _icms_stop_limit)) {
1176 1176      _icms_start_limit = _icms_stop_limit = eden->end();
1177 1177    }
1178 1178  
1179 1179    // Install the new start limit.
1180 1180    eden->set_soft_end(_icms_start_limit);
1181 1181  
1182 1182    if (CMSTraceIncrementalMode) {
1183 1183      gclog_or_tty->print(" icms alloc limits:  "
1184 1184                             PTR_FORMAT "," PTR_FORMAT
1185 1185                             " (" SIZE_FORMAT "%%," SIZE_FORMAT "%%) ",
1186 1186                             _icms_start_limit, _icms_stop_limit,
1187 1187                             percent_of_space(eden, _icms_start_limit),
1188 1188                             percent_of_space(eden, _icms_stop_limit));
1189 1189      if (Verbose) {
1190 1190        gclog_or_tty->print("eden:  ");
1191 1191        eden->print_on(gclog_or_tty);
1192 1192      }
1193 1193    }
1194 1194  }
1195 1195  
1196 1196  // Any changes here should try to maintain the invariant
1197 1197  // that if this method is called with _icms_start_limit
1198 1198  // and _icms_stop_limit both NULL, then it should return NULL
1199 1199  // and not notify the icms thread.
1200 1200  HeapWord*
1201 1201  CMSCollector::allocation_limit_reached(Space* space, HeapWord* top,
1202 1202                                         size_t word_size)
1203 1203  {
1204 1204    // A start_limit equal to end() means the duty cycle is 0, so treat that as a
1205 1205    // nop.
1206 1206    if (CMSIncrementalMode && _icms_start_limit != space->end()) {
1207 1207      if (top <= _icms_start_limit) {
1208 1208        if (CMSTraceIncrementalMode) {
1209 1209          space->print_on(gclog_or_tty);
1210 1210          gclog_or_tty->stamp();
1211 1211          gclog_or_tty->print_cr(" start limit top=" PTR_FORMAT
1212 1212                                 ", new limit=" PTR_FORMAT
1213 1213                                 " (" SIZE_FORMAT "%%)",
1214 1214                                 top, _icms_stop_limit,
1215 1215                                 percent_of_space(space, _icms_stop_limit));
1216 1216        }
1217 1217        ConcurrentMarkSweepThread::start_icms();
1218 1218        assert(top < _icms_stop_limit, "Tautology");
1219 1219        if (word_size < pointer_delta(_icms_stop_limit, top)) {
1220 1220          return _icms_stop_limit;
1221 1221        }
1222 1222  
1223 1223        // The allocation will cross both the _start and _stop limits, so do the
1224 1224        // stop notification also and return end().
1225 1225        if (CMSTraceIncrementalMode) {
1226 1226          space->print_on(gclog_or_tty);
1227 1227          gclog_or_tty->stamp();
1228 1228          gclog_or_tty->print_cr(" +stop limit top=" PTR_FORMAT
1229 1229                                 ", new limit=" PTR_FORMAT
1230 1230                                 " (" SIZE_FORMAT "%%)",
1231 1231                                 top, space->end(),
1232 1232                                 percent_of_space(space, space->end()));
1233 1233        }
1234 1234        ConcurrentMarkSweepThread::stop_icms();
1235 1235        return space->end();
1236 1236      }
1237 1237  
1238 1238      if (top <= _icms_stop_limit) {
1239 1239        if (CMSTraceIncrementalMode) {
1240 1240          space->print_on(gclog_or_tty);
1241 1241          gclog_or_tty->stamp();
1242 1242          gclog_or_tty->print_cr(" stop limit top=" PTR_FORMAT
1243 1243                                 ", new limit=" PTR_FORMAT
1244 1244                                 " (" SIZE_FORMAT "%%)",
1245 1245                                 top, space->end(),
1246 1246                                 percent_of_space(space, space->end()));
1247 1247        }
1248 1248        ConcurrentMarkSweepThread::stop_icms();
1249 1249        return space->end();
1250 1250      }
1251 1251  
1252 1252      if (CMSTraceIncrementalMode) {
1253 1253        space->print_on(gclog_or_tty);
1254 1254        gclog_or_tty->stamp();
1255 1255        gclog_or_tty->print_cr(" end limit top=" PTR_FORMAT
1256 1256                               ", new limit=" PTR_FORMAT,
1257 1257                               top, NULL);
1258 1258      }
1259 1259    }
1260 1260  
1261 1261    return NULL;
1262 1262  }
1263 1263  
1264 1264  oop ConcurrentMarkSweepGeneration::promote(oop obj, size_t obj_size) {
1265 1265    assert(obj_size == (size_t)obj->size(), "bad obj_size passed in");
1266 1266    // allocate, copy and if necessary update promoinfo --
1267 1267    // delegate to underlying space.
1268 1268    assert_lock_strong(freelistLock());
1269 1269  
1270 1270  #ifndef PRODUCT
1271 1271    if (Universe::heap()->promotion_should_fail()) {
1272 1272      return NULL;
1273 1273    }
1274 1274  #endif  // #ifndef PRODUCT
1275 1275  
1276 1276    oop res = _cmsSpace->promote(obj, obj_size);
1277 1277    if (res == NULL) {
1278 1278      // expand and retry
1279 1279      size_t s = _cmsSpace->expansionSpaceRequired(obj_size);  // HeapWords
1280 1280      expand(s*HeapWordSize, MinHeapDeltaBytes,
1281 1281        CMSExpansionCause::_satisfy_promotion);
1282 1282      // Since there's currently no next generation, we don't try to promote
1283 1283      // into a more senior generation.
1284 1284      assert(next_gen() == NULL, "assumption, based upon which no attempt "
1285 1285                                 "is made to pass on a possibly failing "
1286 1286                                 "promotion to next generation");
1287 1287      res = _cmsSpace->promote(obj, obj_size);
1288 1288    }
1289 1289    if (res != NULL) {
1290 1290      // See comment in allocate() about when objects should
1291 1291      // be allocated live.
1292 1292      assert(obj->is_oop(), "Will dereference klass pointer below");
1293 1293      collector()->promoted(false,           // Not parallel
1294 1294                            (HeapWord*)res, obj->is_objArray(), obj_size);
1295 1295      // promotion counters
1296 1296      NOT_PRODUCT(
1297 1297        _numObjectsPromoted++;
1298 1298        _numWordsPromoted +=
1299 1299          (int)(CompactibleFreeListSpace::adjustObjectSize(obj->size()));
1300 1300      )
1301 1301    }
1302 1302    return res;
1303 1303  }
1304 1304  
1305 1305  
1306 1306  HeapWord*
1307 1307  ConcurrentMarkSweepGeneration::allocation_limit_reached(Space* space,
1308 1308                                               HeapWord* top,
1309 1309                                               size_t word_sz)
1310 1310  {
1311 1311    return collector()->allocation_limit_reached(space, top, word_sz);
1312 1312  }
1313 1313  
1314 1314  // IMPORTANT: Notes on object size recognition in CMS.
1315 1315  // ---------------------------------------------------
1316 1316  // A block of storage in the CMS generation is always in
1317 1317  // one of three states. A free block (FREE), an allocated
1318 1318  // object (OBJECT) whose size() method reports the correct size,
1319 1319  // and an intermediate state (TRANSIENT) in which its size cannot
1320 1320  // be accurately determined.
1321 1321  // STATE IDENTIFICATION:   (32 bit and 64 bit w/o COOPS)
1322 1322  // -----------------------------------------------------
1323 1323  // FREE:      klass_word & 1 == 1; mark_word holds block size
1324 1324  //
1325 1325  // OBJECT:    klass_word installed; klass_word != 0 && klass_word & 1 == 0;
1326 1326  //            obj->size() computes correct size
1327 1327  //            [Perm Gen objects needs to be "parsable" before they can be navigated]
1328 1328  //
1329 1329  // TRANSIENT: klass_word == 0; size is indeterminate until we become an OBJECT
1330 1330  //
1331 1331  // STATE IDENTIFICATION: (64 bit+COOPS)
1332 1332  // ------------------------------------
1333 1333  // FREE:      mark_word & CMS_FREE_BIT == 1; mark_word & ~CMS_FREE_BIT gives block_size
1334 1334  //
1335 1335  // OBJECT:    klass_word installed; klass_word != 0;
1336 1336  //            obj->size() computes correct size
1337 1337  //            [Perm Gen comment above continues to hold]
1338 1338  //
1339 1339  // TRANSIENT: klass_word == 0; size is indeterminate until we become an OBJECT
1340 1340  //
1341 1341  //
1342 1342  // STATE TRANSITION DIAGRAM
1343 1343  //
1344 1344  //        mut / parnew                     mut  /  parnew
1345 1345  // FREE --------------------> TRANSIENT ---------------------> OBJECT --|
1346 1346  //  ^                                                                   |
1347 1347  //  |------------------------ DEAD <------------------------------------|
1348 1348  //         sweep                            mut
1349 1349  //
1350 1350  // While a block is in TRANSIENT state its size cannot be determined
1351 1351  // so readers will either need to come back later or stall until
1352 1352  // the size can be determined. Note that for the case of direct
1353 1353  // allocation, P-bits, when available, may be used to determine the
1354 1354  // size of an object that may not yet have been initialized.
1355 1355  
1356 1356  // Things to support parallel young-gen collection.
1357 1357  oop
1358 1358  ConcurrentMarkSweepGeneration::par_promote(int thread_num,
1359 1359                                             oop old, markOop m,
1360 1360                                             size_t word_sz) {
1361 1361  #ifndef PRODUCT
1362 1362    if (Universe::heap()->promotion_should_fail()) {
1363 1363      return NULL;
1364 1364    }
1365 1365  #endif  // #ifndef PRODUCT
1366 1366  
1367 1367    CMSParGCThreadState* ps = _par_gc_thread_states[thread_num];
1368 1368    PromotionInfo* promoInfo = &ps->promo;
1369 1369    // if we are tracking promotions, then first ensure space for
1370 1370    // promotion (including spooling space for saving header if necessary).
1371 1371    // then allocate and copy, then track promoted info if needed.
1372 1372    // When tracking (see PromotionInfo::track()), the mark word may
1373 1373    // be displaced and in this case restoration of the mark word
1374 1374    // occurs in the (oop_since_save_marks_)iterate phase.
1375 1375    if (promoInfo->tracking() && !promoInfo->ensure_spooling_space()) {
1376 1376      // Out of space for allocating spooling buffers;
1377 1377      // try expanding and allocating spooling buffers.
1378 1378      if (!expand_and_ensure_spooling_space(promoInfo)) {
1379 1379        return NULL;
1380 1380      }
1381 1381    }
1382 1382    assert(promoInfo->has_spooling_space(), "Control point invariant");
1383 1383    const size_t alloc_sz = CompactibleFreeListSpace::adjustObjectSize(word_sz);
1384 1384    HeapWord* obj_ptr = ps->lab.alloc(alloc_sz);
1385 1385    if (obj_ptr == NULL) {
1386 1386       obj_ptr = expand_and_par_lab_allocate(ps, alloc_sz);
1387 1387       if (obj_ptr == NULL) {
1388 1388         return NULL;
1389 1389       }
1390 1390    }
1391 1391    oop obj = oop(obj_ptr);
1392 1392    OrderAccess::storestore();
1393 1393    assert(obj->klass_or_null() == NULL, "Object should be uninitialized here.");
1394 1394    assert(!((FreeChunk*)obj_ptr)->isFree(), "Error, block will look free but show wrong size");
1395 1395    // IMPORTANT: See note on object initialization for CMS above.
1396 1396    // Otherwise, copy the object.  Here we must be careful to insert the
1397 1397    // klass pointer last, since this marks the block as an allocated object.
1398 1398    // Except with compressed oops it's the mark word.
1399 1399    HeapWord* old_ptr = (HeapWord*)old;
1400 1400    // Restore the mark word copied above.
1401 1401    obj->set_mark(m);
1402 1402    assert(obj->klass_or_null() == NULL, "Object should be uninitialized here.");
1403 1403    assert(!((FreeChunk*)obj_ptr)->isFree(), "Error, block will look free but show wrong size");
1404 1404    OrderAccess::storestore();
1405 1405  
1406 1406    if (UseCompressedOops) {
1407 1407      // Copy gap missed by (aligned) header size calculation below
1408 1408      obj->set_klass_gap(old->klass_gap());
1409 1409    }
1410 1410    if (word_sz > (size_t)oopDesc::header_size()) {
1411 1411      Copy::aligned_disjoint_words(old_ptr + oopDesc::header_size(),
1412 1412                                   obj_ptr + oopDesc::header_size(),
1413 1413                                   word_sz - oopDesc::header_size());
1414 1414    }
1415 1415  
1416 1416    // Now we can track the promoted object, if necessary.  We take care
1417 1417    // to delay the transition from uninitialized to full object
1418 1418    // (i.e., insertion of klass pointer) until after, so that it
1419 1419    // atomically becomes a promoted object.
1420 1420    if (promoInfo->tracking()) {
1421 1421      promoInfo->track((PromotedObject*)obj, old->klass());
1422 1422    }
1423 1423    assert(obj->klass_or_null() == NULL, "Object should be uninitialized here.");
1424 1424    assert(!((FreeChunk*)obj_ptr)->isFree(), "Error, block will look free but show wrong size");
1425 1425    assert(old->is_oop(), "Will use and dereference old klass ptr below");
1426 1426  
1427 1427    // Finally, install the klass pointer (this should be volatile).
1428 1428    OrderAccess::storestore();
1429 1429    obj->set_klass(old->klass());
1430 1430    // We should now be able to calculate the right size for this object
1431 1431    assert(obj->is_oop() && obj->size() == (int)word_sz, "Error, incorrect size computed for promoted object");
1432 1432  
1433 1433    collector()->promoted(true,          // parallel
1434 1434                          obj_ptr, old->is_objArray(), word_sz);
1435 1435  
1436 1436    NOT_PRODUCT(
1437 1437      Atomic::inc_ptr(&_numObjectsPromoted);
1438 1438      Atomic::add_ptr(alloc_sz, &_numWordsPromoted);
1439 1439    )
1440 1440  
1441 1441    return obj;
1442 1442  }
1443 1443  
1444 1444  void
1445 1445  ConcurrentMarkSweepGeneration::
1446 1446  par_promote_alloc_undo(int thread_num,
1447 1447                         HeapWord* obj, size_t word_sz) {
1448 1448    // CMS does not support promotion undo.
1449 1449    ShouldNotReachHere();
1450 1450  }
1451 1451  
1452 1452  void
1453 1453  ConcurrentMarkSweepGeneration::
1454 1454  par_promote_alloc_done(int thread_num) {
1455 1455    CMSParGCThreadState* ps = _par_gc_thread_states[thread_num];
1456 1456    ps->lab.retire(thread_num);
1457 1457  }
1458 1458  
1459 1459  void
1460 1460  ConcurrentMarkSweepGeneration::
1461 1461  par_oop_since_save_marks_iterate_done(int thread_num) {
1462 1462    CMSParGCThreadState* ps = _par_gc_thread_states[thread_num];
1463 1463    ParScanWithoutBarrierClosure* dummy_cl = NULL;
1464 1464    ps->promo.promoted_oops_iterate_nv(dummy_cl);
1465 1465  }
1466 1466  
1467 1467  // XXXPERM
1468 1468  bool ConcurrentMarkSweepGeneration::should_collect(bool   full,
1469 1469                                                     size_t size,
1470 1470                                                     bool   tlab)
1471 1471  {
1472 1472    // We allow a STW collection only if a full
1473 1473    // collection was requested.
1474 1474    return full || should_allocate(size, tlab); // FIX ME !!!
1475 1475    // This and promotion failure handling are connected at the
1476 1476    // hip and should be fixed by untying them.
1477 1477  }
1478 1478  
1479 1479  bool CMSCollector::shouldConcurrentCollect() {
1480 1480    if (_full_gc_requested) {
1481 1481      if (Verbose && PrintGCDetails) {
1482 1482        gclog_or_tty->print_cr("CMSCollector: collect because of explicit "
1483 1483                               " gc request (or gc_locker)");
1484 1484      }
1485 1485      return true;
1486 1486    }
1487 1487  
1488 1488    // For debugging purposes, change the type of collection.
1489 1489    // If the rotation is not on the concurrent collection
1490 1490    // type, don't start a concurrent collection.
1491 1491    NOT_PRODUCT(
1492 1492      if (RotateCMSCollectionTypes &&
1493 1493          (_cmsGen->debug_collection_type() !=
1494 1494            ConcurrentMarkSweepGeneration::Concurrent_collection_type)) {
1495 1495        assert(_cmsGen->debug_collection_type() !=
1496 1496          ConcurrentMarkSweepGeneration::Unknown_collection_type,
1497 1497          "Bad cms collection type");
1498 1498        return false;
1499 1499      }
1500 1500    )
1501 1501  
1502 1502    FreelistLocker x(this);
1503 1503    // ------------------------------------------------------------------
1504 1504    // Print out lots of information which affects the initiation of
1505 1505    // a collection.
1506 1506    if (PrintCMSInitiationStatistics && stats().valid()) {
1507 1507      gclog_or_tty->print("CMSCollector shouldConcurrentCollect: ");
1508 1508      gclog_or_tty->stamp();
1509 1509      gclog_or_tty->print_cr("");
1510 1510      stats().print_on(gclog_or_tty);
1511 1511      gclog_or_tty->print_cr("time_until_cms_gen_full %3.7f",
1512 1512        stats().time_until_cms_gen_full());
1513 1513      gclog_or_tty->print_cr("free="SIZE_FORMAT, _cmsGen->free());
1514 1514      gclog_or_tty->print_cr("contiguous_available="SIZE_FORMAT,
1515 1515                             _cmsGen->contiguous_available());
1516 1516      gclog_or_tty->print_cr("promotion_rate=%g", stats().promotion_rate());
1517 1517      gclog_or_tty->print_cr("cms_allocation_rate=%g", stats().cms_allocation_rate());
1518 1518      gclog_or_tty->print_cr("occupancy=%3.7f", _cmsGen->occupancy());
1519 1519      gclog_or_tty->print_cr("initiatingOccupancy=%3.7f", _cmsGen->initiating_occupancy());
1520 1520      gclog_or_tty->print_cr("initiatingPermOccupancy=%3.7f", _permGen->initiating_occupancy());
1521 1521    }
1522 1522    // ------------------------------------------------------------------
1523 1523  
1524 1524    // If the estimated time to complete a cms collection (cms_duration())
1525 1525    // is less than the estimated time remaining until the cms generation
1526 1526    // is full, start a collection.
1527 1527    if (!UseCMSInitiatingOccupancyOnly) {
1528 1528      if (stats().valid()) {
1529 1529        if (stats().time_until_cms_start() == 0.0) {
1530 1530          return true;
1531 1531        }
1532 1532      } else {
1533 1533        // We want to conservatively collect somewhat early in order
1534 1534        // to try and "bootstrap" our CMS/promotion statistics;
1535 1535        // this branch will not fire after the first successful CMS
1536 1536        // collection because the stats should then be valid.
1537 1537        if (_cmsGen->occupancy() >= _bootstrap_occupancy) {
1538 1538          if (Verbose && PrintGCDetails) {
1539 1539            gclog_or_tty->print_cr(
1540 1540              " CMSCollector: collect for bootstrapping statistics:"
1541 1541              " occupancy = %f, boot occupancy = %f", _cmsGen->occupancy(),
1542 1542              _bootstrap_occupancy);
1543 1543          }
1544 1544          return true;
1545 1545        }
1546 1546      }
1547 1547    }
1548 1548  
1549 1549    // Otherwise, we start a collection cycle if either the perm gen or
1550 1550    // old gen want a collection cycle started. Each may use
1551 1551    // an appropriate criterion for making this decision.
1552 1552    // XXX We need to make sure that the gen expansion
1553 1553    // criterion dovetails well with this. XXX NEED TO FIX THIS
1554 1554    if (_cmsGen->should_concurrent_collect()) {
1555 1555      if (Verbose && PrintGCDetails) {
1556 1556        gclog_or_tty->print_cr("CMS old gen initiated");
1557 1557      }
1558 1558      return true;
1559 1559    }
1560 1560  
1561 1561    // We start a collection if we believe an incremental collection may fail;
1562 1562    // this is not likely to be productive in practice because it's probably too
1563 1563    // late anyway.
1564 1564    GenCollectedHeap* gch = GenCollectedHeap::heap();
1565 1565    assert(gch->collector_policy()->is_two_generation_policy(),
1566 1566           "You may want to check the correctness of the following");
1567 1567    if (gch->incremental_collection_will_fail(true /* consult_young */)) {
1568 1568      if (Verbose && PrintGCDetails) {
1569 1569        gclog_or_tty->print("CMSCollector: collect because incremental collection will fail ");
1570 1570      }
1571 1571      return true;
1572 1572    }
1573 1573  
1574 1574    if (CMSClassUnloadingEnabled && _permGen->should_concurrent_collect()) {
1575 1575      bool res = update_should_unload_classes();
1576 1576      if (res) {
1577 1577        if (Verbose && PrintGCDetails) {
1578 1578          gclog_or_tty->print_cr("CMS perm gen initiated");
1579 1579        }
1580 1580        return true;
1581 1581      }
1582 1582    }
1583 1583    return false;
1584 1584  }
1585 1585  
1586 1586  // Clear _expansion_cause fields of constituent generations
1587 1587  void CMSCollector::clear_expansion_cause() {
1588 1588    _cmsGen->clear_expansion_cause();
1589 1589    _permGen->clear_expansion_cause();
1590 1590  }
1591 1591  
1592 1592  // We should be conservative in starting a collection cycle.  To
1593 1593  // start too eagerly runs the risk of collecting too often in the
1594 1594  // extreme.  To collect too rarely falls back on full collections,
1595 1595  // which works, even if not optimum in terms of concurrent work.
1596 1596  // As a work around for too eagerly collecting, use the flag
1597 1597  // UseCMSInitiatingOccupancyOnly.  This also has the advantage of
1598 1598  // giving the user an easily understandable way of controlling the
1599 1599  // collections.
1600 1600  // We want to start a new collection cycle if any of the following
1601 1601  // conditions hold:
1602 1602  // . our current occupancy exceeds the configured initiating occupancy
1603 1603  //   for this generation, or
1604 1604  // . we recently needed to expand this space and have not, since that
1605 1605  //   expansion, done a collection of this generation, or
1606 1606  // . the underlying space believes that it may be a good idea to initiate
1607 1607  //   a concurrent collection (this may be based on criteria such as the
1608 1608  //   following: the space uses linear allocation and linear allocation is
1609 1609  //   going to fail, or there is believed to be excessive fragmentation in
1610 1610  //   the generation, etc... or ...
1611 1611  // [.(currently done by CMSCollector::shouldConcurrentCollect() only for
1612 1612  //   the case of the old generation, not the perm generation; see CR 6543076):
1613 1613  //   we may be approaching a point at which allocation requests may fail because
1614 1614  //   we will be out of sufficient free space given allocation rate estimates.]
1615 1615  bool ConcurrentMarkSweepGeneration::should_concurrent_collect() const {
1616 1616  
1617 1617    assert_lock_strong(freelistLock());
1618 1618    if (occupancy() > initiating_occupancy()) {
1619 1619      if (PrintGCDetails && Verbose) {
1620 1620        gclog_or_tty->print(" %s: collect because of occupancy %f / %f  ",
1621 1621          short_name(), occupancy(), initiating_occupancy());
1622 1622      }
1623 1623      return true;
1624 1624    }
1625 1625    if (UseCMSInitiatingOccupancyOnly) {
1626 1626      return false;
1627 1627    }
1628 1628    if (expansion_cause() == CMSExpansionCause::_satisfy_allocation) {
1629 1629      if (PrintGCDetails && Verbose) {
1630 1630        gclog_or_tty->print(" %s: collect because expanded for allocation ",
1631 1631          short_name());
1632 1632      }
1633 1633      return true;
1634 1634    }
1635 1635    if (_cmsSpace->should_concurrent_collect()) {
1636 1636      if (PrintGCDetails && Verbose) {
1637 1637        gclog_or_tty->print(" %s: collect because cmsSpace says so ",
1638 1638          short_name());
1639 1639      }
1640 1640      return true;
1641 1641    }
1642 1642    return false;
1643 1643  }
1644 1644  
1645 1645  void ConcurrentMarkSweepGeneration::collect(bool   full,
1646 1646                                              bool   clear_all_soft_refs,
1647 1647                                              size_t size,
1648 1648                                              bool   tlab)
1649 1649  {
1650 1650    collector()->collect(full, clear_all_soft_refs, size, tlab);
1651 1651  }
1652 1652  
1653 1653  void CMSCollector::collect(bool   full,
1654 1654                             bool   clear_all_soft_refs,
1655 1655                             size_t size,
1656 1656                             bool   tlab)
1657 1657  {
1658 1658    if (!UseCMSCollectionPassing && _collectorState > Idling) {
1659 1659      // For debugging purposes skip the collection if the state
1660 1660      // is not currently idle
1661 1661      if (TraceCMSState) {
1662 1662        gclog_or_tty->print_cr("Thread " INTPTR_FORMAT " skipped full:%d CMS state %d",
1663 1663          Thread::current(), full, _collectorState);
1664 1664      }
1665 1665      return;
1666 1666    }
1667 1667  
1668 1668    // The following "if" branch is present for defensive reasons.
1669 1669    // In the current uses of this interface, it can be replaced with:
1670 1670    // assert(!GC_locker.is_active(), "Can't be called otherwise");
1671 1671    // But I am not placing that assert here to allow future
1672 1672    // generality in invoking this interface.
1673 1673    if (GC_locker::is_active()) {
1674 1674      // A consistency test for GC_locker
1675 1675      assert(GC_locker::needs_gc(), "Should have been set already");
1676 1676      // Skip this foreground collection, instead
1677 1677      // expanding the heap if necessary.
1678 1678      // Need the free list locks for the call to free() in compute_new_size()
1679 1679      compute_new_size();
1680 1680      return;
1681 1681    }
1682 1682    acquire_control_and_collect(full, clear_all_soft_refs);
1683 1683    _full_gcs_since_conc_gc++;
1684 1684  
1685 1685  }
1686 1686  
1687 1687  void CMSCollector::request_full_gc(unsigned int full_gc_count) {
1688 1688    GenCollectedHeap* gch = GenCollectedHeap::heap();
1689 1689    unsigned int gc_count = gch->total_full_collections();
1690 1690    if (gc_count == full_gc_count) {
1691 1691      MutexLockerEx y(CGC_lock, Mutex::_no_safepoint_check_flag);
1692 1692      _full_gc_requested = true;
1693 1693      CGC_lock->notify();   // nudge CMS thread
1694 1694    } else {
1695 1695      assert(gc_count > full_gc_count, "Error: causal loop");
1696 1696    }
1697 1697  }
1698 1698  
1699 1699  
1700 1700  // The foreground and background collectors need to coordinate in order
1701 1701  // to make sure that they do not mutually interfere with CMS collections.
1702 1702  // When a background collection is active,
1703 1703  // the foreground collector may need to take over (preempt) and
1704 1704  // synchronously complete an ongoing collection. Depending on the
1705 1705  // frequency of the background collections and the heap usage
1706 1706  // of the application, this preemption can be seldom or frequent.
1707 1707  // There are only certain
1708 1708  // points in the background collection that the "collection-baton"
1709 1709  // can be passed to the foreground collector.
1710 1710  //
1711 1711  // The foreground collector will wait for the baton before
1712 1712  // starting any part of the collection.  The foreground collector
1713 1713  // will only wait at one location.
1714 1714  //
1715 1715  // The background collector will yield the baton before starting a new
1716 1716  // phase of the collection (e.g., before initial marking, marking from roots,
1717 1717  // precleaning, final re-mark, sweep etc.)  This is normally done at the head
1718 1718  // of the loop which switches the phases. The background collector does some
1719 1719  // of the phases (initial mark, final re-mark) with the world stopped.
1720 1720  // Because of locking involved in stopping the world,
1721 1721  // the foreground collector should not block waiting for the background
1722 1722  // collector when it is doing a stop-the-world phase.  The background
1723 1723  // collector will yield the baton at an additional point just before
1724 1724  // it enters a stop-the-world phase.  Once the world is stopped, the
1725 1725  // background collector checks the phase of the collection.  If the
1726 1726  // phase has not changed, it proceeds with the collection.  If the
1727 1727  // phase has changed, it skips that phase of the collection.  See
1728 1728  // the comments on the use of the Heap_lock in collect_in_background().
1729 1729  //
1730 1730  // Variable used in baton passing.
1731 1731  //   _foregroundGCIsActive - Set to true by the foreground collector when
1732 1732  //      it wants the baton.  The foreground clears it when it has finished
1733 1733  //      the collection.
1734 1734  //   _foregroundGCShouldWait - Set to true by the background collector
1735 1735  //        when it is running.  The foreground collector waits while
1736 1736  //      _foregroundGCShouldWait is true.
1737 1737  //  CGC_lock - monitor used to protect access to the above variables
1738 1738  //      and to notify the foreground and background collectors.
1739 1739  //  _collectorState - current state of the CMS collection.
1740 1740  //
1741 1741  // The foreground collector
1742 1742  //   acquires the CGC_lock
1743 1743  //   sets _foregroundGCIsActive
1744 1744  //   waits on the CGC_lock for _foregroundGCShouldWait to be false
1745 1745  //     various locks acquired in preparation for the collection
1746 1746  //     are released so as not to block the background collector
1747 1747  //     that is in the midst of a collection
1748 1748  //   proceeds with the collection
1749 1749  //   clears _foregroundGCIsActive
1750 1750  //   returns
1751 1751  //
1752 1752  // The background collector in a loop iterating on the phases of the
1753 1753  //      collection
1754 1754  //   acquires the CGC_lock
1755 1755  //   sets _foregroundGCShouldWait
1756 1756  //   if _foregroundGCIsActive is set
1757 1757  //     clears _foregroundGCShouldWait, notifies _CGC_lock
1758 1758  //     waits on _CGC_lock for _foregroundGCIsActive to become false
1759 1759  //     and exits the loop.
1760 1760  //   otherwise
1761 1761  //     proceed with that phase of the collection
1762 1762  //     if the phase is a stop-the-world phase,
1763 1763  //       yield the baton once more just before enqueueing
1764 1764  //       the stop-world CMS operation (executed by the VM thread).
1765 1765  //   returns after all phases of the collection are done
1766 1766  //
1767 1767  
1768 1768  void CMSCollector::acquire_control_and_collect(bool full,
1769 1769          bool clear_all_soft_refs) {
1770 1770    assert(SafepointSynchronize::is_at_safepoint(), "should be at safepoint");
1771 1771    assert(!Thread::current()->is_ConcurrentGC_thread(),
1772 1772           "shouldn't try to acquire control from self!");
1773 1773  
1774 1774    // Start the protocol for acquiring control of the
1775 1775    // collection from the background collector (aka CMS thread).
1776 1776    assert(ConcurrentMarkSweepThread::vm_thread_has_cms_token(),
1777 1777           "VM thread should have CMS token");
1778 1778    // Remember the possibly interrupted state of an ongoing
1779 1779    // concurrent collection
1780 1780    CollectorState first_state = _collectorState;
1781 1781  
1782 1782    // Signal to a possibly ongoing concurrent collection that
1783 1783    // we want to do a foreground collection.
1784 1784    _foregroundGCIsActive = true;
1785 1785  
1786 1786    // Disable incremental mode during a foreground collection.
1787 1787    ICMSDisabler icms_disabler;
1788 1788  
1789 1789    // release locks and wait for a notify from the background collector
1790 1790    // releasing the locks in only necessary for phases which
1791 1791    // do yields to improve the granularity of the collection.
1792 1792    assert_lock_strong(bitMapLock());
1793 1793    // We need to lock the Free list lock for the space that we are
1794 1794    // currently collecting.
1795 1795    assert(haveFreelistLocks(), "Must be holding free list locks");
1796 1796    bitMapLock()->unlock();
1797 1797    releaseFreelistLocks();
1798 1798    {
1799 1799      MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
1800 1800      if (_foregroundGCShouldWait) {
1801 1801        // We are going to be waiting for action for the CMS thread;
1802 1802        // it had better not be gone (for instance at shutdown)!
1803 1803        assert(ConcurrentMarkSweepThread::cmst() != NULL,
1804 1804               "CMS thread must be running");
1805 1805        // Wait here until the background collector gives us the go-ahead
1806 1806        ConcurrentMarkSweepThread::clear_CMS_flag(
1807 1807          ConcurrentMarkSweepThread::CMS_vm_has_token);  // release token
1808 1808        // Get a possibly blocked CMS thread going:
1809 1809        //   Note that we set _foregroundGCIsActive true above,
1810 1810        //   without protection of the CGC_lock.
1811 1811        CGC_lock->notify();
1812 1812        assert(!ConcurrentMarkSweepThread::vm_thread_wants_cms_token(),
1813 1813               "Possible deadlock");
1814 1814        while (_foregroundGCShouldWait) {
1815 1815          // wait for notification
1816 1816          CGC_lock->wait(Mutex::_no_safepoint_check_flag);
1817 1817          // Possibility of delay/starvation here, since CMS token does
1818 1818          // not know to give priority to VM thread? Actually, i think
1819 1819          // there wouldn't be any delay/starvation, but the proof of
1820 1820          // that "fact" (?) appears non-trivial. XXX 20011219YSR
1821 1821        }
1822 1822        ConcurrentMarkSweepThread::set_CMS_flag(
1823 1823          ConcurrentMarkSweepThread::CMS_vm_has_token);
1824 1824      }
1825 1825    }
1826 1826    // The CMS_token is already held.  Get back the other locks.
1827 1827    assert(ConcurrentMarkSweepThread::vm_thread_has_cms_token(),
1828 1828           "VM thread should have CMS token");
1829 1829    getFreelistLocks();
1830 1830    bitMapLock()->lock_without_safepoint_check();
1831 1831    if (TraceCMSState) {
1832 1832      gclog_or_tty->print_cr("CMS foreground collector has asked for control "
1833 1833        INTPTR_FORMAT " with first state %d", Thread::current(), first_state);
1834 1834      gclog_or_tty->print_cr("    gets control with state %d", _collectorState);
1835 1835    }
1836 1836  
1837 1837    // Check if we need to do a compaction, or if not, whether
1838 1838    // we need to start the mark-sweep from scratch.
1839 1839    bool should_compact    = false;
1840 1840    bool should_start_over = false;
1841 1841    decide_foreground_collection_type(clear_all_soft_refs,
1842 1842      &should_compact, &should_start_over);
1843 1843  
1844 1844  NOT_PRODUCT(
1845 1845    if (RotateCMSCollectionTypes) {
1846 1846      if (_cmsGen->debug_collection_type() ==
1847 1847          ConcurrentMarkSweepGeneration::MSC_foreground_collection_type) {
1848 1848        should_compact = true;
1849 1849      } else if (_cmsGen->debug_collection_type() ==
1850 1850                 ConcurrentMarkSweepGeneration::MS_foreground_collection_type) {
1851 1851        should_compact = false;
1852 1852      }
1853 1853    }
1854 1854  )
1855 1855  
1856 1856    if (PrintGCDetails && first_state > Idling) {
1857 1857      GCCause::Cause cause = GenCollectedHeap::heap()->gc_cause();
1858 1858      if (GCCause::is_user_requested_gc(cause) ||
1859 1859          GCCause::is_serviceability_requested_gc(cause)) {
1860 1860        gclog_or_tty->print(" (concurrent mode interrupted)");
1861 1861      } else {
1862 1862        gclog_or_tty->print(" (concurrent mode failure)");
1863 1863      }
1864 1864    }
1865 1865  
1866 1866    if (should_compact) {
1867 1867      // If the collection is being acquired from the background
1868 1868      // collector, there may be references on the discovered
1869 1869      // references lists that have NULL referents (being those
1870 1870      // that were concurrently cleared by a mutator) or
1871 1871      // that are no longer active (having been enqueued concurrently
1872 1872      // by the mutator).
1873 1873      // Scrub the list of those references because Mark-Sweep-Compact
1874 1874      // code assumes referents are not NULL and that all discovered
1875 1875      // Reference objects are active.
1876 1876      ref_processor()->clean_up_discovered_references();
1877 1877  
1878 1878      do_compaction_work(clear_all_soft_refs);
1879 1879  
1880 1880      // Has the GC time limit been exceeded?
1881 1881      DefNewGeneration* young_gen = _young_gen->as_DefNewGeneration();
1882 1882      size_t max_eden_size = young_gen->max_capacity() -
1883 1883                             young_gen->to()->capacity() -
1884 1884                             young_gen->from()->capacity();
1885 1885      GenCollectedHeap* gch = GenCollectedHeap::heap();
1886 1886      GCCause::Cause gc_cause = gch->gc_cause();
1887 1887      size_policy()->check_gc_overhead_limit(_young_gen->used(),
1888 1888                                             young_gen->eden()->used(),
1889 1889                                             _cmsGen->max_capacity(),
1890 1890                                             max_eden_size,
1891 1891                                             full,
1892 1892                                             gc_cause,
1893 1893                                             gch->collector_policy());
1894 1894    } else {
1895 1895      do_mark_sweep_work(clear_all_soft_refs, first_state,
1896 1896        should_start_over);
1897 1897    }
1898 1898    // Reset the expansion cause, now that we just completed
1899 1899    // a collection cycle.
1900 1900    clear_expansion_cause();
1901 1901    _foregroundGCIsActive = false;
1902 1902    return;
1903 1903  }
1904 1904  
1905 1905  // Resize the perm generation and the tenured generation
1906 1906  // after obtaining the free list locks for the
1907 1907  // two generations.
1908 1908  void CMSCollector::compute_new_size() {
1909 1909    assert_locked_or_safepoint(Heap_lock);
1910 1910    FreelistLocker z(this);
1911 1911    _permGen->compute_new_size();
1912 1912    _cmsGen->compute_new_size();
1913 1913  }
1914 1914  
1915 1915  // A work method used by foreground collection to determine
1916 1916  // what type of collection (compacting or not, continuing or fresh)
1917 1917  // it should do.
1918 1918  // NOTE: the intent is to make UseCMSCompactAtFullCollection
1919 1919  // and CMSCompactWhenClearAllSoftRefs the default in the future
1920 1920  // and do away with the flags after a suitable period.
1921 1921  void CMSCollector::decide_foreground_collection_type(
1922 1922    bool clear_all_soft_refs, bool* should_compact,
1923 1923    bool* should_start_over) {
1924 1924    // Normally, we'll compact only if the UseCMSCompactAtFullCollection
1925 1925    // flag is set, and we have either requested a System.gc() or
1926 1926    // the number of full gc's since the last concurrent cycle
1927 1927    // has exceeded the threshold set by CMSFullGCsBeforeCompaction,
1928 1928    // or if an incremental collection has failed
1929 1929    GenCollectedHeap* gch = GenCollectedHeap::heap();
1930 1930    assert(gch->collector_policy()->is_two_generation_policy(),
1931 1931           "You may want to check the correctness of the following");
1932 1932    // Inform cms gen if this was due to partial collection failing.
1933 1933    // The CMS gen may use this fact to determine its expansion policy.
1934 1934    if (gch->incremental_collection_will_fail(false /* don't consult_young */)) {
1935 1935      assert(!_cmsGen->incremental_collection_failed(),
1936 1936             "Should have been noticed, reacted to and cleared");
1937 1937      _cmsGen->set_incremental_collection_failed();
1938 1938    }
1939 1939    *should_compact =
1940 1940      UseCMSCompactAtFullCollection &&
1941 1941      ((_full_gcs_since_conc_gc >= CMSFullGCsBeforeCompaction) ||
1942 1942       GCCause::is_user_requested_gc(gch->gc_cause()) ||
1943 1943       gch->incremental_collection_will_fail(true /* consult_young */));
1944 1944    *should_start_over = false;
1945 1945    if (clear_all_soft_refs && !*should_compact) {
1946 1946      // We are about to do a last ditch collection attempt
1947 1947      // so it would normally make sense to do a compaction
1948 1948      // to reclaim as much space as possible.
1949 1949      if (CMSCompactWhenClearAllSoftRefs) {
1950 1950        // Default: The rationale is that in this case either
1951 1951        // we are past the final marking phase, in which case
1952 1952        // we'd have to start over, or so little has been done
1953 1953        // that there's little point in saving that work. Compaction
1954 1954        // appears to be the sensible choice in either case.
1955 1955        *should_compact = true;
1956 1956      } else {
1957 1957        // We have been asked to clear all soft refs, but not to
1958 1958        // compact. Make sure that we aren't past the final checkpoint
1959 1959        // phase, for that is where we process soft refs. If we are already
1960 1960        // past that phase, we'll need to redo the refs discovery phase and
1961 1961        // if necessary clear soft refs that weren't previously
1962 1962        // cleared. We do so by remembering the phase in which
1963 1963        // we came in, and if we are past the refs processing
1964 1964        // phase, we'll choose to just redo the mark-sweep
1965 1965        // collection from scratch.
1966 1966        if (_collectorState > FinalMarking) {
1967 1967          // We are past the refs processing phase;
1968 1968          // start over and do a fresh synchronous CMS cycle
1969 1969          _collectorState = Resetting; // skip to reset to start new cycle
1970 1970          reset(false /* == !asynch */);
1971 1971          *should_start_over = true;
1972 1972        } // else we can continue a possibly ongoing current cycle
1973 1973      }
1974 1974    }
1975 1975  }
1976 1976  
1977 1977  // A work method used by the foreground collector to do
1978 1978  // a mark-sweep-compact.
1979 1979  void CMSCollector::do_compaction_work(bool clear_all_soft_refs) {
1980 1980    GenCollectedHeap* gch = GenCollectedHeap::heap();
1981 1981    TraceTime t("CMS:MSC ", PrintGCDetails && Verbose, true, gclog_or_tty);
1982 1982    if (PrintGC && Verbose && !(GCCause::is_user_requested_gc(gch->gc_cause()))) {
1983 1983      gclog_or_tty->print_cr("Compact ConcurrentMarkSweepGeneration after %d "
1984 1984        "collections passed to foreground collector", _full_gcs_since_conc_gc);
1985 1985    }
1986 1986  
1987 1987    // Sample collection interval time and reset for collection pause.
1988 1988    if (UseAdaptiveSizePolicy) {
1989 1989      size_policy()->msc_collection_begin();
1990 1990    }
1991 1991  
1992 1992    // Temporarily widen the span of the weak reference processing to
1993 1993    // the entire heap.
1994 1994    MemRegion new_span(GenCollectedHeap::heap()->reserved_region());
1995 1995    ReferenceProcessorSpanMutator rp_mut_span(ref_processor(), new_span);
1996 1996    // Temporarily, clear the "is_alive_non_header" field of the

↓ open down ↓

1996 lines elided

↑ open up ↑

1997 1997    // reference processor.
1998 1998    ReferenceProcessorIsAliveMutator rp_mut_closure(ref_processor(), NULL);
1999 1999    // Temporarily make reference _processing_ single threaded (non-MT).
2000 2000    ReferenceProcessorMTProcMutator rp_mut_mt_processing(ref_processor(), false);
2001 2001    // Temporarily make refs discovery atomic
2002 2002    ReferenceProcessorAtomicMutator rp_mut_atomic(ref_processor(), true);
2003 2003    // Temporarily make reference _discovery_ single threaded (non-MT)
2004 2004    ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(ref_processor(), false);
2005 2005  
2006 2006    ref_processor()->set_enqueuing_is_done(false);
2007      -  ref_processor()->enable_discovery();
     2007 +  ref_processor()->enable_discovery(false /*verify_disabled*/, false /*check_no_refs*/);
2008 2008    ref_processor()->setup_policy(clear_all_soft_refs);
2009 2009    // If an asynchronous collection finishes, the _modUnionTable is
2010 2010    // all clear.  If we are assuming the collection from an asynchronous
2011 2011    // collection, clear the _modUnionTable.
2012 2012    assert(_collectorState != Idling || _modUnionTable.isAllClear(),
2013 2013      "_modUnionTable should be clear if the baton was not passed");
2014 2014    _modUnionTable.clear_all();
2015 2015  
2016 2016    // We must adjust the allocation statistics being maintained
2017 2017    // in the free list space. We do so by reading and clearing

2018 2018    // the sweep timer and updating the block flux rate estimates below.
2019 2019    assert(!_intra_sweep_timer.is_active(), "_intra_sweep_timer should be inactive");
2020 2020    if (_inter_sweep_timer.is_active()) {
2021 2021      _inter_sweep_timer.stop();
2022 2022      // Note that we do not use this sample to update the _inter_sweep_estimate.
2023 2023      _cmsGen->cmsSpace()->beginSweepFLCensus((float)(_inter_sweep_timer.seconds()),
2024 2024                                              _inter_sweep_estimate.padded_average(),
2025 2025                                              _intra_sweep_estimate.padded_average());
2026 2026    }
2027 2027  
2028 2028    GenMarkSweep::invoke_at_safepoint(_cmsGen->level(),
2029 2029      ref_processor(), clear_all_soft_refs);
2030 2030    #ifdef ASSERT
2031 2031      CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace();
2032 2032      size_t free_size = cms_space->free();
2033 2033      assert(free_size ==
2034 2034             pointer_delta(cms_space->end(), cms_space->compaction_top())
2035 2035             * HeapWordSize,
2036 2036        "All the free space should be compacted into one chunk at top");
2037 2037      assert(cms_space->dictionary()->totalChunkSize(
2038 2038                                        debug_only(cms_space->freelistLock())) == 0 ||
2039 2039             cms_space->totalSizeInIndexedFreeLists() == 0,
2040 2040        "All the free space should be in a single chunk");
2041 2041      size_t num = cms_space->totalCount();
2042 2042      assert((free_size == 0 && num == 0) ||
2043 2043             (free_size > 0  && (num == 1 || num == 2)),
2044 2044           "There should be at most 2 free chunks after compaction");
2045 2045    #endif // ASSERT
2046 2046    _collectorState = Resetting;
2047 2047    assert(_restart_addr == NULL,
2048 2048           "Should have been NULL'd before baton was passed");
2049 2049    reset(false /* == !asynch */);
2050 2050    _cmsGen->reset_after_compaction();
2051 2051    _concurrent_cycles_since_last_unload = 0;
2052 2052  
2053 2053    if (verifying() && !should_unload_classes()) {
2054 2054      perm_gen_verify_bit_map()->clear_all();
2055 2055    }
2056 2056  
2057 2057    // Clear any data recorded in the PLAB chunk arrays.
2058 2058    if (_survivor_plab_array != NULL) {
2059 2059      reset_survivor_plab_arrays();
2060 2060    }
2061 2061  
2062 2062    // Adjust the per-size allocation stats for the next epoch.
2063 2063    _cmsGen->cmsSpace()->endSweepFLCensus(sweep_count() /* fake */);
2064 2064    // Restart the "inter sweep timer" for the next epoch.
2065 2065    _inter_sweep_timer.reset();
2066 2066    _inter_sweep_timer.start();
2067 2067  
2068 2068    // Sample collection pause time and reset for collection interval.
2069 2069    if (UseAdaptiveSizePolicy) {
2070 2070      size_policy()->msc_collection_end(gch->gc_cause());
2071 2071    }
2072 2072  
2073 2073    // For a mark-sweep-compact, compute_new_size() will be called
2074 2074    // in the heap's do_collection() method.
2075 2075  }
2076 2076  
2077 2077  // A work method used by the foreground collector to do
2078 2078  // a mark-sweep, after taking over from a possibly on-going
2079 2079  // concurrent mark-sweep collection.
2080 2080  void CMSCollector::do_mark_sweep_work(bool clear_all_soft_refs,
2081 2081    CollectorState first_state, bool should_start_over) {
2082 2082    if (PrintGC && Verbose) {
2083 2083      gclog_or_tty->print_cr("Pass concurrent collection to foreground "
2084 2084        "collector with count %d",
2085 2085        _full_gcs_since_conc_gc);
2086 2086    }
2087 2087    switch (_collectorState) {
2088 2088      case Idling:
2089 2089        if (first_state == Idling || should_start_over) {
2090 2090          // The background GC was not active, or should
2091 2091          // restarted from scratch;  start the cycle.
2092 2092          _collectorState = InitialMarking;
2093 2093        }
2094 2094        // If first_state was not Idling, then a background GC
2095 2095        // was in progress and has now finished.  No need to do it
2096 2096        // again.  Leave the state as Idling.
2097 2097        break;
2098 2098      case Precleaning:
2099 2099        // In the foreground case don't do the precleaning since
2100 2100        // it is not done concurrently and there is extra work
2101 2101        // required.
2102 2102        _collectorState = FinalMarking;
2103 2103    }
2104 2104    if (PrintGCDetails &&
2105 2105        (_collectorState > Idling ||
2106 2106         !GCCause::is_user_requested_gc(GenCollectedHeap::heap()->gc_cause()))) {
2107 2107      gclog_or_tty->print(" (concurrent mode failure)");
2108 2108    }
2109 2109    collect_in_foreground(clear_all_soft_refs);
2110 2110  
2111 2111    // For a mark-sweep, compute_new_size() will be called
2112 2112    // in the heap's do_collection() method.
2113 2113  }
2114 2114  
2115 2115  
2116 2116  void CMSCollector::getFreelistLocks() const {
2117 2117    // Get locks for all free lists in all generations that this
2118 2118    // collector is responsible for
2119 2119    _cmsGen->freelistLock()->lock_without_safepoint_check();
2120 2120    _permGen->freelistLock()->lock_without_safepoint_check();
2121 2121  }
2122 2122  
2123 2123  void CMSCollector::releaseFreelistLocks() const {
2124 2124    // Release locks for all free lists in all generations that this
2125 2125    // collector is responsible for
2126 2126    _cmsGen->freelistLock()->unlock();
2127 2127    _permGen->freelistLock()->unlock();
2128 2128  }
2129 2129  
2130 2130  bool CMSCollector::haveFreelistLocks() const {
2131 2131    // Check locks for all free lists in all generations that this
2132 2132    // collector is responsible for
2133 2133    assert_lock_strong(_cmsGen->freelistLock());
2134 2134    assert_lock_strong(_permGen->freelistLock());
2135 2135    PRODUCT_ONLY(ShouldNotReachHere());
2136 2136    return true;
2137 2137  }
2138 2138  
2139 2139  // A utility class that is used by the CMS collector to
2140 2140  // temporarily "release" the foreground collector from its
2141 2141  // usual obligation to wait for the background collector to
2142 2142  // complete an ongoing phase before proceeding.
2143 2143  class ReleaseForegroundGC: public StackObj {
2144 2144   private:
2145 2145    CMSCollector* _c;
2146 2146   public:
2147 2147    ReleaseForegroundGC(CMSCollector* c) : _c(c) {
2148 2148      assert(_c->_foregroundGCShouldWait, "Else should not need to call");
2149 2149      MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
2150 2150      // allow a potentially blocked foreground collector to proceed
2151 2151      _c->_foregroundGCShouldWait = false;
2152 2152      if (_c->_foregroundGCIsActive) {
2153 2153        CGC_lock->notify();
2154 2154      }
2155 2155      assert(!ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
2156 2156             "Possible deadlock");
2157 2157    }
2158 2158  
2159 2159    ~ReleaseForegroundGC() {
2160 2160      assert(!_c->_foregroundGCShouldWait, "Usage protocol violation?");
2161 2161      MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
2162 2162      _c->_foregroundGCShouldWait = true;
2163 2163    }
2164 2164  };
2165 2165  
2166 2166  // There are separate collect_in_background and collect_in_foreground because of
2167 2167  // the different locking requirements of the background collector and the
2168 2168  // foreground collector.  There was originally an attempt to share
2169 2169  // one "collect" method between the background collector and the foreground
2170 2170  // collector but the if-then-else required made it cleaner to have
2171 2171  // separate methods.
2172 2172  void CMSCollector::collect_in_background(bool clear_all_soft_refs) {
2173 2173    assert(Thread::current()->is_ConcurrentGC_thread(),
2174 2174      "A CMS asynchronous collection is only allowed on a CMS thread.");
2175 2175  
2176 2176    GenCollectedHeap* gch = GenCollectedHeap::heap();
2177 2177    {
2178 2178      bool safepoint_check = Mutex::_no_safepoint_check_flag;
2179 2179      MutexLockerEx hl(Heap_lock, safepoint_check);
2180 2180      FreelistLocker fll(this);
2181 2181      MutexLockerEx x(CGC_lock, safepoint_check);
2182 2182      if (_foregroundGCIsActive || !UseAsyncConcMarkSweepGC) {
2183 2183        // The foreground collector is active or we're
2184 2184        // not using asynchronous collections.  Skip this
2185 2185        // background collection.
2186 2186        assert(!_foregroundGCShouldWait, "Should be clear");
2187 2187        return;
2188 2188      } else {
2189 2189        assert(_collectorState == Idling, "Should be idling before start.");
2190 2190        _collectorState = InitialMarking;
2191 2191        // Reset the expansion cause, now that we are about to begin
2192 2192        // a new cycle.
2193 2193        clear_expansion_cause();
2194 2194      }
2195 2195      // Decide if we want to enable class unloading as part of the
2196 2196      // ensuing concurrent GC cycle.
2197 2197      update_should_unload_classes();
2198 2198      _full_gc_requested = false;           // acks all outstanding full gc requests
2199 2199      // Signal that we are about to start a collection
2200 2200      gch->increment_total_full_collections();  // ... starting a collection cycle
2201 2201      _collection_count_start = gch->total_full_collections();
2202 2202    }
2203 2203  
2204 2204    // Used for PrintGC
2205 2205    size_t prev_used;
2206 2206    if (PrintGC && Verbose) {
2207 2207      prev_used = _cmsGen->used(); // XXXPERM
2208 2208    }
2209 2209  
2210 2210    // The change of the collection state is normally done at this level;
2211 2211    // the exceptions are phases that are executed while the world is
2212 2212    // stopped.  For those phases the change of state is done while the
2213 2213    // world is stopped.  For baton passing purposes this allows the
2214 2214    // background collector to finish the phase and change state atomically.
2215 2215    // The foreground collector cannot wait on a phase that is done
2216 2216    // while the world is stopped because the foreground collector already
2217 2217    // has the world stopped and would deadlock.
2218 2218    while (_collectorState != Idling) {
2219 2219      if (TraceCMSState) {
2220 2220        gclog_or_tty->print_cr("Thread " INTPTR_FORMAT " in CMS state %d",
2221 2221          Thread::current(), _collectorState);
2222 2222      }
2223 2223      // The foreground collector
2224 2224      //   holds the Heap_lock throughout its collection.
2225 2225      //   holds the CMS token (but not the lock)
2226 2226      //     except while it is waiting for the background collector to yield.
2227 2227      //
2228 2228      // The foreground collector should be blocked (not for long)
2229 2229      //   if the background collector is about to start a phase
2230 2230      //   executed with world stopped.  If the background
2231 2231      //   collector has already started such a phase, the
2232 2232      //   foreground collector is blocked waiting for the
2233 2233      //   Heap_lock.  The stop-world phases (InitialMarking and FinalMarking)
2234 2234      //   are executed in the VM thread.
2235 2235      //
2236 2236      // The locking order is
2237 2237      //   PendingListLock (PLL)  -- if applicable (FinalMarking)
2238 2238      //   Heap_lock  (both this & PLL locked in VM_CMS_Operation::prologue())
2239 2239      //   CMS token  (claimed in
2240 2240      //                stop_world_and_do() -->
2241 2241      //                  safepoint_synchronize() -->
2242 2242      //                    CMSThread::synchronize())
2243 2243  
2244 2244      {
2245 2245        // Check if the FG collector wants us to yield.
2246 2246        CMSTokenSync x(true); // is cms thread
2247 2247        if (waitForForegroundGC()) {
2248 2248          // We yielded to a foreground GC, nothing more to be
2249 2249          // done this round.
2250 2250          assert(_foregroundGCShouldWait == false, "We set it to false in "
2251 2251                 "waitForForegroundGC()");
2252 2252          if (TraceCMSState) {
2253 2253            gclog_or_tty->print_cr("CMS Thread " INTPTR_FORMAT
2254 2254              " exiting collection CMS state %d",
2255 2255              Thread::current(), _collectorState);
2256 2256          }
2257 2257          return;
2258 2258        } else {
2259 2259          // The background collector can run but check to see if the
2260 2260          // foreground collector has done a collection while the
2261 2261          // background collector was waiting to get the CGC_lock
2262 2262          // above.  If yes, break so that _foregroundGCShouldWait
2263 2263          // is cleared before returning.
2264 2264          if (_collectorState == Idling) {
2265 2265            break;
2266 2266          }
2267 2267        }
2268 2268      }
2269 2269  
2270 2270      assert(_foregroundGCShouldWait, "Foreground collector, if active, "
2271 2271        "should be waiting");
2272 2272  
2273 2273      switch (_collectorState) {
2274 2274        case InitialMarking:
2275 2275          {
2276 2276            ReleaseForegroundGC x(this);
2277 2277            stats().record_cms_begin();
2278 2278  
2279 2279            VM_CMS_Initial_Mark initial_mark_op(this);
2280 2280            VMThread::execute(&initial_mark_op);
2281 2281          }
2282 2282          // The collector state may be any legal state at this point
2283 2283          // since the background collector may have yielded to the
2284 2284          // foreground collector.
2285 2285          break;
2286 2286        case Marking:
2287 2287          // initial marking in checkpointRootsInitialWork has been completed
2288 2288          if (markFromRoots(true)) { // we were successful
2289 2289            assert(_collectorState == Precleaning, "Collector state should "
2290 2290              "have changed");
2291 2291          } else {
2292 2292            assert(_foregroundGCIsActive, "Internal state inconsistency");
2293 2293          }
2294 2294          break;
2295 2295        case Precleaning:
2296 2296          if (UseAdaptiveSizePolicy) {
2297 2297            size_policy()->concurrent_precleaning_begin();
2298 2298          }
2299 2299          // marking from roots in markFromRoots has been completed
2300 2300          preclean();
2301 2301          if (UseAdaptiveSizePolicy) {
2302 2302            size_policy()->concurrent_precleaning_end();
2303 2303          }
2304 2304          assert(_collectorState == AbortablePreclean ||
2305 2305                 _collectorState == FinalMarking,
2306 2306                 "Collector state should have changed");
2307 2307          break;
2308 2308        case AbortablePreclean:
2309 2309          if (UseAdaptiveSizePolicy) {
2310 2310          size_policy()->concurrent_phases_resume();
2311 2311          }
2312 2312          abortable_preclean();
2313 2313          if (UseAdaptiveSizePolicy) {
2314 2314            size_policy()->concurrent_precleaning_end();
2315 2315          }
2316 2316          assert(_collectorState == FinalMarking, "Collector state should "
2317 2317            "have changed");
2318 2318          break;
2319 2319        case FinalMarking:
2320 2320          {
2321 2321            ReleaseForegroundGC x(this);
2322 2322  
2323 2323            VM_CMS_Final_Remark final_remark_op(this);
2324 2324            VMThread::execute(&final_remark_op);
2325 2325          }
2326 2326          assert(_foregroundGCShouldWait, "block post-condition");
2327 2327          break;
2328 2328        case Sweeping:
2329 2329          if (UseAdaptiveSizePolicy) {
2330 2330            size_policy()->concurrent_sweeping_begin();
2331 2331          }
2332 2332          // final marking in checkpointRootsFinal has been completed
2333 2333          sweep(true);
2334 2334          assert(_collectorState == Resizing, "Collector state change "
2335 2335            "to Resizing must be done under the free_list_lock");
2336 2336          _full_gcs_since_conc_gc = 0;
2337 2337  
2338 2338          // Stop the timers for adaptive size policy for the concurrent phases
2339 2339          if (UseAdaptiveSizePolicy) {
2340 2340            size_policy()->concurrent_sweeping_end();
2341 2341            size_policy()->concurrent_phases_end(gch->gc_cause(),
2342 2342                                               gch->prev_gen(_cmsGen)->capacity(),
2343 2343                                               _cmsGen->free());
2344 2344          }
2345 2345  
2346 2346        case Resizing: {
2347 2347          // Sweeping has been completed...
2348 2348          // At this point the background collection has completed.
2349 2349          // Don't move the call to compute_new_size() down
2350 2350          // into code that might be executed if the background
2351 2351          // collection was preempted.
2352 2352          {
2353 2353            ReleaseForegroundGC x(this);   // unblock FG collection
2354 2354            MutexLockerEx       y(Heap_lock, Mutex::_no_safepoint_check_flag);
2355 2355            CMSTokenSync        z(true);   // not strictly needed.
2356 2356            if (_collectorState == Resizing) {
2357 2357              compute_new_size();
2358 2358              _collectorState = Resetting;
2359 2359            } else {
2360 2360              assert(_collectorState == Idling, "The state should only change"
2361 2361                     " because the foreground collector has finished the collection");
2362 2362            }
2363 2363          }
2364 2364          break;
2365 2365        }
2366 2366        case Resetting:
2367 2367          // CMS heap resizing has been completed
2368 2368          reset(true);
2369 2369          assert(_collectorState == Idling, "Collector state should "
2370 2370            "have changed");
2371 2371          stats().record_cms_end();
2372 2372          // Don't move the concurrent_phases_end() and compute_new_size()
2373 2373          // calls to here because a preempted background collection
2374 2374          // has it's state set to "Resetting".
2375 2375          break;
2376 2376        case Idling:
2377 2377        default:
2378 2378          ShouldNotReachHere();
2379 2379          break;
2380 2380      }
2381 2381      if (TraceCMSState) {
2382 2382        gclog_or_tty->print_cr("  Thread " INTPTR_FORMAT " done - next CMS state %d",
2383 2383          Thread::current(), _collectorState);
2384 2384      }
2385 2385      assert(_foregroundGCShouldWait, "block post-condition");
2386 2386    }
2387 2387  
2388 2388    // Should this be in gc_epilogue?
2389 2389    collector_policy()->counters()->update_counters();
2390 2390  
2391 2391    {
2392 2392      // Clear _foregroundGCShouldWait and, in the event that the
2393 2393      // foreground collector is waiting, notify it, before
2394 2394      // returning.
2395 2395      MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
2396 2396      _foregroundGCShouldWait = false;
2397 2397      if (_foregroundGCIsActive) {
2398 2398        CGC_lock->notify();
2399 2399      }
2400 2400      assert(!ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
2401 2401             "Possible deadlock");
2402 2402    }
2403 2403    if (TraceCMSState) {
2404 2404      gclog_or_tty->print_cr("CMS Thread " INTPTR_FORMAT
2405 2405        " exiting collection CMS state %d",
2406 2406        Thread::current(), _collectorState);
2407 2407    }
2408 2408    if (PrintGC && Verbose) {
2409 2409      _cmsGen->print_heap_change(prev_used);
2410 2410    }
2411 2411  }
2412 2412  
2413 2413  void CMSCollector::collect_in_foreground(bool clear_all_soft_refs) {
2414 2414    assert(_foregroundGCIsActive && !_foregroundGCShouldWait,
2415 2415           "Foreground collector should be waiting, not executing");
2416 2416    assert(Thread::current()->is_VM_thread(), "A foreground collection"
2417 2417      "may only be done by the VM Thread with the world stopped");
2418 2418    assert(ConcurrentMarkSweepThread::vm_thread_has_cms_token(),
2419 2419           "VM thread should have CMS token");
2420 2420  
2421 2421    NOT_PRODUCT(TraceTime t("CMS:MS (foreground) ", PrintGCDetails && Verbose,
2422 2422      true, gclog_or_tty);)
2423 2423    if (UseAdaptiveSizePolicy) {
2424 2424      size_policy()->ms_collection_begin();
2425 2425    }
2426 2426    COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact);
2427 2427  
2428 2428    HandleMark hm;  // Discard invalid handles created during verification
2429 2429  
2430 2430    if (VerifyBeforeGC &&
2431 2431        GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
2432 2432      Universe::verify(true);
2433 2433    }
2434 2434  
2435 2435    // Snapshot the soft reference policy to be used in this collection cycle.
2436 2436    ref_processor()->setup_policy(clear_all_soft_refs);
2437 2437  
2438 2438    bool init_mark_was_synchronous = false; // until proven otherwise
2439 2439    while (_collectorState != Idling) {
2440 2440      if (TraceCMSState) {
2441 2441        gclog_or_tty->print_cr("Thread " INTPTR_FORMAT " in CMS state %d",
2442 2442          Thread::current(), _collectorState);
2443 2443      }
2444 2444      switch (_collectorState) {
2445 2445        case InitialMarking:
2446 2446          init_mark_was_synchronous = true;  // fact to be exploited in re-mark
2447 2447          checkpointRootsInitial(false);
2448 2448          assert(_collectorState == Marking, "Collector state should have changed"
2449 2449            " within checkpointRootsInitial()");
2450 2450          break;
2451 2451        case Marking:
2452 2452          // initial marking in checkpointRootsInitialWork has been completed
2453 2453          if (VerifyDuringGC &&
2454 2454              GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
2455 2455            gclog_or_tty->print("Verify before initial mark: ");
2456 2456            Universe::verify(true);
2457 2457          }
2458 2458          {
2459 2459            bool res = markFromRoots(false);
2460 2460            assert(res && _collectorState == FinalMarking, "Collector state should "
2461 2461              "have changed");
2462 2462            break;
2463 2463          }
2464 2464        case FinalMarking:
2465 2465          if (VerifyDuringGC &&
2466 2466              GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
2467 2467            gclog_or_tty->print("Verify before re-mark: ");
2468 2468            Universe::verify(true);
2469 2469          }
2470 2470          checkpointRootsFinal(false, clear_all_soft_refs,
2471 2471                               init_mark_was_synchronous);
2472 2472          assert(_collectorState == Sweeping, "Collector state should not "
2473 2473            "have changed within checkpointRootsFinal()");
2474 2474          break;
2475 2475        case Sweeping:
2476 2476          // final marking in checkpointRootsFinal has been completed
2477 2477          if (VerifyDuringGC &&
2478 2478              GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
2479 2479            gclog_or_tty->print("Verify before sweep: ");
2480 2480            Universe::verify(true);
2481 2481          }
2482 2482          sweep(false);
2483 2483          assert(_collectorState == Resizing, "Incorrect state");
2484 2484          break;
2485 2485        case Resizing: {
2486 2486          // Sweeping has been completed; the actual resize in this case
2487 2487          // is done separately; nothing to be done in this state.
2488 2488          _collectorState = Resetting;
2489 2489          break;
2490 2490        }
2491 2491        case Resetting:
2492 2492          // The heap has been resized.
2493 2493          if (VerifyDuringGC &&
2494 2494              GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
2495 2495            gclog_or_tty->print("Verify before reset: ");
2496 2496            Universe::verify(true);
2497 2497          }
2498 2498          reset(false);
2499 2499          assert(_collectorState == Idling, "Collector state should "
2500 2500            "have changed");
2501 2501          break;
2502 2502        case Precleaning:
2503 2503        case AbortablePreclean:
2504 2504          // Elide the preclean phase
2505 2505          _collectorState = FinalMarking;
2506 2506          break;
2507 2507        default:
2508 2508          ShouldNotReachHere();
2509 2509      }
2510 2510      if (TraceCMSState) {
2511 2511        gclog_or_tty->print_cr("  Thread " INTPTR_FORMAT " done - next CMS state %d",
2512 2512          Thread::current(), _collectorState);
2513 2513      }
2514 2514    }
2515 2515  
2516 2516    if (UseAdaptiveSizePolicy) {
2517 2517      GenCollectedHeap* gch = GenCollectedHeap::heap();
2518 2518      size_policy()->ms_collection_end(gch->gc_cause());
2519 2519    }
2520 2520  
2521 2521    if (VerifyAfterGC &&
2522 2522        GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
2523 2523      Universe::verify(true);
2524 2524    }
2525 2525    if (TraceCMSState) {
2526 2526      gclog_or_tty->print_cr("CMS Thread " INTPTR_FORMAT
2527 2527        " exiting collection CMS state %d",
2528 2528        Thread::current(), _collectorState);
2529 2529    }
2530 2530  }
2531 2531  
2532 2532  bool CMSCollector::waitForForegroundGC() {
2533 2533    bool res = false;
2534 2534    assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
2535 2535           "CMS thread should have CMS token");
2536 2536    // Block the foreground collector until the
2537 2537    // background collectors decides whether to
2538 2538    // yield.
2539 2539    MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
2540 2540    _foregroundGCShouldWait = true;
2541 2541    if (_foregroundGCIsActive) {
2542 2542      // The background collector yields to the
2543 2543      // foreground collector and returns a value
2544 2544      // indicating that it has yielded.  The foreground
2545 2545      // collector can proceed.
2546 2546      res = true;
2547 2547      _foregroundGCShouldWait = false;
2548 2548      ConcurrentMarkSweepThread::clear_CMS_flag(
2549 2549        ConcurrentMarkSweepThread::CMS_cms_has_token);
2550 2550      ConcurrentMarkSweepThread::set_CMS_flag(
2551 2551        ConcurrentMarkSweepThread::CMS_cms_wants_token);
2552 2552      // Get a possibly blocked foreground thread going
2553 2553      CGC_lock->notify();
2554 2554      if (TraceCMSState) {
2555 2555        gclog_or_tty->print_cr("CMS Thread " INTPTR_FORMAT " waiting at CMS state %d",
2556 2556          Thread::current(), _collectorState);
2557 2557      }
2558 2558      while (_foregroundGCIsActive) {
2559 2559        CGC_lock->wait(Mutex::_no_safepoint_check_flag);
2560 2560      }
2561 2561      ConcurrentMarkSweepThread::set_CMS_flag(
2562 2562        ConcurrentMarkSweepThread::CMS_cms_has_token);
2563 2563      ConcurrentMarkSweepThread::clear_CMS_flag(
2564 2564        ConcurrentMarkSweepThread::CMS_cms_wants_token);
2565 2565    }
2566 2566    if (TraceCMSState) {
2567 2567      gclog_or_tty->print_cr("CMS Thread " INTPTR_FORMAT " continuing at CMS state %d",
2568 2568        Thread::current(), _collectorState);
2569 2569    }
2570 2570    return res;
2571 2571  }
2572 2572  
2573 2573  // Because of the need to lock the free lists and other structures in
2574 2574  // the collector, common to all the generations that the collector is
2575 2575  // collecting, we need the gc_prologues of individual CMS generations
2576 2576  // delegate to their collector. It may have been simpler had the
2577 2577  // current infrastructure allowed one to call a prologue on a
2578 2578  // collector. In the absence of that we have the generation's
2579 2579  // prologue delegate to the collector, which delegates back
2580 2580  // some "local" work to a worker method in the individual generations
2581 2581  // that it's responsible for collecting, while itself doing any
2582 2582  // work common to all generations it's responsible for. A similar
2583 2583  // comment applies to the  gc_epilogue()'s.
2584 2584  // The role of the varaible _between_prologue_and_epilogue is to
2585 2585  // enforce the invocation protocol.
2586 2586  void CMSCollector::gc_prologue(bool full) {
2587 2587    // Call gc_prologue_work() for each CMSGen and PermGen that
2588 2588    // we are responsible for.
2589 2589  
2590 2590    // The following locking discipline assumes that we are only called
2591 2591    // when the world is stopped.
2592 2592    assert(SafepointSynchronize::is_at_safepoint(), "world is stopped assumption");
2593 2593  
2594 2594    // The CMSCollector prologue must call the gc_prologues for the
2595 2595    // "generations" (including PermGen if any) that it's responsible
2596 2596    // for.
2597 2597  
2598 2598    assert(   Thread::current()->is_VM_thread()
2599 2599           || (   CMSScavengeBeforeRemark
2600 2600               && Thread::current()->is_ConcurrentGC_thread()),
2601 2601           "Incorrect thread type for prologue execution");
2602 2602  
2603 2603    if (_between_prologue_and_epilogue) {
2604 2604      // We have already been invoked; this is a gc_prologue delegation
2605 2605      // from yet another CMS generation that we are responsible for, just
2606 2606      // ignore it since all relevant work has already been done.
2607 2607      return;
2608 2608    }
2609 2609  
2610 2610    // set a bit saying prologue has been called; cleared in epilogue
2611 2611    _between_prologue_and_epilogue = true;
2612 2612    // Claim locks for common data structures, then call gc_prologue_work()
2613 2613    // for each CMSGen and PermGen that we are responsible for.
2614 2614  
2615 2615    getFreelistLocks();   // gets free list locks on constituent spaces
2616 2616    bitMapLock()->lock_without_safepoint_check();
2617 2617  
2618 2618    // Should call gc_prologue_work() for all cms gens we are responsible for
2619 2619    bool registerClosure =    _collectorState >= Marking
2620 2620                           && _collectorState < Sweeping;
2621 2621    ModUnionClosure* muc = CollectedHeap::use_parallel_gc_threads() ?
2622 2622                                                 &_modUnionClosurePar
2623 2623                                                 : &_modUnionClosure;
2624 2624    _cmsGen->gc_prologue_work(full, registerClosure, muc);
2625 2625    _permGen->gc_prologue_work(full, registerClosure, muc);
2626 2626  
2627 2627    if (!full) {
2628 2628      stats().record_gc0_begin();
2629 2629    }
2630 2630  }
2631 2631  
2632 2632  void ConcurrentMarkSweepGeneration::gc_prologue(bool full) {
2633 2633    // Delegate to CMScollector which knows how to coordinate between
2634 2634    // this and any other CMS generations that it is responsible for
2635 2635    // collecting.
2636 2636    collector()->gc_prologue(full);
2637 2637  }
2638 2638  
2639 2639  // This is a "private" interface for use by this generation's CMSCollector.
2640 2640  // Not to be called directly by any other entity (for instance,
2641 2641  // GenCollectedHeap, which calls the "public" gc_prologue method above).
2642 2642  void ConcurrentMarkSweepGeneration::gc_prologue_work(bool full,
2643 2643    bool registerClosure, ModUnionClosure* modUnionClosure) {
2644 2644    assert(!incremental_collection_failed(), "Shouldn't be set yet");
2645 2645    assert(cmsSpace()->preconsumptionDirtyCardClosure() == NULL,
2646 2646      "Should be NULL");
2647 2647    if (registerClosure) {
2648 2648      cmsSpace()->setPreconsumptionDirtyCardClosure(modUnionClosure);
2649 2649    }
2650 2650    cmsSpace()->gc_prologue();
2651 2651    // Clear stat counters
2652 2652    NOT_PRODUCT(
2653 2653      assert(_numObjectsPromoted == 0, "check");
2654 2654      assert(_numWordsPromoted   == 0, "check");
2655 2655      if (Verbose && PrintGC) {
2656 2656        gclog_or_tty->print("Allocated "SIZE_FORMAT" objects, "
2657 2657                            SIZE_FORMAT" bytes concurrently",
2658 2658        _numObjectsAllocated, _numWordsAllocated*sizeof(HeapWord));
2659 2659      }
2660 2660      _numObjectsAllocated = 0;
2661 2661      _numWordsAllocated   = 0;
2662 2662    )
2663 2663  }
2664 2664  
2665 2665  void CMSCollector::gc_epilogue(bool full) {
2666 2666    // The following locking discipline assumes that we are only called
2667 2667    // when the world is stopped.
2668 2668    assert(SafepointSynchronize::is_at_safepoint(),
2669 2669           "world is stopped assumption");
2670 2670  
2671 2671    // Currently the CMS epilogue (see CompactibleFreeListSpace) merely checks
2672 2672    // if linear allocation blocks need to be appropriately marked to allow the
2673 2673    // the blocks to be parsable. We also check here whether we need to nudge the
2674 2674    // CMS collector thread to start a new cycle (if it's not already active).
2675 2675    assert(   Thread::current()->is_VM_thread()
2676 2676           || (   CMSScavengeBeforeRemark
2677 2677               && Thread::current()->is_ConcurrentGC_thread()),
2678 2678           "Incorrect thread type for epilogue execution");
2679 2679  
2680 2680    if (!_between_prologue_and_epilogue) {
2681 2681      // We have already been invoked; this is a gc_epilogue delegation
2682 2682      // from yet another CMS generation that we are responsible for, just
2683 2683      // ignore it since all relevant work has already been done.
2684 2684      return;
2685 2685    }
2686 2686    assert(haveFreelistLocks(), "must have freelist locks");
2687 2687    assert_lock_strong(bitMapLock());
2688 2688  
2689 2689    _cmsGen->gc_epilogue_work(full);
2690 2690    _permGen->gc_epilogue_work(full);
2691 2691  
2692 2692    if (_collectorState == AbortablePreclean || _collectorState == Precleaning) {
2693 2693      // in case sampling was not already enabled, enable it
2694 2694      _start_sampling = true;
2695 2695    }
2696 2696    // reset _eden_chunk_array so sampling starts afresh
2697 2697    _eden_chunk_index = 0;
2698 2698  
2699 2699    size_t cms_used   = _cmsGen->cmsSpace()->used();
2700 2700    size_t perm_used  = _permGen->cmsSpace()->used();
2701 2701  
2702 2702    // update performance counters - this uses a special version of
2703 2703    // update_counters() that allows the utilization to be passed as a
2704 2704    // parameter, avoiding multiple calls to used().
2705 2705    //
2706 2706    _cmsGen->update_counters(cms_used);
2707 2707    _permGen->update_counters(perm_used);
2708 2708  
2709 2709    if (CMSIncrementalMode) {
2710 2710      icms_update_allocation_limits();
2711 2711    }
2712 2712  
2713 2713    bitMapLock()->unlock();
2714 2714    releaseFreelistLocks();
2715 2715  
2716 2716    if (!CleanChunkPoolAsync) {
2717 2717      Chunk::clean_chunk_pool();
2718 2718    }
2719 2719  
2720 2720    _between_prologue_and_epilogue = false;  // ready for next cycle
2721 2721  }
2722 2722  
2723 2723  void ConcurrentMarkSweepGeneration::gc_epilogue(bool full) {
2724 2724    collector()->gc_epilogue(full);
2725 2725  
2726 2726    // Also reset promotion tracking in par gc thread states.
2727 2727    if (CollectedHeap::use_parallel_gc_threads()) {
2728 2728      for (uint i = 0; i < ParallelGCThreads; i++) {
2729 2729        _par_gc_thread_states[i]->promo.stopTrackingPromotions(i);
2730 2730      }
2731 2731    }
2732 2732  }
2733 2733  
2734 2734  void ConcurrentMarkSweepGeneration::gc_epilogue_work(bool full) {
2735 2735    assert(!incremental_collection_failed(), "Should have been cleared");
2736 2736    cmsSpace()->setPreconsumptionDirtyCardClosure(NULL);
2737 2737    cmsSpace()->gc_epilogue();
2738 2738      // Print stat counters
2739 2739    NOT_PRODUCT(
2740 2740      assert(_numObjectsAllocated == 0, "check");
2741 2741      assert(_numWordsAllocated == 0, "check");
2742 2742      if (Verbose && PrintGC) {
2743 2743        gclog_or_tty->print("Promoted "SIZE_FORMAT" objects, "
2744 2744                            SIZE_FORMAT" bytes",
2745 2745                   _numObjectsPromoted, _numWordsPromoted*sizeof(HeapWord));
2746 2746      }
2747 2747      _numObjectsPromoted = 0;
2748 2748      _numWordsPromoted   = 0;
2749 2749    )
2750 2750  
2751 2751    if (PrintGC && Verbose) {
2752 2752      // Call down the chain in contiguous_available needs the freelistLock
2753 2753      // so print this out before releasing the freeListLock.
2754 2754      gclog_or_tty->print(" Contiguous available "SIZE_FORMAT" bytes ",
2755 2755                          contiguous_available());
2756 2756    }
2757 2757  }
2758 2758  
2759 2759  #ifndef PRODUCT
2760 2760  bool CMSCollector::have_cms_token() {
2761 2761    Thread* thr = Thread::current();
2762 2762    if (thr->is_VM_thread()) {
2763 2763      return ConcurrentMarkSweepThread::vm_thread_has_cms_token();
2764 2764    } else if (thr->is_ConcurrentGC_thread()) {
2765 2765      return ConcurrentMarkSweepThread::cms_thread_has_cms_token();
2766 2766    } else if (thr->is_GC_task_thread()) {
2767 2767      return ConcurrentMarkSweepThread::vm_thread_has_cms_token() &&
2768 2768             ParGCRareEvent_lock->owned_by_self();
2769 2769    }
2770 2770    return false;
2771 2771  }
2772 2772  #endif
2773 2773  
2774 2774  // Check reachability of the given heap address in CMS generation,
2775 2775  // treating all other generations as roots.
2776 2776  bool CMSCollector::is_cms_reachable(HeapWord* addr) {
2777 2777    // We could "guarantee" below, rather than assert, but i'll
2778 2778    // leave these as "asserts" so that an adventurous debugger
2779 2779    // could try this in the product build provided some subset of
2780 2780    // the conditions were met, provided they were intersted in the
2781 2781    // results and knew that the computation below wouldn't interfere
2782 2782    // with other concurrent computations mutating the structures
2783 2783    // being read or written.
2784 2784    assert(SafepointSynchronize::is_at_safepoint(),
2785 2785           "Else mutations in object graph will make answer suspect");
2786 2786    assert(have_cms_token(), "Should hold cms token");
2787 2787    assert(haveFreelistLocks(), "must hold free list locks");
2788 2788    assert_lock_strong(bitMapLock());
2789 2789  
2790 2790    // Clear the marking bit map array before starting, but, just
2791 2791    // for kicks, first report if the given address is already marked
2792 2792    gclog_or_tty->print_cr("Start: Address 0x%x is%s marked", addr,
2793 2793                  _markBitMap.isMarked(addr) ? "" : " not");
2794 2794  
2795 2795    if (verify_after_remark()) {
2796 2796      MutexLockerEx x(verification_mark_bm()->lock(), Mutex::_no_safepoint_check_flag);
2797 2797      bool result = verification_mark_bm()->isMarked(addr);
2798 2798      gclog_or_tty->print_cr("TransitiveMark: Address 0x%x %s marked", addr,
2799 2799                             result ? "IS" : "is NOT");
2800 2800      return result;
2801 2801    } else {
2802 2802      gclog_or_tty->print_cr("Could not compute result");
2803 2803      return false;
2804 2804    }
2805 2805  }
2806 2806  
2807 2807  ////////////////////////////////////////////////////////
2808 2808  // CMS Verification Support
2809 2809  ////////////////////////////////////////////////////////
2810 2810  // Following the remark phase, the following invariant
2811 2811  // should hold -- each object in the CMS heap which is
2812 2812  // marked in markBitMap() should be marked in the verification_mark_bm().
2813 2813  
2814 2814  class VerifyMarkedClosure: public BitMapClosure {
2815 2815    CMSBitMap* _marks;
2816 2816    bool       _failed;
2817 2817  
2818 2818   public:
2819 2819    VerifyMarkedClosure(CMSBitMap* bm): _marks(bm), _failed(false) {}
2820 2820  
2821 2821    bool do_bit(size_t offset) {
2822 2822      HeapWord* addr = _marks->offsetToHeapWord(offset);
2823 2823      if (!_marks->isMarked(addr)) {
2824 2824        oop(addr)->print_on(gclog_or_tty);
2825 2825        gclog_or_tty->print_cr(" ("INTPTR_FORMAT" should have been marked)", addr);
2826 2826        _failed = true;
2827 2827      }
2828 2828      return true;
2829 2829    }
2830 2830  
2831 2831    bool failed() { return _failed; }
2832 2832  };
2833 2833  
2834 2834  bool CMSCollector::verify_after_remark() {
2835 2835    gclog_or_tty->print(" [Verifying CMS Marking... ");
2836 2836    MutexLockerEx ml(verification_mark_bm()->lock(), Mutex::_no_safepoint_check_flag);
2837 2837    static bool init = false;
2838 2838  
2839 2839    assert(SafepointSynchronize::is_at_safepoint(),
2840 2840           "Else mutations in object graph will make answer suspect");
2841 2841    assert(have_cms_token(),
2842 2842           "Else there may be mutual interference in use of "
2843 2843           " verification data structures");
2844 2844    assert(_collectorState > Marking && _collectorState <= Sweeping,
2845 2845           "Else marking info checked here may be obsolete");
2846 2846    assert(haveFreelistLocks(), "must hold free list locks");
2847 2847    assert_lock_strong(bitMapLock());
2848 2848  
2849 2849  
2850 2850    // Allocate marking bit map if not already allocated
2851 2851    if (!init) { // first time
2852 2852      if (!verification_mark_bm()->allocate(_span)) {
2853 2853        return false;
2854 2854      }
2855 2855      init = true;
2856 2856    }
2857 2857  
2858 2858    assert(verification_mark_stack()->isEmpty(), "Should be empty");
2859 2859  
2860 2860    // Turn off refs discovery -- so we will be tracing through refs.
2861 2861    // This is as intended, because by this time
2862 2862    // GC must already have cleared any refs that need to be cleared,
2863 2863    // and traced those that need to be marked; moreover,
2864 2864    // the marking done here is not going to intefere in any
2865 2865    // way with the marking information used by GC.
2866 2866    NoRefDiscovery no_discovery(ref_processor());
2867 2867  
2868 2868    COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact;)
2869 2869  
2870 2870    // Clear any marks from a previous round
2871 2871    verification_mark_bm()->clear_all();
2872 2872    assert(verification_mark_stack()->isEmpty(), "markStack should be empty");
2873 2873    verify_work_stacks_empty();
2874 2874  
2875 2875    GenCollectedHeap* gch = GenCollectedHeap::heap();
2876 2876    gch->ensure_parsability(false);  // fill TLABs, but no need to retire them
2877 2877    // Update the saved marks which may affect the root scans.
2878 2878    gch->save_marks();
2879 2879  
2880 2880    if (CMSRemarkVerifyVariant == 1) {
2881 2881      // In this first variant of verification, we complete
2882 2882      // all marking, then check if the new marks-verctor is
2883 2883      // a subset of the CMS marks-vector.
2884 2884      verify_after_remark_work_1();
2885 2885    } else if (CMSRemarkVerifyVariant == 2) {
2886 2886      // In this second variant of verification, we flag an error
2887 2887      // (i.e. an object reachable in the new marks-vector not reachable
2888 2888      // in the CMS marks-vector) immediately, also indicating the
2889 2889      // identify of an object (A) that references the unmarked object (B) --
2890 2890      // presumably, a mutation to A failed to be picked up by preclean/remark?
2891 2891      verify_after_remark_work_2();
2892 2892    } else {
2893 2893      warning("Unrecognized value %d for CMSRemarkVerifyVariant",
2894 2894              CMSRemarkVerifyVariant);
2895 2895    }
2896 2896    gclog_or_tty->print(" done] ");
2897 2897    return true;
2898 2898  }
2899 2899  
2900 2900  void CMSCollector::verify_after_remark_work_1() {
2901 2901    ResourceMark rm;
2902 2902    HandleMark  hm;
2903 2903    GenCollectedHeap* gch = GenCollectedHeap::heap();
2904 2904  
2905 2905    // Mark from roots one level into CMS
2906 2906    MarkRefsIntoClosure notOlder(_span, verification_mark_bm());
2907 2907    gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
2908 2908  
2909 2909    gch->gen_process_strong_roots(_cmsGen->level(),
2910 2910                                  true,   // younger gens are roots
2911 2911                                  true,   // activate StrongRootsScope
2912 2912                                  true,   // collecting perm gen
2913 2913                                  SharedHeap::ScanningOption(roots_scanning_options()),
2914 2914                                  &notOlder,
2915 2915                                  true,   // walk code active on stacks
2916 2916                                  NULL);
2917 2917  
2918 2918    // Now mark from the roots
2919 2919    assert(_revisitStack.isEmpty(), "Should be empty");
2920 2920    MarkFromRootsClosure markFromRootsClosure(this, _span,
2921 2921      verification_mark_bm(), verification_mark_stack(), &_revisitStack,
2922 2922      false /* don't yield */, true /* verifying */);
2923 2923    assert(_restart_addr == NULL, "Expected pre-condition");
2924 2924    verification_mark_bm()->iterate(&markFromRootsClosure);
2925 2925    while (_restart_addr != NULL) {
2926 2926      // Deal with stack overflow: by restarting at the indicated
2927 2927      // address.
2928 2928      HeapWord* ra = _restart_addr;
2929 2929      markFromRootsClosure.reset(ra);
2930 2930      _restart_addr = NULL;
2931 2931      verification_mark_bm()->iterate(&markFromRootsClosure, ra, _span.end());
2932 2932    }
2933 2933    assert(verification_mark_stack()->isEmpty(), "Should have been drained");
2934 2934    verify_work_stacks_empty();
2935 2935    // Should reset the revisit stack above, since no class tree
2936 2936    // surgery is forthcoming.
2937 2937    _revisitStack.reset(); // throwing away all contents
2938 2938  
2939 2939    // Marking completed -- now verify that each bit marked in
2940 2940    // verification_mark_bm() is also marked in markBitMap(); flag all
2941 2941    // errors by printing corresponding objects.
2942 2942    VerifyMarkedClosure vcl(markBitMap());
2943 2943    verification_mark_bm()->iterate(&vcl);
2944 2944    if (vcl.failed()) {
2945 2945      gclog_or_tty->print("Verification failed");
2946 2946      Universe::heap()->print_on(gclog_or_tty);
2947 2947      fatal("CMS: failed marking verification after remark");
2948 2948    }
2949 2949  }
2950 2950  
2951 2951  void CMSCollector::verify_after_remark_work_2() {
2952 2952    ResourceMark rm;
2953 2953    HandleMark  hm;
2954 2954    GenCollectedHeap* gch = GenCollectedHeap::heap();
2955 2955  
2956 2956    // Mark from roots one level into CMS
2957 2957    MarkRefsIntoVerifyClosure notOlder(_span, verification_mark_bm(),
2958 2958                                       markBitMap());
2959 2959    gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
2960 2960    gch->gen_process_strong_roots(_cmsGen->level(),
2961 2961                                  true,   // younger gens are roots
2962 2962                                  true,   // activate StrongRootsScope
2963 2963                                  true,   // collecting perm gen
2964 2964                                  SharedHeap::ScanningOption(roots_scanning_options()),
2965 2965                                  &notOlder,
2966 2966                                  true,   // walk code active on stacks
2967 2967                                  NULL);
2968 2968  
2969 2969    // Now mark from the roots
2970 2970    assert(_revisitStack.isEmpty(), "Should be empty");
2971 2971    MarkFromRootsVerifyClosure markFromRootsClosure(this, _span,
2972 2972      verification_mark_bm(), markBitMap(), verification_mark_stack());
2973 2973    assert(_restart_addr == NULL, "Expected pre-condition");
2974 2974    verification_mark_bm()->iterate(&markFromRootsClosure);
2975 2975    while (_restart_addr != NULL) {
2976 2976      // Deal with stack overflow: by restarting at the indicated
2977 2977      // address.
2978 2978      HeapWord* ra = _restart_addr;
2979 2979      markFromRootsClosure.reset(ra);
2980 2980      _restart_addr = NULL;
2981 2981      verification_mark_bm()->iterate(&markFromRootsClosure, ra, _span.end());
2982 2982    }
2983 2983    assert(verification_mark_stack()->isEmpty(), "Should have been drained");
2984 2984    verify_work_stacks_empty();
2985 2985    // Should reset the revisit stack above, since no class tree
2986 2986    // surgery is forthcoming.
2987 2987    _revisitStack.reset(); // throwing away all contents
2988 2988  
2989 2989    // Marking completed -- now verify that each bit marked in
2990 2990    // verification_mark_bm() is also marked in markBitMap(); flag all
2991 2991    // errors by printing corresponding objects.
2992 2992    VerifyMarkedClosure vcl(markBitMap());
2993 2993    verification_mark_bm()->iterate(&vcl);
2994 2994    assert(!vcl.failed(), "Else verification above should not have succeeded");
2995 2995  }
2996 2996  
2997 2997  void ConcurrentMarkSweepGeneration::save_marks() {
2998 2998    // delegate to CMS space
2999 2999    cmsSpace()->save_marks();
3000 3000    for (uint i = 0; i < ParallelGCThreads; i++) {
3001 3001      _par_gc_thread_states[i]->promo.startTrackingPromotions();
3002 3002    }
3003 3003  }
3004 3004  
3005 3005  bool ConcurrentMarkSweepGeneration::no_allocs_since_save_marks() {
3006 3006    return cmsSpace()->no_allocs_since_save_marks();
3007 3007  }
3008 3008  
3009 3009  #define CMS_SINCE_SAVE_MARKS_DEFN(OopClosureType, nv_suffix)    \
3010 3010                                                                  \
3011 3011  void ConcurrentMarkSweepGeneration::                            \
3012 3012  oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl) {   \
3013 3013    cl->set_generation(this);                                     \
3014 3014    cmsSpace()->oop_since_save_marks_iterate##nv_suffix(cl);      \
3015 3015    cl->reset_generation();                                       \
3016 3016    save_marks();                                                 \
3017 3017  }
3018 3018  
3019 3019  ALL_SINCE_SAVE_MARKS_CLOSURES(CMS_SINCE_SAVE_MARKS_DEFN)
3020 3020  
3021 3021  void
3022 3022  ConcurrentMarkSweepGeneration::object_iterate_since_last_GC(ObjectClosure* blk)
3023 3023  {
3024 3024    // Not currently implemented; need to do the following. -- ysr.
3025 3025    // dld -- I think that is used for some sort of allocation profiler.  So it
3026 3026    // really means the objects allocated by the mutator since the last
3027 3027    // GC.  We could potentially implement this cheaply by recording only
3028 3028    // the direct allocations in a side data structure.
3029 3029    //
3030 3030    // I think we probably ought not to be required to support these
3031 3031    // iterations at any arbitrary point; I think there ought to be some
3032 3032    // call to enable/disable allocation profiling in a generation/space,
3033 3033    // and the iterator ought to return the objects allocated in the
3034 3034    // gen/space since the enable call, or the last iterator call (which
3035 3035    // will probably be at a GC.)  That way, for gens like CM&S that would
3036 3036    // require some extra data structure to support this, we only pay the
3037 3037    // cost when it's in use...
3038 3038    cmsSpace()->object_iterate_since_last_GC(blk);
3039 3039  }
3040 3040  
3041 3041  void
3042 3042  ConcurrentMarkSweepGeneration::younger_refs_iterate(OopsInGenClosure* cl) {
3043 3043    cl->set_generation(this);
3044 3044    younger_refs_in_space_iterate(_cmsSpace, cl);
3045 3045    cl->reset_generation();
3046 3046  }
3047 3047  
3048 3048  void
3049 3049  ConcurrentMarkSweepGeneration::oop_iterate(MemRegion mr, OopClosure* cl) {
3050 3050    if (freelistLock()->owned_by_self()) {
3051 3051      Generation::oop_iterate(mr, cl);
3052 3052    } else {
3053 3053      MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
3054 3054      Generation::oop_iterate(mr, cl);
3055 3055    }
3056 3056  }
3057 3057  
3058 3058  void
3059 3059  ConcurrentMarkSweepGeneration::oop_iterate(OopClosure* cl) {
3060 3060    if (freelistLock()->owned_by_self()) {
3061 3061      Generation::oop_iterate(cl);
3062 3062    } else {
3063 3063      MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
3064 3064      Generation::oop_iterate(cl);
3065 3065    }
3066 3066  }
3067 3067  
3068 3068  void
3069 3069  ConcurrentMarkSweepGeneration::object_iterate(ObjectClosure* cl) {
3070 3070    if (freelistLock()->owned_by_self()) {
3071 3071      Generation::object_iterate(cl);
3072 3072    } else {
3073 3073      MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
3074 3074      Generation::object_iterate(cl);
3075 3075    }
3076 3076  }
3077 3077  
3078 3078  void
3079 3079  ConcurrentMarkSweepGeneration::safe_object_iterate(ObjectClosure* cl) {
3080 3080    if (freelistLock()->owned_by_self()) {
3081 3081      Generation::safe_object_iterate(cl);
3082 3082    } else {
3083 3083      MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
3084 3084      Generation::safe_object_iterate(cl);
3085 3085    }
3086 3086  }
3087 3087  
3088 3088  void
3089 3089  ConcurrentMarkSweepGeneration::pre_adjust_pointers() {
3090 3090  }
3091 3091  
3092 3092  void
3093 3093  ConcurrentMarkSweepGeneration::post_compact() {
3094 3094  }
3095 3095  
3096 3096  void
3097 3097  ConcurrentMarkSweepGeneration::prepare_for_verify() {
3098 3098    // Fix the linear allocation blocks to look like free blocks.
3099 3099  
3100 3100    // Locks are normally acquired/released in gc_prologue/gc_epilogue, but those
3101 3101    // are not called when the heap is verified during universe initialization and
3102 3102    // at vm shutdown.
3103 3103    if (freelistLock()->owned_by_self()) {
3104 3104      cmsSpace()->prepare_for_verify();
3105 3105    } else {
3106 3106      MutexLockerEx fll(freelistLock(), Mutex::_no_safepoint_check_flag);
3107 3107      cmsSpace()->prepare_for_verify();
3108 3108    }
3109 3109  }
3110 3110  
3111 3111  void
3112 3112  ConcurrentMarkSweepGeneration::verify(bool allow_dirty /* ignored */) {
3113 3113    // Locks are normally acquired/released in gc_prologue/gc_epilogue, but those
3114 3114    // are not called when the heap is verified during universe initialization and
3115 3115    // at vm shutdown.
3116 3116    if (freelistLock()->owned_by_self()) {
3117 3117      cmsSpace()->verify(false /* ignored */);
3118 3118    } else {
3119 3119      MutexLockerEx fll(freelistLock(), Mutex::_no_safepoint_check_flag);
3120 3120      cmsSpace()->verify(false /* ignored */);
3121 3121    }
3122 3122  }
3123 3123  
3124 3124  void CMSCollector::verify(bool allow_dirty /* ignored */) {
3125 3125    _cmsGen->verify(allow_dirty);
3126 3126    _permGen->verify(allow_dirty);
3127 3127  }
3128 3128  
3129 3129  #ifndef PRODUCT
3130 3130  bool CMSCollector::overflow_list_is_empty() const {
3131 3131    assert(_num_par_pushes >= 0, "Inconsistency");
3132 3132    if (_overflow_list == NULL) {
3133 3133      assert(_num_par_pushes == 0, "Inconsistency");
3134 3134    }
3135 3135    return _overflow_list == NULL;
3136 3136  }
3137 3137  
3138 3138  // The methods verify_work_stacks_empty() and verify_overflow_empty()
3139 3139  // merely consolidate assertion checks that appear to occur together frequently.
3140 3140  void CMSCollector::verify_work_stacks_empty() const {
3141 3141    assert(_markStack.isEmpty(), "Marking stack should be empty");
3142 3142    assert(overflow_list_is_empty(), "Overflow list should be empty");
3143 3143  }
3144 3144  
3145 3145  void CMSCollector::verify_overflow_empty() const {
3146 3146    assert(overflow_list_is_empty(), "Overflow list should be empty");
3147 3147    assert(no_preserved_marks(), "No preserved marks");
3148 3148  }
3149 3149  #endif // PRODUCT
3150 3150  
3151 3151  // Decide if we want to enable class unloading as part of the
3152 3152  // ensuing concurrent GC cycle. We will collect the perm gen and
3153 3153  // unload classes if it's the case that:
3154 3154  // (1) an explicit gc request has been made and the flag
3155 3155  //     ExplicitGCInvokesConcurrentAndUnloadsClasses is set, OR
3156 3156  // (2) (a) class unloading is enabled at the command line, and
3157 3157  //     (b) (i)   perm gen threshold has been crossed, or
3158 3158  //         (ii)  old gen is getting really full, or
3159 3159  //         (iii) the previous N CMS collections did not collect the
3160 3160  //               perm gen
3161 3161  // NOTE: Provided there is no change in the state of the heap between
3162 3162  // calls to this method, it should have idempotent results. Moreover,
3163 3163  // its results should be monotonically increasing (i.e. going from 0 to 1,
3164 3164  // but not 1 to 0) between successive calls between which the heap was
3165 3165  // not collected. For the implementation below, it must thus rely on
3166 3166  // the property that concurrent_cycles_since_last_unload()
3167 3167  // will not decrease unless a collection cycle happened and that
3168 3168  // _permGen->should_concurrent_collect() and _cmsGen->is_too_full() are
3169 3169  // themselves also monotonic in that sense. See check_monotonicity()
3170 3170  // below.
3171 3171  bool CMSCollector::update_should_unload_classes() {
3172 3172    _should_unload_classes = false;
3173 3173    // Condition 1 above
3174 3174    if (_full_gc_requested && ExplicitGCInvokesConcurrentAndUnloadsClasses) {
3175 3175      _should_unload_classes = true;
3176 3176    } else if (CMSClassUnloadingEnabled) { // Condition 2.a above
3177 3177      // Disjuncts 2.b.(i,ii,iii) above
3178 3178      _should_unload_classes = (concurrent_cycles_since_last_unload() >=
3179 3179                                CMSClassUnloadingMaxInterval)
3180 3180                             || _permGen->should_concurrent_collect()
3181 3181                             || _cmsGen->is_too_full();
3182 3182    }
3183 3183    return _should_unload_classes;
3184 3184  }
3185 3185  
3186 3186  bool ConcurrentMarkSweepGeneration::is_too_full() const {
3187 3187    bool res = should_concurrent_collect();
3188 3188    res = res && (occupancy() > (double)CMSIsTooFullPercentage/100.0);
3189 3189    return res;
3190 3190  }
3191 3191  
3192 3192  void CMSCollector::setup_cms_unloading_and_verification_state() {
3193 3193    const  bool should_verify =   VerifyBeforeGC || VerifyAfterGC || VerifyDuringGC
3194 3194                               || VerifyBeforeExit;
3195 3195    const  int  rso           =   SharedHeap::SO_Strings | SharedHeap::SO_CodeCache;
3196 3196  
3197 3197    if (should_unload_classes()) {   // Should unload classes this cycle
3198 3198      remove_root_scanning_option(rso);  // Shrink the root set appropriately
3199 3199      set_verifying(should_verify);    // Set verification state for this cycle
3200 3200      return;                            // Nothing else needs to be done at this time
3201 3201    }
3202 3202  
3203 3203    // Not unloading classes this cycle
3204 3204    assert(!should_unload_classes(), "Inconsitency!");
3205 3205    if ((!verifying() || unloaded_classes_last_cycle()) && should_verify) {
3206 3206      // We were not verifying, or we _were_ unloading classes in the last cycle,
3207 3207      // AND some verification options are enabled this cycle; in this case,
3208 3208      // we must make sure that the deadness map is allocated if not already so,
3209 3209      // and cleared (if already allocated previously --
3210 3210      // CMSBitMap::sizeInBits() is used to determine if it's allocated).
3211 3211      if (perm_gen_verify_bit_map()->sizeInBits() == 0) {
3212 3212        if (!perm_gen_verify_bit_map()->allocate(_permGen->reserved())) {
3213 3213          warning("Failed to allocate permanent generation verification CMS Bit Map;\n"
3214 3214                  "permanent generation verification disabled");
3215 3215          return;  // Note that we leave verification disabled, so we'll retry this
3216 3216                   // allocation next cycle. We _could_ remember this failure
3217 3217                   // and skip further attempts and permanently disable verification
3218 3218                   // attempts if that is considered more desirable.
3219 3219        }
3220 3220        assert(perm_gen_verify_bit_map()->covers(_permGen->reserved()),
3221 3221                "_perm_gen_ver_bit_map inconsistency?");
3222 3222      } else {
3223 3223        perm_gen_verify_bit_map()->clear_all();
3224 3224      }
3225 3225      // Include symbols, strings and code cache elements to prevent their resurrection.
3226 3226      add_root_scanning_option(rso);
3227 3227      set_verifying(true);
3228 3228    } else if (verifying() && !should_verify) {
3229 3229      // We were verifying, but some verification flags got disabled.
3230 3230      set_verifying(false);
3231 3231      // Exclude symbols, strings and code cache elements from root scanning to
3232 3232      // reduce IM and RM pauses.
3233 3233      remove_root_scanning_option(rso);
3234 3234    }
3235 3235  }
3236 3236  
3237 3237  
3238 3238  #ifndef PRODUCT
3239 3239  HeapWord* CMSCollector::block_start(const void* p) const {
3240 3240    const HeapWord* addr = (HeapWord*)p;
3241 3241    if (_span.contains(p)) {
3242 3242      if (_cmsGen->cmsSpace()->is_in_reserved(addr)) {
3243 3243        return _cmsGen->cmsSpace()->block_start(p);
3244 3244      } else {
3245 3245        assert(_permGen->cmsSpace()->is_in_reserved(addr),
3246 3246               "Inconsistent _span?");
3247 3247        return _permGen->cmsSpace()->block_start(p);
3248 3248      }
3249 3249    }
3250 3250    return NULL;
3251 3251  }
3252 3252  #endif
3253 3253  
3254 3254  HeapWord*
3255 3255  ConcurrentMarkSweepGeneration::expand_and_allocate(size_t word_size,
3256 3256                                                     bool   tlab,
3257 3257                                                     bool   parallel) {
3258 3258    CMSSynchronousYieldRequest yr;
3259 3259    assert(!tlab, "Can't deal with TLAB allocation");
3260 3260    MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
3261 3261    expand(word_size*HeapWordSize, MinHeapDeltaBytes,
3262 3262      CMSExpansionCause::_satisfy_allocation);
3263 3263    if (GCExpandToAllocateDelayMillis > 0) {
3264 3264      os::sleep(Thread::current(), GCExpandToAllocateDelayMillis, false);
3265 3265    }
3266 3266    return have_lock_and_allocate(word_size, tlab);
3267 3267  }
3268 3268  
3269 3269  // YSR: All of this generation expansion/shrinking stuff is an exact copy of
3270 3270  // OneContigSpaceCardGeneration, which makes me wonder if we should move this
3271 3271  // to CardGeneration and share it...
3272 3272  bool ConcurrentMarkSweepGeneration::expand(size_t bytes, size_t expand_bytes) {
3273 3273    return CardGeneration::expand(bytes, expand_bytes);
3274 3274  }
3275 3275  
3276 3276  void ConcurrentMarkSweepGeneration::expand(size_t bytes, size_t expand_bytes,
3277 3277    CMSExpansionCause::Cause cause)
3278 3278  {
3279 3279  
3280 3280    bool success = expand(bytes, expand_bytes);
3281 3281  
3282 3282    // remember why we expanded; this information is used
3283 3283    // by shouldConcurrentCollect() when making decisions on whether to start
3284 3284    // a new CMS cycle.
3285 3285    if (success) {
3286 3286      set_expansion_cause(cause);
3287 3287      if (PrintGCDetails && Verbose) {
3288 3288        gclog_or_tty->print_cr("Expanded CMS gen for %s",
3289 3289          CMSExpansionCause::to_string(cause));
3290 3290      }
3291 3291    }
3292 3292  }
3293 3293  
3294 3294  HeapWord* ConcurrentMarkSweepGeneration::expand_and_par_lab_allocate(CMSParGCThreadState* ps, size_t word_sz) {
3295 3295    HeapWord* res = NULL;
3296 3296    MutexLocker x(ParGCRareEvent_lock);
3297 3297    while (true) {
3298 3298      // Expansion by some other thread might make alloc OK now:
3299 3299      res = ps->lab.alloc(word_sz);
3300 3300      if (res != NULL) return res;
3301 3301      // If there's not enough expansion space available, give up.
3302 3302      if (_virtual_space.uncommitted_size() < (word_sz * HeapWordSize)) {
3303 3303        return NULL;
3304 3304      }
3305 3305      // Otherwise, we try expansion.
3306 3306      expand(word_sz*HeapWordSize, MinHeapDeltaBytes,
3307 3307        CMSExpansionCause::_allocate_par_lab);
3308 3308      // Now go around the loop and try alloc again;
3309 3309      // A competing par_promote might beat us to the expansion space,
3310 3310      // so we may go around the loop again if promotion fails agaion.
3311 3311      if (GCExpandToAllocateDelayMillis > 0) {
3312 3312        os::sleep(Thread::current(), GCExpandToAllocateDelayMillis, false);
3313 3313      }
3314 3314    }
3315 3315  }
3316 3316  
3317 3317  
3318 3318  bool ConcurrentMarkSweepGeneration::expand_and_ensure_spooling_space(
3319 3319    PromotionInfo* promo) {
3320 3320    MutexLocker x(ParGCRareEvent_lock);
3321 3321    size_t refill_size_bytes = promo->refillSize() * HeapWordSize;
3322 3322    while (true) {
3323 3323      // Expansion by some other thread might make alloc OK now:
3324 3324      if (promo->ensure_spooling_space()) {
3325 3325        assert(promo->has_spooling_space(),
3326 3326               "Post-condition of successful ensure_spooling_space()");
3327 3327        return true;
3328 3328      }
3329 3329      // If there's not enough expansion space available, give up.
3330 3330      if (_virtual_space.uncommitted_size() < refill_size_bytes) {
3331 3331        return false;
3332 3332      }
3333 3333      // Otherwise, we try expansion.
3334 3334      expand(refill_size_bytes, MinHeapDeltaBytes,
3335 3335        CMSExpansionCause::_allocate_par_spooling_space);
3336 3336      // Now go around the loop and try alloc again;
3337 3337      // A competing allocation might beat us to the expansion space,
3338 3338      // so we may go around the loop again if allocation fails again.
3339 3339      if (GCExpandToAllocateDelayMillis > 0) {
3340 3340        os::sleep(Thread::current(), GCExpandToAllocateDelayMillis, false);
3341 3341      }
3342 3342    }
3343 3343  }
3344 3344  
3345 3345  
3346 3346  
3347 3347  void ConcurrentMarkSweepGeneration::shrink(size_t bytes) {
3348 3348    assert_locked_or_safepoint(Heap_lock);
3349 3349    size_t size = ReservedSpace::page_align_size_down(bytes);
3350 3350    if (size > 0) {
3351 3351      shrink_by(size);
3352 3352    }
3353 3353  }
3354 3354  
3355 3355  bool ConcurrentMarkSweepGeneration::grow_by(size_t bytes) {
3356 3356    assert_locked_or_safepoint(Heap_lock);
3357 3357    bool result = _virtual_space.expand_by(bytes);
3358 3358    if (result) {
3359 3359      HeapWord* old_end = _cmsSpace->end();
3360 3360      size_t new_word_size =
3361 3361        heap_word_size(_virtual_space.committed_size());
3362 3362      MemRegion mr(_cmsSpace->bottom(), new_word_size);
3363 3363      _bts->resize(new_word_size);  // resize the block offset shared array
3364 3364      Universe::heap()->barrier_set()->resize_covered_region(mr);
3365 3365      // Hmmmm... why doesn't CFLS::set_end verify locking?
3366 3366      // This is quite ugly; FIX ME XXX
3367 3367      _cmsSpace->assert_locked(freelistLock());
3368 3368      _cmsSpace->set_end((HeapWord*)_virtual_space.high());
3369 3369  
3370 3370      // update the space and generation capacity counters
3371 3371      if (UsePerfData) {
3372 3372        _space_counters->update_capacity();
3373 3373        _gen_counters->update_all();
3374 3374      }
3375 3375  
3376 3376      if (Verbose && PrintGC) {
3377 3377        size_t new_mem_size = _virtual_space.committed_size();
3378 3378        size_t old_mem_size = new_mem_size - bytes;
3379 3379        gclog_or_tty->print_cr("Expanding %s from %ldK by %ldK to %ldK",
3380 3380                      name(), old_mem_size/K, bytes/K, new_mem_size/K);
3381 3381      }
3382 3382    }
3383 3383    return result;
3384 3384  }
3385 3385  
3386 3386  bool ConcurrentMarkSweepGeneration::grow_to_reserved() {
3387 3387    assert_locked_or_safepoint(Heap_lock);
3388 3388    bool success = true;
3389 3389    const size_t remaining_bytes = _virtual_space.uncommitted_size();
3390 3390    if (remaining_bytes > 0) {
3391 3391      success = grow_by(remaining_bytes);
3392 3392      DEBUG_ONLY(if (!success) warning("grow to reserved failed");)
3393 3393    }
3394 3394    return success;
3395 3395  }
3396 3396  
3397 3397  void ConcurrentMarkSweepGeneration::shrink_by(size_t bytes) {
3398 3398    assert_locked_or_safepoint(Heap_lock);
3399 3399    assert_lock_strong(freelistLock());
3400 3400    // XXX Fix when compaction is implemented.
3401 3401    warning("Shrinking of CMS not yet implemented");
3402 3402    return;
3403 3403  }
3404 3404  
3405 3405  
3406 3406  // Simple ctor/dtor wrapper for accounting & timer chores around concurrent
3407 3407  // phases.
3408 3408  class CMSPhaseAccounting: public StackObj {
3409 3409   public:
3410 3410    CMSPhaseAccounting(CMSCollector *collector,
3411 3411                       const char *phase,
3412 3412                       bool print_cr = true);
3413 3413    ~CMSPhaseAccounting();
3414 3414  
3415 3415   private:
3416 3416    CMSCollector *_collector;
3417 3417    const char *_phase;
3418 3418    elapsedTimer _wallclock;
3419 3419    bool _print_cr;
3420 3420  
3421 3421   public:
3422 3422    // Not MT-safe; so do not pass around these StackObj's
3423 3423    // where they may be accessed by other threads.
3424 3424    jlong wallclock_millis() {
3425 3425      assert(_wallclock.is_active(), "Wall clock should not stop");
3426 3426      _wallclock.stop();  // to record time
3427 3427      jlong ret = _wallclock.milliseconds();
3428 3428      _wallclock.start(); // restart
3429 3429      return ret;
3430 3430    }
3431 3431  };
3432 3432  
3433 3433  CMSPhaseAccounting::CMSPhaseAccounting(CMSCollector *collector,
3434 3434                                         const char *phase,
3435 3435                                         bool print_cr) :
3436 3436    _collector(collector), _phase(phase), _print_cr(print_cr) {
3437 3437  
3438 3438    if (PrintCMSStatistics != 0) {
3439 3439      _collector->resetYields();
3440 3440    }
3441 3441    if (PrintGCDetails && PrintGCTimeStamps) {
3442 3442      gclog_or_tty->date_stamp(PrintGCDateStamps);
3443 3443      gclog_or_tty->stamp();
3444 3444      gclog_or_tty->print_cr(": [%s-concurrent-%s-start]",
3445 3445        _collector->cmsGen()->short_name(), _phase);
3446 3446    }
3447 3447    _collector->resetTimer();
3448 3448    _wallclock.start();
3449 3449    _collector->startTimer();
3450 3450  }
3451 3451  
3452 3452  CMSPhaseAccounting::~CMSPhaseAccounting() {
3453 3453    assert(_wallclock.is_active(), "Wall clock should not have stopped");
3454 3454    _collector->stopTimer();
3455 3455    _wallclock.stop();
3456 3456    if (PrintGCDetails) {
3457 3457      gclog_or_tty->date_stamp(PrintGCDateStamps);
3458 3458      if (PrintGCTimeStamps) {
3459 3459        gclog_or_tty->stamp();
3460 3460        gclog_or_tty->print(": ");
3461 3461      }
3462 3462      gclog_or_tty->print("[%s-concurrent-%s: %3.3f/%3.3f secs]",
3463 3463                   _collector->cmsGen()->short_name(),
3464 3464                   _phase, _collector->timerValue(), _wallclock.seconds());
3465 3465      if (_print_cr) {
3466 3466        gclog_or_tty->print_cr("");
3467 3467      }
3468 3468      if (PrintCMSStatistics != 0) {
3469 3469        gclog_or_tty->print_cr(" (CMS-concurrent-%s yielded %d times)", _phase,
3470 3470                      _collector->yields());
3471 3471      }
3472 3472    }
3473 3473  }
3474 3474  
3475 3475  // CMS work
3476 3476  
3477 3477  // Checkpoint the roots into this generation from outside
3478 3478  // this generation. [Note this initial checkpoint need only
3479 3479  // be approximate -- we'll do a catch up phase subsequently.]
3480 3480  void CMSCollector::checkpointRootsInitial(bool asynch) {
3481 3481    assert(_collectorState == InitialMarking, "Wrong collector state");
3482 3482    check_correct_thread_executing();

↓ open down ↓

1465 lines elided

↑ open up ↑

3483 3483    TraceCMSMemoryManagerStats tms(_collectorState,GenCollectedHeap::heap()->gc_cause());
3484 3484  
3485 3485    ReferenceProcessor* rp = ref_processor();
3486 3486    SpecializationStats::clear();
3487 3487    assert(_restart_addr == NULL, "Control point invariant");
3488 3488    if (asynch) {
3489 3489      // acquire locks for subsequent manipulations
3490 3490      MutexLockerEx x(bitMapLock(),
3491 3491                      Mutex::_no_safepoint_check_flag);
3492 3492      checkpointRootsInitialWork(asynch);
3493      -    rp->verify_no_references_recorded();
3494      -    rp->enable_discovery(); // enable ("weak") refs discovery
     3493 +    // enable ("weak") refs discovery
     3494 +    rp->enable_discovery(true /*verify_disabled*/, true /*check_no_refs*/);
3495 3495      _collectorState = Marking;
3496 3496    } else {
3497 3497      // (Weak) Refs discovery: this is controlled from genCollectedHeap::do_collection
3498 3498      // which recognizes if we are a CMS generation, and doesn't try to turn on
3499 3499      // discovery; verify that they aren't meddling.
3500 3500      assert(!rp->discovery_is_atomic(),
3501 3501             "incorrect setting of discovery predicate");
3502 3502      assert(!rp->discovery_enabled(), "genCollectedHeap shouldn't control "
3503 3503             "ref discovery for this generation kind");
3504 3504      // already have locks
3505 3505      checkpointRootsInitialWork(asynch);
3506      -    rp->enable_discovery(); // now enable ("weak") refs discovery
     3506 +    // now enable ("weak") refs discovery
     3507 +    rp->enable_discovery(true /*verify_disabled*/, false /*verify_no_refs*/);
3507 3508      _collectorState = Marking;
3508 3509    }
3509 3510    SpecializationStats::print();
3510 3511  }
3511 3512  
3512 3513  void CMSCollector::checkpointRootsInitialWork(bool asynch) {
3513 3514    assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
3514 3515    assert(_collectorState == InitialMarking, "just checking");
3515 3516  
3516 3517    // If there has not been a GC[n-1] since last GC[n] cycle completed,

3517 3518    // precede our marking with a collection of all
3518 3519    // younger generations to keep floating garbage to a minimum.
3519 3520    // XXX: we won't do this for now -- it's an optimization to be done later.
3520 3521  
3521 3522    // already have locks
3522 3523    assert_lock_strong(bitMapLock());
3523 3524    assert(_markBitMap.isAllClear(), "was reset at end of previous cycle");
3524 3525  
3525 3526    // Setup the verification and class unloading state for this
3526 3527    // CMS collection cycle.
3527 3528    setup_cms_unloading_and_verification_state();
3528 3529  
3529 3530    NOT_PRODUCT(TraceTime t("\ncheckpointRootsInitialWork",
3530 3531      PrintGCDetails && Verbose, true, gclog_or_tty);)
3531 3532    if (UseAdaptiveSizePolicy) {
3532 3533      size_policy()->checkpoint_roots_initial_begin();
3533 3534    }
3534 3535  
3535 3536    // Reset all the PLAB chunk arrays if necessary.
3536 3537    if (_survivor_plab_array != NULL && !CMSPLABRecordAlways) {
3537 3538      reset_survivor_plab_arrays();
3538 3539    }
3539 3540  
3540 3541    ResourceMark rm;
3541 3542    HandleMark  hm;
3542 3543  
3543 3544    FalseClosure falseClosure;
3544 3545    // In the case of a synchronous collection, we will elide the
3545 3546    // remark step, so it's important to catch all the nmethod oops
3546 3547    // in this step.
3547 3548    // The final 'true' flag to gen_process_strong_roots will ensure this.
3548 3549    // If 'async' is true, we can relax the nmethod tracing.
3549 3550    MarkRefsIntoClosure notOlder(_span, &_markBitMap);
3550 3551    GenCollectedHeap* gch = GenCollectedHeap::heap();
3551 3552  
3552 3553    verify_work_stacks_empty();
3553 3554    verify_overflow_empty();
3554 3555  
3555 3556    gch->ensure_parsability(false);  // fill TLABs, but no need to retire them
3556 3557    // Update the saved marks which may affect the root scans.
3557 3558    gch->save_marks();
3558 3559  
3559 3560    // weak reference processing has not started yet.
3560 3561    ref_processor()->set_enqueuing_is_done(false);
3561 3562  
3562 3563    {
3563 3564      // This is not needed. DEBUG_ONLY(RememberKlassesChecker imx(true);)
3564 3565      COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact;)
3565 3566      gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
3566 3567      gch->gen_process_strong_roots(_cmsGen->level(),
3567 3568                                    true,   // younger gens are roots
3568 3569                                    true,   // activate StrongRootsScope
3569 3570                                    true,   // collecting perm gen
3570 3571                                    SharedHeap::ScanningOption(roots_scanning_options()),
3571 3572                                    &notOlder,
3572 3573                                    true,   // walk all of code cache if (so & SO_CodeCache)
3573 3574                                    NULL);
3574 3575    }
3575 3576  
3576 3577    // Clear mod-union table; it will be dirtied in the prologue of
3577 3578    // CMS generation per each younger generation collection.
3578 3579  
3579 3580    assert(_modUnionTable.isAllClear(),
3580 3581         "Was cleared in most recent final checkpoint phase"
3581 3582         " or no bits are set in the gc_prologue before the start of the next "
3582 3583         "subsequent marking phase.");
3583 3584  
3584 3585    // Temporarily disabled, since pre/post-consumption closures don't
3585 3586    // care about precleaned cards
3586 3587    #if 0
3587 3588    {
3588 3589      MemRegion mr = MemRegion((HeapWord*)_virtual_space.low(),
3589 3590                               (HeapWord*)_virtual_space.high());
3590 3591      _ct->ct_bs()->preclean_dirty_cards(mr);
3591 3592    }
3592 3593    #endif
3593 3594  
3594 3595    // Save the end of the used_region of the constituent generations
3595 3596    // to be used to limit the extent of sweep in each generation.
3596 3597    save_sweep_limits();
3597 3598    if (UseAdaptiveSizePolicy) {
3598 3599      size_policy()->checkpoint_roots_initial_end(gch->gc_cause());
3599 3600    }
3600 3601    verify_overflow_empty();
3601 3602  }
3602 3603  
3603 3604  bool CMSCollector::markFromRoots(bool asynch) {
3604 3605    // we might be tempted to assert that:
3605 3606    // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
3606 3607    //        "inconsistent argument?");
3607 3608    // However that wouldn't be right, because it's possible that
3608 3609    // a safepoint is indeed in progress as a younger generation
3609 3610    // stop-the-world GC happens even as we mark in this generation.
3610 3611    assert(_collectorState == Marking, "inconsistent state?");
3611 3612    check_correct_thread_executing();
3612 3613    verify_overflow_empty();
3613 3614  
3614 3615    bool res;
3615 3616    if (asynch) {
3616 3617  
3617 3618      // Start the timers for adaptive size policy for the concurrent phases
3618 3619      // Do it here so that the foreground MS can use the concurrent
3619 3620      // timer since a foreground MS might has the sweep done concurrently
3620 3621      // or STW.
3621 3622      if (UseAdaptiveSizePolicy) {
3622 3623        size_policy()->concurrent_marking_begin();
3623 3624      }
3624 3625  
3625 3626      // Weak ref discovery note: We may be discovering weak
3626 3627      // refs in this generation concurrent (but interleaved) with
3627 3628      // weak ref discovery by a younger generation collector.
3628 3629  
3629 3630      CMSTokenSyncWithLocks ts(true, bitMapLock());
3630 3631      TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
3631 3632      CMSPhaseAccounting pa(this, "mark", !PrintGCDetails);
3632 3633      res = markFromRootsWork(asynch);
3633 3634      if (res) {
3634 3635        _collectorState = Precleaning;
3635 3636      } else { // We failed and a foreground collection wants to take over
3636 3637        assert(_foregroundGCIsActive, "internal state inconsistency");
3637 3638        assert(_restart_addr == NULL,  "foreground will restart from scratch");
3638 3639        if (PrintGCDetails) {
3639 3640          gclog_or_tty->print_cr("bailing out to foreground collection");
3640 3641        }
3641 3642      }
3642 3643      if (UseAdaptiveSizePolicy) {
3643 3644        size_policy()->concurrent_marking_end();
3644 3645      }
3645 3646    } else {
3646 3647      assert(SafepointSynchronize::is_at_safepoint(),
3647 3648             "inconsistent with asynch == false");
3648 3649      if (UseAdaptiveSizePolicy) {
3649 3650        size_policy()->ms_collection_marking_begin();
3650 3651      }
3651 3652      // already have locks
3652 3653      res = markFromRootsWork(asynch);
3653 3654      _collectorState = FinalMarking;
3654 3655      if (UseAdaptiveSizePolicy) {
3655 3656        GenCollectedHeap* gch = GenCollectedHeap::heap();
3656 3657        size_policy()->ms_collection_marking_end(gch->gc_cause());
3657 3658      }
3658 3659    }
3659 3660    verify_overflow_empty();
3660 3661    return res;
3661 3662  }
3662 3663  
3663 3664  bool CMSCollector::markFromRootsWork(bool asynch) {
3664 3665    // iterate over marked bits in bit map, doing a full scan and mark
3665 3666    // from these roots using the following algorithm:
3666 3667    // . if oop is to the right of the current scan pointer,
3667 3668    //   mark corresponding bit (we'll process it later)
3668 3669    // . else (oop is to left of current scan pointer)
3669 3670    //   push oop on marking stack
3670 3671    // . drain the marking stack
3671 3672  
3672 3673    // Note that when we do a marking step we need to hold the
3673 3674    // bit map lock -- recall that direct allocation (by mutators)
3674 3675    // and promotion (by younger generation collectors) is also
3675 3676    // marking the bit map. [the so-called allocate live policy.]
3676 3677    // Because the implementation of bit map marking is not
3677 3678    // robust wrt simultaneous marking of bits in the same word,
3678 3679    // we need to make sure that there is no such interference
3679 3680    // between concurrent such updates.
3680 3681  
3681 3682    // already have locks
3682 3683    assert_lock_strong(bitMapLock());
3683 3684  
3684 3685    // Clear the revisit stack, just in case there are any
3685 3686    // obsolete contents from a short-circuited previous CMS cycle.
3686 3687    _revisitStack.reset();
3687 3688    verify_work_stacks_empty();
3688 3689    verify_overflow_empty();
3689 3690    assert(_revisitStack.isEmpty(), "tabula rasa");
3690 3691    DEBUG_ONLY(RememberKlassesChecker cmx(should_unload_classes());)
3691 3692    bool result = false;
3692 3693    if (CMSConcurrentMTEnabled && ConcGCThreads > 0) {
3693 3694      result = do_marking_mt(asynch);
3694 3695    } else {
3695 3696      result = do_marking_st(asynch);
3696 3697    }
3697 3698    return result;
3698 3699  }
3699 3700  
3700 3701  // Forward decl
3701 3702  class CMSConcMarkingTask;
3702 3703  
3703 3704  class CMSConcMarkingTerminator: public ParallelTaskTerminator {
3704 3705    CMSCollector*       _collector;
3705 3706    CMSConcMarkingTask* _task;
3706 3707   public:
3707 3708    virtual void yield();
3708 3709  
3709 3710    // "n_threads" is the number of threads to be terminated.
3710 3711    // "queue_set" is a set of work queues of other threads.
3711 3712    // "collector" is the CMS collector associated with this task terminator.
3712 3713    // "yield" indicates whether we need the gang as a whole to yield.
3713 3714    CMSConcMarkingTerminator(int n_threads, TaskQueueSetSuper* queue_set, CMSCollector* collector) :
3714 3715      ParallelTaskTerminator(n_threads, queue_set),
3715 3716      _collector(collector) { }
3716 3717  
3717 3718    void set_task(CMSConcMarkingTask* task) {
3718 3719      _task = task;
3719 3720    }
3720 3721  };
3721 3722  
3722 3723  class CMSConcMarkingTerminatorTerminator: public TerminatorTerminator {
3723 3724    CMSConcMarkingTask* _task;
3724 3725   public:
3725 3726    bool should_exit_termination();
3726 3727    void set_task(CMSConcMarkingTask* task) {
3727 3728      _task = task;
3728 3729    }
3729 3730  };
3730 3731  
3731 3732  // MT Concurrent Marking Task
3732 3733  class CMSConcMarkingTask: public YieldingFlexibleGangTask {
3733 3734    CMSCollector* _collector;
3734 3735    int           _n_workers;                  // requested/desired # workers
3735 3736    bool          _asynch;
3736 3737    bool          _result;
3737 3738    CompactibleFreeListSpace*  _cms_space;
3738 3739    CompactibleFreeListSpace* _perm_space;
3739 3740    char          _pad_front[64];   // padding to ...
3740 3741    HeapWord*     _global_finger;   // ... avoid sharing cache line
3741 3742    char          _pad_back[64];
3742 3743    HeapWord*     _restart_addr;
3743 3744  
3744 3745    //  Exposed here for yielding support
3745 3746    Mutex* const _bit_map_lock;
3746 3747  
3747 3748    // The per thread work queues, available here for stealing
3748 3749    OopTaskQueueSet*  _task_queues;
3749 3750  
3750 3751    // Termination (and yielding) support
3751 3752    CMSConcMarkingTerminator _term;
3752 3753    CMSConcMarkingTerminatorTerminator _term_term;
3753 3754  
3754 3755   public:
3755 3756    CMSConcMarkingTask(CMSCollector* collector,
3756 3757                   CompactibleFreeListSpace* cms_space,
3757 3758                   CompactibleFreeListSpace* perm_space,
3758 3759                   bool asynch,
3759 3760                   YieldingFlexibleWorkGang* workers,
3760 3761                   OopTaskQueueSet* task_queues):
3761 3762      YieldingFlexibleGangTask("Concurrent marking done multi-threaded"),
3762 3763      _collector(collector),
3763 3764      _cms_space(cms_space),
3764 3765      _perm_space(perm_space),
3765 3766      _asynch(asynch), _n_workers(0), _result(true),
3766 3767      _task_queues(task_queues),
3767 3768      _term(_n_workers, task_queues, _collector),
3768 3769      _bit_map_lock(collector->bitMapLock())
3769 3770    {
3770 3771      _requested_size = _n_workers;
3771 3772      _term.set_task(this);
3772 3773      _term_term.set_task(this);
3773 3774      assert(_cms_space->bottom() < _perm_space->bottom(),
3774 3775             "Finger incorrectly initialized below");
3775 3776      _restart_addr = _global_finger = _cms_space->bottom();
3776 3777    }
3777 3778  
3778 3779  
3779 3780    OopTaskQueueSet* task_queues()  { return _task_queues; }
3780 3781  
3781 3782    OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
3782 3783  
3783 3784    HeapWord** global_finger_addr() { return &_global_finger; }
3784 3785  
3785 3786    CMSConcMarkingTerminator* terminator() { return &_term; }
3786 3787  
3787 3788    virtual void set_for_termination(int active_workers) {
3788 3789      terminator()->reset_for_reuse(active_workers);
3789 3790    }
3790 3791  
3791 3792    void work(int i);
3792 3793    bool should_yield() {
3793 3794      return    ConcurrentMarkSweepThread::should_yield()
3794 3795             && !_collector->foregroundGCIsActive()
3795 3796             && _asynch;
3796 3797    }
3797 3798  
3798 3799    virtual void coordinator_yield();  // stuff done by coordinator
3799 3800    bool result() { return _result; }
3800 3801  
3801 3802    void reset(HeapWord* ra) {
3802 3803      assert(_global_finger >= _cms_space->end(),  "Postcondition of ::work(i)");
3803 3804      assert(_global_finger >= _perm_space->end(), "Postcondition of ::work(i)");
3804 3805      assert(ra             <  _perm_space->end(), "ra too large");
3805 3806      _restart_addr = _global_finger = ra;
3806 3807      _term.reset_for_reuse();
3807 3808    }
3808 3809  
3809 3810    static bool get_work_from_overflow_stack(CMSMarkStack* ovflw_stk,
3810 3811                                             OopTaskQueue* work_q);
3811 3812  
3812 3813   private:
3813 3814    void do_scan_and_mark(int i, CompactibleFreeListSpace* sp);
3814 3815    void do_work_steal(int i);
3815 3816    void bump_global_finger(HeapWord* f);
3816 3817  };
3817 3818  
3818 3819  bool CMSConcMarkingTerminatorTerminator::should_exit_termination() {
3819 3820    assert(_task != NULL, "Error");
3820 3821    return _task->yielding();
3821 3822    // Note that we do not need the disjunct || _task->should_yield() above
3822 3823    // because we want terminating threads to yield only if the task
3823 3824    // is already in the midst of yielding, which happens only after at least one
3824 3825    // thread has yielded.
3825 3826  }
3826 3827  
3827 3828  void CMSConcMarkingTerminator::yield() {
3828 3829    if (_task->should_yield()) {
3829 3830      _task->yield();
3830 3831    } else {
3831 3832      ParallelTaskTerminator::yield();
3832 3833    }
3833 3834  }
3834 3835  
3835 3836  ////////////////////////////////////////////////////////////////
3836 3837  // Concurrent Marking Algorithm Sketch
3837 3838  ////////////////////////////////////////////////////////////////
3838 3839  // Until all tasks exhausted (both spaces):
3839 3840  // -- claim next available chunk
3840 3841  // -- bump global finger via CAS
3841 3842  // -- find first object that starts in this chunk
3842 3843  //    and start scanning bitmap from that position
3843 3844  // -- scan marked objects for oops
3844 3845  // -- CAS-mark target, and if successful:
3845 3846  //    . if target oop is above global finger (volatile read)
3846 3847  //      nothing to do
3847 3848  //    . if target oop is in chunk and above local finger
3848 3849  //        then nothing to do
3849 3850  //    . else push on work-queue
3850 3851  // -- Deal with possible overflow issues:
3851 3852  //    . local work-queue overflow causes stuff to be pushed on
3852 3853  //      global (common) overflow queue
3853 3854  //    . always first empty local work queue
3854 3855  //    . then get a batch of oops from global work queue if any
3855 3856  //    . then do work stealing
3856 3857  // -- When all tasks claimed (both spaces)
3857 3858  //    and local work queue empty,
3858 3859  //    then in a loop do:
3859 3860  //    . check global overflow stack; steal a batch of oops and trace
3860 3861  //    . try to steal from other threads oif GOS is empty
3861 3862  //    . if neither is available, offer termination
3862 3863  // -- Terminate and return result
3863 3864  //
3864 3865  void CMSConcMarkingTask::work(int i) {
3865 3866    elapsedTimer _timer;
3866 3867    ResourceMark rm;
3867 3868    HandleMark hm;
3868 3869  
3869 3870    DEBUG_ONLY(_collector->verify_overflow_empty();)
3870 3871  
3871 3872    // Before we begin work, our work queue should be empty
3872 3873    assert(work_queue(i)->size() == 0, "Expected to be empty");
3873 3874    // Scan the bitmap covering _cms_space, tracing through grey objects.
3874 3875    _timer.start();
3875 3876    do_scan_and_mark(i, _cms_space);
3876 3877    _timer.stop();
3877 3878    if (PrintCMSStatistics != 0) {
3878 3879      gclog_or_tty->print_cr("Finished cms space scanning in %dth thread: %3.3f sec",
3879 3880        i, _timer.seconds()); // XXX: need xxx/xxx type of notation, two timers
3880 3881    }
3881 3882  
3882 3883    // ... do the same for the _perm_space
3883 3884    _timer.reset();
3884 3885    _timer.start();
3885 3886    do_scan_and_mark(i, _perm_space);
3886 3887    _timer.stop();
3887 3888    if (PrintCMSStatistics != 0) {
3888 3889      gclog_or_tty->print_cr("Finished perm space scanning in %dth thread: %3.3f sec",
3889 3890        i, _timer.seconds()); // XXX: need xxx/xxx type of notation, two timers
3890 3891    }
3891 3892  
3892 3893    // ... do work stealing
3893 3894    _timer.reset();
3894 3895    _timer.start();
3895 3896    do_work_steal(i);
3896 3897    _timer.stop();
3897 3898    if (PrintCMSStatistics != 0) {
3898 3899      gclog_or_tty->print_cr("Finished work stealing in %dth thread: %3.3f sec",
3899 3900        i, _timer.seconds()); // XXX: need xxx/xxx type of notation, two timers
3900 3901    }
3901 3902    assert(_collector->_markStack.isEmpty(), "Should have been emptied");
3902 3903    assert(work_queue(i)->size() == 0, "Should have been emptied");
3903 3904    // Note that under the current task protocol, the
3904 3905    // following assertion is true even of the spaces
3905 3906    // expanded since the completion of the concurrent
3906 3907    // marking. XXX This will likely change under a strict
3907 3908    // ABORT semantics.
3908 3909    assert(_global_finger >  _cms_space->end() &&
3909 3910           _global_finger >= _perm_space->end(),
3910 3911           "All tasks have been completed");
3911 3912    DEBUG_ONLY(_collector->verify_overflow_empty();)
3912 3913  }
3913 3914  
3914 3915  void CMSConcMarkingTask::bump_global_finger(HeapWord* f) {
3915 3916    HeapWord* read = _global_finger;
3916 3917    HeapWord* cur  = read;
3917 3918    while (f > read) {
3918 3919      cur = read;
3919 3920      read = (HeapWord*) Atomic::cmpxchg_ptr(f, &_global_finger, cur);
3920 3921      if (cur == read) {
3921 3922        // our cas succeeded
3922 3923        assert(_global_finger >= f, "protocol consistency");
3923 3924        break;
3924 3925      }
3925 3926    }
3926 3927  }
3927 3928  
3928 3929  // This is really inefficient, and should be redone by
3929 3930  // using (not yet available) block-read and -write interfaces to the
3930 3931  // stack and the work_queue. XXX FIX ME !!!
3931 3932  bool CMSConcMarkingTask::get_work_from_overflow_stack(CMSMarkStack* ovflw_stk,
3932 3933                                                        OopTaskQueue* work_q) {
3933 3934    // Fast lock-free check
3934 3935    if (ovflw_stk->length() == 0) {
3935 3936      return false;
3936 3937    }
3937 3938    assert(work_q->size() == 0, "Shouldn't steal");
3938 3939    MutexLockerEx ml(ovflw_stk->par_lock(),
3939 3940                     Mutex::_no_safepoint_check_flag);
3940 3941    // Grab up to 1/4 the size of the work queue
3941 3942    size_t num = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
3942 3943                      (size_t)ParGCDesiredObjsFromOverflowList);
3943 3944    num = MIN2(num, ovflw_stk->length());
3944 3945    for (int i = (int) num; i > 0; i--) {
3945 3946      oop cur = ovflw_stk->pop();
3946 3947      assert(cur != NULL, "Counted wrong?");
3947 3948      work_q->push(cur);
3948 3949    }
3949 3950    return num > 0;
3950 3951  }
3951 3952  
3952 3953  void CMSConcMarkingTask::do_scan_and_mark(int i, CompactibleFreeListSpace* sp) {
3953 3954    SequentialSubTasksDone* pst = sp->conc_par_seq_tasks();
3954 3955    int n_tasks = pst->n_tasks();
3955 3956    // We allow that there may be no tasks to do here because
3956 3957    // we are restarting after a stack overflow.
3957 3958    assert(pst->valid() || n_tasks == 0, "Uninitialized use?");
3958 3959    int nth_task = 0;
3959 3960  
3960 3961    HeapWord* aligned_start = sp->bottom();
3961 3962    if (sp->used_region().contains(_restart_addr)) {
3962 3963      // Align down to a card boundary for the start of 0th task
3963 3964      // for this space.
3964 3965      aligned_start =
3965 3966        (HeapWord*)align_size_down((uintptr_t)_restart_addr,
3966 3967                                   CardTableModRefBS::card_size);
3967 3968    }
3968 3969  
3969 3970    size_t chunk_size = sp->marking_task_size();
3970 3971    while (!pst->is_task_claimed(/* reference */ nth_task)) {
3971 3972      // Having claimed the nth task in this space,
3972 3973      // compute the chunk that it corresponds to:
3973 3974      MemRegion span = MemRegion(aligned_start + nth_task*chunk_size,
3974 3975                                 aligned_start + (nth_task+1)*chunk_size);
3975 3976      // Try and bump the global finger via a CAS;
3976 3977      // note that we need to do the global finger bump
3977 3978      // _before_ taking the intersection below, because
3978 3979      // the task corresponding to that region will be
3979 3980      // deemed done even if the used_region() expands
3980 3981      // because of allocation -- as it almost certainly will
3981 3982      // during start-up while the threads yield in the
3982 3983      // closure below.
3983 3984      HeapWord* finger = span.end();
3984 3985      bump_global_finger(finger);   // atomically
3985 3986      // There are null tasks here corresponding to chunks
3986 3987      // beyond the "top" address of the space.
3987 3988      span = span.intersection(sp->used_region());
3988 3989      if (!span.is_empty()) {  // Non-null task
3989 3990        HeapWord* prev_obj;
3990 3991        assert(!span.contains(_restart_addr) || nth_task == 0,
3991 3992               "Inconsistency");
3992 3993        if (nth_task == 0) {
3993 3994          // For the 0th task, we'll not need to compute a block_start.
3994 3995          if (span.contains(_restart_addr)) {
3995 3996            // In the case of a restart because of stack overflow,
3996 3997            // we might additionally skip a chunk prefix.
3997 3998            prev_obj = _restart_addr;
3998 3999          } else {
3999 4000            prev_obj = span.start();
4000 4001          }
4001 4002        } else {
4002 4003          // We want to skip the first object because
4003 4004          // the protocol is to scan any object in its entirety
4004 4005          // that _starts_ in this span; a fortiori, any
4005 4006          // object starting in an earlier span is scanned
4006 4007          // as part of an earlier claimed task.
4007 4008          // Below we use the "careful" version of block_start
4008 4009          // so we do not try to navigate uninitialized objects.
4009 4010          prev_obj = sp->block_start_careful(span.start());
4010 4011          // Below we use a variant of block_size that uses the
4011 4012          // Printezis bits to avoid waiting for allocated
4012 4013          // objects to become initialized/parsable.
4013 4014          while (prev_obj < span.start()) {
4014 4015            size_t sz = sp->block_size_no_stall(prev_obj, _collector);
4015 4016            if (sz > 0) {
4016 4017              prev_obj += sz;
4017 4018            } else {
4018 4019              // In this case we may end up doing a bit of redundant
4019 4020              // scanning, but that appears unavoidable, short of
4020 4021              // locking the free list locks; see bug 6324141.
4021 4022              break;
4022 4023            }
4023 4024          }
4024 4025        }
4025 4026        if (prev_obj < span.end()) {
4026 4027          MemRegion my_span = MemRegion(prev_obj, span.end());
4027 4028          // Do the marking work within a non-empty span --
4028 4029          // the last argument to the constructor indicates whether the
4029 4030          // iteration should be incremental with periodic yields.
4030 4031          Par_MarkFromRootsClosure cl(this, _collector, my_span,
4031 4032                                      &_collector->_markBitMap,
4032 4033                                      work_queue(i),
4033 4034                                      &_collector->_markStack,
4034 4035                                      &_collector->_revisitStack,
4035 4036                                      _asynch);
4036 4037          _collector->_markBitMap.iterate(&cl, my_span.start(), my_span.end());
4037 4038        } // else nothing to do for this task
4038 4039      }   // else nothing to do for this task
4039 4040    }
4040 4041    // We'd be tempted to assert here that since there are no
4041 4042    // more tasks left to claim in this space, the global_finger
4042 4043    // must exceed space->top() and a fortiori space->end(). However,
4043 4044    // that would not quite be correct because the bumping of
4044 4045    // global_finger occurs strictly after the claiming of a task,
4045 4046    // so by the time we reach here the global finger may not yet
4046 4047    // have been bumped up by the thread that claimed the last
4047 4048    // task.
4048 4049    pst->all_tasks_completed();
4049 4050  }
4050 4051  
4051 4052  class Par_ConcMarkingClosure: public Par_KlassRememberingOopClosure {
4052 4053   private:
4053 4054    CMSConcMarkingTask* _task;
4054 4055    MemRegion     _span;
4055 4056    CMSBitMap*    _bit_map;
4056 4057    CMSMarkStack* _overflow_stack;
4057 4058    OopTaskQueue* _work_queue;
4058 4059   protected:
4059 4060    DO_OOP_WORK_DEFN
4060 4061   public:
4061 4062    Par_ConcMarkingClosure(CMSCollector* collector, CMSConcMarkingTask* task, OopTaskQueue* work_queue,
4062 4063                           CMSBitMap* bit_map, CMSMarkStack* overflow_stack,
4063 4064                           CMSMarkStack* revisit_stack):
4064 4065      Par_KlassRememberingOopClosure(collector, NULL, revisit_stack),
4065 4066      _task(task),
4066 4067      _span(collector->_span),
4067 4068      _work_queue(work_queue),
4068 4069      _bit_map(bit_map),
4069 4070      _overflow_stack(overflow_stack)
4070 4071    { }
4071 4072    virtual void do_oop(oop* p);
4072 4073    virtual void do_oop(narrowOop* p);
4073 4074    void trim_queue(size_t max);
4074 4075    void handle_stack_overflow(HeapWord* lost);
4075 4076    void do_yield_check() {
4076 4077      if (_task->should_yield()) {
4077 4078        _task->yield();
4078 4079      }
4079 4080    }
4080 4081  };
4081 4082  
4082 4083  // Grey object scanning during work stealing phase --
4083 4084  // the salient assumption here is that any references
4084 4085  // that are in these stolen objects being scanned must
4085 4086  // already have been initialized (else they would not have
4086 4087  // been published), so we do not need to check for
4087 4088  // uninitialized objects before pushing here.
4088 4089  void Par_ConcMarkingClosure::do_oop(oop obj) {
4089 4090    assert(obj->is_oop_or_null(true), "expected an oop or NULL");
4090 4091    HeapWord* addr = (HeapWord*)obj;
4091 4092    // Check if oop points into the CMS generation
4092 4093    // and is not marked
4093 4094    if (_span.contains(addr) && !_bit_map->isMarked(addr)) {
4094 4095      // a white object ...
4095 4096      // If we manage to "claim" the object, by being the
4096 4097      // first thread to mark it, then we push it on our
4097 4098      // marking stack
4098 4099      if (_bit_map->par_mark(addr)) {     // ... now grey
4099 4100        // push on work queue (grey set)
4100 4101        bool simulate_overflow = false;
4101 4102        NOT_PRODUCT(
4102 4103          if (CMSMarkStackOverflowALot &&
4103 4104              _collector->simulate_overflow()) {
4104 4105            // simulate a stack overflow
4105 4106            simulate_overflow = true;
4106 4107          }
4107 4108        )
4108 4109        if (simulate_overflow ||
4109 4110            !(_work_queue->push(obj) || _overflow_stack->par_push(obj))) {
4110 4111          // stack overflow
4111 4112          if (PrintCMSStatistics != 0) {
4112 4113            gclog_or_tty->print_cr("CMS marking stack overflow (benign) at "
4113 4114                                   SIZE_FORMAT, _overflow_stack->capacity());
4114 4115          }
4115 4116          // We cannot assert that the overflow stack is full because
4116 4117          // it may have been emptied since.
4117 4118          assert(simulate_overflow ||
4118 4119                 _work_queue->size() == _work_queue->max_elems(),
4119 4120                "Else push should have succeeded");
4120 4121          handle_stack_overflow(addr);
4121 4122        }
4122 4123      } // Else, some other thread got there first
4123 4124      do_yield_check();
4124 4125    }
4125 4126  }
4126 4127  
4127 4128  void Par_ConcMarkingClosure::do_oop(oop* p)       { Par_ConcMarkingClosure::do_oop_work(p); }
4128 4129  void Par_ConcMarkingClosure::do_oop(narrowOop* p) { Par_ConcMarkingClosure::do_oop_work(p); }
4129 4130  
4130 4131  void Par_ConcMarkingClosure::trim_queue(size_t max) {
4131 4132    while (_work_queue->size() > max) {
4132 4133      oop new_oop;
4133 4134      if (_work_queue->pop_local(new_oop)) {
4134 4135        assert(new_oop->is_oop(), "Should be an oop");
4135 4136        assert(_bit_map->isMarked((HeapWord*)new_oop), "Grey object");
4136 4137        assert(_span.contains((HeapWord*)new_oop), "Not in span");
4137 4138        assert(new_oop->is_parsable(), "Should be parsable");
4138 4139        new_oop->oop_iterate(this);  // do_oop() above
4139 4140        do_yield_check();
4140 4141      }
4141 4142    }
4142 4143  }
4143 4144  
4144 4145  // Upon stack overflow, we discard (part of) the stack,
4145 4146  // remembering the least address amongst those discarded
4146 4147  // in CMSCollector's _restart_address.
4147 4148  void Par_ConcMarkingClosure::handle_stack_overflow(HeapWord* lost) {
4148 4149    // We need to do this under a mutex to prevent other
4149 4150    // workers from interfering with the work done below.
4150 4151    MutexLockerEx ml(_overflow_stack->par_lock(),
4151 4152                     Mutex::_no_safepoint_check_flag);
4152 4153    // Remember the least grey address discarded
4153 4154    HeapWord* ra = (HeapWord*)_overflow_stack->least_value(lost);
4154 4155    _collector->lower_restart_addr(ra);
4155 4156    _overflow_stack->reset();  // discard stack contents
4156 4157    _overflow_stack->expand(); // expand the stack if possible
4157 4158  }
4158 4159  
4159 4160  
4160 4161  void CMSConcMarkingTask::do_work_steal(int i) {
4161 4162    OopTaskQueue* work_q = work_queue(i);
4162 4163    oop obj_to_scan;
4163 4164    CMSBitMap* bm = &(_collector->_markBitMap);
4164 4165    CMSMarkStack* ovflw = &(_collector->_markStack);
4165 4166    CMSMarkStack* revisit = &(_collector->_revisitStack);
4166 4167    int* seed = _collector->hash_seed(i);
4167 4168    Par_ConcMarkingClosure cl(_collector, this, work_q, bm, ovflw, revisit);
4168 4169    while (true) {
4169 4170      cl.trim_queue(0);
4170 4171      assert(work_q->size() == 0, "Should have been emptied above");
4171 4172      if (get_work_from_overflow_stack(ovflw, work_q)) {
4172 4173        // Can't assert below because the work obtained from the
4173 4174        // overflow stack may already have been stolen from us.
4174 4175        // assert(work_q->size() > 0, "Work from overflow stack");
4175 4176        continue;
4176 4177      } else if (task_queues()->steal(i, seed, /* reference */ obj_to_scan)) {
4177 4178        assert(obj_to_scan->is_oop(), "Should be an oop");
4178 4179        assert(bm->isMarked((HeapWord*)obj_to_scan), "Grey object");
4179 4180        obj_to_scan->oop_iterate(&cl);
4180 4181      } else if (terminator()->offer_termination(&_term_term)) {
4181 4182        assert(work_q->size() == 0, "Impossible!");
4182 4183        break;
4183 4184      } else if (yielding() || should_yield()) {
4184 4185        yield();
4185 4186      }
4186 4187    }
4187 4188  }
4188 4189  
4189 4190  // This is run by the CMS (coordinator) thread.
4190 4191  void CMSConcMarkingTask::coordinator_yield() {
4191 4192    assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
4192 4193           "CMS thread should hold CMS token");
4193 4194    DEBUG_ONLY(RememberKlassesChecker mux(false);)
4194 4195    // First give up the locks, then yield, then re-lock
4195 4196    // We should probably use a constructor/destructor idiom to
4196 4197    // do this unlock/lock or modify the MutexUnlocker class to
4197 4198    // serve our purpose. XXX
4198 4199    assert_lock_strong(_bit_map_lock);
4199 4200    _bit_map_lock->unlock();
4200 4201    ConcurrentMarkSweepThread::desynchronize(true);
4201 4202    ConcurrentMarkSweepThread::acknowledge_yield_request();
4202 4203    _collector->stopTimer();
4203 4204    if (PrintCMSStatistics != 0) {
4204 4205      _collector->incrementYields();
4205 4206    }
4206 4207    _collector->icms_wait();
4207 4208  
4208 4209    // It is possible for whichever thread initiated the yield request
4209 4210    // not to get a chance to wake up and take the bitmap lock between
4210 4211    // this thread releasing it and reacquiring it. So, while the
4211 4212    // should_yield() flag is on, let's sleep for a bit to give the
4212 4213    // other thread a chance to wake up. The limit imposed on the number
4213 4214    // of iterations is defensive, to avoid any unforseen circumstances
4214 4215    // putting us into an infinite loop. Since it's always been this
4215 4216    // (coordinator_yield()) method that was observed to cause the
4216 4217    // problem, we are using a parameter (CMSCoordinatorYieldSleepCount)
4217 4218    // which is by default non-zero. For the other seven methods that
4218 4219    // also perform the yield operation, as are using a different
4219 4220    // parameter (CMSYieldSleepCount) which is by default zero. This way we
4220 4221    // can enable the sleeping for those methods too, if necessary.
4221 4222    // See 6442774.
4222 4223    //
4223 4224    // We really need to reconsider the synchronization between the GC
4224 4225    // thread and the yield-requesting threads in the future and we
4225 4226    // should really use wait/notify, which is the recommended
4226 4227    // way of doing this type of interaction. Additionally, we should
4227 4228    // consolidate the eight methods that do the yield operation and they
4228 4229    // are almost identical into one for better maintenability and
4229 4230    // readability. See 6445193.
4230 4231    //
4231 4232    // Tony 2006.06.29
4232 4233    for (unsigned i = 0; i < CMSCoordinatorYieldSleepCount &&
4233 4234                     ConcurrentMarkSweepThread::should_yield() &&
4234 4235                     !CMSCollector::foregroundGCIsActive(); ++i) {
4235 4236      os::sleep(Thread::current(), 1, false);
4236 4237      ConcurrentMarkSweepThread::acknowledge_yield_request();
4237 4238    }
4238 4239  
4239 4240    ConcurrentMarkSweepThread::synchronize(true);
4240 4241    _bit_map_lock->lock_without_safepoint_check();
4241 4242    _collector->startTimer();
4242 4243  }
4243 4244  
4244 4245  bool CMSCollector::do_marking_mt(bool asynch) {
4245 4246    assert(ConcGCThreads > 0 && conc_workers() != NULL, "precondition");
4246 4247    // In the future this would be determined ergonomically, based
4247 4248    // on #cpu's, # active mutator threads (and load), and mutation rate.
4248 4249    int num_workers = ConcGCThreads;
4249 4250  
4250 4251    CompactibleFreeListSpace* cms_space  = _cmsGen->cmsSpace();
4251 4252    CompactibleFreeListSpace* perm_space = _permGen->cmsSpace();
4252 4253  
4253 4254    CMSConcMarkingTask tsk(this,
4254 4255                           cms_space,
4255 4256                           perm_space,
4256 4257                           asynch,
4257 4258                           conc_workers(),
4258 4259                           task_queues());
4259 4260  
4260 4261    // Since the actual number of workers we get may be different
4261 4262    // from the number we requested above, do we need to do anything different
4262 4263    // below? In particular, may be we need to subclass the SequantialSubTasksDone
4263 4264    // class?? XXX
4264 4265    cms_space ->initialize_sequential_subtasks_for_marking(num_workers);
4265 4266    perm_space->initialize_sequential_subtasks_for_marking(num_workers);
4266 4267  
4267 4268    // Refs discovery is already non-atomic.
4268 4269    assert(!ref_processor()->discovery_is_atomic(), "Should be non-atomic");
4269 4270    assert(ref_processor()->discovery_is_mt(), "Discovery should be MT");
4270 4271    DEBUG_ONLY(RememberKlassesChecker cmx(should_unload_classes());)
4271 4272    conc_workers()->start_task(&tsk);
4272 4273    while (tsk.yielded()) {
4273 4274      tsk.coordinator_yield();
4274 4275      conc_workers()->continue_task(&tsk);
4275 4276    }
4276 4277    // If the task was aborted, _restart_addr will be non-NULL
4277 4278    assert(tsk.completed() || _restart_addr != NULL, "Inconsistency");
4278 4279    while (_restart_addr != NULL) {
4279 4280      // XXX For now we do not make use of ABORTED state and have not
4280 4281      // yet implemented the right abort semantics (even in the original
4281 4282      // single-threaded CMS case). That needs some more investigation
4282 4283      // and is deferred for now; see CR# TBF. 07252005YSR. XXX
4283 4284      assert(!CMSAbortSemantics || tsk.aborted(), "Inconsistency");
4284 4285      // If _restart_addr is non-NULL, a marking stack overflow
4285 4286      // occurred; we need to do a fresh marking iteration from the
4286 4287      // indicated restart address.
4287 4288      if (_foregroundGCIsActive && asynch) {
4288 4289        // We may be running into repeated stack overflows, having
4289 4290        // reached the limit of the stack size, while making very
4290 4291        // slow forward progress. It may be best to bail out and
4291 4292        // let the foreground collector do its job.
4292 4293        // Clear _restart_addr, so that foreground GC
4293 4294        // works from scratch. This avoids the headache of
4294 4295        // a "rescan" which would otherwise be needed because
4295 4296        // of the dirty mod union table & card table.
4296 4297        _restart_addr = NULL;
4297 4298        return false;
4298 4299      }
4299 4300      // Adjust the task to restart from _restart_addr
4300 4301      tsk.reset(_restart_addr);
4301 4302      cms_space ->initialize_sequential_subtasks_for_marking(num_workers,
4302 4303                    _restart_addr);
4303 4304      perm_space->initialize_sequential_subtasks_for_marking(num_workers,
4304 4305                    _restart_addr);
4305 4306      _restart_addr = NULL;
4306 4307      // Get the workers going again
4307 4308      conc_workers()->start_task(&tsk);
4308 4309      while (tsk.yielded()) {
4309 4310        tsk.coordinator_yield();
4310 4311        conc_workers()->continue_task(&tsk);
4311 4312      }
4312 4313    }
4313 4314    assert(tsk.completed(), "Inconsistency");
4314 4315    assert(tsk.result() == true, "Inconsistency");
4315 4316    return true;
4316 4317  }
4317 4318  
4318 4319  bool CMSCollector::do_marking_st(bool asynch) {
4319 4320    ResourceMark rm;
4320 4321    HandleMark   hm;
4321 4322  
4322 4323    // Temporarily make refs discovery single threaded (non-MT)
4323 4324    ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(ref_processor(), false);
4324 4325    MarkFromRootsClosure markFromRootsClosure(this, _span, &_markBitMap,
4325 4326      &_markStack, &_revisitStack, CMSYield && asynch);
4326 4327    // the last argument to iterate indicates whether the iteration
4327 4328    // should be incremental with periodic yields.
4328 4329    _markBitMap.iterate(&markFromRootsClosure);
4329 4330    // If _restart_addr is non-NULL, a marking stack overflow
4330 4331    // occurred; we need to do a fresh iteration from the
4331 4332    // indicated restart address.
4332 4333    while (_restart_addr != NULL) {
4333 4334      if (_foregroundGCIsActive && asynch) {
4334 4335        // We may be running into repeated stack overflows, having
4335 4336        // reached the limit of the stack size, while making very
4336 4337        // slow forward progress. It may be best to bail out and
4337 4338        // let the foreground collector do its job.
4338 4339        // Clear _restart_addr, so that foreground GC
4339 4340        // works from scratch. This avoids the headache of
4340 4341        // a "rescan" which would otherwise be needed because
4341 4342        // of the dirty mod union table & card table.
4342 4343        _restart_addr = NULL;
4343 4344        return false;  // indicating failure to complete marking
4344 4345      }
4345 4346      // Deal with stack overflow:
4346 4347      // we restart marking from _restart_addr
4347 4348      HeapWord* ra = _restart_addr;
4348 4349      markFromRootsClosure.reset(ra);
4349 4350      _restart_addr = NULL;
4350 4351      _markBitMap.iterate(&markFromRootsClosure, ra, _span.end());
4351 4352    }
4352 4353    return true;
4353 4354  }
4354 4355  
4355 4356  void CMSCollector::preclean() {
4356 4357    check_correct_thread_executing();
4357 4358    assert(Thread::current()->is_ConcurrentGC_thread(), "Wrong thread");
4358 4359    verify_work_stacks_empty();
4359 4360    verify_overflow_empty();
4360 4361    _abort_preclean = false;
4361 4362    if (CMSPrecleaningEnabled) {
4362 4363      _eden_chunk_index = 0;
4363 4364      size_t used = get_eden_used();
4364 4365      size_t capacity = get_eden_capacity();
4365 4366      // Don't start sampling unless we will get sufficiently
4366 4367      // many samples.
4367 4368      if (used < (capacity/(CMSScheduleRemarkSamplingRatio * 100)
4368 4369                  * CMSScheduleRemarkEdenPenetration)) {
4369 4370        _start_sampling = true;
4370 4371      } else {
4371 4372        _start_sampling = false;
4372 4373      }
4373 4374      TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
4374 4375      CMSPhaseAccounting pa(this, "preclean", !PrintGCDetails);
4375 4376      preclean_work(CMSPrecleanRefLists1, CMSPrecleanSurvivors1);
4376 4377    }
4377 4378    CMSTokenSync x(true); // is cms thread
4378 4379    if (CMSPrecleaningEnabled) {
4379 4380      sample_eden();
4380 4381      _collectorState = AbortablePreclean;
4381 4382    } else {
4382 4383      _collectorState = FinalMarking;
4383 4384    }
4384 4385    verify_work_stacks_empty();
4385 4386    verify_overflow_empty();
4386 4387  }
4387 4388  
4388 4389  // Try and schedule the remark such that young gen
4389 4390  // occupancy is CMSScheduleRemarkEdenPenetration %.
4390 4391  void CMSCollector::abortable_preclean() {
4391 4392    check_correct_thread_executing();
4392 4393    assert(CMSPrecleaningEnabled,  "Inconsistent control state");
4393 4394    assert(_collectorState == AbortablePreclean, "Inconsistent control state");
4394 4395  
4395 4396    // If Eden's current occupancy is below this threshold,
4396 4397    // immediately schedule the remark; else preclean
4397 4398    // past the next scavenge in an effort to
4398 4399    // schedule the pause as described avove. By choosing
4399 4400    // CMSScheduleRemarkEdenSizeThreshold >= max eden size
4400 4401    // we will never do an actual abortable preclean cycle.
4401 4402    if (get_eden_used() > CMSScheduleRemarkEdenSizeThreshold) {
4402 4403      TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
4403 4404      CMSPhaseAccounting pa(this, "abortable-preclean", !PrintGCDetails);
4404 4405      // We need more smarts in the abortable preclean
4405 4406      // loop below to deal with cases where allocation
4406 4407      // in young gen is very very slow, and our precleaning
4407 4408      // is running a losing race against a horde of
4408 4409      // mutators intent on flooding us with CMS updates
4409 4410      // (dirty cards).
4410 4411      // One, admittedly dumb, strategy is to give up
4411 4412      // after a certain number of abortable precleaning loops
4412 4413      // or after a certain maximum time. We want to make
4413 4414      // this smarter in the next iteration.
4414 4415      // XXX FIX ME!!! YSR
4415 4416      size_t loops = 0, workdone = 0, cumworkdone = 0, waited = 0;
4416 4417      while (!(should_abort_preclean() ||
4417 4418               ConcurrentMarkSweepThread::should_terminate())) {
4418 4419        workdone = preclean_work(CMSPrecleanRefLists2, CMSPrecleanSurvivors2);
4419 4420        cumworkdone += workdone;
4420 4421        loops++;
4421 4422        // Voluntarily terminate abortable preclean phase if we have
4422 4423        // been at it for too long.
4423 4424        if ((CMSMaxAbortablePrecleanLoops != 0) &&
4424 4425            loops >= CMSMaxAbortablePrecleanLoops) {
4425 4426          if (PrintGCDetails) {
4426 4427            gclog_or_tty->print(" CMS: abort preclean due to loops ");
4427 4428          }
4428 4429          break;
4429 4430        }
4430 4431        if (pa.wallclock_millis() > CMSMaxAbortablePrecleanTime) {
4431 4432          if (PrintGCDetails) {
4432 4433            gclog_or_tty->print(" CMS: abort preclean due to time ");
4433 4434          }
4434 4435          break;
4435 4436        }
4436 4437        // If we are doing little work each iteration, we should
4437 4438        // take a short break.
4438 4439        if (workdone < CMSAbortablePrecleanMinWorkPerIteration) {
4439 4440          // Sleep for some time, waiting for work to accumulate
4440 4441          stopTimer();
4441 4442          cmsThread()->wait_on_cms_lock(CMSAbortablePrecleanWaitMillis);
4442 4443          startTimer();
4443 4444          waited++;
4444 4445        }
4445 4446      }
4446 4447      if (PrintCMSStatistics > 0) {
4447 4448        gclog_or_tty->print(" [%d iterations, %d waits, %d cards)] ",
4448 4449                            loops, waited, cumworkdone);
4449 4450      }
4450 4451    }
4451 4452    CMSTokenSync x(true); // is cms thread
4452 4453    if (_collectorState != Idling) {
4453 4454      assert(_collectorState == AbortablePreclean,
4454 4455             "Spontaneous state transition?");
4455 4456      _collectorState = FinalMarking;
4456 4457    } // Else, a foreground collection completed this CMS cycle.
4457 4458    return;
4458 4459  }
4459 4460  
4460 4461  // Respond to an Eden sampling opportunity
4461 4462  void CMSCollector::sample_eden() {
4462 4463    // Make sure a young gc cannot sneak in between our
4463 4464    // reading and recording of a sample.
4464 4465    assert(Thread::current()->is_ConcurrentGC_thread(),
4465 4466           "Only the cms thread may collect Eden samples");
4466 4467    assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
4467 4468           "Should collect samples while holding CMS token");
4468 4469    if (!_start_sampling) {
4469 4470      return;
4470 4471    }
4471 4472    if (_eden_chunk_array) {
4472 4473      if (_eden_chunk_index < _eden_chunk_capacity) {
4473 4474        _eden_chunk_array[_eden_chunk_index] = *_top_addr;   // take sample
4474 4475        assert(_eden_chunk_array[_eden_chunk_index] <= *_end_addr,
4475 4476               "Unexpected state of Eden");
4476 4477        // We'd like to check that what we just sampled is an oop-start address;
4477 4478        // however, we cannot do that here since the object may not yet have been
4478 4479        // initialized. So we'll instead do the check when we _use_ this sample
4479 4480        // later.
4480 4481        if (_eden_chunk_index == 0 ||
4481 4482            (pointer_delta(_eden_chunk_array[_eden_chunk_index],
4482 4483                           _eden_chunk_array[_eden_chunk_index-1])
4483 4484             >= CMSSamplingGrain)) {
4484 4485          _eden_chunk_index++;  // commit sample
4485 4486        }
4486 4487      }
4487 4488    }
4488 4489    if ((_collectorState == AbortablePreclean) && !_abort_preclean) {
4489 4490      size_t used = get_eden_used();
4490 4491      size_t capacity = get_eden_capacity();
4491 4492      assert(used <= capacity, "Unexpected state of Eden");
4492 4493      if (used >  (capacity/100 * CMSScheduleRemarkEdenPenetration)) {
4493 4494        _abort_preclean = true;
4494 4495      }
4495 4496    }
4496 4497  }
4497 4498  
4498 4499  
4499 4500  size_t CMSCollector::preclean_work(bool clean_refs, bool clean_survivor) {
4500 4501    assert(_collectorState == Precleaning ||
4501 4502           _collectorState == AbortablePreclean, "incorrect state");
4502 4503    ResourceMark rm;
4503 4504    HandleMark   hm;
4504 4505  
4505 4506    // Precleaning is currently not MT but the reference processor
4506 4507    // may be set for MT.  Disable it temporarily here.
4507 4508    ReferenceProcessor* rp = ref_processor();
4508 4509    ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
4509 4510  
4510 4511    // Do one pass of scrubbing the discovered reference lists
4511 4512    // to remove any reference objects with strongly-reachable
4512 4513    // referents.
4513 4514    if (clean_refs) {
4514 4515      CMSPrecleanRefsYieldClosure yield_cl(this);
4515 4516      assert(rp->span().equals(_span), "Spans should be equal");
4516 4517      CMSKeepAliveClosure keep_alive(this, _span, &_markBitMap,
4517 4518                                     &_markStack, &_revisitStack,
4518 4519                                     true /* preclean */);
4519 4520      CMSDrainMarkingStackClosure complete_trace(this,
4520 4521                                     _span, &_markBitMap, &_markStack,
4521 4522                                     &keep_alive, true /* preclean */);
4522 4523  
4523 4524      // We don't want this step to interfere with a young
4524 4525      // collection because we don't want to take CPU
4525 4526      // or memory bandwidth away from the young GC threads
4526 4527      // (which may be as many as there are CPUs).
4527 4528      // Note that we don't need to protect ourselves from
4528 4529      // interference with mutators because they can't
4529 4530      // manipulate the discovered reference lists nor affect
4530 4531      // the computed reachability of the referents, the
4531 4532      // only properties manipulated by the precleaning
4532 4533      // of these reference lists.
4533 4534      stopTimer();
4534 4535      CMSTokenSyncWithLocks x(true /* is cms thread */,
4535 4536                              bitMapLock());
4536 4537      startTimer();
4537 4538      sample_eden();
4538 4539  
4539 4540      // The following will yield to allow foreground
4540 4541      // collection to proceed promptly. XXX YSR:
4541 4542      // The code in this method may need further
4542 4543      // tweaking for better performance and some restructuring
4543 4544      // for cleaner interfaces.
4544 4545      rp->preclean_discovered_references(
4545 4546            rp->is_alive_non_header(), &keep_alive, &complete_trace,
4546 4547            &yield_cl, should_unload_classes());
4547 4548    }
4548 4549  
4549 4550    if (clean_survivor) {  // preclean the active survivor space(s)
4550 4551      assert(_young_gen->kind() == Generation::DefNew ||
4551 4552             _young_gen->kind() == Generation::ParNew ||
4552 4553             _young_gen->kind() == Generation::ASParNew,
4553 4554           "incorrect type for cast");
4554 4555      DefNewGeneration* dng = (DefNewGeneration*)_young_gen;
4555 4556      PushAndMarkClosure pam_cl(this, _span, ref_processor(),
4556 4557                               &_markBitMap, &_modUnionTable,
4557 4558                               &_markStack, &_revisitStack,
4558 4559                               true /* precleaning phase */);
4559 4560      stopTimer();
4560 4561      CMSTokenSyncWithLocks ts(true /* is cms thread */,
4561 4562                               bitMapLock());
4562 4563      startTimer();
4563 4564      unsigned int before_count =
4564 4565        GenCollectedHeap::heap()->total_collections();
4565 4566      SurvivorSpacePrecleanClosure
4566 4567        sss_cl(this, _span, &_markBitMap, &_markStack,
4567 4568               &pam_cl, before_count, CMSYield);
4568 4569      DEBUG_ONLY(RememberKlassesChecker mx(should_unload_classes());)
4569 4570      dng->from()->object_iterate_careful(&sss_cl);
4570 4571      dng->to()->object_iterate_careful(&sss_cl);
4571 4572    }
4572 4573    MarkRefsIntoAndScanClosure
4573 4574      mrias_cl(_span, ref_processor(), &_markBitMap, &_modUnionTable,
4574 4575               &_markStack, &_revisitStack, this, CMSYield,
4575 4576               true /* precleaning phase */);
4576 4577    // CAUTION: The following closure has persistent state that may need to
4577 4578    // be reset upon a decrease in the sequence of addresses it
4578 4579    // processes.
4579 4580    ScanMarkedObjectsAgainCarefullyClosure
4580 4581      smoac_cl(this, _span,
4581 4582        &_markBitMap, &_markStack, &_revisitStack, &mrias_cl, CMSYield);
4582 4583  
4583 4584    // Preclean dirty cards in ModUnionTable and CardTable using
4584 4585    // appropriate convergence criterion;
4585 4586    // repeat CMSPrecleanIter times unless we find that
4586 4587    // we are losing.
4587 4588    assert(CMSPrecleanIter < 10, "CMSPrecleanIter is too large");
4588 4589    assert(CMSPrecleanNumerator < CMSPrecleanDenominator,
4589 4590           "Bad convergence multiplier");
4590 4591    assert(CMSPrecleanThreshold >= 100,
4591 4592           "Unreasonably low CMSPrecleanThreshold");
4592 4593  
4593 4594    size_t numIter, cumNumCards, lastNumCards, curNumCards;
4594 4595    for (numIter = 0, cumNumCards = lastNumCards = curNumCards = 0;
4595 4596         numIter < CMSPrecleanIter;
4596 4597         numIter++, lastNumCards = curNumCards, cumNumCards += curNumCards) {
4597 4598      curNumCards  = preclean_mod_union_table(_cmsGen, &smoac_cl);
4598 4599      if (CMSPermGenPrecleaningEnabled) {
4599 4600        curNumCards  += preclean_mod_union_table(_permGen, &smoac_cl);
4600 4601      }
4601 4602      if (Verbose && PrintGCDetails) {
4602 4603        gclog_or_tty->print(" (modUnionTable: %d cards)", curNumCards);
4603 4604      }
4604 4605      // Either there are very few dirty cards, so re-mark
4605 4606      // pause will be small anyway, or our pre-cleaning isn't
4606 4607      // that much faster than the rate at which cards are being
4607 4608      // dirtied, so we might as well stop and re-mark since
4608 4609      // precleaning won't improve our re-mark time by much.
4609 4610      if (curNumCards <= CMSPrecleanThreshold ||
4610 4611          (numIter > 0 &&
4611 4612           (curNumCards * CMSPrecleanDenominator >
4612 4613           lastNumCards * CMSPrecleanNumerator))) {
4613 4614        numIter++;
4614 4615        cumNumCards += curNumCards;
4615 4616        break;
4616 4617      }
4617 4618    }
4618 4619    curNumCards = preclean_card_table(_cmsGen, &smoac_cl);
4619 4620    if (CMSPermGenPrecleaningEnabled) {
4620 4621      curNumCards += preclean_card_table(_permGen, &smoac_cl);
4621 4622    }
4622 4623    cumNumCards += curNumCards;
4623 4624    if (PrintGCDetails && PrintCMSStatistics != 0) {
4624 4625      gclog_or_tty->print_cr(" (cardTable: %d cards, re-scanned %d cards, %d iterations)",
4625 4626                    curNumCards, cumNumCards, numIter);
4626 4627    }
4627 4628    return cumNumCards;   // as a measure of useful work done
4628 4629  }
4629 4630  
4630 4631  // PRECLEANING NOTES:
4631 4632  // Precleaning involves:
4632 4633  // . reading the bits of the modUnionTable and clearing the set bits.
4633 4634  // . For the cards corresponding to the set bits, we scan the
4634 4635  //   objects on those cards. This means we need the free_list_lock
4635 4636  //   so that we can safely iterate over the CMS space when scanning
4636 4637  //   for oops.
4637 4638  // . When we scan the objects, we'll be both reading and setting
4638 4639  //   marks in the marking bit map, so we'll need the marking bit map.
4639 4640  // . For protecting _collector_state transitions, we take the CGC_lock.
4640 4641  //   Note that any races in the reading of of card table entries by the
4641 4642  //   CMS thread on the one hand and the clearing of those entries by the
4642 4643  //   VM thread or the setting of those entries by the mutator threads on the
4643 4644  //   other are quite benign. However, for efficiency it makes sense to keep
4644 4645  //   the VM thread from racing with the CMS thread while the latter is
4645 4646  //   dirty card info to the modUnionTable. We therefore also use the
4646 4647  //   CGC_lock to protect the reading of the card table and the mod union
4647 4648  //   table by the CM thread.
4648 4649  // . We run concurrently with mutator updates, so scanning
4649 4650  //   needs to be done carefully  -- we should not try to scan
4650 4651  //   potentially uninitialized objects.
4651 4652  //
4652 4653  // Locking strategy: While holding the CGC_lock, we scan over and
4653 4654  // reset a maximal dirty range of the mod union / card tables, then lock
4654 4655  // the free_list_lock and bitmap lock to do a full marking, then
4655 4656  // release these locks; and repeat the cycle. This allows for a
4656 4657  // certain amount of fairness in the sharing of these locks between
4657 4658  // the CMS collector on the one hand, and the VM thread and the
4658 4659  // mutators on the other.
4659 4660  
4660 4661  // NOTE: preclean_mod_union_table() and preclean_card_table()
4661 4662  // further below are largely identical; if you need to modify
4662 4663  // one of these methods, please check the other method too.
4663 4664  
4664 4665  size_t CMSCollector::preclean_mod_union_table(
4665 4666    ConcurrentMarkSweepGeneration* gen,
4666 4667    ScanMarkedObjectsAgainCarefullyClosure* cl) {
4667 4668    verify_work_stacks_empty();
4668 4669    verify_overflow_empty();
4669 4670  
4670 4671    // Turn off checking for this method but turn it back on
4671 4672    // selectively.  There are yield points in this method
4672 4673    // but it is difficult to turn the checking off just around
4673 4674    // the yield points.  It is simpler to selectively turn
4674 4675    // it on.
4675 4676    DEBUG_ONLY(RememberKlassesChecker mux(false);)
4676 4677  
4677 4678    // strategy: starting with the first card, accumulate contiguous
4678 4679    // ranges of dirty cards; clear these cards, then scan the region
4679 4680    // covered by these cards.
4680 4681  
4681 4682    // Since all of the MUT is committed ahead, we can just use
4682 4683    // that, in case the generations expand while we are precleaning.
4683 4684    // It might also be fine to just use the committed part of the
4684 4685    // generation, but we might potentially miss cards when the
4685 4686    // generation is rapidly expanding while we are in the midst
4686 4687    // of precleaning.
4687 4688    HeapWord* startAddr = gen->reserved().start();
4688 4689    HeapWord* endAddr   = gen->reserved().end();
4689 4690  
4690 4691    cl->setFreelistLock(gen->freelistLock());   // needed for yielding
4691 4692  
4692 4693    size_t numDirtyCards, cumNumDirtyCards;
4693 4694    HeapWord *nextAddr, *lastAddr;
4694 4695    for (cumNumDirtyCards = numDirtyCards = 0,
4695 4696         nextAddr = lastAddr = startAddr;
4696 4697         nextAddr < endAddr;
4697 4698         nextAddr = lastAddr, cumNumDirtyCards += numDirtyCards) {
4698 4699  
4699 4700      ResourceMark rm;
4700 4701      HandleMark   hm;
4701 4702  
4702 4703      MemRegion dirtyRegion;
4703 4704      {
4704 4705        stopTimer();
4705 4706        // Potential yield point
4706 4707        CMSTokenSync ts(true);
4707 4708        startTimer();
4708 4709        sample_eden();
4709 4710        // Get dirty region starting at nextOffset (inclusive),
4710 4711        // simultaneously clearing it.
4711 4712        dirtyRegion =
4712 4713          _modUnionTable.getAndClearMarkedRegion(nextAddr, endAddr);
4713 4714        assert(dirtyRegion.start() >= nextAddr,
4714 4715               "returned region inconsistent?");
4715 4716      }
4716 4717      // Remember where the next search should begin.
4717 4718      // The returned region (if non-empty) is a right open interval,
4718 4719      // so lastOffset is obtained from the right end of that
4719 4720      // interval.
4720 4721      lastAddr = dirtyRegion.end();
4721 4722      // Should do something more transparent and less hacky XXX
4722 4723      numDirtyCards =
4723 4724        _modUnionTable.heapWordDiffToOffsetDiff(dirtyRegion.word_size());
4724 4725  
4725 4726      // We'll scan the cards in the dirty region (with periodic
4726 4727      // yields for foreground GC as needed).
4727 4728      if (!dirtyRegion.is_empty()) {
4728 4729        assert(numDirtyCards > 0, "consistency check");
4729 4730        HeapWord* stop_point = NULL;
4730 4731        stopTimer();
4731 4732        // Potential yield point
4732 4733        CMSTokenSyncWithLocks ts(true, gen->freelistLock(),
4733 4734                                 bitMapLock());
4734 4735        startTimer();
4735 4736        {
4736 4737          verify_work_stacks_empty();
4737 4738          verify_overflow_empty();
4738 4739          sample_eden();
4739 4740          DEBUG_ONLY(RememberKlassesChecker mx(should_unload_classes());)
4740 4741          stop_point =
4741 4742            gen->cmsSpace()->object_iterate_careful_m(dirtyRegion, cl);
4742 4743        }
4743 4744        if (stop_point != NULL) {
4744 4745          // The careful iteration stopped early either because it found an
4745 4746          // uninitialized object, or because we were in the midst of an
4746 4747          // "abortable preclean", which should now be aborted. Redirty
4747 4748          // the bits corresponding to the partially-scanned or unscanned
4748 4749          // cards. We'll either restart at the next block boundary or
4749 4750          // abort the preclean.
4750 4751          assert((CMSPermGenPrecleaningEnabled && (gen == _permGen)) ||
4751 4752                 (_collectorState == AbortablePreclean && should_abort_preclean()),
4752 4753                 "Unparsable objects should only be in perm gen.");
4753 4754          _modUnionTable.mark_range(MemRegion(stop_point, dirtyRegion.end()));
4754 4755          if (should_abort_preclean()) {
4755 4756            break; // out of preclean loop
4756 4757          } else {
4757 4758            // Compute the next address at which preclean should pick up;
4758 4759            // might need bitMapLock in order to read P-bits.
4759 4760            lastAddr = next_card_start_after_block(stop_point);
4760 4761          }
4761 4762        }
4762 4763      } else {
4763 4764        assert(lastAddr == endAddr, "consistency check");
4764 4765        assert(numDirtyCards == 0, "consistency check");
4765 4766        break;
4766 4767      }
4767 4768    }
4768 4769    verify_work_stacks_empty();
4769 4770    verify_overflow_empty();
4770 4771    return cumNumDirtyCards;
4771 4772  }
4772 4773  
4773 4774  // NOTE: preclean_mod_union_table() above and preclean_card_table()
4774 4775  // below are largely identical; if you need to modify
4775 4776  // one of these methods, please check the other method too.
4776 4777  
4777 4778  size_t CMSCollector::preclean_card_table(ConcurrentMarkSweepGeneration* gen,
4778 4779    ScanMarkedObjectsAgainCarefullyClosure* cl) {
4779 4780    // strategy: it's similar to precleamModUnionTable above, in that
4780 4781    // we accumulate contiguous ranges of dirty cards, mark these cards
4781 4782    // precleaned, then scan the region covered by these cards.
4782 4783    HeapWord* endAddr   = (HeapWord*)(gen->_virtual_space.high());
4783 4784    HeapWord* startAddr = (HeapWord*)(gen->_virtual_space.low());
4784 4785  
4785 4786    cl->setFreelistLock(gen->freelistLock());   // needed for yielding
4786 4787  
4787 4788    size_t numDirtyCards, cumNumDirtyCards;
4788 4789    HeapWord *lastAddr, *nextAddr;
4789 4790  
4790 4791    for (cumNumDirtyCards = numDirtyCards = 0,
4791 4792         nextAddr = lastAddr = startAddr;
4792 4793         nextAddr < endAddr;
4793 4794         nextAddr = lastAddr, cumNumDirtyCards += numDirtyCards) {
4794 4795  
4795 4796      ResourceMark rm;
4796 4797      HandleMark   hm;
4797 4798  
4798 4799      MemRegion dirtyRegion;
4799 4800      {
4800 4801        // See comments in "Precleaning notes" above on why we
4801 4802        // do this locking. XXX Could the locking overheads be
4802 4803        // too high when dirty cards are sparse? [I don't think so.]
4803 4804        stopTimer();
4804 4805        CMSTokenSync x(true); // is cms thread
4805 4806        startTimer();
4806 4807        sample_eden();
4807 4808        // Get and clear dirty region from card table
4808 4809        dirtyRegion = _ct->ct_bs()->dirty_card_range_after_reset(
4809 4810                                      MemRegion(nextAddr, endAddr),
4810 4811                                      true,
4811 4812                                      CardTableModRefBS::precleaned_card_val());
4812 4813  
4813 4814        assert(dirtyRegion.start() >= nextAddr,
4814 4815               "returned region inconsistent?");
4815 4816      }
4816 4817      lastAddr = dirtyRegion.end();
4817 4818      numDirtyCards =
4818 4819        dirtyRegion.word_size()/CardTableModRefBS::card_size_in_words;
4819 4820  
4820 4821      if (!dirtyRegion.is_empty()) {
4821 4822        stopTimer();
4822 4823        CMSTokenSyncWithLocks ts(true, gen->freelistLock(), bitMapLock());
4823 4824        startTimer();
4824 4825        sample_eden();
4825 4826        verify_work_stacks_empty();
4826 4827        verify_overflow_empty();
4827 4828        DEBUG_ONLY(RememberKlassesChecker mx(should_unload_classes());)
4828 4829        HeapWord* stop_point =
4829 4830          gen->cmsSpace()->object_iterate_careful_m(dirtyRegion, cl);
4830 4831        if (stop_point != NULL) {
4831 4832          // The careful iteration stopped early because it found an
4832 4833          // uninitialized object.  Redirty the bits corresponding to the
4833 4834          // partially-scanned or unscanned cards, and start again at the
4834 4835          // next block boundary.
4835 4836          assert(CMSPermGenPrecleaningEnabled ||
4836 4837                 (_collectorState == AbortablePreclean && should_abort_preclean()),
4837 4838                 "Unparsable objects should only be in perm gen.");
4838 4839          _ct->ct_bs()->invalidate(MemRegion(stop_point, dirtyRegion.end()));
4839 4840          if (should_abort_preclean()) {
4840 4841            break; // out of preclean loop
4841 4842          } else {
4842 4843            // Compute the next address at which preclean should pick up.
4843 4844            lastAddr = next_card_start_after_block(stop_point);
4844 4845          }
4845 4846        }
4846 4847      } else {
4847 4848        break;
4848 4849      }
4849 4850    }
4850 4851    verify_work_stacks_empty();
4851 4852    verify_overflow_empty();
4852 4853    return cumNumDirtyCards;
4853 4854  }
4854 4855  
4855 4856  void CMSCollector::checkpointRootsFinal(bool asynch,
4856 4857    bool clear_all_soft_refs, bool init_mark_was_synchronous) {
4857 4858    assert(_collectorState == FinalMarking, "incorrect state transition?");
4858 4859    check_correct_thread_executing();
4859 4860    // world is stopped at this checkpoint
4860 4861    assert(SafepointSynchronize::is_at_safepoint(),
4861 4862           "world should be stopped");
4862 4863    TraceCMSMemoryManagerStats tms(_collectorState,GenCollectedHeap::heap()->gc_cause());
4863 4864  
4864 4865    verify_work_stacks_empty();
4865 4866    verify_overflow_empty();
4866 4867  
4867 4868    SpecializationStats::clear();
4868 4869    if (PrintGCDetails) {
4869 4870      gclog_or_tty->print("[YG occupancy: "SIZE_FORMAT" K ("SIZE_FORMAT" K)]",
4870 4871                          _young_gen->used() / K,
4871 4872                          _young_gen->capacity() / K);
4872 4873    }
4873 4874    if (asynch) {
4874 4875      if (CMSScavengeBeforeRemark) {
4875 4876        GenCollectedHeap* gch = GenCollectedHeap::heap();
4876 4877        // Temporarily set flag to false, GCH->do_collection will
4877 4878        // expect it to be false and set to true
4878 4879        FlagSetting fl(gch->_is_gc_active, false);
4879 4880        NOT_PRODUCT(TraceTime t("Scavenge-Before-Remark",
4880 4881          PrintGCDetails && Verbose, true, gclog_or_tty);)
4881 4882        int level = _cmsGen->level() - 1;
4882 4883        if (level >= 0) {
4883 4884          gch->do_collection(true,        // full (i.e. force, see below)
4884 4885                             false,       // !clear_all_soft_refs
4885 4886                             0,           // size
4886 4887                             false,       // is_tlab
4887 4888                             level        // max_level
4888 4889                            );
4889 4890        }
4890 4891      }
4891 4892      FreelistLocker x(this);
4892 4893      MutexLockerEx y(bitMapLock(),
4893 4894                      Mutex::_no_safepoint_check_flag);
4894 4895      assert(!init_mark_was_synchronous, "but that's impossible!");
4895 4896      checkpointRootsFinalWork(asynch, clear_all_soft_refs, false);
4896 4897    } else {
4897 4898      // already have all the locks
4898 4899      checkpointRootsFinalWork(asynch, clear_all_soft_refs,
4899 4900                               init_mark_was_synchronous);
4900 4901    }
4901 4902    verify_work_stacks_empty();
4902 4903    verify_overflow_empty();
4903 4904    SpecializationStats::print();
4904 4905  }
4905 4906  
4906 4907  void CMSCollector::checkpointRootsFinalWork(bool asynch,
4907 4908    bool clear_all_soft_refs, bool init_mark_was_synchronous) {
4908 4909  
4909 4910    NOT_PRODUCT(TraceTime tr("checkpointRootsFinalWork", PrintGCDetails, false, gclog_or_tty);)
4910 4911  
4911 4912    assert(haveFreelistLocks(), "must have free list locks");
4912 4913    assert_lock_strong(bitMapLock());
4913 4914  
4914 4915    if (UseAdaptiveSizePolicy) {
4915 4916      size_policy()->checkpoint_roots_final_begin();
4916 4917    }
4917 4918  
4918 4919    ResourceMark rm;
4919 4920    HandleMark   hm;
4920 4921  
4921 4922    GenCollectedHeap* gch = GenCollectedHeap::heap();
4922 4923  
4923 4924    if (should_unload_classes()) {
4924 4925      CodeCache::gc_prologue();
4925 4926    }
4926 4927    assert(haveFreelistLocks(), "must have free list locks");
4927 4928    assert_lock_strong(bitMapLock());
4928 4929  
4929 4930    DEBUG_ONLY(RememberKlassesChecker fmx(should_unload_classes());)
4930 4931    if (!init_mark_was_synchronous) {
4931 4932      // We might assume that we need not fill TLAB's when
4932 4933      // CMSScavengeBeforeRemark is set, because we may have just done
4933 4934      // a scavenge which would have filled all TLAB's -- and besides
4934 4935      // Eden would be empty. This however may not always be the case --
4935 4936      // for instance although we asked for a scavenge, it may not have
4936 4937      // happened because of a JNI critical section. We probably need
4937 4938      // a policy for deciding whether we can in that case wait until
4938 4939      // the critical section releases and then do the remark following
4939 4940      // the scavenge, and skip it here. In the absence of that policy,
4940 4941      // or of an indication of whether the scavenge did indeed occur,
4941 4942      // we cannot rely on TLAB's having been filled and must do
4942 4943      // so here just in case a scavenge did not happen.
4943 4944      gch->ensure_parsability(false);  // fill TLAB's, but no need to retire them
4944 4945      // Update the saved marks which may affect the root scans.
4945 4946      gch->save_marks();
4946 4947  
4947 4948      {
4948 4949        COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact;)
4949 4950  
4950 4951        // Note on the role of the mod union table:
4951 4952        // Since the marker in "markFromRoots" marks concurrently with
4952 4953        // mutators, it is possible for some reachable objects not to have been
4953 4954        // scanned. For instance, an only reference to an object A was
4954 4955        // placed in object B after the marker scanned B. Unless B is rescanned,
4955 4956        // A would be collected. Such updates to references in marked objects
4956 4957        // are detected via the mod union table which is the set of all cards
4957 4958        // dirtied since the first checkpoint in this GC cycle and prior to
4958 4959        // the most recent young generation GC, minus those cleaned up by the
4959 4960        // concurrent precleaning.
4960 4961        if (CMSParallelRemarkEnabled && CollectedHeap::use_parallel_gc_threads()) {
4961 4962          TraceTime t("Rescan (parallel) ", PrintGCDetails, false, gclog_or_tty);
4962 4963          do_remark_parallel();
4963 4964        } else {
4964 4965          TraceTime t("Rescan (non-parallel) ", PrintGCDetails, false,
4965 4966                      gclog_or_tty);
4966 4967          do_remark_non_parallel();
4967 4968        }
4968 4969      }
4969 4970    } else {
4970 4971      assert(!asynch, "Can't have init_mark_was_synchronous in asynch mode");
4971 4972      // The initial mark was stop-world, so there's no rescanning to
4972 4973      // do; go straight on to the next step below.
4973 4974    }
4974 4975    verify_work_stacks_empty();
4975 4976    verify_overflow_empty();
4976 4977  
4977 4978    {
4978 4979      NOT_PRODUCT(TraceTime ts("refProcessingWork", PrintGCDetails, false, gclog_or_tty);)
4979 4980      refProcessingWork(asynch, clear_all_soft_refs);
4980 4981    }
4981 4982    verify_work_stacks_empty();
4982 4983    verify_overflow_empty();
4983 4984  
4984 4985    if (should_unload_classes()) {
4985 4986      CodeCache::gc_epilogue();
4986 4987    }
4987 4988    JvmtiExport::gc_epilogue();
4988 4989  
4989 4990    // If we encountered any (marking stack / work queue) overflow
4990 4991    // events during the current CMS cycle, take appropriate
4991 4992    // remedial measures, where possible, so as to try and avoid
4992 4993    // recurrence of that condition.
4993 4994    assert(_markStack.isEmpty(), "No grey objects");
4994 4995    size_t ser_ovflw = _ser_pmc_remark_ovflw + _ser_pmc_preclean_ovflw +
4995 4996                       _ser_kac_ovflw        + _ser_kac_preclean_ovflw;
4996 4997    if (ser_ovflw > 0) {
4997 4998      if (PrintCMSStatistics != 0) {
4998 4999        gclog_or_tty->print_cr("Marking stack overflow (benign) "
4999 5000          "(pmc_pc="SIZE_FORMAT", pmc_rm="SIZE_FORMAT", kac="SIZE_FORMAT
5000 5001          ", kac_preclean="SIZE_FORMAT")",
5001 5002          _ser_pmc_preclean_ovflw, _ser_pmc_remark_ovflw,
5002 5003          _ser_kac_ovflw, _ser_kac_preclean_ovflw);
5003 5004      }
5004 5005      _markStack.expand();
5005 5006      _ser_pmc_remark_ovflw = 0;
5006 5007      _ser_pmc_preclean_ovflw = 0;
5007 5008      _ser_kac_preclean_ovflw = 0;
5008 5009      _ser_kac_ovflw = 0;
5009 5010    }
5010 5011    if (_par_pmc_remark_ovflw > 0 || _par_kac_ovflw > 0) {
5011 5012      if (PrintCMSStatistics != 0) {
5012 5013        gclog_or_tty->print_cr("Work queue overflow (benign) "
5013 5014          "(pmc_rm="SIZE_FORMAT", kac="SIZE_FORMAT")",
5014 5015          _par_pmc_remark_ovflw, _par_kac_ovflw);
5015 5016      }
5016 5017      _par_pmc_remark_ovflw = 0;
5017 5018      _par_kac_ovflw = 0;
5018 5019    }
5019 5020    if (PrintCMSStatistics != 0) {
5020 5021       if (_markStack._hit_limit > 0) {
5021 5022         gclog_or_tty->print_cr(" (benign) Hit max stack size limit ("SIZE_FORMAT")",
5022 5023                                _markStack._hit_limit);
5023 5024       }
5024 5025       if (_markStack._failed_double > 0) {
5025 5026         gclog_or_tty->print_cr(" (benign) Failed stack doubling ("SIZE_FORMAT"),"
5026 5027                                " current capacity "SIZE_FORMAT,
5027 5028                                _markStack._failed_double,
5028 5029                                _markStack.capacity());
5029 5030       }
5030 5031    }
5031 5032    _markStack._hit_limit = 0;
5032 5033    _markStack._failed_double = 0;
5033 5034  
5034 5035    // Check that all the klasses have been checked
5035 5036    assert(_revisitStack.isEmpty(), "Not all klasses revisited");
5036 5037  
5037 5038    if ((VerifyAfterGC || VerifyDuringGC) &&
5038 5039        GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
5039 5040      verify_after_remark();
5040 5041    }
5041 5042  
5042 5043    // Change under the freelistLocks.
5043 5044    _collectorState = Sweeping;
5044 5045    // Call isAllClear() under bitMapLock
5045 5046    assert(_modUnionTable.isAllClear(), "Should be clear by end of the"
5046 5047      " final marking");
5047 5048    if (UseAdaptiveSizePolicy) {
5048 5049      size_policy()->checkpoint_roots_final_end(gch->gc_cause());
5049 5050    }
5050 5051  }
5051 5052  
5052 5053  // Parallel remark task
5053 5054  class CMSParRemarkTask: public AbstractGangTask {
5054 5055    CMSCollector* _collector;
5055 5056    int           _n_workers;
5056 5057    CompactibleFreeListSpace* _cms_space;
5057 5058    CompactibleFreeListSpace* _perm_space;
5058 5059  
5059 5060    // The per-thread work queues, available here for stealing.
5060 5061    OopTaskQueueSet*       _task_queues;
5061 5062    ParallelTaskTerminator _term;
5062 5063  
5063 5064   public:
5064 5065    CMSParRemarkTask(CMSCollector* collector,
5065 5066                     CompactibleFreeListSpace* cms_space,
5066 5067                     CompactibleFreeListSpace* perm_space,
5067 5068                     int n_workers, FlexibleWorkGang* workers,
5068 5069                     OopTaskQueueSet* task_queues):
5069 5070      AbstractGangTask("Rescan roots and grey objects in parallel"),
5070 5071      _collector(collector),
5071 5072      _cms_space(cms_space), _perm_space(perm_space),
5072 5073      _n_workers(n_workers),
5073 5074      _task_queues(task_queues),
5074 5075      _term(n_workers, task_queues) { }
5075 5076  
5076 5077    OopTaskQueueSet* task_queues() { return _task_queues; }
5077 5078  
5078 5079    OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
5079 5080  
5080 5081    ParallelTaskTerminator* terminator() { return &_term; }
5081 5082    int n_workers() { return _n_workers; }
5082 5083  
5083 5084    void work(int i);
5084 5085  
5085 5086   private:
5086 5087    // Work method in support of parallel rescan ... of young gen spaces
5087 5088    void do_young_space_rescan(int i, Par_MarkRefsIntoAndScanClosure* cl,
5088 5089                               ContiguousSpace* space,
5089 5090                               HeapWord** chunk_array, size_t chunk_top);
5090 5091  
5091 5092    // ... of  dirty cards in old space
5092 5093    void do_dirty_card_rescan_tasks(CompactibleFreeListSpace* sp, int i,
5093 5094                                    Par_MarkRefsIntoAndScanClosure* cl);
5094 5095  
5095 5096    // ... work stealing for the above
5096 5097    void do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl, int* seed);
5097 5098  };
5098 5099  
5099 5100  // work_queue(i) is passed to the closure
5100 5101  // Par_MarkRefsIntoAndScanClosure.  The "i" parameter
5101 5102  // also is passed to do_dirty_card_rescan_tasks() and to
5102 5103  // do_work_steal() to select the i-th task_queue.
5103 5104  
5104 5105  void CMSParRemarkTask::work(int i) {
5105 5106    elapsedTimer _timer;
5106 5107    ResourceMark rm;
5107 5108    HandleMark   hm;
5108 5109  
5109 5110    // ---------- rescan from roots --------------
5110 5111    _timer.start();
5111 5112    GenCollectedHeap* gch = GenCollectedHeap::heap();
5112 5113    Par_MarkRefsIntoAndScanClosure par_mrias_cl(_collector,
5113 5114      _collector->_span, _collector->ref_processor(),
5114 5115      &(_collector->_markBitMap),
5115 5116      work_queue(i), &(_collector->_revisitStack));
5116 5117  
5117 5118    // Rescan young gen roots first since these are likely
5118 5119    // coarsely partitioned and may, on that account, constitute
5119 5120    // the critical path; thus, it's best to start off that
5120 5121    // work first.
5121 5122    // ---------- young gen roots --------------
5122 5123    {
5123 5124      DefNewGeneration* dng = _collector->_young_gen->as_DefNewGeneration();
5124 5125      EdenSpace* eden_space = dng->eden();
5125 5126      ContiguousSpace* from_space = dng->from();
5126 5127      ContiguousSpace* to_space   = dng->to();
5127 5128  
5128 5129      HeapWord** eca = _collector->_eden_chunk_array;
5129 5130      size_t     ect = _collector->_eden_chunk_index;
5130 5131      HeapWord** sca = _collector->_survivor_chunk_array;
5131 5132      size_t     sct = _collector->_survivor_chunk_index;
5132 5133  
5133 5134      assert(ect <= _collector->_eden_chunk_capacity, "out of bounds");
5134 5135      assert(sct <= _collector->_survivor_chunk_capacity, "out of bounds");
5135 5136  
5136 5137      do_young_space_rescan(i, &par_mrias_cl, to_space, NULL, 0);
5137 5138      do_young_space_rescan(i, &par_mrias_cl, from_space, sca, sct);
5138 5139      do_young_space_rescan(i, &par_mrias_cl, eden_space, eca, ect);
5139 5140  
5140 5141      _timer.stop();
5141 5142      if (PrintCMSStatistics != 0) {
5142 5143        gclog_or_tty->print_cr(
5143 5144          "Finished young gen rescan work in %dth thread: %3.3f sec",
5144 5145          i, _timer.seconds());
5145 5146      }
5146 5147    }
5147 5148  
5148 5149    // ---------- remaining roots --------------
5149 5150    _timer.reset();
5150 5151    _timer.start();
5151 5152    gch->gen_process_strong_roots(_collector->_cmsGen->level(),
5152 5153                                  false,     // yg was scanned above
5153 5154                                  false,     // this is parallel code
5154 5155                                  true,      // collecting perm gen
5155 5156                                  SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
5156 5157                                  &par_mrias_cl,
5157 5158                                  true,   // walk all of code cache if (so & SO_CodeCache)
5158 5159                                  NULL);
5159 5160    assert(_collector->should_unload_classes()
5160 5161           || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_CodeCache),
5161 5162           "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
5162 5163    _timer.stop();
5163 5164    if (PrintCMSStatistics != 0) {
5164 5165      gclog_or_tty->print_cr(
5165 5166        "Finished remaining root rescan work in %dth thread: %3.3f sec",
5166 5167        i, _timer.seconds());
5167 5168    }
5168 5169  
5169 5170    // ---------- rescan dirty cards ------------
5170 5171    _timer.reset();
5171 5172    _timer.start();
5172 5173  
5173 5174    // Do the rescan tasks for each of the two spaces
5174 5175    // (cms_space and perm_space) in turn.
5175 5176    // "i" is passed to select the "i-th" task_queue
5176 5177    do_dirty_card_rescan_tasks(_cms_space, i, &par_mrias_cl);
5177 5178    do_dirty_card_rescan_tasks(_perm_space, i, &par_mrias_cl);
5178 5179    _timer.stop();
5179 5180    if (PrintCMSStatistics != 0) {
5180 5181      gclog_or_tty->print_cr(
5181 5182        "Finished dirty card rescan work in %dth thread: %3.3f sec",
5182 5183        i, _timer.seconds());
5183 5184    }
5184 5185  
5185 5186    // ---------- steal work from other threads ...
5186 5187    // ---------- ... and drain overflow list.
5187 5188    _timer.reset();
5188 5189    _timer.start();
5189 5190    do_work_steal(i, &par_mrias_cl, _collector->hash_seed(i));
5190 5191    _timer.stop();
5191 5192    if (PrintCMSStatistics != 0) {
5192 5193      gclog_or_tty->print_cr(
5193 5194        "Finished work stealing in %dth thread: %3.3f sec",
5194 5195        i, _timer.seconds());
5195 5196    }
5196 5197  }
5197 5198  
5198 5199  // Note that parameter "i" is not used.
5199 5200  void
5200 5201  CMSParRemarkTask::do_young_space_rescan(int i,
5201 5202    Par_MarkRefsIntoAndScanClosure* cl, ContiguousSpace* space,
5202 5203    HeapWord** chunk_array, size_t chunk_top) {
5203 5204    // Until all tasks completed:
5204 5205    // . claim an unclaimed task
5205 5206    // . compute region boundaries corresponding to task claimed
5206 5207    //   using chunk_array
5207 5208    // . par_oop_iterate(cl) over that region
5208 5209  
5209 5210    ResourceMark rm;
5210 5211    HandleMark   hm;
5211 5212  
5212 5213    SequentialSubTasksDone* pst = space->par_seq_tasks();
5213 5214    assert(pst->valid(), "Uninitialized use?");
5214 5215  
5215 5216    int nth_task = 0;
5216 5217    int n_tasks  = pst->n_tasks();
5217 5218  
5218 5219    HeapWord *start, *end;
5219 5220    while (!pst->is_task_claimed(/* reference */ nth_task)) {
5220 5221      // We claimed task # nth_task; compute its boundaries.
5221 5222      if (chunk_top == 0) {  // no samples were taken
5222 5223        assert(nth_task == 0 && n_tasks == 1, "Can have only 1 EdenSpace task");
5223 5224        start = space->bottom();
5224 5225        end   = space->top();
5225 5226      } else if (nth_task == 0) {
5226 5227        start = space->bottom();
5227 5228        end   = chunk_array[nth_task];
5228 5229      } else if (nth_task < (jint)chunk_top) {
5229 5230        assert(nth_task >= 1, "Control point invariant");
5230 5231        start = chunk_array[nth_task - 1];
5231 5232        end   = chunk_array[nth_task];
5232 5233      } else {
5233 5234        assert(nth_task == (jint)chunk_top, "Control point invariant");
5234 5235        start = chunk_array[chunk_top - 1];
5235 5236        end   = space->top();
5236 5237      }
5237 5238      MemRegion mr(start, end);
5238 5239      // Verify that mr is in space
5239 5240      assert(mr.is_empty() || space->used_region().contains(mr),
5240 5241             "Should be in space");
5241 5242      // Verify that "start" is an object boundary
5242 5243      assert(mr.is_empty() || oop(mr.start())->is_oop(),
5243 5244             "Should be an oop");
5244 5245      space->par_oop_iterate(mr, cl);
5245 5246    }
5246 5247    pst->all_tasks_completed();
5247 5248  }
5248 5249  
5249 5250  void
5250 5251  CMSParRemarkTask::do_dirty_card_rescan_tasks(
5251 5252    CompactibleFreeListSpace* sp, int i,
5252 5253    Par_MarkRefsIntoAndScanClosure* cl) {
5253 5254    // Until all tasks completed:
5254 5255    // . claim an unclaimed task
5255 5256    // . compute region boundaries corresponding to task claimed
5256 5257    // . transfer dirty bits ct->mut for that region
5257 5258    // . apply rescanclosure to dirty mut bits for that region
5258 5259  
5259 5260    ResourceMark rm;
5260 5261    HandleMark   hm;
5261 5262  
5262 5263    OopTaskQueue* work_q = work_queue(i);
5263 5264    ModUnionClosure modUnionClosure(&(_collector->_modUnionTable));
5264 5265    // CAUTION! CAUTION! CAUTION! CAUTION! CAUTION! CAUTION! CAUTION!
5265 5266    // CAUTION: This closure has state that persists across calls to
5266 5267    // the work method dirty_range_iterate_clear() in that it has
5267 5268    // imbedded in it a (subtype of) UpwardsObjectClosure. The
5268 5269    // use of that state in the imbedded UpwardsObjectClosure instance
5269 5270    // assumes that the cards are always iterated (even if in parallel
5270 5271    // by several threads) in monotonically increasing order per each
5271 5272    // thread. This is true of the implementation below which picks
5272 5273    // card ranges (chunks) in monotonically increasing order globally
5273 5274    // and, a-fortiori, in monotonically increasing order per thread
5274 5275    // (the latter order being a subsequence of the former).
5275 5276    // If the work code below is ever reorganized into a more chaotic
5276 5277    // work-partitioning form than the current "sequential tasks"
5277 5278    // paradigm, the use of that persistent state will have to be
5278 5279    // revisited and modified appropriately. See also related
5279 5280    // bug 4756801 work on which should examine this code to make
5280 5281    // sure that the changes there do not run counter to the
5281 5282    // assumptions made here and necessary for correctness and
5282 5283    // efficiency. Note also that this code might yield inefficient
5283 5284    // behaviour in the case of very large objects that span one or
5284 5285    // more work chunks. Such objects would potentially be scanned
5285 5286    // several times redundantly. Work on 4756801 should try and
5286 5287    // address that performance anomaly if at all possible. XXX
5287 5288    MemRegion  full_span  = _collector->_span;
5288 5289    CMSBitMap* bm    = &(_collector->_markBitMap);     // shared
5289 5290    CMSMarkStack* rs = &(_collector->_revisitStack);   // shared
5290 5291    MarkFromDirtyCardsClosure
5291 5292      greyRescanClosure(_collector, full_span, // entire span of interest
5292 5293                        sp, bm, work_q, rs, cl);
5293 5294  
5294 5295    SequentialSubTasksDone* pst = sp->conc_par_seq_tasks();
5295 5296    assert(pst->valid(), "Uninitialized use?");
5296 5297    int nth_task = 0;
5297 5298    const int alignment = CardTableModRefBS::card_size * BitsPerWord;
5298 5299    MemRegion span = sp->used_region();
5299 5300    HeapWord* start_addr = span.start();
5300 5301    HeapWord* end_addr = (HeapWord*)round_to((intptr_t)span.end(),
5301 5302                                             alignment);
5302 5303    const size_t chunk_size = sp->rescan_task_size(); // in HeapWord units
5303 5304    assert((HeapWord*)round_to((intptr_t)start_addr, alignment) ==
5304 5305           start_addr, "Check alignment");
5305 5306    assert((size_t)round_to((intptr_t)chunk_size, alignment) ==
5306 5307           chunk_size, "Check alignment");
5307 5308  
5308 5309    while (!pst->is_task_claimed(/* reference */ nth_task)) {
5309 5310      // Having claimed the nth_task, compute corresponding mem-region,
5310 5311      // which is a-fortiori aligned correctly (i.e. at a MUT bopundary).
5311 5312      // The alignment restriction ensures that we do not need any
5312 5313      // synchronization with other gang-workers while setting or
5313 5314      // clearing bits in thus chunk of the MUT.
5314 5315      MemRegion this_span = MemRegion(start_addr + nth_task*chunk_size,
5315 5316                                      start_addr + (nth_task+1)*chunk_size);
5316 5317      // The last chunk's end might be way beyond end of the
5317 5318      // used region. In that case pull back appropriately.
5318 5319      if (this_span.end() > end_addr) {
5319 5320        this_span.set_end(end_addr);
5320 5321        assert(!this_span.is_empty(), "Program logic (calculation of n_tasks)");
5321 5322      }
5322 5323      // Iterate over the dirty cards covering this chunk, marking them
5323 5324      // precleaned, and setting the corresponding bits in the mod union
5324 5325      // table. Since we have been careful to partition at Card and MUT-word
5325 5326      // boundaries no synchronization is needed between parallel threads.
5326 5327      _collector->_ct->ct_bs()->dirty_card_iterate(this_span,
5327 5328                                                   &modUnionClosure);
5328 5329  
5329 5330      // Having transferred these marks into the modUnionTable,
5330 5331      // rescan the marked objects on the dirty cards in the modUnionTable.
5331 5332      // Even if this is at a synchronous collection, the initial marking
5332 5333      // may have been done during an asynchronous collection so there
5333 5334      // may be dirty bits in the mod-union table.
5334 5335      _collector->_modUnionTable.dirty_range_iterate_clear(
5335 5336                    this_span, &greyRescanClosure);
5336 5337      _collector->_modUnionTable.verifyNoOneBitsInRange(
5337 5338                                   this_span.start(),
5338 5339                                   this_span.end());
5339 5340    }
5340 5341    pst->all_tasks_completed();  // declare that i am done
5341 5342  }
5342 5343  
5343 5344  // . see if we can share work_queues with ParNew? XXX
5344 5345  void
5345 5346  CMSParRemarkTask::do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl,
5346 5347                                  int* seed) {
5347 5348    OopTaskQueue* work_q = work_queue(i);
5348 5349    NOT_PRODUCT(int num_steals = 0;)
5349 5350    oop obj_to_scan;
5350 5351    CMSBitMap* bm = &(_collector->_markBitMap);
5351 5352  
5352 5353    while (true) {
5353 5354      // Completely finish any left over work from (an) earlier round(s)
5354 5355      cl->trim_queue(0);
5355 5356      size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
5356 5357                                           (size_t)ParGCDesiredObjsFromOverflowList);
5357 5358      // Now check if there's any work in the overflow list
5358 5359      // Passing ParallelGCThreads as the third parameter, no_of_gc_threads,
5359 5360      // only affects the number of attempts made to get work from the
5360 5361      // overflow list and does not affect the number of workers.  Just
5361 5362      // pass ParallelGCThreads so this behavior is unchanged.
5362 5363      if (_collector->par_take_from_overflow_list(num_from_overflow_list,
5363 5364                                                  work_q,
5364 5365                                                  ParallelGCThreads)) {
5365 5366        // found something in global overflow list;
5366 5367        // not yet ready to go stealing work from others.
5367 5368        // We'd like to assert(work_q->size() != 0, ...)
5368 5369        // because we just took work from the overflow list,
5369 5370        // but of course we can't since all of that could have
5370 5371        // been already stolen from us.
5371 5372        // "He giveth and He taketh away."
5372 5373        continue;
5373 5374      }
5374 5375      // Verify that we have no work before we resort to stealing
5375 5376      assert(work_q->size() == 0, "Have work, shouldn't steal");
5376 5377      // Try to steal from other queues that have work
5377 5378      if (task_queues()->steal(i, seed, /* reference */ obj_to_scan)) {
5378 5379        NOT_PRODUCT(num_steals++;)
5379 5380        assert(obj_to_scan->is_oop(), "Oops, not an oop!");
5380 5381        assert(bm->isMarked((HeapWord*)obj_to_scan), "Stole an unmarked oop?");
5381 5382        // Do scanning work
5382 5383        obj_to_scan->oop_iterate(cl);
5383 5384        // Loop around, finish this work, and try to steal some more
5384 5385      } else if (terminator()->offer_termination()) {
5385 5386          break;  // nirvana from the infinite cycle
5386 5387      }
5387 5388    }
5388 5389    NOT_PRODUCT(
5389 5390      if (PrintCMSStatistics != 0) {
5390 5391        gclog_or_tty->print("\n\t(%d: stole %d oops)", i, num_steals);
5391 5392      }
5392 5393    )
5393 5394    assert(work_q->size() == 0 && _collector->overflow_list_is_empty(),
5394 5395           "Else our work is not yet done");
5395 5396  }
5396 5397  
5397 5398  // Return a thread-local PLAB recording array, as appropriate.
5398 5399  void* CMSCollector::get_data_recorder(int thr_num) {
5399 5400    if (_survivor_plab_array != NULL &&
5400 5401        (CMSPLABRecordAlways ||
5401 5402         (_collectorState > Marking && _collectorState < FinalMarking))) {
5402 5403      assert(thr_num < (int)ParallelGCThreads, "thr_num is out of bounds");
5403 5404      ChunkArray* ca = &_survivor_plab_array[thr_num];
5404 5405      ca->reset();   // clear it so that fresh data is recorded
5405 5406      return (void*) ca;
5406 5407    } else {
5407 5408      return NULL;
5408 5409    }
5409 5410  }
5410 5411  
5411 5412  // Reset all the thread-local PLAB recording arrays
5412 5413  void CMSCollector::reset_survivor_plab_arrays() {
5413 5414    for (uint i = 0; i < ParallelGCThreads; i++) {
5414 5415      _survivor_plab_array[i].reset();
5415 5416    }
5416 5417  }
5417 5418  
5418 5419  // Merge the per-thread plab arrays into the global survivor chunk
5419 5420  // array which will provide the partitioning of the survivor space
5420 5421  // for CMS rescan.
5421 5422  void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv,
5422 5423                                                int no_of_gc_threads) {
5423 5424    assert(_survivor_plab_array  != NULL, "Error");
5424 5425    assert(_survivor_chunk_array != NULL, "Error");
5425 5426    assert(_collectorState == FinalMarking, "Error");
5426 5427    for (int j = 0; j < no_of_gc_threads; j++) {
5427 5428      _cursor[j] = 0;
5428 5429    }
5429 5430    HeapWord* top = surv->top();
5430 5431    size_t i;
5431 5432    for (i = 0; i < _survivor_chunk_capacity; i++) {  // all sca entries
5432 5433      HeapWord* min_val = top;          // Higher than any PLAB address
5433 5434      uint      min_tid = 0;            // position of min_val this round
5434 5435      for (int j = 0; j < no_of_gc_threads; j++) {
5435 5436        ChunkArray* cur_sca = &_survivor_plab_array[j];
5436 5437        if (_cursor[j] == cur_sca->end()) {
5437 5438          continue;
5438 5439        }
5439 5440        assert(_cursor[j] < cur_sca->end(), "ctl pt invariant");
5440 5441        HeapWord* cur_val = cur_sca->nth(_cursor[j]);
5441 5442        assert(surv->used_region().contains(cur_val), "Out of bounds value");
5442 5443        if (cur_val < min_val) {
5443 5444          min_tid = j;
5444 5445          min_val = cur_val;
5445 5446        } else {
5446 5447          assert(cur_val < top, "All recorded addresses should be less");
5447 5448        }
5448 5449      }
5449 5450      // At this point min_val and min_tid are respectively
5450 5451      // the least address in _survivor_plab_array[j]->nth(_cursor[j])
5451 5452      // and the thread (j) that witnesses that address.
5452 5453      // We record this address in the _survivor_chunk_array[i]
5453 5454      // and increment _cursor[min_tid] prior to the next round i.
5454 5455      if (min_val == top) {
5455 5456        break;
5456 5457      }
5457 5458      _survivor_chunk_array[i] = min_val;
5458 5459      _cursor[min_tid]++;
5459 5460    }
5460 5461    // We are all done; record the size of the _survivor_chunk_array
5461 5462    _survivor_chunk_index = i; // exclusive: [0, i)
5462 5463    if (PrintCMSStatistics > 0) {
5463 5464      gclog_or_tty->print(" (Survivor:" SIZE_FORMAT "chunks) ", i);
5464 5465    }
5465 5466    // Verify that we used up all the recorded entries
5466 5467    #ifdef ASSERT
5467 5468      size_t total = 0;
5468 5469      for (int j = 0; j < no_of_gc_threads; j++) {
5469 5470        assert(_cursor[j] == _survivor_plab_array[j].end(), "Ctl pt invariant");
5470 5471        total += _cursor[j];
5471 5472      }
5472 5473      assert(total == _survivor_chunk_index, "Ctl Pt Invariant");
5473 5474      // Check that the merged array is in sorted order
5474 5475      if (total > 0) {
5475 5476        for (size_t i = 0; i < total - 1; i++) {
5476 5477          if (PrintCMSStatistics > 0) {
5477 5478            gclog_or_tty->print(" (chunk" SIZE_FORMAT ":" INTPTR_FORMAT ") ",
5478 5479                                i, _survivor_chunk_array[i]);
5479 5480          }
5480 5481          assert(_survivor_chunk_array[i] < _survivor_chunk_array[i+1],
5481 5482                 "Not sorted");
5482 5483        }
5483 5484      }
5484 5485    #endif // ASSERT
5485 5486  }
5486 5487  
5487 5488  // Set up the space's par_seq_tasks structure for work claiming
5488 5489  // for parallel rescan of young gen.
5489 5490  // See ParRescanTask where this is currently used.
5490 5491  void
5491 5492  CMSCollector::
5492 5493  initialize_sequential_subtasks_for_young_gen_rescan(int n_threads) {
5493 5494    assert(n_threads > 0, "Unexpected n_threads argument");
5494 5495    DefNewGeneration* dng = (DefNewGeneration*)_young_gen;
5495 5496  
5496 5497    // Eden space
5497 5498    {
5498 5499      SequentialSubTasksDone* pst = dng->eden()->par_seq_tasks();
5499 5500      assert(!pst->valid(), "Clobbering existing data?");
5500 5501      // Each valid entry in [0, _eden_chunk_index) represents a task.
5501 5502      size_t n_tasks = _eden_chunk_index + 1;
5502 5503      assert(n_tasks == 1 || _eden_chunk_array != NULL, "Error");
5503 5504      // Sets the condition for completion of the subtask (how many threads
5504 5505      // need to finish in order to be done).
5505 5506      pst->set_n_threads(n_threads);
5506 5507      pst->set_n_tasks((int)n_tasks);
5507 5508    }
5508 5509  
5509 5510    // Merge the survivor plab arrays into _survivor_chunk_array
5510 5511    if (_survivor_plab_array != NULL) {
5511 5512      merge_survivor_plab_arrays(dng->from(), n_threads);
5512 5513    } else {
5513 5514      assert(_survivor_chunk_index == 0, "Error");
5514 5515    }
5515 5516  
5516 5517    // To space
5517 5518    {
5518 5519      SequentialSubTasksDone* pst = dng->to()->par_seq_tasks();
5519 5520      assert(!pst->valid(), "Clobbering existing data?");
5520 5521      // Sets the condition for completion of the subtask (how many threads
5521 5522      // need to finish in order to be done).
5522 5523      pst->set_n_threads(n_threads);
5523 5524      pst->set_n_tasks(1);
5524 5525      assert(pst->valid(), "Error");
5525 5526    }
5526 5527  
5527 5528    // From space
5528 5529    {
5529 5530      SequentialSubTasksDone* pst = dng->from()->par_seq_tasks();
5530 5531      assert(!pst->valid(), "Clobbering existing data?");
5531 5532      size_t n_tasks = _survivor_chunk_index + 1;
5532 5533      assert(n_tasks == 1 || _survivor_chunk_array != NULL, "Error");
5533 5534      // Sets the condition for completion of the subtask (how many threads
5534 5535      // need to finish in order to be done).
5535 5536      pst->set_n_threads(n_threads);
5536 5537      pst->set_n_tasks((int)n_tasks);
5537 5538      assert(pst->valid(), "Error");
5538 5539    }
5539 5540  }
5540 5541  
5541 5542  // Parallel version of remark
5542 5543  void CMSCollector::do_remark_parallel() {
5543 5544    GenCollectedHeap* gch = GenCollectedHeap::heap();
5544 5545    FlexibleWorkGang* workers = gch->workers();
5545 5546    assert(workers != NULL, "Need parallel worker threads.");
5546 5547    int n_workers = workers->total_workers();
5547 5548    CompactibleFreeListSpace* cms_space  = _cmsGen->cmsSpace();
5548 5549    CompactibleFreeListSpace* perm_space = _permGen->cmsSpace();
5549 5550  
5550 5551    CMSParRemarkTask tsk(this,
5551 5552      cms_space, perm_space,
5552 5553      n_workers, workers, task_queues());
5553 5554  
5554 5555    // Set up for parallel process_strong_roots work.
5555 5556    gch->set_par_threads(n_workers);
5556 5557    // We won't be iterating over the cards in the card table updating
5557 5558    // the younger_gen cards, so we shouldn't call the following else
5558 5559    // the verification code as well as subsequent younger_refs_iterate
5559 5560    // code would get confused. XXX
5560 5561    // gch->rem_set()->prepare_for_younger_refs_iterate(true); // parallel
5561 5562  
5562 5563    // The young gen rescan work will not be done as part of
5563 5564    // process_strong_roots (which currently doesn't knw how to
5564 5565    // parallelize such a scan), but rather will be broken up into
5565 5566    // a set of parallel tasks (via the sampling that the [abortable]
5566 5567    // preclean phase did of EdenSpace, plus the [two] tasks of
5567 5568    // scanning the [two] survivor spaces. Further fine-grain
5568 5569    // parallelization of the scanning of the survivor spaces
5569 5570    // themselves, and of precleaning of the younger gen itself
5570 5571    // is deferred to the future.
5571 5572    initialize_sequential_subtasks_for_young_gen_rescan(n_workers);
5572 5573  
5573 5574    // The dirty card rescan work is broken up into a "sequence"
5574 5575    // of parallel tasks (per constituent space) that are dynamically
5575 5576    // claimed by the parallel threads.
5576 5577    cms_space->initialize_sequential_subtasks_for_rescan(n_workers);
5577 5578    perm_space->initialize_sequential_subtasks_for_rescan(n_workers);
5578 5579  
5579 5580    // It turns out that even when we're using 1 thread, doing the work in a
5580 5581    // separate thread causes wide variance in run times.  We can't help this
5581 5582    // in the multi-threaded case, but we special-case n=1 here to get
5582 5583    // repeatable measurements of the 1-thread overhead of the parallel code.
5583 5584    if (n_workers > 1) {
5584 5585      // Make refs discovery MT-safe, if it isn't already: it may not
5585 5586      // necessarily be so, since it's possible that we are doing
5586 5587      // ST marking.
5587 5588      ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), true);
5588 5589      GenCollectedHeap::StrongRootsScope srs(gch);
5589 5590      workers->run_task(&tsk);
5590 5591    } else {
5591 5592      GenCollectedHeap::StrongRootsScope srs(gch);
5592 5593      tsk.work(0);
5593 5594    }
5594 5595    gch->set_par_threads(0);  // 0 ==> non-parallel.
5595 5596    // restore, single-threaded for now, any preserved marks
5596 5597    // as a result of work_q overflow
5597 5598    restore_preserved_marks_if_any();
5598 5599  }
5599 5600  
5600 5601  // Non-parallel version of remark
5601 5602  void CMSCollector::do_remark_non_parallel() {
5602 5603    ResourceMark rm;
5603 5604    HandleMark   hm;
5604 5605    GenCollectedHeap* gch = GenCollectedHeap::heap();
5605 5606    MarkRefsIntoAndScanClosure
5606 5607      mrias_cl(_span, ref_processor(), &_markBitMap, &_modUnionTable,
5607 5608               &_markStack, &_revisitStack, this,
5608 5609               false /* should_yield */, false /* not precleaning */);
5609 5610    MarkFromDirtyCardsClosure
5610 5611      markFromDirtyCardsClosure(this, _span,
5611 5612                                NULL,  // space is set further below
5612 5613                                &_markBitMap, &_markStack, &_revisitStack,
5613 5614                                &mrias_cl);
5614 5615    {
5615 5616      TraceTime t("grey object rescan", PrintGCDetails, false, gclog_or_tty);
5616 5617      // Iterate over the dirty cards, setting the corresponding bits in the
5617 5618      // mod union table.
5618 5619      {
5619 5620        ModUnionClosure modUnionClosure(&_modUnionTable);
5620 5621        _ct->ct_bs()->dirty_card_iterate(
5621 5622                        _cmsGen->used_region(),
5622 5623                        &modUnionClosure);
5623 5624        _ct->ct_bs()->dirty_card_iterate(
5624 5625                        _permGen->used_region(),
5625 5626                        &modUnionClosure);
5626 5627      }
5627 5628      // Having transferred these marks into the modUnionTable, we just need
5628 5629      // to rescan the marked objects on the dirty cards in the modUnionTable.
5629 5630      // The initial marking may have been done during an asynchronous
5630 5631      // collection so there may be dirty bits in the mod-union table.
5631 5632      const int alignment =
5632 5633        CardTableModRefBS::card_size * BitsPerWord;
5633 5634      {
5634 5635        // ... First handle dirty cards in CMS gen
5635 5636        markFromDirtyCardsClosure.set_space(_cmsGen->cmsSpace());
5636 5637        MemRegion ur = _cmsGen->used_region();
5637 5638        HeapWord* lb = ur.start();
5638 5639        HeapWord* ub = (HeapWord*)round_to((intptr_t)ur.end(), alignment);
5639 5640        MemRegion cms_span(lb, ub);
5640 5641        _modUnionTable.dirty_range_iterate_clear(cms_span,
5641 5642                                                 &markFromDirtyCardsClosure);
5642 5643        verify_work_stacks_empty();
5643 5644        if (PrintCMSStatistics != 0) {
5644 5645          gclog_or_tty->print(" (re-scanned "SIZE_FORMAT" dirty cards in cms gen) ",
5645 5646            markFromDirtyCardsClosure.num_dirty_cards());
5646 5647        }
5647 5648      }
5648 5649      {
5649 5650        // .. and then repeat for dirty cards in perm gen
5650 5651        markFromDirtyCardsClosure.set_space(_permGen->cmsSpace());
5651 5652        MemRegion ur = _permGen->used_region();
5652 5653        HeapWord* lb = ur.start();
5653 5654        HeapWord* ub = (HeapWord*)round_to((intptr_t)ur.end(), alignment);
5654 5655        MemRegion perm_span(lb, ub);
5655 5656        _modUnionTable.dirty_range_iterate_clear(perm_span,
5656 5657                                                 &markFromDirtyCardsClosure);
5657 5658        verify_work_stacks_empty();
5658 5659        if (PrintCMSStatistics != 0) {
5659 5660          gclog_or_tty->print(" (re-scanned "SIZE_FORMAT" dirty cards in perm gen) ",
5660 5661            markFromDirtyCardsClosure.num_dirty_cards());
5661 5662        }
5662 5663      }
5663 5664    }
5664 5665    if (VerifyDuringGC &&
5665 5666        GenCollectedHeap::heap()->total_collections() >= VerifyGCStartAt) {
5666 5667      HandleMark hm;  // Discard invalid handles created during verification
5667 5668      Universe::verify(true);
5668 5669    }
5669 5670    {
5670 5671      TraceTime t("root rescan", PrintGCDetails, false, gclog_or_tty);
5671 5672  
5672 5673      verify_work_stacks_empty();
5673 5674  
5674 5675      gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
5675 5676      GenCollectedHeap::StrongRootsScope srs(gch);
5676 5677      gch->gen_process_strong_roots(_cmsGen->level(),
5677 5678                                    true,  // younger gens as roots
5678 5679                                    false, // use the local StrongRootsScope
5679 5680                                    true,  // collecting perm gen
5680 5681                                    SharedHeap::ScanningOption(roots_scanning_options()),
5681 5682                                    &mrias_cl,
5682 5683                                    true,   // walk code active on stacks
5683 5684                                    NULL);
5684 5685      assert(should_unload_classes()
5685 5686             || (roots_scanning_options() & SharedHeap::SO_CodeCache),
5686 5687             "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
5687 5688    }
5688 5689    verify_work_stacks_empty();
5689 5690    // Restore evacuated mark words, if any, used for overflow list links
5690 5691    if (!CMSOverflowEarlyRestoration) {
5691 5692      restore_preserved_marks_if_any();
5692 5693    }
5693 5694    verify_overflow_empty();
5694 5695  }
5695 5696  
5696 5697  ////////////////////////////////////////////////////////
5697 5698  // Parallel Reference Processing Task Proxy Class
5698 5699  ////////////////////////////////////////////////////////
5699 5700  class CMSRefProcTaskProxy: public AbstractGangTaskWOopQueues {
5700 5701    typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
5701 5702    CMSCollector*          _collector;
5702 5703    CMSBitMap*             _mark_bit_map;
5703 5704    const MemRegion        _span;
5704 5705    ProcessTask&           _task;
5705 5706  
5706 5707  public:
5707 5708    CMSRefProcTaskProxy(ProcessTask&     task,
5708 5709                        CMSCollector*    collector,
5709 5710                        const MemRegion& span,
5710 5711                        CMSBitMap*       mark_bit_map,
5711 5712                        AbstractWorkGang* workers,
5712 5713                        OopTaskQueueSet* task_queues):
5713 5714      // XXX Should superclass AGTWOQ also know about AWG since it knows
5714 5715      // about the task_queues used by the AWG? Then it could initialize
5715 5716      // the terminator() object. See 6984287. The set_for_termination()
5716 5717      // below is a temporary band-aid for the regression in 6984287.
5717 5718      AbstractGangTaskWOopQueues("Process referents by policy in parallel",
5718 5719        task_queues),
5719 5720      _task(task),
5720 5721      _collector(collector), _span(span), _mark_bit_map(mark_bit_map)
5721 5722    {
5722 5723      assert(_collector->_span.equals(_span) && !_span.is_empty(),
5723 5724             "Inconsistency in _span");
5724 5725      set_for_termination(workers->active_workers());
5725 5726    }
5726 5727  
5727 5728    OopTaskQueueSet* task_queues() { return queues(); }
5728 5729  
5729 5730    OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
5730 5731  
5731 5732    void do_work_steal(int i,
5732 5733                       CMSParDrainMarkingStackClosure* drain,
5733 5734                       CMSParKeepAliveClosure* keep_alive,
5734 5735                       int* seed);
5735 5736  
5736 5737    virtual void work(int i);
5737 5738  };
5738 5739  
5739 5740  void CMSRefProcTaskProxy::work(int i) {
5740 5741    assert(_collector->_span.equals(_span), "Inconsistency in _span");
5741 5742    CMSParKeepAliveClosure par_keep_alive(_collector, _span,
5742 5743                                          _mark_bit_map,
5743 5744                                          &_collector->_revisitStack,
5744 5745                                          work_queue(i));
5745 5746    CMSParDrainMarkingStackClosure par_drain_stack(_collector, _span,
5746 5747                                                   _mark_bit_map,
5747 5748                                                   &_collector->_revisitStack,
5748 5749                                                   work_queue(i));
5749 5750    CMSIsAliveClosure is_alive_closure(_span, _mark_bit_map);
5750 5751    _task.work(i, is_alive_closure, par_keep_alive, par_drain_stack);
5751 5752    if (_task.marks_oops_alive()) {
5752 5753      do_work_steal(i, &par_drain_stack, &par_keep_alive,
5753 5754                    _collector->hash_seed(i));
5754 5755    }
5755 5756    assert(work_queue(i)->size() == 0, "work_queue should be empty");
5756 5757    assert(_collector->_overflow_list == NULL, "non-empty _overflow_list");
5757 5758  }
5758 5759  
5759 5760  class CMSRefEnqueueTaskProxy: public AbstractGangTask {
5760 5761    typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
5761 5762    EnqueueTask& _task;
5762 5763  
5763 5764  public:
5764 5765    CMSRefEnqueueTaskProxy(EnqueueTask& task)
5765 5766      : AbstractGangTask("Enqueue reference objects in parallel"),
5766 5767        _task(task)
5767 5768    { }
5768 5769  
5769 5770    virtual void work(int i)
5770 5771    {
5771 5772      _task.work(i);
5772 5773    }
5773 5774  };
5774 5775  
5775 5776  CMSParKeepAliveClosure::CMSParKeepAliveClosure(CMSCollector* collector,
5776 5777    MemRegion span, CMSBitMap* bit_map, CMSMarkStack* revisit_stack,
5777 5778    OopTaskQueue* work_queue):
5778 5779     Par_KlassRememberingOopClosure(collector, NULL, revisit_stack),
5779 5780     _span(span),
5780 5781     _bit_map(bit_map),
5781 5782     _work_queue(work_queue),
5782 5783     _mark_and_push(collector, span, bit_map, revisit_stack, work_queue),
5783 5784     _low_water_mark(MIN2((uint)(work_queue->max_elems()/4),
5784 5785                          (uint)(CMSWorkQueueDrainThreshold * ParallelGCThreads)))
5785 5786  { }
5786 5787  
5787 5788  // . see if we can share work_queues with ParNew? XXX
5788 5789  void CMSRefProcTaskProxy::do_work_steal(int i,
5789 5790    CMSParDrainMarkingStackClosure* drain,
5790 5791    CMSParKeepAliveClosure* keep_alive,
5791 5792    int* seed) {
5792 5793    OopTaskQueue* work_q = work_queue(i);
5793 5794    NOT_PRODUCT(int num_steals = 0;)
5794 5795    oop obj_to_scan;
5795 5796  
5796 5797    while (true) {
5797 5798      // Completely finish any left over work from (an) earlier round(s)
5798 5799      drain->trim_queue(0);
5799 5800      size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
5800 5801                                           (size_t)ParGCDesiredObjsFromOverflowList);
5801 5802      // Now check if there's any work in the overflow list
5802 5803      // Passing ParallelGCThreads as the third parameter, no_of_gc_threads,
5803 5804      // only affects the number of attempts made to get work from the
5804 5805      // overflow list and does not affect the number of workers.  Just
5805 5806      // pass ParallelGCThreads so this behavior is unchanged.
5806 5807      if (_collector->par_take_from_overflow_list(num_from_overflow_list,
5807 5808                                                  work_q,
5808 5809                                                  ParallelGCThreads)) {
5809 5810        // Found something in global overflow list;
5810 5811        // not yet ready to go stealing work from others.
5811 5812        // We'd like to assert(work_q->size() != 0, ...)
5812 5813        // because we just took work from the overflow list,
5813 5814        // but of course we can't, since all of that might have
5814 5815        // been already stolen from us.
5815 5816        continue;
5816 5817      }
5817 5818      // Verify that we have no work before we resort to stealing
5818 5819      assert(work_q->size() == 0, "Have work, shouldn't steal");
5819 5820      // Try to steal from other queues that have work
5820 5821      if (task_queues()->steal(i, seed, /* reference */ obj_to_scan)) {
5821 5822        NOT_PRODUCT(num_steals++;)
5822 5823        assert(obj_to_scan->is_oop(), "Oops, not an oop!");
5823 5824        assert(_mark_bit_map->isMarked((HeapWord*)obj_to_scan), "Stole an unmarked oop?");
5824 5825        // Do scanning work
5825 5826        obj_to_scan->oop_iterate(keep_alive);
5826 5827        // Loop around, finish this work, and try to steal some more
5827 5828      } else if (terminator()->offer_termination()) {
5828 5829        break;  // nirvana from the infinite cycle
5829 5830      }
5830 5831    }
5831 5832    NOT_PRODUCT(
5832 5833      if (PrintCMSStatistics != 0) {
5833 5834        gclog_or_tty->print("\n\t(%d: stole %d oops)", i, num_steals);
5834 5835      }
5835 5836    )
5836 5837  }
5837 5838  
5838 5839  void CMSRefProcTaskExecutor::execute(ProcessTask& task)
5839 5840  {
5840 5841    GenCollectedHeap* gch = GenCollectedHeap::heap();
5841 5842    FlexibleWorkGang* workers = gch->workers();
5842 5843    assert(workers != NULL, "Need parallel worker threads.");
5843 5844    CMSRefProcTaskProxy rp_task(task, &_collector,
5844 5845                                _collector.ref_processor()->span(),
5845 5846                                _collector.markBitMap(),
5846 5847                                workers, _collector.task_queues());
5847 5848    workers->run_task(&rp_task);
5848 5849  }
5849 5850  
5850 5851  void CMSRefProcTaskExecutor::execute(EnqueueTask& task)
5851 5852  {
5852 5853  
5853 5854    GenCollectedHeap* gch = GenCollectedHeap::heap();
5854 5855    FlexibleWorkGang* workers = gch->workers();
5855 5856    assert(workers != NULL, "Need parallel worker threads.");
5856 5857    CMSRefEnqueueTaskProxy enq_task(task);
5857 5858    workers->run_task(&enq_task);
5858 5859  }
5859 5860  
5860 5861  void CMSCollector::refProcessingWork(bool asynch, bool clear_all_soft_refs) {
5861 5862  
5862 5863    ResourceMark rm;
5863 5864    HandleMark   hm;
5864 5865  
5865 5866    ReferenceProcessor* rp = ref_processor();
5866 5867    assert(rp->span().equals(_span), "Spans should be equal");
5867 5868    assert(!rp->enqueuing_is_done(), "Enqueuing should not be complete");
5868 5869    // Process weak references.
5869 5870    rp->setup_policy(clear_all_soft_refs);
5870 5871    verify_work_stacks_empty();
5871 5872  
5872 5873    CMSKeepAliveClosure cmsKeepAliveClosure(this, _span, &_markBitMap,
5873 5874                                            &_markStack, &_revisitStack,
5874 5875                                            false /* !preclean */);
5875 5876    CMSDrainMarkingStackClosure cmsDrainMarkingStackClosure(this,
5876 5877                                  _span, &_markBitMap, &_markStack,
5877 5878                                  &cmsKeepAliveClosure, false /* !preclean */);
5878 5879    {
5879 5880      TraceTime t("weak refs processing", PrintGCDetails, false, gclog_or_tty);
5880 5881      if (rp->processing_is_mt()) {
5881 5882        // Set the degree of MT here.  If the discovery is done MT, there
5882 5883        // may have been a different number of threads doing the discovery
5883 5884        // and a different number of discovered lists may have Ref objects.
5884 5885        // That is OK as long as the Reference lists are balanced (see
5885 5886        // balance_all_queues() and balance_queues()).
5886 5887  
5887 5888        rp->set_active_mt_degree(ParallelGCThreads);
5888 5889        CMSRefProcTaskExecutor task_executor(*this);
5889 5890        rp->process_discovered_references(&_is_alive_closure,
5890 5891                                          &cmsKeepAliveClosure,
5891 5892                                          &cmsDrainMarkingStackClosure,
5892 5893                                          &task_executor);
5893 5894      } else {
5894 5895        rp->process_discovered_references(&_is_alive_closure,
5895 5896                                          &cmsKeepAliveClosure,
5896 5897                                          &cmsDrainMarkingStackClosure,
5897 5898                                          NULL);
5898 5899      }
5899 5900      verify_work_stacks_empty();
5900 5901    }
5901 5902  
5902 5903    if (should_unload_classes()) {
5903 5904      {
5904 5905        TraceTime t("class unloading", PrintGCDetails, false, gclog_or_tty);
5905 5906  
5906 5907        // Follow SystemDictionary roots and unload classes
5907 5908        bool purged_class = SystemDictionary::do_unloading(&_is_alive_closure);
5908 5909  
5909 5910        // Follow CodeCache roots and unload any methods marked for unloading
5910 5911        CodeCache::do_unloading(&_is_alive_closure,
5911 5912                                &cmsKeepAliveClosure,
5912 5913                                purged_class);
5913 5914  
5914 5915        cmsDrainMarkingStackClosure.do_void();
5915 5916        verify_work_stacks_empty();
5916 5917  
5917 5918        // Update subklass/sibling/implementor links in KlassKlass descendants
5918 5919        assert(!_revisitStack.isEmpty(), "revisit stack should not be empty");
5919 5920        oop k;
5920 5921        while ((k = _revisitStack.pop()) != NULL) {
5921 5922          ((Klass*)(oopDesc*)k)->follow_weak_klass_links(
5922 5923                         &_is_alive_closure,
5923 5924                         &cmsKeepAliveClosure);
5924 5925        }
5925 5926        assert(!ClassUnloading ||
5926 5927               (_markStack.isEmpty() && overflow_list_is_empty()),
5927 5928               "Should not have found new reachable objects");
5928 5929        assert(_revisitStack.isEmpty(), "revisit stack should have been drained");
5929 5930        cmsDrainMarkingStackClosure.do_void();
5930 5931        verify_work_stacks_empty();
5931 5932      }
5932 5933  
5933 5934      {
5934 5935        TraceTime t("scrub symbol table", PrintGCDetails, false, gclog_or_tty);
5935 5936        // Clean up unreferenced symbols in symbol table.
5936 5937        SymbolTable::unlink();
5937 5938      }
5938 5939    }
5939 5940  
5940 5941    if (should_unload_classes() || !JavaObjectsInPerm) {
5941 5942      TraceTime t("scrub string table", PrintGCDetails, false, gclog_or_tty);
5942 5943      // Now clean up stale oops in StringTable
5943 5944      StringTable::unlink(&_is_alive_closure);
5944 5945    }
5945 5946  
5946 5947    verify_work_stacks_empty();
5947 5948    // Restore any preserved marks as a result of mark stack or
5948 5949    // work queue overflow
5949 5950    restore_preserved_marks_if_any();  // done single-threaded for now
5950 5951  
5951 5952    rp->set_enqueuing_is_done(true);
5952 5953    if (rp->processing_is_mt()) {
5953 5954      rp->balance_all_queues();
5954 5955      CMSRefProcTaskExecutor task_executor(*this);
5955 5956      rp->enqueue_discovered_references(&task_executor);
5956 5957    } else {
5957 5958      rp->enqueue_discovered_references(NULL);
5958 5959    }
5959 5960    rp->verify_no_references_recorded();
5960 5961    assert(!rp->discovery_enabled(), "should have been disabled");
5961 5962  }
5962 5963  
5963 5964  #ifndef PRODUCT
5964 5965  void CMSCollector::check_correct_thread_executing() {
5965 5966    Thread* t = Thread::current();
5966 5967    // Only the VM thread or the CMS thread should be here.
5967 5968    assert(t->is_ConcurrentGC_thread() || t->is_VM_thread(),
5968 5969           "Unexpected thread type");
5969 5970    // If this is the vm thread, the foreground process
5970 5971    // should not be waiting.  Note that _foregroundGCIsActive is
5971 5972    // true while the foreground collector is waiting.
5972 5973    if (_foregroundGCShouldWait) {
5973 5974      // We cannot be the VM thread
5974 5975      assert(t->is_ConcurrentGC_thread(),
5975 5976             "Should be CMS thread");
5976 5977    } else {
5977 5978      // We can be the CMS thread only if we are in a stop-world
5978 5979      // phase of CMS collection.
5979 5980      if (t->is_ConcurrentGC_thread()) {
5980 5981        assert(_collectorState == InitialMarking ||
5981 5982               _collectorState == FinalMarking,
5982 5983               "Should be a stop-world phase");
5983 5984        // The CMS thread should be holding the CMS_token.
5984 5985        assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
5985 5986               "Potential interference with concurrently "
5986 5987               "executing VM thread");
5987 5988      }
5988 5989    }
5989 5990  }
5990 5991  #endif
5991 5992  
5992 5993  void CMSCollector::sweep(bool asynch) {
5993 5994    assert(_collectorState == Sweeping, "just checking");
5994 5995    check_correct_thread_executing();
5995 5996    verify_work_stacks_empty();
5996 5997    verify_overflow_empty();
5997 5998    increment_sweep_count();
5998 5999    TraceCMSMemoryManagerStats tms(_collectorState,GenCollectedHeap::heap()->gc_cause());
5999 6000  
6000 6001    _inter_sweep_timer.stop();
6001 6002    _inter_sweep_estimate.sample(_inter_sweep_timer.seconds());
6002 6003    size_policy()->avg_cms_free_at_sweep()->sample(_cmsGen->free());
6003 6004  
6004 6005    // PermGen verification support: If perm gen sweeping is disabled in
6005 6006    // this cycle, we preserve the perm gen object "deadness" information
6006 6007    // in the perm_gen_verify_bit_map. In order to do that we traverse
6007 6008    // all blocks in perm gen and mark all dead objects.
6008 6009    if (verifying() && !should_unload_classes()) {
6009 6010      assert(perm_gen_verify_bit_map()->sizeInBits() != 0,
6010 6011             "Should have already been allocated");
6011 6012      MarkDeadObjectsClosure mdo(this, _permGen->cmsSpace(),
6012 6013                                 markBitMap(), perm_gen_verify_bit_map());
6013 6014      if (asynch) {
6014 6015        CMSTokenSyncWithLocks ts(true, _permGen->freelistLock(),
6015 6016                                 bitMapLock());
6016 6017        _permGen->cmsSpace()->blk_iterate(&mdo);
6017 6018      } else {
6018 6019        // In the case of synchronous sweep, we already have
6019 6020        // the requisite locks/tokens.
6020 6021        _permGen->cmsSpace()->blk_iterate(&mdo);
6021 6022      }
6022 6023    }
6023 6024  
6024 6025    assert(!_intra_sweep_timer.is_active(), "Should not be active");
6025 6026    _intra_sweep_timer.reset();
6026 6027    _intra_sweep_timer.start();
6027 6028    if (asynch) {
6028 6029      TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
6029 6030      CMSPhaseAccounting pa(this, "sweep", !PrintGCDetails);
6030 6031      // First sweep the old gen then the perm gen
6031 6032      {
6032 6033        CMSTokenSyncWithLocks ts(true, _cmsGen->freelistLock(),
6033 6034                                 bitMapLock());
6034 6035        sweepWork(_cmsGen, asynch);
6035 6036      }
6036 6037  
6037 6038      // Now repeat for perm gen
6038 6039      if (should_unload_classes()) {
6039 6040        CMSTokenSyncWithLocks ts(true, _permGen->freelistLock(),
6040 6041                               bitMapLock());
6041 6042        sweepWork(_permGen, asynch);
6042 6043      }
6043 6044  
6044 6045      // Update Universe::_heap_*_at_gc figures.
6045 6046      // We need all the free list locks to make the abstract state
6046 6047      // transition from Sweeping to Resetting. See detailed note
6047 6048      // further below.
6048 6049      {
6049 6050        CMSTokenSyncWithLocks ts(true, _cmsGen->freelistLock(),
6050 6051                                 _permGen->freelistLock());
6051 6052        // Update heap occupancy information which is used as
6052 6053        // input to soft ref clearing policy at the next gc.
6053 6054        Universe::update_heap_info_at_gc();
6054 6055        _collectorState = Resizing;
6055 6056      }
6056 6057    } else {
6057 6058      // already have needed locks
6058 6059      sweepWork(_cmsGen,  asynch);
6059 6060  
6060 6061      if (should_unload_classes()) {
6061 6062        sweepWork(_permGen, asynch);
6062 6063      }
6063 6064      // Update heap occupancy information which is used as
6064 6065      // input to soft ref clearing policy at the next gc.
6065 6066      Universe::update_heap_info_at_gc();
6066 6067      _collectorState = Resizing;
6067 6068    }
6068 6069    verify_work_stacks_empty();
6069 6070    verify_overflow_empty();
6070 6071  
6071 6072    _intra_sweep_timer.stop();
6072 6073    _intra_sweep_estimate.sample(_intra_sweep_timer.seconds());
6073 6074  
6074 6075    _inter_sweep_timer.reset();
6075 6076    _inter_sweep_timer.start();
6076 6077  
6077 6078    update_time_of_last_gc(os::javaTimeMillis());
6078 6079  
6079 6080    // NOTE on abstract state transitions:
6080 6081    // Mutators allocate-live and/or mark the mod-union table dirty
6081 6082    // based on the state of the collection.  The former is done in
6082 6083    // the interval [Marking, Sweeping] and the latter in the interval
6083 6084    // [Marking, Sweeping).  Thus the transitions into the Marking state
6084 6085    // and out of the Sweeping state must be synchronously visible
6085 6086    // globally to the mutators.
6086 6087    // The transition into the Marking state happens with the world
6087 6088    // stopped so the mutators will globally see it.  Sweeping is
6088 6089    // done asynchronously by the background collector so the transition
6089 6090    // from the Sweeping state to the Resizing state must be done
6090 6091    // under the freelistLock (as is the check for whether to
6091 6092    // allocate-live and whether to dirty the mod-union table).
6092 6093    assert(_collectorState == Resizing, "Change of collector state to"
6093 6094      " Resizing must be done under the freelistLocks (plural)");
6094 6095  
6095 6096    // Now that sweeping has been completed, we clear
6096 6097    // the incremental_collection_failed flag,
6097 6098    // thus inviting a younger gen collection to promote into
6098 6099    // this generation. If such a promotion may still fail,
6099 6100    // the flag will be set again when a young collection is
6100 6101    // attempted.
6101 6102    GenCollectedHeap* gch = GenCollectedHeap::heap();
6102 6103    gch->clear_incremental_collection_failed();  // Worth retrying as fresh space may have been freed up
6103 6104    gch->update_full_collections_completed(_collection_count_start);
6104 6105  }
6105 6106  
6106 6107  // FIX ME!!! Looks like this belongs in CFLSpace, with
6107 6108  // CMSGen merely delegating to it.
6108 6109  void ConcurrentMarkSweepGeneration::setNearLargestChunk() {
6109 6110    double nearLargestPercent = FLSLargestBlockCoalesceProximity;
6110 6111    HeapWord*  minAddr        = _cmsSpace->bottom();
6111 6112    HeapWord*  largestAddr    =
6112 6113      (HeapWord*) _cmsSpace->dictionary()->findLargestDict();
6113 6114    if (largestAddr == NULL) {
6114 6115      // The dictionary appears to be empty.  In this case
6115 6116      // try to coalesce at the end of the heap.
6116 6117      largestAddr = _cmsSpace->end();
6117 6118    }
6118 6119    size_t largestOffset     = pointer_delta(largestAddr, minAddr);
6119 6120    size_t nearLargestOffset =
6120 6121      (size_t)((double)largestOffset * nearLargestPercent) - MinChunkSize;
6121 6122    if (PrintFLSStatistics != 0) {
6122 6123      gclog_or_tty->print_cr(
6123 6124        "CMS: Large Block: " PTR_FORMAT ";"
6124 6125        " Proximity: " PTR_FORMAT " -> " PTR_FORMAT,
6125 6126        largestAddr,
6126 6127        _cmsSpace->nearLargestChunk(), minAddr + nearLargestOffset);
6127 6128    }
6128 6129    _cmsSpace->set_nearLargestChunk(minAddr + nearLargestOffset);
6129 6130  }
6130 6131  
6131 6132  bool ConcurrentMarkSweepGeneration::isNearLargestChunk(HeapWord* addr) {
6132 6133    return addr >= _cmsSpace->nearLargestChunk();
6133 6134  }
6134 6135  
6135 6136  FreeChunk* ConcurrentMarkSweepGeneration::find_chunk_at_end() {
6136 6137    return _cmsSpace->find_chunk_at_end();
6137 6138  }
6138 6139  
6139 6140  void ConcurrentMarkSweepGeneration::update_gc_stats(int current_level,
6140 6141                                                      bool full) {
6141 6142    // The next lower level has been collected.  Gather any statistics
6142 6143    // that are of interest at this point.
6143 6144    if (!full && (current_level + 1) == level()) {
6144 6145      // Gather statistics on the young generation collection.
6145 6146      collector()->stats().record_gc0_end(used());
6146 6147    }
6147 6148  }
6148 6149  
6149 6150  CMSAdaptiveSizePolicy* ConcurrentMarkSweepGeneration::size_policy() {
6150 6151    GenCollectedHeap* gch = GenCollectedHeap::heap();
6151 6152    assert(gch->kind() == CollectedHeap::GenCollectedHeap,
6152 6153      "Wrong type of heap");
6153 6154    CMSAdaptiveSizePolicy* sp = (CMSAdaptiveSizePolicy*)
6154 6155      gch->gen_policy()->size_policy();
6155 6156    assert(sp->is_gc_cms_adaptive_size_policy(),
6156 6157      "Wrong type of size policy");
6157 6158    return sp;
6158 6159  }
6159 6160  
6160 6161  void ConcurrentMarkSweepGeneration::rotate_debug_collection_type() {
6161 6162    if (PrintGCDetails && Verbose) {
6162 6163      gclog_or_tty->print("Rotate from %d ", _debug_collection_type);
6163 6164    }
6164 6165    _debug_collection_type = (CollectionTypes) (_debug_collection_type + 1);
6165 6166    _debug_collection_type =
6166 6167      (CollectionTypes) (_debug_collection_type % Unknown_collection_type);
6167 6168    if (PrintGCDetails && Verbose) {
6168 6169      gclog_or_tty->print_cr("to %d ", _debug_collection_type);
6169 6170    }
6170 6171  }
6171 6172  
6172 6173  void CMSCollector::sweepWork(ConcurrentMarkSweepGeneration* gen,
6173 6174    bool asynch) {
6174 6175    // We iterate over the space(s) underlying this generation,
6175 6176    // checking the mark bit map to see if the bits corresponding
6176 6177    // to specific blocks are marked or not. Blocks that are
6177 6178    // marked are live and are not swept up. All remaining blocks
6178 6179    // are swept up, with coalescing on-the-fly as we sweep up
6179 6180    // contiguous free and/or garbage blocks:
6180 6181    // We need to ensure that the sweeper synchronizes with allocators
6181 6182    // and stop-the-world collectors. In particular, the following
6182 6183    // locks are used:
6183 6184    // . CMS token: if this is held, a stop the world collection cannot occur
6184 6185    // . freelistLock: if this is held no allocation can occur from this
6185 6186    //                 generation by another thread
6186 6187    // . bitMapLock: if this is held, no other thread can access or update
6187 6188    //
6188 6189  
6189 6190    // Note that we need to hold the freelistLock if we use
6190 6191    // block iterate below; else the iterator might go awry if
6191 6192    // a mutator (or promotion) causes block contents to change
6192 6193    // (for instance if the allocator divvies up a block).
6193 6194    // If we hold the free list lock, for all practical purposes
6194 6195    // young generation GC's can't occur (they'll usually need to
6195 6196    // promote), so we might as well prevent all young generation
6196 6197    // GC's while we do a sweeping step. For the same reason, we might
6197 6198    // as well take the bit map lock for the entire duration
6198 6199  
6199 6200    // check that we hold the requisite locks
6200 6201    assert(have_cms_token(), "Should hold cms token");
6201 6202    assert(   (asynch && ConcurrentMarkSweepThread::cms_thread_has_cms_token())
6202 6203           || (!asynch && ConcurrentMarkSweepThread::vm_thread_has_cms_token()),
6203 6204          "Should possess CMS token to sweep");
6204 6205    assert_lock_strong(gen->freelistLock());
6205 6206    assert_lock_strong(bitMapLock());
6206 6207  
6207 6208    assert(!_inter_sweep_timer.is_active(), "Was switched off in an outer context");
6208 6209    assert(_intra_sweep_timer.is_active(),  "Was switched on  in an outer context");
6209 6210    gen->cmsSpace()->beginSweepFLCensus((float)(_inter_sweep_timer.seconds()),
6210 6211                                        _inter_sweep_estimate.padded_average(),
6211 6212                                        _intra_sweep_estimate.padded_average());
6212 6213    gen->setNearLargestChunk();
6213 6214  
6214 6215    {
6215 6216      SweepClosure sweepClosure(this, gen, &_markBitMap,
6216 6217                              CMSYield && asynch);
6217 6218      gen->cmsSpace()->blk_iterate_careful(&sweepClosure);
6218 6219      // We need to free-up/coalesce garbage/blocks from a
6219 6220      // co-terminal free run. This is done in the SweepClosure
6220 6221      // destructor; so, do not remove this scope, else the
6221 6222      // end-of-sweep-census below will be off by a little bit.
6222 6223    }
6223 6224    gen->cmsSpace()->sweep_completed();
6224 6225    gen->cmsSpace()->endSweepFLCensus(sweep_count());
6225 6226    if (should_unload_classes()) {                // unloaded classes this cycle,
6226 6227      _concurrent_cycles_since_last_unload = 0;   // ... reset count
6227 6228    } else {                                      // did not unload classes,
6228 6229      _concurrent_cycles_since_last_unload++;     // ... increment count
6229 6230    }
6230 6231  }
6231 6232  
6232 6233  // Reset CMS data structures (for now just the marking bit map)
6233 6234  // preparatory for the next cycle.
6234 6235  void CMSCollector::reset(bool asynch) {
6235 6236    GenCollectedHeap* gch = GenCollectedHeap::heap();
6236 6237    CMSAdaptiveSizePolicy* sp = size_policy();
6237 6238    AdaptiveSizePolicyOutput(sp, gch->total_collections());
6238 6239    if (asynch) {
6239 6240      CMSTokenSyncWithLocks ts(true, bitMapLock());
6240 6241  
6241 6242      // If the state is not "Resetting", the foreground  thread
6242 6243      // has done a collection and the resetting.
6243 6244      if (_collectorState != Resetting) {
6244 6245        assert(_collectorState == Idling, "The state should only change"
6245 6246          " because the foreground collector has finished the collection");
6246 6247        return;
6247 6248      }
6248 6249  
6249 6250      // Clear the mark bitmap (no grey objects to start with)
6250 6251      // for the next cycle.
6251 6252      TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
6252 6253      CMSPhaseAccounting cmspa(this, "reset", !PrintGCDetails);
6253 6254  
6254 6255      HeapWord* curAddr = _markBitMap.startWord();
6255 6256      while (curAddr < _markBitMap.endWord()) {
6256 6257        size_t remaining  = pointer_delta(_markBitMap.endWord(), curAddr);
6257 6258        MemRegion chunk(curAddr, MIN2(CMSBitMapYieldQuantum, remaining));
6258 6259        _markBitMap.clear_large_range(chunk);
6259 6260        if (ConcurrentMarkSweepThread::should_yield() &&
6260 6261            !foregroundGCIsActive() &&
6261 6262            CMSYield) {
6262 6263          assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
6263 6264                 "CMS thread should hold CMS token");
6264 6265          assert_lock_strong(bitMapLock());
6265 6266          bitMapLock()->unlock();
6266 6267          ConcurrentMarkSweepThread::desynchronize(true);
6267 6268          ConcurrentMarkSweepThread::acknowledge_yield_request();
6268 6269          stopTimer();
6269 6270          if (PrintCMSStatistics != 0) {
6270 6271            incrementYields();
6271 6272          }
6272 6273          icms_wait();
6273 6274  
6274 6275          // See the comment in coordinator_yield()
6275 6276          for (unsigned i = 0; i < CMSYieldSleepCount &&
6276 6277                           ConcurrentMarkSweepThread::should_yield() &&
6277 6278                           !CMSCollector::foregroundGCIsActive(); ++i) {
6278 6279            os::sleep(Thread::current(), 1, false);
6279 6280            ConcurrentMarkSweepThread::acknowledge_yield_request();
6280 6281          }
6281 6282  
6282 6283          ConcurrentMarkSweepThread::synchronize(true);
6283 6284          bitMapLock()->lock_without_safepoint_check();
6284 6285          startTimer();
6285 6286        }
6286 6287        curAddr = chunk.end();
6287 6288      }
6288 6289      // A successful mostly concurrent collection has been done.
6289 6290      // Because only the full (i.e., concurrent mode failure) collections
6290 6291      // are being measured for gc overhead limits, clean the "near" flag
6291 6292      // and count.
6292 6293      sp->reset_gc_overhead_limit_count();
6293 6294      _collectorState = Idling;
6294 6295    } else {
6295 6296      // already have the lock
6296 6297      assert(_collectorState == Resetting, "just checking");
6297 6298      assert_lock_strong(bitMapLock());
6298 6299      _markBitMap.clear_all();
6299 6300      _collectorState = Idling;
6300 6301    }
6301 6302  
6302 6303    // Stop incremental mode after a cycle completes, so that any future cycles
6303 6304    // are triggered by allocation.
6304 6305    stop_icms();
6305 6306  
6306 6307    NOT_PRODUCT(
6307 6308      if (RotateCMSCollectionTypes) {
6308 6309        _cmsGen->rotate_debug_collection_type();
6309 6310      }
6310 6311    )
6311 6312  }
6312 6313  
6313 6314  void CMSCollector::do_CMS_operation(CMS_op_type op) {
6314 6315    gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
6315 6316    TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
6316 6317    TraceTime t("GC", PrintGC, !PrintGCDetails, gclog_or_tty);
6317 6318    TraceCollectorStats tcs(counters());
6318 6319  
6319 6320    switch (op) {
6320 6321      case CMS_op_checkpointRootsInitial: {
6321 6322        SvcGCMarker sgcm(SvcGCMarker::OTHER);
6322 6323        checkpointRootsInitial(true);       // asynch
6323 6324        if (PrintGC) {
6324 6325          _cmsGen->printOccupancy("initial-mark");
6325 6326        }
6326 6327        break;
6327 6328      }
6328 6329      case CMS_op_checkpointRootsFinal: {
6329 6330        SvcGCMarker sgcm(SvcGCMarker::OTHER);
6330 6331        checkpointRootsFinal(true,    // asynch
6331 6332                             false,   // !clear_all_soft_refs
6332 6333                             false);  // !init_mark_was_synchronous
6333 6334        if (PrintGC) {
6334 6335          _cmsGen->printOccupancy("remark");
6335 6336        }
6336 6337        break;
6337 6338      }
6338 6339      default:
6339 6340        fatal("No such CMS_op");
6340 6341    }
6341 6342  }
6342 6343  
6343 6344  #ifndef PRODUCT
6344 6345  size_t const CMSCollector::skip_header_HeapWords() {
6345 6346    return FreeChunk::header_size();
6346 6347  }
6347 6348  
6348 6349  // Try and collect here conditions that should hold when
6349 6350  // CMS thread is exiting. The idea is that the foreground GC
6350 6351  // thread should not be blocked if it wants to terminate
6351 6352  // the CMS thread and yet continue to run the VM for a while
6352 6353  // after that.
6353 6354  void CMSCollector::verify_ok_to_terminate() const {
6354 6355    assert(Thread::current()->is_ConcurrentGC_thread(),
6355 6356           "should be called by CMS thread");
6356 6357    assert(!_foregroundGCShouldWait, "should be false");
6357 6358    // We could check here that all the various low-level locks
6358 6359    // are not held by the CMS thread, but that is overkill; see
6359 6360    // also CMSThread::verify_ok_to_terminate() where the CGC_lock
6360 6361    // is checked.
6361 6362  }
6362 6363  #endif
6363 6364  
6364 6365  size_t CMSCollector::block_size_using_printezis_bits(HeapWord* addr) const {
6365 6366     assert(_markBitMap.isMarked(addr) && _markBitMap.isMarked(addr + 1),
6366 6367            "missing Printezis mark?");
6367 6368    HeapWord* nextOneAddr = _markBitMap.getNextMarkedWordAddress(addr + 2);
6368 6369    size_t size = pointer_delta(nextOneAddr + 1, addr);
6369 6370    assert(size == CompactibleFreeListSpace::adjustObjectSize(size),
6370 6371           "alignment problem");
6371 6372    assert(size >= 3, "Necessary for Printezis marks to work");
6372 6373    return size;
6373 6374  }
6374 6375  
6375 6376  // A variant of the above (block_size_using_printezis_bits()) except
6376 6377  // that we return 0 if the P-bits are not yet set.
6377 6378  size_t CMSCollector::block_size_if_printezis_bits(HeapWord* addr) const {
6378 6379    if (_markBitMap.isMarked(addr + 1)) {
6379 6380      assert(_markBitMap.isMarked(addr), "P-bit can be set only for marked objects");
6380 6381      HeapWord* nextOneAddr = _markBitMap.getNextMarkedWordAddress(addr + 2);
6381 6382      size_t size = pointer_delta(nextOneAddr + 1, addr);
6382 6383      assert(size == CompactibleFreeListSpace::adjustObjectSize(size),
6383 6384             "alignment problem");
6384 6385      assert(size >= 3, "Necessary for Printezis marks to work");
6385 6386      return size;
6386 6387    }
6387 6388    return 0;
6388 6389  }
6389 6390  
6390 6391  HeapWord* CMSCollector::next_card_start_after_block(HeapWord* addr) const {
6391 6392    size_t sz = 0;
6392 6393    oop p = (oop)addr;
6393 6394    if (p->klass_or_null() != NULL && p->is_parsable()) {
6394 6395      sz = CompactibleFreeListSpace::adjustObjectSize(p->size());
6395 6396    } else {
6396 6397      sz = block_size_using_printezis_bits(addr);
6397 6398    }
6398 6399    assert(sz > 0, "size must be nonzero");
6399 6400    HeapWord* next_block = addr + sz;
6400 6401    HeapWord* next_card  = (HeapWord*)round_to((uintptr_t)next_block,
6401 6402                                               CardTableModRefBS::card_size);
6402 6403    assert(round_down((uintptr_t)addr,      CardTableModRefBS::card_size) <
6403 6404           round_down((uintptr_t)next_card, CardTableModRefBS::card_size),
6404 6405           "must be different cards");
6405 6406    return next_card;
6406 6407  }
6407 6408  
6408 6409  
6409 6410  // CMS Bit Map Wrapper /////////////////////////////////////////
6410 6411  
6411 6412  // Construct a CMS bit map infrastructure, but don't create the
6412 6413  // bit vector itself. That is done by a separate call CMSBitMap::allocate()
6413 6414  // further below.
6414 6415  CMSBitMap::CMSBitMap(int shifter, int mutex_rank, const char* mutex_name):
6415 6416    _bm(),
6416 6417    _shifter(shifter),
6417 6418    _lock(mutex_rank >= 0 ? new Mutex(mutex_rank, mutex_name, true) : NULL)
6418 6419  {
6419 6420    _bmStartWord = 0;
6420 6421    _bmWordSize  = 0;
6421 6422  }
6422 6423  
6423 6424  bool CMSBitMap::allocate(MemRegion mr) {
6424 6425    _bmStartWord = mr.start();
6425 6426    _bmWordSize  = mr.word_size();
6426 6427    ReservedSpace brs(ReservedSpace::allocation_align_size_up(
6427 6428                       (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
6428 6429    if (!brs.is_reserved()) {
6429 6430      warning("CMS bit map allocation failure");
6430 6431      return false;
6431 6432    }
6432 6433    // For now we'll just commit all of the bit map up fromt.
6433 6434    // Later on we'll try to be more parsimonious with swap.
6434 6435    if (!_virtual_space.initialize(brs, brs.size())) {
6435 6436      warning("CMS bit map backing store failure");
6436 6437      return false;
6437 6438    }
6438 6439    assert(_virtual_space.committed_size() == brs.size(),
6439 6440           "didn't reserve backing store for all of CMS bit map?");
6440 6441    _bm.set_map((BitMap::bm_word_t*)_virtual_space.low());
6441 6442    assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
6442 6443           _bmWordSize, "inconsistency in bit map sizing");
6443 6444    _bm.set_size(_bmWordSize >> _shifter);
6444 6445  
6445 6446    // bm.clear(); // can we rely on getting zero'd memory? verify below
6446 6447    assert(isAllClear(),
6447 6448           "Expected zero'd memory from ReservedSpace constructor");
6448 6449    assert(_bm.size() == heapWordDiffToOffsetDiff(sizeInWords()),
6449 6450           "consistency check");
6450 6451    return true;
6451 6452  }
6452 6453  
6453 6454  void CMSBitMap::dirty_range_iterate_clear(MemRegion mr, MemRegionClosure* cl) {
6454 6455    HeapWord *next_addr, *end_addr, *last_addr;
6455 6456    assert_locked();
6456 6457    assert(covers(mr), "out-of-range error");
6457 6458    // XXX assert that start and end are appropriately aligned
6458 6459    for (next_addr = mr.start(), end_addr = mr.end();
6459 6460         next_addr < end_addr; next_addr = last_addr) {
6460 6461      MemRegion dirty_region = getAndClearMarkedRegion(next_addr, end_addr);
6461 6462      last_addr = dirty_region.end();
6462 6463      if (!dirty_region.is_empty()) {
6463 6464        cl->do_MemRegion(dirty_region);
6464 6465      } else {
6465 6466        assert(last_addr == end_addr, "program logic");
6466 6467        return;
6467 6468      }
6468 6469    }
6469 6470  }
6470 6471  
6471 6472  #ifndef PRODUCT
6472 6473  void CMSBitMap::assert_locked() const {
6473 6474    CMSLockVerifier::assert_locked(lock());
6474 6475  }
6475 6476  
6476 6477  bool CMSBitMap::covers(MemRegion mr) const {
6477 6478    // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
6478 6479    assert((size_t)_bm.size() == (_bmWordSize >> _shifter),
6479 6480           "size inconsistency");
6480 6481    return (mr.start() >= _bmStartWord) &&
6481 6482           (mr.end()   <= endWord());
6482 6483  }
6483 6484  
6484 6485  bool CMSBitMap::covers(HeapWord* start, size_t size) const {
6485 6486      return (start >= _bmStartWord && (start + size) <= endWord());
6486 6487  }
6487 6488  
6488 6489  void CMSBitMap::verifyNoOneBitsInRange(HeapWord* left, HeapWord* right) {
6489 6490    // verify that there are no 1 bits in the interval [left, right)
6490 6491    FalseBitMapClosure falseBitMapClosure;
6491 6492    iterate(&falseBitMapClosure, left, right);
6492 6493  }
6493 6494  
6494 6495  void CMSBitMap::region_invariant(MemRegion mr)
6495 6496  {
6496 6497    assert_locked();
6497 6498    // mr = mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
6498 6499    assert(!mr.is_empty(), "unexpected empty region");
6499 6500    assert(covers(mr), "mr should be covered by bit map");
6500 6501    // convert address range into offset range
6501 6502    size_t start_ofs = heapWordToOffset(mr.start());
6502 6503    // Make sure that end() is appropriately aligned
6503 6504    assert(mr.end() == (HeapWord*)round_to((intptr_t)mr.end(),
6504 6505                          (1 << (_shifter+LogHeapWordSize))),
6505 6506           "Misaligned mr.end()");
6506 6507    size_t end_ofs   = heapWordToOffset(mr.end());
6507 6508    assert(end_ofs > start_ofs, "Should mark at least one bit");
6508 6509  }
6509 6510  
6510 6511  #endif
6511 6512  
6512 6513  bool CMSMarkStack::allocate(size_t size) {
6513 6514    // allocate a stack of the requisite depth
6514 6515    ReservedSpace rs(ReservedSpace::allocation_align_size_up(
6515 6516                     size * sizeof(oop)));
6516 6517    if (!rs.is_reserved()) {
6517 6518      warning("CMSMarkStack allocation failure");
6518 6519      return false;
6519 6520    }
6520 6521    if (!_virtual_space.initialize(rs, rs.size())) {
6521 6522      warning("CMSMarkStack backing store failure");
6522 6523      return false;
6523 6524    }
6524 6525    assert(_virtual_space.committed_size() == rs.size(),
6525 6526           "didn't reserve backing store for all of CMS stack?");
6526 6527    _base = (oop*)(_virtual_space.low());
6527 6528    _index = 0;
6528 6529    _capacity = size;
6529 6530    NOT_PRODUCT(_max_depth = 0);
6530 6531    return true;
6531 6532  }
6532 6533  
6533 6534  // XXX FIX ME !!! In the MT case we come in here holding a
6534 6535  // leaf lock. For printing we need to take a further lock
6535 6536  // which has lower rank. We need to recallibrate the two
6536 6537  // lock-ranks involved in order to be able to rpint the
6537 6538  // messages below. (Or defer the printing to the caller.
6538 6539  // For now we take the expedient path of just disabling the
6539 6540  // messages for the problematic case.)
6540 6541  void CMSMarkStack::expand() {
6541 6542    assert(_capacity <= MarkStackSizeMax, "stack bigger than permitted");
6542 6543    if (_capacity == MarkStackSizeMax) {
6543 6544      if (_hit_limit++ == 0 && !CMSConcurrentMTEnabled && PrintGCDetails) {
6544 6545        // We print a warning message only once per CMS cycle.
6545 6546        gclog_or_tty->print_cr(" (benign) Hit CMSMarkStack max size limit");
6546 6547      }
6547 6548      return;
6548 6549    }
6549 6550    // Double capacity if possible
6550 6551    size_t new_capacity = MIN2(_capacity*2, MarkStackSizeMax);
6551 6552    // Do not give up existing stack until we have managed to
6552 6553    // get the double capacity that we desired.
6553 6554    ReservedSpace rs(ReservedSpace::allocation_align_size_up(
6554 6555                     new_capacity * sizeof(oop)));
6555 6556    if (rs.is_reserved()) {
6556 6557      // Release the backing store associated with old stack
6557 6558      _virtual_space.release();
6558 6559      // Reinitialize virtual space for new stack
6559 6560      if (!_virtual_space.initialize(rs, rs.size())) {
6560 6561        fatal("Not enough swap for expanded marking stack");
6561 6562      }
6562 6563      _base = (oop*)(_virtual_space.low());
6563 6564      _index = 0;
6564 6565      _capacity = new_capacity;
6565 6566    } else if (_failed_double++ == 0 && !CMSConcurrentMTEnabled && PrintGCDetails) {
6566 6567      // Failed to double capacity, continue;
6567 6568      // we print a detail message only once per CMS cycle.
6568 6569      gclog_or_tty->print(" (benign) Failed to expand marking stack from "SIZE_FORMAT"K to "
6569 6570              SIZE_FORMAT"K",
6570 6571              _capacity / K, new_capacity / K);
6571 6572    }
6572 6573  }
6573 6574  
6574 6575  
6575 6576  // Closures
6576 6577  // XXX: there seems to be a lot of code  duplication here;
6577 6578  // should refactor and consolidate common code.
6578 6579  
6579 6580  // This closure is used to mark refs into the CMS generation in
6580 6581  // the CMS bit map. Called at the first checkpoint. This closure
6581 6582  // assumes that we do not need to re-mark dirty cards; if the CMS
6582 6583  // generation on which this is used is not an oldest (modulo perm gen)
6583 6584  // generation then this will lose younger_gen cards!
6584 6585  
6585 6586  MarkRefsIntoClosure::MarkRefsIntoClosure(
6586 6587    MemRegion span, CMSBitMap* bitMap):
6587 6588      _span(span),
6588 6589      _bitMap(bitMap)
6589 6590  {
6590 6591      assert(_ref_processor == NULL, "deliberately left NULL");
6591 6592      assert(_bitMap->covers(_span), "_bitMap/_span mismatch");
6592 6593  }
6593 6594  
6594 6595  void MarkRefsIntoClosure::do_oop(oop obj) {
6595 6596    // if p points into _span, then mark corresponding bit in _markBitMap
6596 6597    assert(obj->is_oop(), "expected an oop");
6597 6598    HeapWord* addr = (HeapWord*)obj;
6598 6599    if (_span.contains(addr)) {
6599 6600      // this should be made more efficient
6600 6601      _bitMap->mark(addr);
6601 6602    }
6602 6603  }
6603 6604  
6604 6605  void MarkRefsIntoClosure::do_oop(oop* p)       { MarkRefsIntoClosure::do_oop_work(p); }
6605 6606  void MarkRefsIntoClosure::do_oop(narrowOop* p) { MarkRefsIntoClosure::do_oop_work(p); }
6606 6607  
6607 6608  // A variant of the above, used for CMS marking verification.
6608 6609  MarkRefsIntoVerifyClosure::MarkRefsIntoVerifyClosure(
6609 6610    MemRegion span, CMSBitMap* verification_bm, CMSBitMap* cms_bm):
6610 6611      _span(span),
6611 6612      _verification_bm(verification_bm),
6612 6613      _cms_bm(cms_bm)
6613 6614  {
6614 6615      assert(_ref_processor == NULL, "deliberately left NULL");
6615 6616      assert(_verification_bm->covers(_span), "_verification_bm/_span mismatch");
6616 6617  }
6617 6618  
6618 6619  void MarkRefsIntoVerifyClosure::do_oop(oop obj) {
6619 6620    // if p points into _span, then mark corresponding bit in _markBitMap
6620 6621    assert(obj->is_oop(), "expected an oop");
6621 6622    HeapWord* addr = (HeapWord*)obj;
6622 6623    if (_span.contains(addr)) {
6623 6624      _verification_bm->mark(addr);
6624 6625      if (!_cms_bm->isMarked(addr)) {
6625 6626        oop(addr)->print();
6626 6627        gclog_or_tty->print_cr(" (" INTPTR_FORMAT " should have been marked)", addr);
6627 6628        fatal("... aborting");
6628 6629      }
6629 6630    }
6630 6631  }
6631 6632  
6632 6633  void MarkRefsIntoVerifyClosure::do_oop(oop* p)       { MarkRefsIntoVerifyClosure::do_oop_work(p); }
6633 6634  void MarkRefsIntoVerifyClosure::do_oop(narrowOop* p) { MarkRefsIntoVerifyClosure::do_oop_work(p); }
6634 6635  
6635 6636  //////////////////////////////////////////////////
6636 6637  // MarkRefsIntoAndScanClosure
6637 6638  //////////////////////////////////////////////////
6638 6639  
6639 6640  MarkRefsIntoAndScanClosure::MarkRefsIntoAndScanClosure(MemRegion span,
6640 6641                                                         ReferenceProcessor* rp,
6641 6642                                                         CMSBitMap* bit_map,
6642 6643                                                         CMSBitMap* mod_union_table,
6643 6644                                                         CMSMarkStack*  mark_stack,
6644 6645                                                         CMSMarkStack*  revisit_stack,
6645 6646                                                         CMSCollector* collector,
6646 6647                                                         bool should_yield,
6647 6648                                                         bool concurrent_precleaning):
6648 6649    _collector(collector),
6649 6650    _span(span),
6650 6651    _bit_map(bit_map),
6651 6652    _mark_stack(mark_stack),
6652 6653    _pushAndMarkClosure(collector, span, rp, bit_map, mod_union_table,
6653 6654                        mark_stack, revisit_stack, concurrent_precleaning),
6654 6655    _yield(should_yield),
6655 6656    _concurrent_precleaning(concurrent_precleaning),
6656 6657    _freelistLock(NULL)
6657 6658  {
6658 6659    _ref_processor = rp;
6659 6660    assert(_ref_processor != NULL, "_ref_processor shouldn't be NULL");
6660 6661  }
6661 6662  
6662 6663  // This closure is used to mark refs into the CMS generation at the
6663 6664  // second (final) checkpoint, and to scan and transitively follow
6664 6665  // the unmarked oops. It is also used during the concurrent precleaning
6665 6666  // phase while scanning objects on dirty cards in the CMS generation.
6666 6667  // The marks are made in the marking bit map and the marking stack is
6667 6668  // used for keeping the (newly) grey objects during the scan.
6668 6669  // The parallel version (Par_...) appears further below.
6669 6670  void MarkRefsIntoAndScanClosure::do_oop(oop obj) {
6670 6671    if (obj != NULL) {
6671 6672      assert(obj->is_oop(), "expected an oop");
6672 6673      HeapWord* addr = (HeapWord*)obj;
6673 6674      assert(_mark_stack->isEmpty(), "pre-condition (eager drainage)");
6674 6675      assert(_collector->overflow_list_is_empty(),
6675 6676             "overflow list should be empty");
6676 6677      if (_span.contains(addr) &&
6677 6678          !_bit_map->isMarked(addr)) {
6678 6679        // mark bit map (object is now grey)
6679 6680        _bit_map->mark(addr);
6680 6681        // push on marking stack (stack should be empty), and drain the
6681 6682        // stack by applying this closure to the oops in the oops popped
6682 6683        // from the stack (i.e. blacken the grey objects)
6683 6684        bool res = _mark_stack->push(obj);
6684 6685        assert(res, "Should have space to push on empty stack");
6685 6686        do {
6686 6687          oop new_oop = _mark_stack->pop();
6687 6688          assert(new_oop != NULL && new_oop->is_oop(), "Expected an oop");
6688 6689          assert(new_oop->is_parsable(), "Found unparsable oop");
6689 6690          assert(_bit_map->isMarked((HeapWord*)new_oop),
6690 6691                 "only grey objects on this stack");
6691 6692          // iterate over the oops in this oop, marking and pushing
6692 6693          // the ones in CMS heap (i.e. in _span).
6693 6694          new_oop->oop_iterate(&_pushAndMarkClosure);
6694 6695          // check if it's time to yield
6695 6696          do_yield_check();
6696 6697        } while (!_mark_stack->isEmpty() ||
6697 6698                 (!_concurrent_precleaning && take_from_overflow_list()));
6698 6699          // if marking stack is empty, and we are not doing this
6699 6700          // during precleaning, then check the overflow list
6700 6701      }
6701 6702      assert(_mark_stack->isEmpty(), "post-condition (eager drainage)");
6702 6703      assert(_collector->overflow_list_is_empty(),
6703 6704             "overflow list was drained above");
6704 6705      // We could restore evacuated mark words, if any, used for
6705 6706      // overflow list links here because the overflow list is
6706 6707      // provably empty here. That would reduce the maximum
6707 6708      // size requirements for preserved_{oop,mark}_stack.
6708 6709      // But we'll just postpone it until we are all done
6709 6710      // so we can just stream through.
6710 6711      if (!_concurrent_precleaning && CMSOverflowEarlyRestoration) {
6711 6712        _collector->restore_preserved_marks_if_any();
6712 6713        assert(_collector->no_preserved_marks(), "No preserved marks");
6713 6714      }
6714 6715      assert(!CMSOverflowEarlyRestoration || _collector->no_preserved_marks(),
6715 6716             "All preserved marks should have been restored above");
6716 6717    }
6717 6718  }
6718 6719  
6719 6720  void MarkRefsIntoAndScanClosure::do_oop(oop* p)       { MarkRefsIntoAndScanClosure::do_oop_work(p); }
6720 6721  void MarkRefsIntoAndScanClosure::do_oop(narrowOop* p) { MarkRefsIntoAndScanClosure::do_oop_work(p); }
6721 6722  
6722 6723  void MarkRefsIntoAndScanClosure::do_yield_work() {
6723 6724    assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
6724 6725           "CMS thread should hold CMS token");
6725 6726    assert_lock_strong(_freelistLock);
6726 6727    assert_lock_strong(_bit_map->lock());
6727 6728    // relinquish the free_list_lock and bitMaplock()
6728 6729    DEBUG_ONLY(RememberKlassesChecker mux(false);)
6729 6730    _bit_map->lock()->unlock();
6730 6731    _freelistLock->unlock();
6731 6732    ConcurrentMarkSweepThread::desynchronize(true);
6732 6733    ConcurrentMarkSweepThread::acknowledge_yield_request();
6733 6734    _collector->stopTimer();
6734 6735    GCPauseTimer p(_collector->size_policy()->concurrent_timer_ptr());
6735 6736    if (PrintCMSStatistics != 0) {
6736 6737      _collector->incrementYields();
6737 6738    }
6738 6739    _collector->icms_wait();
6739 6740  
6740 6741    // See the comment in coordinator_yield()
6741 6742    for (unsigned i = 0;
6742 6743         i < CMSYieldSleepCount &&
6743 6744         ConcurrentMarkSweepThread::should_yield() &&
6744 6745         !CMSCollector::foregroundGCIsActive();
6745 6746         ++i) {
6746 6747      os::sleep(Thread::current(), 1, false);
6747 6748      ConcurrentMarkSweepThread::acknowledge_yield_request();
6748 6749    }
6749 6750  
6750 6751    ConcurrentMarkSweepThread::synchronize(true);
6751 6752    _freelistLock->lock_without_safepoint_check();
6752 6753    _bit_map->lock()->lock_without_safepoint_check();
6753 6754    _collector->startTimer();
6754 6755  }
6755 6756  
6756 6757  ///////////////////////////////////////////////////////////
6757 6758  // Par_MarkRefsIntoAndScanClosure: a parallel version of
6758 6759  //                                 MarkRefsIntoAndScanClosure
6759 6760  ///////////////////////////////////////////////////////////
6760 6761  Par_MarkRefsIntoAndScanClosure::Par_MarkRefsIntoAndScanClosure(
6761 6762    CMSCollector* collector, MemRegion span, ReferenceProcessor* rp,
6762 6763    CMSBitMap* bit_map, OopTaskQueue* work_queue, CMSMarkStack*  revisit_stack):
6763 6764    _span(span),
6764 6765    _bit_map(bit_map),
6765 6766    _work_queue(work_queue),
6766 6767    _low_water_mark(MIN2((uint)(work_queue->max_elems()/4),
6767 6768                         (uint)(CMSWorkQueueDrainThreshold * ParallelGCThreads))),
6768 6769    _par_pushAndMarkClosure(collector, span, rp, bit_map, work_queue,
6769 6770                            revisit_stack)
6770 6771  {
6771 6772    _ref_processor = rp;
6772 6773    assert(_ref_processor != NULL, "_ref_processor shouldn't be NULL");
6773 6774  }
6774 6775  
6775 6776  // This closure is used to mark refs into the CMS generation at the
6776 6777  // second (final) checkpoint, and to scan and transitively follow
6777 6778  // the unmarked oops. The marks are made in the marking bit map and
6778 6779  // the work_queue is used for keeping the (newly) grey objects during
6779 6780  // the scan phase whence they are also available for stealing by parallel
6780 6781  // threads. Since the marking bit map is shared, updates are
6781 6782  // synchronized (via CAS).
6782 6783  void Par_MarkRefsIntoAndScanClosure::do_oop(oop obj) {
6783 6784    if (obj != NULL) {
6784 6785      // Ignore mark word because this could be an already marked oop
6785 6786      // that may be chained at the end of the overflow list.
6786 6787      assert(obj->is_oop(true), "expected an oop");
6787 6788      HeapWord* addr = (HeapWord*)obj;
6788 6789      if (_span.contains(addr) &&
6789 6790          !_bit_map->isMarked(addr)) {
6790 6791        // mark bit map (object will become grey):
6791 6792        // It is possible for several threads to be
6792 6793        // trying to "claim" this object concurrently;
6793 6794        // the unique thread that succeeds in marking the
6794 6795        // object first will do the subsequent push on
6795 6796        // to the work queue (or overflow list).
6796 6797        if (_bit_map->par_mark(addr)) {
6797 6798          // push on work_queue (which may not be empty), and trim the
6798 6799          // queue to an appropriate length by applying this closure to
6799 6800          // the oops in the oops popped from the stack (i.e. blacken the
6800 6801          // grey objects)
6801 6802          bool res = _work_queue->push(obj);
6802 6803          assert(res, "Low water mark should be less than capacity?");
6803 6804          trim_queue(_low_water_mark);
6804 6805        } // Else, another thread claimed the object
6805 6806      }
6806 6807    }
6807 6808  }
6808 6809  
6809 6810  void Par_MarkRefsIntoAndScanClosure::do_oop(oop* p)       { Par_MarkRefsIntoAndScanClosure::do_oop_work(p); }
6810 6811  void Par_MarkRefsIntoAndScanClosure::do_oop(narrowOop* p) { Par_MarkRefsIntoAndScanClosure::do_oop_work(p); }
6811 6812  
6812 6813  // This closure is used to rescan the marked objects on the dirty cards
6813 6814  // in the mod union table and the card table proper.
6814 6815  size_t ScanMarkedObjectsAgainCarefullyClosure::do_object_careful_m(
6815 6816    oop p, MemRegion mr) {
6816 6817  
6817 6818    size_t size = 0;
6818 6819    HeapWord* addr = (HeapWord*)p;
6819 6820    DEBUG_ONLY(_collector->verify_work_stacks_empty();)
6820 6821    assert(_span.contains(addr), "we are scanning the CMS generation");
6821 6822    // check if it's time to yield
6822 6823    if (do_yield_check()) {
6823 6824      // We yielded for some foreground stop-world work,
6824 6825      // and we have been asked to abort this ongoing preclean cycle.
6825 6826      return 0;
6826 6827    }
6827 6828    if (_bitMap->isMarked(addr)) {
6828 6829      // it's marked; is it potentially uninitialized?
6829 6830      if (p->klass_or_null() != NULL) {
6830 6831        // If is_conc_safe is false, the object may be undergoing
6831 6832        // change by the VM outside a safepoint.  Don't try to
6832 6833        // scan it, but rather leave it for the remark phase.
6833 6834        if (CMSPermGenPrecleaningEnabled &&
6834 6835            (!p->is_conc_safe() || !p->is_parsable())) {
6835 6836          // Signal precleaning to redirty the card since
6836 6837          // the klass pointer is already installed.
6837 6838          assert(size == 0, "Initial value");
6838 6839        } else {
6839 6840          assert(p->is_parsable(), "must be parsable.");
6840 6841          // an initialized object; ignore mark word in verification below
6841 6842          // since we are running concurrent with mutators
6842 6843          assert(p->is_oop(true), "should be an oop");
6843 6844          if (p->is_objArray()) {
6844 6845            // objArrays are precisely marked; restrict scanning
6845 6846            // to dirty cards only.
6846 6847            size = CompactibleFreeListSpace::adjustObjectSize(
6847 6848                     p->oop_iterate(_scanningClosure, mr));
6848 6849          } else {
6849 6850            // A non-array may have been imprecisely marked; we need
6850 6851            // to scan object in its entirety.
6851 6852            size = CompactibleFreeListSpace::adjustObjectSize(
6852 6853                     p->oop_iterate(_scanningClosure));
6853 6854          }
6854 6855          #ifdef DEBUG
6855 6856            size_t direct_size =
6856 6857              CompactibleFreeListSpace::adjustObjectSize(p->size());
6857 6858            assert(size == direct_size, "Inconsistency in size");
6858 6859            assert(size >= 3, "Necessary for Printezis marks to work");
6859 6860            if (!_bitMap->isMarked(addr+1)) {
6860 6861              _bitMap->verifyNoOneBitsInRange(addr+2, addr+size);
6861 6862            } else {
6862 6863              _bitMap->verifyNoOneBitsInRange(addr+2, addr+size-1);
6863 6864              assert(_bitMap->isMarked(addr+size-1),
6864 6865                     "inconsistent Printezis mark");
6865 6866            }
6866 6867          #endif // DEBUG
6867 6868        }
6868 6869      } else {
6869 6870        // an unitialized object
6870 6871        assert(_bitMap->isMarked(addr+1), "missing Printezis mark?");
6871 6872        HeapWord* nextOneAddr = _bitMap->getNextMarkedWordAddress(addr + 2);
6872 6873        size = pointer_delta(nextOneAddr + 1, addr);
6873 6874        assert(size == CompactibleFreeListSpace::adjustObjectSize(size),
6874 6875               "alignment problem");
6875 6876        // Note that pre-cleaning needn't redirty the card. OopDesc::set_klass()
6876 6877        // will dirty the card when the klass pointer is installed in the
6877 6878        // object (signalling the completion of initialization).
6878 6879      }
6879 6880    } else {
6880 6881      // Either a not yet marked object or an uninitialized object
6881 6882      if (p->klass_or_null() == NULL || !p->is_parsable()) {
6882 6883        // An uninitialized object, skip to the next card, since
6883 6884        // we may not be able to read its P-bits yet.
6884 6885        assert(size == 0, "Initial value");
6885 6886      } else {
6886 6887        // An object not (yet) reached by marking: we merely need to
6887 6888        // compute its size so as to go look at the next block.
6888 6889        assert(p->is_oop(true), "should be an oop");
6889 6890        size = CompactibleFreeListSpace::adjustObjectSize(p->size());
6890 6891      }
6891 6892    }
6892 6893    DEBUG_ONLY(_collector->verify_work_stacks_empty();)
6893 6894    return size;
6894 6895  }
6895 6896  
6896 6897  void ScanMarkedObjectsAgainCarefullyClosure::do_yield_work() {
6897 6898    assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
6898 6899           "CMS thread should hold CMS token");
6899 6900    assert_lock_strong(_freelistLock);
6900 6901    assert_lock_strong(_bitMap->lock());
6901 6902    DEBUG_ONLY(RememberKlassesChecker mux(false);)
6902 6903    // relinquish the free_list_lock and bitMaplock()
6903 6904    _bitMap->lock()->unlock();
6904 6905    _freelistLock->unlock();
6905 6906    ConcurrentMarkSweepThread::desynchronize(true);
6906 6907    ConcurrentMarkSweepThread::acknowledge_yield_request();
6907 6908    _collector->stopTimer();
6908 6909    GCPauseTimer p(_collector->size_policy()->concurrent_timer_ptr());
6909 6910    if (PrintCMSStatistics != 0) {
6910 6911      _collector->incrementYields();
6911 6912    }
6912 6913    _collector->icms_wait();
6913 6914  
6914 6915    // See the comment in coordinator_yield()
6915 6916    for (unsigned i = 0; i < CMSYieldSleepCount &&
6916 6917                     ConcurrentMarkSweepThread::should_yield() &&
6917 6918                     !CMSCollector::foregroundGCIsActive(); ++i) {
6918 6919      os::sleep(Thread::current(), 1, false);
6919 6920      ConcurrentMarkSweepThread::acknowledge_yield_request();
6920 6921    }
6921 6922  
6922 6923    ConcurrentMarkSweepThread::synchronize(true);
6923 6924    _freelistLock->lock_without_safepoint_check();
6924 6925    _bitMap->lock()->lock_without_safepoint_check();
6925 6926    _collector->startTimer();
6926 6927  }
6927 6928  
6928 6929  
6929 6930  //////////////////////////////////////////////////////////////////
6930 6931  // SurvivorSpacePrecleanClosure
6931 6932  //////////////////////////////////////////////////////////////////
6932 6933  // This (single-threaded) closure is used to preclean the oops in
6933 6934  // the survivor spaces.
6934 6935  size_t SurvivorSpacePrecleanClosure::do_object_careful(oop p) {
6935 6936  
6936 6937    HeapWord* addr = (HeapWord*)p;
6937 6938    DEBUG_ONLY(_collector->verify_work_stacks_empty();)
6938 6939    assert(!_span.contains(addr), "we are scanning the survivor spaces");
6939 6940    assert(p->klass_or_null() != NULL, "object should be initializd");
6940 6941    assert(p->is_parsable(), "must be parsable.");
6941 6942    // an initialized object; ignore mark word in verification below
6942 6943    // since we are running concurrent with mutators
6943 6944    assert(p->is_oop(true), "should be an oop");
6944 6945    // Note that we do not yield while we iterate over
6945 6946    // the interior oops of p, pushing the relevant ones
6946 6947    // on our marking stack.
6947 6948    size_t size = p->oop_iterate(_scanning_closure);
6948 6949    do_yield_check();
6949 6950    // Observe that below, we do not abandon the preclean
6950 6951    // phase as soon as we should; rather we empty the
6951 6952    // marking stack before returning. This is to satisfy
6952 6953    // some existing assertions. In general, it may be a
6953 6954    // good idea to abort immediately and complete the marking
6954 6955    // from the grey objects at a later time.
6955 6956    while (!_mark_stack->isEmpty()) {
6956 6957      oop new_oop = _mark_stack->pop();
6957 6958      assert(new_oop != NULL && new_oop->is_oop(), "Expected an oop");
6958 6959      assert(new_oop->is_parsable(), "Found unparsable oop");
6959 6960      assert(_bit_map->isMarked((HeapWord*)new_oop),
6960 6961             "only grey objects on this stack");
6961 6962      // iterate over the oops in this oop, marking and pushing
6962 6963      // the ones in CMS heap (i.e. in _span).
6963 6964      new_oop->oop_iterate(_scanning_closure);
6964 6965      // check if it's time to yield
6965 6966      do_yield_check();
6966 6967    }
6967 6968    unsigned int after_count =
6968 6969      GenCollectedHeap::heap()->total_collections();
6969 6970    bool abort = (_before_count != after_count) ||
6970 6971                 _collector->should_abort_preclean();
6971 6972    return abort ? 0 : size;
6972 6973  }
6973 6974  
6974 6975  void SurvivorSpacePrecleanClosure::do_yield_work() {
6975 6976    assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
6976 6977           "CMS thread should hold CMS token");
6977 6978    assert_lock_strong(_bit_map->lock());
6978 6979    DEBUG_ONLY(RememberKlassesChecker smx(false);)
6979 6980    // Relinquish the bit map lock
6980 6981    _bit_map->lock()->unlock();
6981 6982    ConcurrentMarkSweepThread::desynchronize(true);
6982 6983    ConcurrentMarkSweepThread::acknowledge_yield_request();
6983 6984    _collector->stopTimer();
6984 6985    GCPauseTimer p(_collector->size_policy()->concurrent_timer_ptr());
6985 6986    if (PrintCMSStatistics != 0) {
6986 6987      _collector->incrementYields();
6987 6988    }
6988 6989    _collector->icms_wait();
6989 6990  
6990 6991    // See the comment in coordinator_yield()
6991 6992    for (unsigned i = 0; i < CMSYieldSleepCount &&
6992 6993                         ConcurrentMarkSweepThread::should_yield() &&
6993 6994                         !CMSCollector::foregroundGCIsActive(); ++i) {
6994 6995      os::sleep(Thread::current(), 1, false);
6995 6996      ConcurrentMarkSweepThread::acknowledge_yield_request();
6996 6997    }
6997 6998  
6998 6999    ConcurrentMarkSweepThread::synchronize(true);
6999 7000    _bit_map->lock()->lock_without_safepoint_check();
7000 7001    _collector->startTimer();
7001 7002  }
7002 7003  
7003 7004  // This closure is used to rescan the marked objects on the dirty cards
7004 7005  // in the mod union table and the card table proper. In the parallel
7005 7006  // case, although the bitMap is shared, we do a single read so the
7006 7007  // isMarked() query is "safe".
7007 7008  bool ScanMarkedObjectsAgainClosure::do_object_bm(oop p, MemRegion mr) {
7008 7009    // Ignore mark word because we are running concurrent with mutators
7009 7010    assert(p->is_oop_or_null(true), "expected an oop or null");
7010 7011    HeapWord* addr = (HeapWord*)p;
7011 7012    assert(_span.contains(addr), "we are scanning the CMS generation");
7012 7013    bool is_obj_array = false;
7013 7014    #ifdef DEBUG
7014 7015      if (!_parallel) {
7015 7016        assert(_mark_stack->isEmpty(), "pre-condition (eager drainage)");
7016 7017        assert(_collector->overflow_list_is_empty(),
7017 7018               "overflow list should be empty");
7018 7019  
7019 7020      }
7020 7021    #endif // DEBUG
7021 7022    if (_bit_map->isMarked(addr)) {
7022 7023      // Obj arrays are precisely marked, non-arrays are not;
7023 7024      // so we scan objArrays precisely and non-arrays in their
7024 7025      // entirety.
7025 7026      if (p->is_objArray()) {
7026 7027        is_obj_array = true;
7027 7028        if (_parallel) {
7028 7029          p->oop_iterate(_par_scan_closure, mr);
7029 7030        } else {
7030 7031          p->oop_iterate(_scan_closure, mr);
7031 7032        }
7032 7033      } else {
7033 7034        if (_parallel) {
7034 7035          p->oop_iterate(_par_scan_closure);
7035 7036        } else {
7036 7037          p->oop_iterate(_scan_closure);
7037 7038        }
7038 7039      }
7039 7040    }
7040 7041    #ifdef DEBUG
7041 7042      if (!_parallel) {
7042 7043        assert(_mark_stack->isEmpty(), "post-condition (eager drainage)");
7043 7044        assert(_collector->overflow_list_is_empty(),
7044 7045               "overflow list should be empty");
7045 7046  
7046 7047      }
7047 7048    #endif // DEBUG
7048 7049    return is_obj_array;
7049 7050  }
7050 7051  
7051 7052  MarkFromRootsClosure::MarkFromRootsClosure(CMSCollector* collector,
7052 7053                          MemRegion span,
7053 7054                          CMSBitMap* bitMap, CMSMarkStack*  markStack,
7054 7055                          CMSMarkStack*  revisitStack,
7055 7056                          bool should_yield, bool verifying):
7056 7057    _collector(collector),
7057 7058    _span(span),
7058 7059    _bitMap(bitMap),
7059 7060    _mut(&collector->_modUnionTable),
7060 7061    _markStack(markStack),
7061 7062    _revisitStack(revisitStack),
7062 7063    _yield(should_yield),
7063 7064    _skipBits(0)
7064 7065  {
7065 7066    assert(_markStack->isEmpty(), "stack should be empty");
7066 7067    _finger = _bitMap->startWord();
7067 7068    _threshold = _finger;
7068 7069    assert(_collector->_restart_addr == NULL, "Sanity check");
7069 7070    assert(_span.contains(_finger), "Out of bounds _finger?");
7070 7071    DEBUG_ONLY(_verifying = verifying;)
7071 7072  }
7072 7073  
7073 7074  void MarkFromRootsClosure::reset(HeapWord* addr) {
7074 7075    assert(_markStack->isEmpty(), "would cause duplicates on stack");
7075 7076    assert(_span.contains(addr), "Out of bounds _finger?");
7076 7077    _finger = addr;
7077 7078    _threshold = (HeapWord*)round_to(
7078 7079                   (intptr_t)_finger, CardTableModRefBS::card_size);
7079 7080  }
7080 7081  
7081 7082  // Should revisit to see if this should be restructured for
7082 7083  // greater efficiency.
7083 7084  bool MarkFromRootsClosure::do_bit(size_t offset) {
7084 7085    if (_skipBits > 0) {
7085 7086      _skipBits--;
7086 7087      return true;
7087 7088    }
7088 7089    // convert offset into a HeapWord*
7089 7090    HeapWord* addr = _bitMap->startWord() + offset;
7090 7091    assert(_bitMap->endWord() && addr < _bitMap->endWord(),
7091 7092           "address out of range");
7092 7093    assert(_bitMap->isMarked(addr), "tautology");
7093 7094    if (_bitMap->isMarked(addr+1)) {
7094 7095      // this is an allocated but not yet initialized object
7095 7096      assert(_skipBits == 0, "tautology");
7096 7097      _skipBits = 2;  // skip next two marked bits ("Printezis-marks")
7097 7098      oop p = oop(addr);
7098 7099      if (p->klass_or_null() == NULL || !p->is_parsable()) {
7099 7100        DEBUG_ONLY(if (!_verifying) {)
7100 7101          // We re-dirty the cards on which this object lies and increase
7101 7102          // the _threshold so that we'll come back to scan this object
7102 7103          // during the preclean or remark phase. (CMSCleanOnEnter)
7103 7104          if (CMSCleanOnEnter) {
7104 7105            size_t sz = _collector->block_size_using_printezis_bits(addr);
7105 7106            HeapWord* end_card_addr   = (HeapWord*)round_to(
7106 7107                                           (intptr_t)(addr+sz), CardTableModRefBS::card_size);
7107 7108            MemRegion redirty_range = MemRegion(addr, end_card_addr);
7108 7109            assert(!redirty_range.is_empty(), "Arithmetical tautology");
7109 7110            // Bump _threshold to end_card_addr; note that
7110 7111            // _threshold cannot possibly exceed end_card_addr, anyhow.
7111 7112            // This prevents future clearing of the card as the scan proceeds
7112 7113            // to the right.
7113 7114            assert(_threshold <= end_card_addr,
7114 7115                   "Because we are just scanning into this object");
7115 7116            if (_threshold < end_card_addr) {
7116 7117              _threshold = end_card_addr;
7117 7118            }
7118 7119            if (p->klass_or_null() != NULL) {
7119 7120              // Redirty the range of cards...
7120 7121              _mut->mark_range(redirty_range);
7121 7122            } // ...else the setting of klass will dirty the card anyway.
7122 7123          }
7123 7124        DEBUG_ONLY(})
7124 7125        return true;
7125 7126      }
7126 7127    }
7127 7128    scanOopsInOop(addr);
7128 7129    return true;
7129 7130  }
7130 7131  
7131 7132  // We take a break if we've been at this for a while,
7132 7133  // so as to avoid monopolizing the locks involved.
7133 7134  void MarkFromRootsClosure::do_yield_work() {
7134 7135    // First give up the locks, then yield, then re-lock
7135 7136    // We should probably use a constructor/destructor idiom to
7136 7137    // do this unlock/lock or modify the MutexUnlocker class to
7137 7138    // serve our purpose. XXX
7138 7139    assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
7139 7140           "CMS thread should hold CMS token");
7140 7141    assert_lock_strong(_bitMap->lock());
7141 7142    DEBUG_ONLY(RememberKlassesChecker mux(false);)
7142 7143    _bitMap->lock()->unlock();
7143 7144    ConcurrentMarkSweepThread::desynchronize(true);
7144 7145    ConcurrentMarkSweepThread::acknowledge_yield_request();
7145 7146    _collector->stopTimer();
7146 7147    GCPauseTimer p(_collector->size_policy()->concurrent_timer_ptr());
7147 7148    if (PrintCMSStatistics != 0) {
7148 7149      _collector->incrementYields();
7149 7150    }
7150 7151    _collector->icms_wait();
7151 7152  
7152 7153    // See the comment in coordinator_yield()
7153 7154    for (unsigned i = 0; i < CMSYieldSleepCount &&
7154 7155                         ConcurrentMarkSweepThread::should_yield() &&
7155 7156                         !CMSCollector::foregroundGCIsActive(); ++i) {
7156 7157      os::sleep(Thread::current(), 1, false);
7157 7158      ConcurrentMarkSweepThread::acknowledge_yield_request();
7158 7159    }
7159 7160  
7160 7161    ConcurrentMarkSweepThread::synchronize(true);
7161 7162    _bitMap->lock()->lock_without_safepoint_check();
7162 7163    _collector->startTimer();
7163 7164  }
7164 7165  
7165 7166  void MarkFromRootsClosure::scanOopsInOop(HeapWord* ptr) {
7166 7167    assert(_bitMap->isMarked(ptr), "expected bit to be set");
7167 7168    assert(_markStack->isEmpty(),
7168 7169           "should drain stack to limit stack usage");
7169 7170    // convert ptr to an oop preparatory to scanning
7170 7171    oop obj = oop(ptr);
7171 7172    // Ignore mark word in verification below, since we
7172 7173    // may be running concurrent with mutators.
7173 7174    assert(obj->is_oop(true), "should be an oop");
7174 7175    assert(_finger <= ptr, "_finger runneth ahead");
7175 7176    // advance the finger to right end of this object
7176 7177    _finger = ptr + obj->size();
7177 7178    assert(_finger > ptr, "we just incremented it above");
7178 7179    // On large heaps, it may take us some time to get through
7179 7180    // the marking phase (especially if running iCMS). During
7180 7181    // this time it's possible that a lot of mutations have
7181 7182    // accumulated in the card table and the mod union table --
7182 7183    // these mutation records are redundant until we have
7183 7184    // actually traced into the corresponding card.
7184 7185    // Here, we check whether advancing the finger would make
7185 7186    // us cross into a new card, and if so clear corresponding
7186 7187    // cards in the MUT (preclean them in the card-table in the
7187 7188    // future).
7188 7189  
7189 7190    DEBUG_ONLY(if (!_verifying) {)
7190 7191      // The clean-on-enter optimization is disabled by default,
7191 7192      // until we fix 6178663.
7192 7193      if (CMSCleanOnEnter && (_finger > _threshold)) {
7193 7194        // [_threshold, _finger) represents the interval
7194 7195        // of cards to be cleared  in MUT (or precleaned in card table).
7195 7196        // The set of cards to be cleared is all those that overlap
7196 7197        // with the interval [_threshold, _finger); note that
7197 7198        // _threshold is always kept card-aligned but _finger isn't
7198 7199        // always card-aligned.
7199 7200        HeapWord* old_threshold = _threshold;
7200 7201        assert(old_threshold == (HeapWord*)round_to(
7201 7202                (intptr_t)old_threshold, CardTableModRefBS::card_size),
7202 7203               "_threshold should always be card-aligned");
7203 7204        _threshold = (HeapWord*)round_to(
7204 7205                       (intptr_t)_finger, CardTableModRefBS::card_size);
7205 7206        MemRegion mr(old_threshold, _threshold);
7206 7207        assert(!mr.is_empty(), "Control point invariant");
7207 7208        assert(_span.contains(mr), "Should clear within span");
7208 7209        // XXX When _finger crosses from old gen into perm gen
7209 7210        // we may be doing unnecessary cleaning; do better in the
7210 7211        // future by detecting that condition and clearing fewer
7211 7212        // MUT/CT entries.
7212 7213        _mut->clear_range(mr);
7213 7214      }
7214 7215    DEBUG_ONLY(})
7215 7216    // Note: the finger doesn't advance while we drain
7216 7217    // the stack below.
7217 7218    PushOrMarkClosure pushOrMarkClosure(_collector,
7218 7219                                        _span, _bitMap, _markStack,
7219 7220                                        _revisitStack,
7220 7221                                        _finger, this);
7221 7222    bool res = _markStack->push(obj);
7222 7223    assert(res, "Empty non-zero size stack should have space for single push");
7223 7224    while (!_markStack->isEmpty()) {
7224 7225      oop new_oop = _markStack->pop();
7225 7226      // Skip verifying header mark word below because we are
7226 7227      // running concurrent with mutators.
7227 7228      assert(new_oop->is_oop(true), "Oops! expected to pop an oop");
7228 7229      // now scan this oop's oops
7229 7230      new_oop->oop_iterate(&pushOrMarkClosure);
7230 7231      do_yield_check();
7231 7232    }
7232 7233    assert(_markStack->isEmpty(), "tautology, emphasizing post-condition");
7233 7234  }
7234 7235  
7235 7236  Par_MarkFromRootsClosure::Par_MarkFromRootsClosure(CMSConcMarkingTask* task,
7236 7237                         CMSCollector* collector, MemRegion span,
7237 7238                         CMSBitMap* bit_map,
7238 7239                         OopTaskQueue* work_queue,
7239 7240                         CMSMarkStack*  overflow_stack,
7240 7241                         CMSMarkStack*  revisit_stack,
7241 7242                         bool should_yield):
7242 7243    _collector(collector),
7243 7244    _whole_span(collector->_span),
7244 7245    _span(span),
7245 7246    _bit_map(bit_map),
7246 7247    _mut(&collector->_modUnionTable),
7247 7248    _work_queue(work_queue),
7248 7249    _overflow_stack(overflow_stack),
7249 7250    _revisit_stack(revisit_stack),
7250 7251    _yield(should_yield),
7251 7252    _skip_bits(0),
7252 7253    _task(task)
7253 7254  {
7254 7255    assert(_work_queue->size() == 0, "work_queue should be empty");
7255 7256    _finger = span.start();
7256 7257    _threshold = _finger;     // XXX Defer clear-on-enter optimization for now
7257 7258    assert(_span.contains(_finger), "Out of bounds _finger?");
7258 7259  }
7259 7260  
7260 7261  // Should revisit to see if this should be restructured for
7261 7262  // greater efficiency.
7262 7263  bool Par_MarkFromRootsClosure::do_bit(size_t offset) {
7263 7264    if (_skip_bits > 0) {
7264 7265      _skip_bits--;
7265 7266      return true;
7266 7267    }
7267 7268    // convert offset into a HeapWord*
7268 7269    HeapWord* addr = _bit_map->startWord() + offset;
7269 7270    assert(_bit_map->endWord() && addr < _bit_map->endWord(),
7270 7271           "address out of range");
7271 7272    assert(_bit_map->isMarked(addr), "tautology");
7272 7273    if (_bit_map->isMarked(addr+1)) {
7273 7274      // this is an allocated object that might not yet be initialized
7274 7275      assert(_skip_bits == 0, "tautology");
7275 7276      _skip_bits = 2;  // skip next two marked bits ("Printezis-marks")
7276 7277      oop p = oop(addr);
7277 7278      if (p->klass_or_null() == NULL || !p->is_parsable()) {
7278 7279        // in the case of Clean-on-Enter optimization, redirty card
7279 7280        // and avoid clearing card by increasing  the threshold.
7280 7281        return true;
7281 7282      }
7282 7283    }
7283 7284    scan_oops_in_oop(addr);
7284 7285    return true;
7285 7286  }
7286 7287  
7287 7288  void Par_MarkFromRootsClosure::scan_oops_in_oop(HeapWord* ptr) {
7288 7289    assert(_bit_map->isMarked(ptr), "expected bit to be set");
7289 7290    // Should we assert that our work queue is empty or
7290 7291    // below some drain limit?
7291 7292    assert(_work_queue->size() == 0,
7292 7293           "should drain stack to limit stack usage");
7293 7294    // convert ptr to an oop preparatory to scanning
7294 7295    oop obj = oop(ptr);
7295 7296    // Ignore mark word in verification below, since we
7296 7297    // may be running concurrent with mutators.
7297 7298    assert(obj->is_oop(true), "should be an oop");
7298 7299    assert(_finger <= ptr, "_finger runneth ahead");
7299 7300    // advance the finger to right end of this object
7300 7301    _finger = ptr + obj->size();
7301 7302    assert(_finger > ptr, "we just incremented it above");
7302 7303    // On large heaps, it may take us some time to get through
7303 7304    // the marking phase (especially if running iCMS). During
7304 7305    // this time it's possible that a lot of mutations have
7305 7306    // accumulated in the card table and the mod union table --
7306 7307    // these mutation records are redundant until we have
7307 7308    // actually traced into the corresponding card.
7308 7309    // Here, we check whether advancing the finger would make
7309 7310    // us cross into a new card, and if so clear corresponding
7310 7311    // cards in the MUT (preclean them in the card-table in the
7311 7312    // future).
7312 7313  
7313 7314    // The clean-on-enter optimization is disabled by default,
7314 7315    // until we fix 6178663.
7315 7316    if (CMSCleanOnEnter && (_finger > _threshold)) {
7316 7317      // [_threshold, _finger) represents the interval
7317 7318      // of cards to be cleared  in MUT (or precleaned in card table).
7318 7319      // The set of cards to be cleared is all those that overlap
7319 7320      // with the interval [_threshold, _finger); note that
7320 7321      // _threshold is always kept card-aligned but _finger isn't
7321 7322      // always card-aligned.
7322 7323      HeapWord* old_threshold = _threshold;
7323 7324      assert(old_threshold == (HeapWord*)round_to(
7324 7325              (intptr_t)old_threshold, CardTableModRefBS::card_size),
7325 7326             "_threshold should always be card-aligned");
7326 7327      _threshold = (HeapWord*)round_to(
7327 7328                     (intptr_t)_finger, CardTableModRefBS::card_size);
7328 7329      MemRegion mr(old_threshold, _threshold);
7329 7330      assert(!mr.is_empty(), "Control point invariant");
7330 7331      assert(_span.contains(mr), "Should clear within span"); // _whole_span ??
7331 7332      // XXX When _finger crosses from old gen into perm gen
7332 7333      // we may be doing unnecessary cleaning; do better in the
7333 7334      // future by detecting that condition and clearing fewer
7334 7335      // MUT/CT entries.
7335 7336      _mut->clear_range(mr);
7336 7337    }
7337 7338  
7338 7339    // Note: the local finger doesn't advance while we drain
7339 7340    // the stack below, but the global finger sure can and will.
7340 7341    HeapWord** gfa = _task->global_finger_addr();
7341 7342    Par_PushOrMarkClosure pushOrMarkClosure(_collector,
7342 7343                                        _span, _bit_map,
7343 7344                                        _work_queue,
7344 7345                                        _overflow_stack,
7345 7346                                        _revisit_stack,
7346 7347                                        _finger,
7347 7348                                        gfa, this);
7348 7349    bool res = _work_queue->push(obj);   // overflow could occur here
7349 7350    assert(res, "Will hold once we use workqueues");
7350 7351    while (true) {
7351 7352      oop new_oop;
7352 7353      if (!_work_queue->pop_local(new_oop)) {
7353 7354        // We emptied our work_queue; check if there's stuff that can
7354 7355        // be gotten from the overflow stack.
7355 7356        if (CMSConcMarkingTask::get_work_from_overflow_stack(
7356 7357              _overflow_stack, _work_queue)) {
7357 7358          do_yield_check();
7358 7359          continue;
7359 7360        } else {  // done
7360 7361          break;
7361 7362        }
7362 7363      }
7363 7364      // Skip verifying header mark word below because we are
7364 7365      // running concurrent with mutators.
7365 7366      assert(new_oop->is_oop(true), "Oops! expected to pop an oop");
7366 7367      // now scan this oop's oops
7367 7368      new_oop->oop_iterate(&pushOrMarkClosure);
7368 7369      do_yield_check();
7369 7370    }
7370 7371    assert(_work_queue->size() == 0, "tautology, emphasizing post-condition");
7371 7372  }
7372 7373  
7373 7374  // Yield in response to a request from VM Thread or
7374 7375  // from mutators.
7375 7376  void Par_MarkFromRootsClosure::do_yield_work() {
7376 7377    assert(_task != NULL, "sanity");
7377 7378    _task->yield();
7378 7379  }
7379 7380  
7380 7381  // A variant of the above used for verifying CMS marking work.
7381 7382  MarkFromRootsVerifyClosure::MarkFromRootsVerifyClosure(CMSCollector* collector,
7382 7383                          MemRegion span,
7383 7384                          CMSBitMap* verification_bm, CMSBitMap* cms_bm,
7384 7385                          CMSMarkStack*  mark_stack):
7385 7386    _collector(collector),
7386 7387    _span(span),
7387 7388    _verification_bm(verification_bm),
7388 7389    _cms_bm(cms_bm),
7389 7390    _mark_stack(mark_stack),
7390 7391    _pam_verify_closure(collector, span, verification_bm, cms_bm,
7391 7392                        mark_stack)
7392 7393  {
7393 7394    assert(_mark_stack->isEmpty(), "stack should be empty");
7394 7395    _finger = _verification_bm->startWord();
7395 7396    assert(_collector->_restart_addr == NULL, "Sanity check");
7396 7397    assert(_span.contains(_finger), "Out of bounds _finger?");
7397 7398  }
7398 7399  
7399 7400  void MarkFromRootsVerifyClosure::reset(HeapWord* addr) {
7400 7401    assert(_mark_stack->isEmpty(), "would cause duplicates on stack");
7401 7402    assert(_span.contains(addr), "Out of bounds _finger?");
7402 7403    _finger = addr;
7403 7404  }
7404 7405  
7405 7406  // Should revisit to see if this should be restructured for
7406 7407  // greater efficiency.
7407 7408  bool MarkFromRootsVerifyClosure::do_bit(size_t offset) {
7408 7409    // convert offset into a HeapWord*
7409 7410    HeapWord* addr = _verification_bm->startWord() + offset;
7410 7411    assert(_verification_bm->endWord() && addr < _verification_bm->endWord(),
7411 7412           "address out of range");
7412 7413    assert(_verification_bm->isMarked(addr), "tautology");
7413 7414    assert(_cms_bm->isMarked(addr), "tautology");
7414 7415  
7415 7416    assert(_mark_stack->isEmpty(),
7416 7417           "should drain stack to limit stack usage");
7417 7418    // convert addr to an oop preparatory to scanning
7418 7419    oop obj = oop(addr);
7419 7420    assert(obj->is_oop(), "should be an oop");
7420 7421    assert(_finger <= addr, "_finger runneth ahead");
7421 7422    // advance the finger to right end of this object
7422 7423    _finger = addr + obj->size();
7423 7424    assert(_finger > addr, "we just incremented it above");
7424 7425    // Note: the finger doesn't advance while we drain
7425 7426    // the stack below.
7426 7427    bool res = _mark_stack->push(obj);
7427 7428    assert(res, "Empty non-zero size stack should have space for single push");
7428 7429    while (!_mark_stack->isEmpty()) {
7429 7430      oop new_oop = _mark_stack->pop();
7430 7431      assert(new_oop->is_oop(), "Oops! expected to pop an oop");
7431 7432      // now scan this oop's oops
7432 7433      new_oop->oop_iterate(&_pam_verify_closure);
7433 7434    }
7434 7435    assert(_mark_stack->isEmpty(), "tautology, emphasizing post-condition");
7435 7436    return true;
7436 7437  }
7437 7438  
7438 7439  PushAndMarkVerifyClosure::PushAndMarkVerifyClosure(
7439 7440    CMSCollector* collector, MemRegion span,
7440 7441    CMSBitMap* verification_bm, CMSBitMap* cms_bm,
7441 7442    CMSMarkStack*  mark_stack):
7442 7443    OopClosure(collector->ref_processor()),
7443 7444    _collector(collector),
7444 7445    _span(span),
7445 7446    _verification_bm(verification_bm),
7446 7447    _cms_bm(cms_bm),
7447 7448    _mark_stack(mark_stack)
7448 7449  { }
7449 7450  
7450 7451  void PushAndMarkVerifyClosure::do_oop(oop* p)       { PushAndMarkVerifyClosure::do_oop_work(p); }
7451 7452  void PushAndMarkVerifyClosure::do_oop(narrowOop* p) { PushAndMarkVerifyClosure::do_oop_work(p); }
7452 7453  
7453 7454  // Upon stack overflow, we discard (part of) the stack,
7454 7455  // remembering the least address amongst those discarded
7455 7456  // in CMSCollector's _restart_address.
7456 7457  void PushAndMarkVerifyClosure::handle_stack_overflow(HeapWord* lost) {
7457 7458    // Remember the least grey address discarded
7458 7459    HeapWord* ra = (HeapWord*)_mark_stack->least_value(lost);
7459 7460    _collector->lower_restart_addr(ra);
7460 7461    _mark_stack->reset();  // discard stack contents
7461 7462    _mark_stack->expand(); // expand the stack if possible
7462 7463  }
7463 7464  
7464 7465  void PushAndMarkVerifyClosure::do_oop(oop obj) {
7465 7466    assert(obj->is_oop_or_null(), "expected an oop or NULL");
7466 7467    HeapWord* addr = (HeapWord*)obj;
7467 7468    if (_span.contains(addr) && !_verification_bm->isMarked(addr)) {
7468 7469      // Oop lies in _span and isn't yet grey or black
7469 7470      _verification_bm->mark(addr);            // now grey
7470 7471      if (!_cms_bm->isMarked(addr)) {
7471 7472        oop(addr)->print();
7472 7473        gclog_or_tty->print_cr(" (" INTPTR_FORMAT " should have been marked)",
7473 7474                               addr);
7474 7475        fatal("... aborting");
7475 7476      }
7476 7477  
7477 7478      if (!_mark_stack->push(obj)) { // stack overflow
7478 7479        if (PrintCMSStatistics != 0) {
7479 7480          gclog_or_tty->print_cr("CMS marking stack overflow (benign) at "
7480 7481                                 SIZE_FORMAT, _mark_stack->capacity());
7481 7482        }
7482 7483        assert(_mark_stack->isFull(), "Else push should have succeeded");
7483 7484        handle_stack_overflow(addr);
7484 7485      }
7485 7486      // anything including and to the right of _finger
7486 7487      // will be scanned as we iterate over the remainder of the
7487 7488      // bit map
7488 7489    }
7489 7490  }
7490 7491  
7491 7492  PushOrMarkClosure::PushOrMarkClosure(CMSCollector* collector,
7492 7493                       MemRegion span,
7493 7494                       CMSBitMap* bitMap, CMSMarkStack*  markStack,
7494 7495                       CMSMarkStack*  revisitStack,
7495 7496                       HeapWord* finger, MarkFromRootsClosure* parent) :
7496 7497    KlassRememberingOopClosure(collector, collector->ref_processor(), revisitStack),
7497 7498    _span(span),
7498 7499    _bitMap(bitMap),
7499 7500    _markStack(markStack),
7500 7501    _finger(finger),
7501 7502    _parent(parent)
7502 7503  { }
7503 7504  
7504 7505  Par_PushOrMarkClosure::Par_PushOrMarkClosure(CMSCollector* collector,
7505 7506                       MemRegion span,
7506 7507                       CMSBitMap* bit_map,
7507 7508                       OopTaskQueue* work_queue,
7508 7509                       CMSMarkStack*  overflow_stack,
7509 7510                       CMSMarkStack*  revisit_stack,
7510 7511                       HeapWord* finger,
7511 7512                       HeapWord** global_finger_addr,
7512 7513                       Par_MarkFromRootsClosure* parent) :
7513 7514    Par_KlassRememberingOopClosure(collector,
7514 7515                              collector->ref_processor(),
7515 7516                              revisit_stack),
7516 7517    _whole_span(collector->_span),
7517 7518    _span(span),
7518 7519    _bit_map(bit_map),
7519 7520    _work_queue(work_queue),
7520 7521    _overflow_stack(overflow_stack),
7521 7522    _finger(finger),
7522 7523    _global_finger_addr(global_finger_addr),
7523 7524    _parent(parent)
7524 7525  { }
7525 7526  
7526 7527  // Assumes thread-safe access by callers, who are
7527 7528  // responsible for mutual exclusion.
7528 7529  void CMSCollector::lower_restart_addr(HeapWord* low) {
7529 7530    assert(_span.contains(low), "Out of bounds addr");
7530 7531    if (_restart_addr == NULL) {
7531 7532      _restart_addr = low;
7532 7533    } else {
7533 7534      _restart_addr = MIN2(_restart_addr, low);
7534 7535    }
7535 7536  }
7536 7537  
7537 7538  // Upon stack overflow, we discard (part of) the stack,
7538 7539  // remembering the least address amongst those discarded
7539 7540  // in CMSCollector's _restart_address.
7540 7541  void PushOrMarkClosure::handle_stack_overflow(HeapWord* lost) {
7541 7542    // Remember the least grey address discarded
7542 7543    HeapWord* ra = (HeapWord*)_markStack->least_value(lost);
7543 7544    _collector->lower_restart_addr(ra);
7544 7545    _markStack->reset();  // discard stack contents
7545 7546    _markStack->expand(); // expand the stack if possible
7546 7547  }
7547 7548  
7548 7549  // Upon stack overflow, we discard (part of) the stack,
7549 7550  // remembering the least address amongst those discarded
7550 7551  // in CMSCollector's _restart_address.
7551 7552  void Par_PushOrMarkClosure::handle_stack_overflow(HeapWord* lost) {
7552 7553    // We need to do this under a mutex to prevent other
7553 7554    // workers from interfering with the work done below.
7554 7555    MutexLockerEx ml(_overflow_stack->par_lock(),
7555 7556                     Mutex::_no_safepoint_check_flag);
7556 7557    // Remember the least grey address discarded
7557 7558    HeapWord* ra = (HeapWord*)_overflow_stack->least_value(lost);
7558 7559    _collector->lower_restart_addr(ra);
7559 7560    _overflow_stack->reset();  // discard stack contents
7560 7561    _overflow_stack->expand(); // expand the stack if possible
7561 7562  }
7562 7563  
7563 7564  void PushOrMarkClosure::do_oop(oop obj) {
7564 7565    // Ignore mark word because we are running concurrent with mutators.
7565 7566    assert(obj->is_oop_or_null(true), "expected an oop or NULL");
7566 7567    HeapWord* addr = (HeapWord*)obj;
7567 7568    if (_span.contains(addr) && !_bitMap->isMarked(addr)) {
7568 7569      // Oop lies in _span and isn't yet grey or black
7569 7570      _bitMap->mark(addr);            // now grey
7570 7571      if (addr < _finger) {
7571 7572        // the bit map iteration has already either passed, or
7572 7573        // sampled, this bit in the bit map; we'll need to
7573 7574        // use the marking stack to scan this oop's oops.
7574 7575        bool simulate_overflow = false;
7575 7576        NOT_PRODUCT(
7576 7577          if (CMSMarkStackOverflowALot &&
7577 7578              _collector->simulate_overflow()) {
7578 7579            // simulate a stack overflow
7579 7580            simulate_overflow = true;
7580 7581          }
7581 7582        )
7582 7583        if (simulate_overflow || !_markStack->push(obj)) { // stack overflow
7583 7584          if (PrintCMSStatistics != 0) {
7584 7585            gclog_or_tty->print_cr("CMS marking stack overflow (benign) at "
7585 7586                                   SIZE_FORMAT, _markStack->capacity());
7586 7587          }
7587 7588          assert(simulate_overflow || _markStack->isFull(), "Else push should have succeeded");
7588 7589          handle_stack_overflow(addr);
7589 7590        }
7590 7591      }
7591 7592      // anything including and to the right of _finger
7592 7593      // will be scanned as we iterate over the remainder of the
7593 7594      // bit map
7594 7595      do_yield_check();
7595 7596    }
7596 7597  }
7597 7598  
7598 7599  void PushOrMarkClosure::do_oop(oop* p)       { PushOrMarkClosure::do_oop_work(p); }
7599 7600  void PushOrMarkClosure::do_oop(narrowOop* p) { PushOrMarkClosure::do_oop_work(p); }
7600 7601  
7601 7602  void Par_PushOrMarkClosure::do_oop(oop obj) {
7602 7603    // Ignore mark word because we are running concurrent with mutators.
7603 7604    assert(obj->is_oop_or_null(true), "expected an oop or NULL");
7604 7605    HeapWord* addr = (HeapWord*)obj;
7605 7606    if (_whole_span.contains(addr) && !_bit_map->isMarked(addr)) {
7606 7607      // Oop lies in _span and isn't yet grey or black
7607 7608      // We read the global_finger (volatile read) strictly after marking oop
7608 7609      bool res = _bit_map->par_mark(addr);    // now grey
7609 7610      volatile HeapWord** gfa = (volatile HeapWord**)_global_finger_addr;
7610 7611      // Should we push this marked oop on our stack?
7611 7612      // -- if someone else marked it, nothing to do
7612 7613      // -- if target oop is above global finger nothing to do
7613 7614      // -- if target oop is in chunk and above local finger
7614 7615      //      then nothing to do
7615 7616      // -- else push on work queue
7616 7617      if (   !res       // someone else marked it, they will deal with it
7617 7618          || (addr >= *gfa)  // will be scanned in a later task
7618 7619          || (_span.contains(addr) && addr >= _finger)) { // later in this chunk
7619 7620        return;
7620 7621      }
7621 7622      // the bit map iteration has already either passed, or
7622 7623      // sampled, this bit in the bit map; we'll need to
7623 7624      // use the marking stack to scan this oop's oops.
7624 7625      bool simulate_overflow = false;
7625 7626      NOT_PRODUCT(
7626 7627        if (CMSMarkStackOverflowALot &&
7627 7628            _collector->simulate_overflow()) {
7628 7629          // simulate a stack overflow
7629 7630          simulate_overflow = true;
7630 7631        }
7631 7632      )
7632 7633      if (simulate_overflow ||
7633 7634          !(_work_queue->push(obj) || _overflow_stack->par_push(obj))) {
7634 7635        // stack overflow
7635 7636        if (PrintCMSStatistics != 0) {
7636 7637          gclog_or_tty->print_cr("CMS marking stack overflow (benign) at "
7637 7638                                 SIZE_FORMAT, _overflow_stack->capacity());
7638 7639        }
7639 7640        // We cannot assert that the overflow stack is full because
7640 7641        // it may have been emptied since.
7641 7642        assert(simulate_overflow ||
7642 7643               _work_queue->size() == _work_queue->max_elems(),
7643 7644              "Else push should have succeeded");
7644 7645        handle_stack_overflow(addr);
7645 7646      }
7646 7647      do_yield_check();
7647 7648    }
7648 7649  }
7649 7650  
7650 7651  void Par_PushOrMarkClosure::do_oop(oop* p)       { Par_PushOrMarkClosure::do_oop_work(p); }
7651 7652  void Par_PushOrMarkClosure::do_oop(narrowOop* p) { Par_PushOrMarkClosure::do_oop_work(p); }
7652 7653  
7653 7654  KlassRememberingOopClosure::KlassRememberingOopClosure(CMSCollector* collector,
7654 7655                                               ReferenceProcessor* rp,
7655 7656                                               CMSMarkStack* revisit_stack) :
7656 7657    OopClosure(rp),
7657 7658    _collector(collector),
7658 7659    _revisit_stack(revisit_stack),
7659 7660    _should_remember_klasses(collector->should_unload_classes()) {}
7660 7661  
7661 7662  PushAndMarkClosure::PushAndMarkClosure(CMSCollector* collector,
7662 7663                                         MemRegion span,
7663 7664                                         ReferenceProcessor* rp,
7664 7665                                         CMSBitMap* bit_map,
7665 7666                                         CMSBitMap* mod_union_table,
7666 7667                                         CMSMarkStack*  mark_stack,
7667 7668                                         CMSMarkStack*  revisit_stack,
7668 7669                                         bool           concurrent_precleaning):
7669 7670    KlassRememberingOopClosure(collector, rp, revisit_stack),
7670 7671    _span(span),
7671 7672    _bit_map(bit_map),
7672 7673    _mod_union_table(mod_union_table),
7673 7674    _mark_stack(mark_stack),
7674 7675    _concurrent_precleaning(concurrent_precleaning)
7675 7676  {
7676 7677    assert(_ref_processor != NULL, "_ref_processor shouldn't be NULL");
7677 7678  }
7678 7679  
7679 7680  // Grey object rescan during pre-cleaning and second checkpoint phases --
7680 7681  // the non-parallel version (the parallel version appears further below.)
7681 7682  void PushAndMarkClosure::do_oop(oop obj) {
7682 7683    // Ignore mark word verification. If during concurrent precleaning,
7683 7684    // the object monitor may be locked. If during the checkpoint
7684 7685    // phases, the object may already have been reached by a  different
7685 7686    // path and may be at the end of the global overflow list (so
7686 7687    // the mark word may be NULL).
7687 7688    assert(obj->is_oop_or_null(true /* ignore mark word */),
7688 7689           "expected an oop or NULL");
7689 7690    HeapWord* addr = (HeapWord*)obj;
7690 7691    // Check if oop points into the CMS generation
7691 7692    // and is not marked
7692 7693    if (_span.contains(addr) && !_bit_map->isMarked(addr)) {
7693 7694      // a white object ...
7694 7695      _bit_map->mark(addr);         // ... now grey
7695 7696      // push on the marking stack (grey set)
7696 7697      bool simulate_overflow = false;
7697 7698      NOT_PRODUCT(
7698 7699        if (CMSMarkStackOverflowALot &&
7699 7700            _collector->simulate_overflow()) {
7700 7701          // simulate a stack overflow
7701 7702          simulate_overflow = true;
7702 7703        }
7703 7704      )
7704 7705      if (simulate_overflow || !_mark_stack->push(obj)) {
7705 7706        if (_concurrent_precleaning) {
7706 7707           // During precleaning we can just dirty the appropriate card(s)
7707 7708           // in the mod union table, thus ensuring that the object remains
7708 7709           // in the grey set  and continue. In the case of object arrays
7709 7710           // we need to dirty all of the cards that the object spans,
7710 7711           // since the rescan of object arrays will be limited to the
7711 7712           // dirty cards.
7712 7713           // Note that no one can be intefering with us in this action
7713 7714           // of dirtying the mod union table, so no locking or atomics
7714 7715           // are required.
7715 7716           if (obj->is_objArray()) {
7716 7717             size_t sz = obj->size();
7717 7718             HeapWord* end_card_addr = (HeapWord*)round_to(
7718 7719                                          (intptr_t)(addr+sz), CardTableModRefBS::card_size);
7719 7720             MemRegion redirty_range = MemRegion(addr, end_card_addr);
7720 7721             assert(!redirty_range.is_empty(), "Arithmetical tautology");
7721 7722             _mod_union_table->mark_range(redirty_range);
7722 7723           } else {
7723 7724             _mod_union_table->mark(addr);
7724 7725           }
7725 7726           _collector->_ser_pmc_preclean_ovflw++;
7726 7727        } else {
7727 7728           // During the remark phase, we need to remember this oop
7728 7729           // in the overflow list.
7729 7730           _collector->push_on_overflow_list(obj);
7730 7731           _collector->_ser_pmc_remark_ovflw++;
7731 7732        }
7732 7733      }
7733 7734    }
7734 7735  }
7735 7736  
7736 7737  Par_PushAndMarkClosure::Par_PushAndMarkClosure(CMSCollector* collector,
7737 7738                                                 MemRegion span,
7738 7739                                                 ReferenceProcessor* rp,
7739 7740                                                 CMSBitMap* bit_map,
7740 7741                                                 OopTaskQueue* work_queue,
7741 7742                                                 CMSMarkStack* revisit_stack):
7742 7743    Par_KlassRememberingOopClosure(collector, rp, revisit_stack),
7743 7744    _span(span),
7744 7745    _bit_map(bit_map),
7745 7746    _work_queue(work_queue)
7746 7747  {
7747 7748    assert(_ref_processor != NULL, "_ref_processor shouldn't be NULL");
7748 7749  }
7749 7750  
7750 7751  void PushAndMarkClosure::do_oop(oop* p)       { PushAndMarkClosure::do_oop_work(p); }
7751 7752  void PushAndMarkClosure::do_oop(narrowOop* p) { PushAndMarkClosure::do_oop_work(p); }
7752 7753  
7753 7754  // Grey object rescan during second checkpoint phase --
7754 7755  // the parallel version.
7755 7756  void Par_PushAndMarkClosure::do_oop(oop obj) {
7756 7757    // In the assert below, we ignore the mark word because
7757 7758    // this oop may point to an already visited object that is
7758 7759    // on the overflow stack (in which case the mark word has
7759 7760    // been hijacked for chaining into the overflow stack --
7760 7761    // if this is the last object in the overflow stack then
7761 7762    // its mark word will be NULL). Because this object may
7762 7763    // have been subsequently popped off the global overflow
7763 7764    // stack, and the mark word possibly restored to the prototypical
7764 7765    // value, by the time we get to examined this failing assert in
7765 7766    // the debugger, is_oop_or_null(false) may subsequently start
7766 7767    // to hold.
7767 7768    assert(obj->is_oop_or_null(true),
7768 7769           "expected an oop or NULL");
7769 7770    HeapWord* addr = (HeapWord*)obj;
7770 7771    // Check if oop points into the CMS generation
7771 7772    // and is not marked
7772 7773    if (_span.contains(addr) && !_bit_map->isMarked(addr)) {
7773 7774      // a white object ...
7774 7775      // If we manage to "claim" the object, by being the
7775 7776      // first thread to mark it, then we push it on our
7776 7777      // marking stack
7777 7778      if (_bit_map->par_mark(addr)) {     // ... now grey
7778 7779        // push on work queue (grey set)
7779 7780        bool simulate_overflow = false;
7780 7781        NOT_PRODUCT(
7781 7782          if (CMSMarkStackOverflowALot &&
7782 7783              _collector->par_simulate_overflow()) {
7783 7784            // simulate a stack overflow
7784 7785            simulate_overflow = true;
7785 7786          }
7786 7787        )
7787 7788        if (simulate_overflow || !_work_queue->push(obj)) {
7788 7789          _collector->par_push_on_overflow_list(obj);
7789 7790          _collector->_par_pmc_remark_ovflw++; //  imprecise OK: no need to CAS
7790 7791        }
7791 7792      } // Else, some other thread got there first
7792 7793    }
7793 7794  }
7794 7795  
7795 7796  void Par_PushAndMarkClosure::do_oop(oop* p)       { Par_PushAndMarkClosure::do_oop_work(p); }
7796 7797  void Par_PushAndMarkClosure::do_oop(narrowOop* p) { Par_PushAndMarkClosure::do_oop_work(p); }
7797 7798  
7798 7799  void PushAndMarkClosure::remember_mdo(DataLayout* v) {
7799 7800    // TBD
7800 7801  }
7801 7802  
7802 7803  void Par_PushAndMarkClosure::remember_mdo(DataLayout* v) {
7803 7804    // TBD
7804 7805  }
7805 7806  
7806 7807  void CMSPrecleanRefsYieldClosure::do_yield_work() {
7807 7808    DEBUG_ONLY(RememberKlassesChecker mux(false);)
7808 7809    Mutex* bml = _collector->bitMapLock();
7809 7810    assert_lock_strong(bml);
7810 7811    assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
7811 7812           "CMS thread should hold CMS token");
7812 7813  
7813 7814    bml->unlock();
7814 7815    ConcurrentMarkSweepThread::desynchronize(true);
7815 7816  
7816 7817    ConcurrentMarkSweepThread::acknowledge_yield_request();
7817 7818  
7818 7819    _collector->stopTimer();
7819 7820    GCPauseTimer p(_collector->size_policy()->concurrent_timer_ptr());
7820 7821    if (PrintCMSStatistics != 0) {
7821 7822      _collector->incrementYields();
7822 7823    }
7823 7824    _collector->icms_wait();
7824 7825  
7825 7826    // See the comment in coordinator_yield()
7826 7827    for (unsigned i = 0; i < CMSYieldSleepCount &&
7827 7828                         ConcurrentMarkSweepThread::should_yield() &&
7828 7829                         !CMSCollector::foregroundGCIsActive(); ++i) {
7829 7830      os::sleep(Thread::current(), 1, false);
7830 7831      ConcurrentMarkSweepThread::acknowledge_yield_request();
7831 7832    }
7832 7833  
7833 7834    ConcurrentMarkSweepThread::synchronize(true);
7834 7835    bml->lock();
7835 7836  
7836 7837    _collector->startTimer();
7837 7838  }
7838 7839  
7839 7840  bool CMSPrecleanRefsYieldClosure::should_return() {
7840 7841    if (ConcurrentMarkSweepThread::should_yield()) {
7841 7842      do_yield_work();
7842 7843    }
7843 7844    return _collector->foregroundGCIsActive();
7844 7845  }
7845 7846  
7846 7847  void MarkFromDirtyCardsClosure::do_MemRegion(MemRegion mr) {
7847 7848    assert(((size_t)mr.start())%CardTableModRefBS::card_size_in_words == 0,
7848 7849           "mr should be aligned to start at a card boundary");
7849 7850    // We'd like to assert:
7850 7851    // assert(mr.word_size()%CardTableModRefBS::card_size_in_words == 0,
7851 7852    //        "mr should be a range of cards");
7852 7853    // However, that would be too strong in one case -- the last
7853 7854    // partition ends at _unallocated_block which, in general, can be
7854 7855    // an arbitrary boundary, not necessarily card aligned.
7855 7856    if (PrintCMSStatistics != 0) {
7856 7857      _num_dirty_cards +=
7857 7858           mr.word_size()/CardTableModRefBS::card_size_in_words;
7858 7859    }
7859 7860    _space->object_iterate_mem(mr, &_scan_cl);
7860 7861  }
7861 7862  
7862 7863  SweepClosure::SweepClosure(CMSCollector* collector,
7863 7864                             ConcurrentMarkSweepGeneration* g,
7864 7865                             CMSBitMap* bitMap, bool should_yield) :
7865 7866    _collector(collector),
7866 7867    _g(g),
7867 7868    _sp(g->cmsSpace()),
7868 7869    _limit(_sp->sweep_limit()),
7869 7870    _freelistLock(_sp->freelistLock()),
7870 7871    _bitMap(bitMap),
7871 7872    _yield(should_yield),
7872 7873    _inFreeRange(false),           // No free range at beginning of sweep
7873 7874    _freeRangeInFreeLists(false),  // No free range at beginning of sweep
7874 7875    _lastFreeRangeCoalesced(false),
7875 7876    _freeFinger(g->used_region().start())
7876 7877  {
7877 7878    NOT_PRODUCT(
7878 7879      _numObjectsFreed = 0;
7879 7880      _numWordsFreed   = 0;
7880 7881      _numObjectsLive = 0;
7881 7882      _numWordsLive = 0;
7882 7883      _numObjectsAlreadyFree = 0;
7883 7884      _numWordsAlreadyFree = 0;
7884 7885      _last_fc = NULL;
7885 7886  
7886 7887      _sp->initializeIndexedFreeListArrayReturnedBytes();
7887 7888      _sp->dictionary()->initializeDictReturnedBytes();
7888 7889    )
7889 7890    assert(_limit >= _sp->bottom() && _limit <= _sp->end(),
7890 7891           "sweep _limit out of bounds");
7891 7892    if (CMSTraceSweeper) {
7892 7893      gclog_or_tty->print_cr("\n====================\nStarting new sweep with limit " PTR_FORMAT,
7893 7894                          _limit);
7894 7895    }
7895 7896  }
7896 7897  
7897 7898  void SweepClosure::print_on(outputStream* st) const {
7898 7899    tty->print_cr("_sp = [" PTR_FORMAT "," PTR_FORMAT ")",
7899 7900                  _sp->bottom(), _sp->end());
7900 7901    tty->print_cr("_limit = " PTR_FORMAT, _limit);
7901 7902    tty->print_cr("_freeFinger = " PTR_FORMAT, _freeFinger);
7902 7903    NOT_PRODUCT(tty->print_cr("_last_fc = " PTR_FORMAT, _last_fc);)
7903 7904    tty->print_cr("_inFreeRange = %d, _freeRangeInFreeLists = %d, _lastFreeRangeCoalesced = %d",
7904 7905                  _inFreeRange, _freeRangeInFreeLists, _lastFreeRangeCoalesced);
7905 7906  }
7906 7907  
7907 7908  #ifndef PRODUCT
7908 7909  // Assertion checking only:  no useful work in product mode --
7909 7910  // however, if any of the flags below become product flags,
7910 7911  // you may need to review this code to see if it needs to be
7911 7912  // enabled in product mode.
7912 7913  SweepClosure::~SweepClosure() {
7913 7914    assert_lock_strong(_freelistLock);
7914 7915    assert(_limit >= _sp->bottom() && _limit <= _sp->end(),
7915 7916           "sweep _limit out of bounds");
7916 7917    if (inFreeRange()) {
7917 7918      warning("inFreeRange() should have been reset; dumping state of SweepClosure");
7918 7919      print();
7919 7920      ShouldNotReachHere();
7920 7921    }
7921 7922    if (Verbose && PrintGC) {
7922 7923      gclog_or_tty->print("Collected "SIZE_FORMAT" objects, " SIZE_FORMAT " bytes",
7923 7924                          _numObjectsFreed, _numWordsFreed*sizeof(HeapWord));
7924 7925      gclog_or_tty->print_cr("\nLive "SIZE_FORMAT" objects,  "
7925 7926                             SIZE_FORMAT" bytes  "
7926 7927        "Already free "SIZE_FORMAT" objects, "SIZE_FORMAT" bytes",
7927 7928        _numObjectsLive, _numWordsLive*sizeof(HeapWord),
7928 7929        _numObjectsAlreadyFree, _numWordsAlreadyFree*sizeof(HeapWord));
7929 7930      size_t totalBytes = (_numWordsFreed + _numWordsLive + _numWordsAlreadyFree)
7930 7931                          * sizeof(HeapWord);
7931 7932      gclog_or_tty->print_cr("Total sweep: "SIZE_FORMAT" bytes", totalBytes);
7932 7933  
7933 7934      if (PrintCMSStatistics && CMSVerifyReturnedBytes) {
7934 7935        size_t indexListReturnedBytes = _sp->sumIndexedFreeListArrayReturnedBytes();
7935 7936        size_t dictReturnedBytes = _sp->dictionary()->sumDictReturnedBytes();
7936 7937        size_t returnedBytes = indexListReturnedBytes + dictReturnedBytes;
7937 7938        gclog_or_tty->print("Returned "SIZE_FORMAT" bytes", returnedBytes);
7938 7939        gclog_or_tty->print("   Indexed List Returned "SIZE_FORMAT" bytes",
7939 7940          indexListReturnedBytes);
7940 7941        gclog_or_tty->print_cr("        Dictionary Returned "SIZE_FORMAT" bytes",
7941 7942          dictReturnedBytes);
7942 7943      }
7943 7944    }
7944 7945    if (CMSTraceSweeper) {
7945 7946      gclog_or_tty->print_cr("end of sweep with _limit = " PTR_FORMAT "\n================",
7946 7947                             _limit);
7947 7948    }
7948 7949  }
7949 7950  #endif  // PRODUCT
7950 7951  
7951 7952  void SweepClosure::initialize_free_range(HeapWord* freeFinger,
7952 7953      bool freeRangeInFreeLists) {
7953 7954    if (CMSTraceSweeper) {
7954 7955      gclog_or_tty->print("---- Start free range at 0x%x with free block (%d)\n",
7955 7956                 freeFinger, freeRangeInFreeLists);
7956 7957    }
7957 7958    assert(!inFreeRange(), "Trampling existing free range");
7958 7959    set_inFreeRange(true);
7959 7960    set_lastFreeRangeCoalesced(false);
7960 7961  
7961 7962    set_freeFinger(freeFinger);
7962 7963    set_freeRangeInFreeLists(freeRangeInFreeLists);
7963 7964    if (CMSTestInFreeList) {
7964 7965      if (freeRangeInFreeLists) {
7965 7966        FreeChunk* fc = (FreeChunk*) freeFinger;
7966 7967        assert(fc->isFree(), "A chunk on the free list should be free.");
7967 7968        assert(fc->size() > 0, "Free range should have a size");
7968 7969        assert(_sp->verifyChunkInFreeLists(fc), "Chunk is not in free lists");
7969 7970      }
7970 7971    }
7971 7972  }
7972 7973  
7973 7974  // Note that the sweeper runs concurrently with mutators. Thus,
7974 7975  // it is possible for direct allocation in this generation to happen
7975 7976  // in the middle of the sweep. Note that the sweeper also coalesces
7976 7977  // contiguous free blocks. Thus, unless the sweeper and the allocator
7977 7978  // synchronize appropriately freshly allocated blocks may get swept up.
7978 7979  // This is accomplished by the sweeper locking the free lists while
7979 7980  // it is sweeping. Thus blocks that are determined to be free are
7980 7981  // indeed free. There is however one additional complication:
7981 7982  // blocks that have been allocated since the final checkpoint and
7982 7983  // mark, will not have been marked and so would be treated as
7983 7984  // unreachable and swept up. To prevent this, the allocator marks
7984 7985  // the bit map when allocating during the sweep phase. This leads,
7985 7986  // however, to a further complication -- objects may have been allocated
7986 7987  // but not yet initialized -- in the sense that the header isn't yet
7987 7988  // installed. The sweeper can not then determine the size of the block
7988 7989  // in order to skip over it. To deal with this case, we use a technique
7989 7990  // (due to Printezis) to encode such uninitialized block sizes in the
7990 7991  // bit map. Since the bit map uses a bit per every HeapWord, but the
7991 7992  // CMS generation has a minimum object size of 3 HeapWords, it follows
7992 7993  // that "normal marks" won't be adjacent in the bit map (there will
7993 7994  // always be at least two 0 bits between successive 1 bits). We make use
7994 7995  // of these "unused" bits to represent uninitialized blocks -- the bit
7995 7996  // corresponding to the start of the uninitialized object and the next
7996 7997  // bit are both set. Finally, a 1 bit marks the end of the object that
7997 7998  // started with the two consecutive 1 bits to indicate its potentially
7998 7999  // uninitialized state.
7999 8000  
8000 8001  size_t SweepClosure::do_blk_careful(HeapWord* addr) {
8001 8002    FreeChunk* fc = (FreeChunk*)addr;
8002 8003    size_t res;
8003 8004  
8004 8005    // Check if we are done sweeping. Below we check "addr >= _limit" rather
8005 8006    // than "addr == _limit" because although _limit was a block boundary when
8006 8007    // we started the sweep, it may no longer be one because heap expansion
8007 8008    // may have caused us to coalesce the block ending at the address _limit
8008 8009    // with a newly expanded chunk (this happens when _limit was set to the
8009 8010    // previous _end of the space), so we may have stepped past _limit:
8010 8011    // see the following Zeno-like trail of CRs 6977970, 7008136, 7042740.
8011 8012    if (addr >= _limit) { // we have swept up to or past the limit: finish up
8012 8013      assert(_limit >= _sp->bottom() && _limit <= _sp->end(),
8013 8014             "sweep _limit out of bounds");
8014 8015      assert(addr < _sp->end(), "addr out of bounds");
8015 8016      // Flush any free range we might be holding as a single
8016 8017      // coalesced chunk to the appropriate free list.
8017 8018      if (inFreeRange()) {
8018 8019        assert(freeFinger() >= _sp->bottom() && freeFinger() < _limit,
8019 8020               err_msg("freeFinger() " PTR_FORMAT" is out-of-bounds", freeFinger()));
8020 8021        flush_cur_free_chunk(freeFinger(),
8021 8022                             pointer_delta(addr, freeFinger()));
8022 8023        if (CMSTraceSweeper) {
8023 8024          gclog_or_tty->print("Sweep: last chunk: ");
8024 8025          gclog_or_tty->print("put_free_blk 0x%x ("SIZE_FORMAT") "
8025 8026                     "[coalesced:"SIZE_FORMAT"]\n",
8026 8027                     freeFinger(), pointer_delta(addr, freeFinger()),
8027 8028                     lastFreeRangeCoalesced());
8028 8029        }
8029 8030      }
8030 8031  
8031 8032      // help the iterator loop finish
8032 8033      return pointer_delta(_sp->end(), addr);
8033 8034    }
8034 8035  
8035 8036    assert(addr < _limit, "sweep invariant");
8036 8037    // check if we should yield
8037 8038    do_yield_check(addr);
8038 8039    if (fc->isFree()) {
8039 8040      // Chunk that is already free
8040 8041      res = fc->size();
8041 8042      do_already_free_chunk(fc);
8042 8043      debug_only(_sp->verifyFreeLists());
8043 8044      // If we flush the chunk at hand in lookahead_and_flush()
8044 8045      // and it's coalesced with a preceding chunk, then the
8045 8046      // process of "mangling" the payload of the coalesced block
8046 8047      // will cause erasure of the size information from the
8047 8048      // (erstwhile) header of all the coalesced blocks but the
8048 8049      // first, so the first disjunct in the assert will not hold
8049 8050      // in that specific case (in which case the second disjunct
8050 8051      // will hold).
8051 8052      assert(res == fc->size() || ((HeapWord*)fc) + res >= _limit,
8052 8053             "Otherwise the size info doesn't change at this step");
8053 8054      NOT_PRODUCT(
8054 8055        _numObjectsAlreadyFree++;
8055 8056        _numWordsAlreadyFree += res;
8056 8057      )
8057 8058      NOT_PRODUCT(_last_fc = fc;)
8058 8059    } else if (!_bitMap->isMarked(addr)) {
8059 8060      // Chunk is fresh garbage
8060 8061      res = do_garbage_chunk(fc);
8061 8062      debug_only(_sp->verifyFreeLists());
8062 8063      NOT_PRODUCT(
8063 8064        _numObjectsFreed++;
8064 8065        _numWordsFreed += res;
8065 8066      )
8066 8067    } else {
8067 8068      // Chunk that is alive.
8068 8069      res = do_live_chunk(fc);
8069 8070      debug_only(_sp->verifyFreeLists());
8070 8071      NOT_PRODUCT(
8071 8072          _numObjectsLive++;
8072 8073          _numWordsLive += res;
8073 8074      )
8074 8075    }
8075 8076    return res;
8076 8077  }
8077 8078  
8078 8079  // For the smart allocation, record following
8079 8080  //  split deaths - a free chunk is removed from its free list because
8080 8081  //      it is being split into two or more chunks.
8081 8082  //  split birth - a free chunk is being added to its free list because
8082 8083  //      a larger free chunk has been split and resulted in this free chunk.
8083 8084  //  coal death - a free chunk is being removed from its free list because
8084 8085  //      it is being coalesced into a large free chunk.
8085 8086  //  coal birth - a free chunk is being added to its free list because
8086 8087  //      it was created when two or more free chunks where coalesced into
8087 8088  //      this free chunk.
8088 8089  //
8089 8090  // These statistics are used to determine the desired number of free
8090 8091  // chunks of a given size.  The desired number is chosen to be relative
8091 8092  // to the end of a CMS sweep.  The desired number at the end of a sweep
8092 8093  // is the
8093 8094  //      count-at-end-of-previous-sweep (an amount that was enough)
8094 8095  //              - count-at-beginning-of-current-sweep  (the excess)
8095 8096  //              + split-births  (gains in this size during interval)
8096 8097  //              - split-deaths  (demands on this size during interval)
8097 8098  // where the interval is from the end of one sweep to the end of the
8098 8099  // next.
8099 8100  //
8100 8101  // When sweeping the sweeper maintains an accumulated chunk which is
8101 8102  // the chunk that is made up of chunks that have been coalesced.  That
8102 8103  // will be termed the left-hand chunk.  A new chunk of garbage that
8103 8104  // is being considered for coalescing will be referred to as the
8104 8105  // right-hand chunk.
8105 8106  //
8106 8107  // When making a decision on whether to coalesce a right-hand chunk with
8107 8108  // the current left-hand chunk, the current count vs. the desired count
8108 8109  // of the left-hand chunk is considered.  Also if the right-hand chunk
8109 8110  // is near the large chunk at the end of the heap (see
8110 8111  // ConcurrentMarkSweepGeneration::isNearLargestChunk()), then the
8111 8112  // left-hand chunk is coalesced.
8112 8113  //
8113 8114  // When making a decision about whether to split a chunk, the desired count
8114 8115  // vs. the current count of the candidate to be split is also considered.
8115 8116  // If the candidate is underpopulated (currently fewer chunks than desired)
8116 8117  // a chunk of an overpopulated (currently more chunks than desired) size may
8117 8118  // be chosen.  The "hint" associated with a free list, if non-null, points
8118 8119  // to a free list which may be overpopulated.
8119 8120  //
8120 8121  
8121 8122  void SweepClosure::do_already_free_chunk(FreeChunk* fc) {
8122 8123    const size_t size = fc->size();
8123 8124    // Chunks that cannot be coalesced are not in the
8124 8125    // free lists.
8125 8126    if (CMSTestInFreeList && !fc->cantCoalesce()) {
8126 8127      assert(_sp->verifyChunkInFreeLists(fc),
8127 8128        "free chunk should be in free lists");
8128 8129    }
8129 8130    // a chunk that is already free, should not have been
8130 8131    // marked in the bit map
8131 8132    HeapWord* const addr = (HeapWord*) fc;
8132 8133    assert(!_bitMap->isMarked(addr), "free chunk should be unmarked");
8133 8134    // Verify that the bit map has no bits marked between
8134 8135    // addr and purported end of this block.
8135 8136    _bitMap->verifyNoOneBitsInRange(addr + 1, addr + size);
8136 8137  
8137 8138    // Some chunks cannot be coalesced under any circumstances.
8138 8139    // See the definition of cantCoalesce().
8139 8140    if (!fc->cantCoalesce()) {
8140 8141      // This chunk can potentially be coalesced.
8141 8142      if (_sp->adaptive_freelists()) {
8142 8143        // All the work is done in
8143 8144        do_post_free_or_garbage_chunk(fc, size);
8144 8145      } else {  // Not adaptive free lists
8145 8146        // this is a free chunk that can potentially be coalesced by the sweeper;
8146 8147        if (!inFreeRange()) {
8147 8148          // if the next chunk is a free block that can't be coalesced
8148 8149          // it doesn't make sense to remove this chunk from the free lists
8149 8150          FreeChunk* nextChunk = (FreeChunk*)(addr + size);
8150 8151          assert((HeapWord*)nextChunk <= _sp->end(), "Chunk size out of bounds?");
8151 8152          if ((HeapWord*)nextChunk < _sp->end() &&     // There is another free chunk to the right ...
8152 8153              nextChunk->isFree()               &&     // ... which is free...
8153 8154              nextChunk->cantCoalesce()) {             // ... but can't be coalesced
8154 8155            // nothing to do
8155 8156          } else {
8156 8157            // Potentially the start of a new free range:
8157 8158            // Don't eagerly remove it from the free lists.
8158 8159            // No need to remove it if it will just be put
8159 8160            // back again.  (Also from a pragmatic point of view
8160 8161            // if it is a free block in a region that is beyond
8161 8162            // any allocated blocks, an assertion will fail)
8162 8163            // Remember the start of a free run.
8163 8164            initialize_free_range(addr, true);
8164 8165            // end - can coalesce with next chunk
8165 8166          }
8166 8167        } else {
8167 8168          // the midst of a free range, we are coalescing
8168 8169          print_free_block_coalesced(fc);
8169 8170          if (CMSTraceSweeper) {
8170 8171            gclog_or_tty->print("  -- pick up free block 0x%x (%d)\n", fc, size);
8171 8172          }
8172 8173          // remove it from the free lists
8173 8174          _sp->removeFreeChunkFromFreeLists(fc);
8174 8175          set_lastFreeRangeCoalesced(true);
8175 8176          // If the chunk is being coalesced and the current free range is
8176 8177          // in the free lists, remove the current free range so that it
8177 8178          // will be returned to the free lists in its entirety - all
8178 8179          // the coalesced pieces included.
8179 8180          if (freeRangeInFreeLists()) {
8180 8181            FreeChunk* ffc = (FreeChunk*) freeFinger();
8181 8182            assert(ffc->size() == pointer_delta(addr, freeFinger()),
8182 8183              "Size of free range is inconsistent with chunk size.");
8183 8184            if (CMSTestInFreeList) {
8184 8185              assert(_sp->verifyChunkInFreeLists(ffc),
8185 8186                "free range is not in free lists");
8186 8187            }
8187 8188            _sp->removeFreeChunkFromFreeLists(ffc);
8188 8189            set_freeRangeInFreeLists(false);
8189 8190          }
8190 8191        }
8191 8192      }
8192 8193      // Note that if the chunk is not coalescable (the else arm
8193 8194      // below), we unconditionally flush, without needing to do
8194 8195      // a "lookahead," as we do below.
8195 8196      if (inFreeRange()) lookahead_and_flush(fc, size);
8196 8197    } else {
8197 8198      // Code path common to both original and adaptive free lists.
8198 8199  
8199 8200      // cant coalesce with previous block; this should be treated
8200 8201      // as the end of a free run if any
8201 8202      if (inFreeRange()) {
8202 8203        // we kicked some butt; time to pick up the garbage
8203 8204        assert(freeFinger() < addr, "freeFinger points too high");
8204 8205        flush_cur_free_chunk(freeFinger(), pointer_delta(addr, freeFinger()));
8205 8206      }
8206 8207      // else, nothing to do, just continue
8207 8208    }
8208 8209  }
8209 8210  
8210 8211  size_t SweepClosure::do_garbage_chunk(FreeChunk* fc) {
8211 8212    // This is a chunk of garbage.  It is not in any free list.
8212 8213    // Add it to a free list or let it possibly be coalesced into
8213 8214    // a larger chunk.
8214 8215    HeapWord* const addr = (HeapWord*) fc;
8215 8216    const size_t size = CompactibleFreeListSpace::adjustObjectSize(oop(addr)->size());
8216 8217  
8217 8218    if (_sp->adaptive_freelists()) {
8218 8219      // Verify that the bit map has no bits marked between
8219 8220      // addr and purported end of just dead object.
8220 8221      _bitMap->verifyNoOneBitsInRange(addr + 1, addr + size);
8221 8222  
8222 8223      do_post_free_or_garbage_chunk(fc, size);
8223 8224    } else {
8224 8225      if (!inFreeRange()) {
8225 8226        // start of a new free range
8226 8227        assert(size > 0, "A free range should have a size");
8227 8228        initialize_free_range(addr, false);
8228 8229      } else {
8229 8230        // this will be swept up when we hit the end of the
8230 8231        // free range
8231 8232        if (CMSTraceSweeper) {
8232 8233          gclog_or_tty->print("  -- pick up garbage 0x%x (%d) \n", fc, size);
8233 8234        }
8234 8235        // If the chunk is being coalesced and the current free range is
8235 8236        // in the free lists, remove the current free range so that it
8236 8237        // will be returned to the free lists in its entirety - all
8237 8238        // the coalesced pieces included.
8238 8239        if (freeRangeInFreeLists()) {
8239 8240          FreeChunk* ffc = (FreeChunk*)freeFinger();
8240 8241          assert(ffc->size() == pointer_delta(addr, freeFinger()),
8241 8242            "Size of free range is inconsistent with chunk size.");
8242 8243          if (CMSTestInFreeList) {
8243 8244            assert(_sp->verifyChunkInFreeLists(ffc),
8244 8245              "free range is not in free lists");
8245 8246          }
8246 8247          _sp->removeFreeChunkFromFreeLists(ffc);
8247 8248          set_freeRangeInFreeLists(false);
8248 8249        }
8249 8250        set_lastFreeRangeCoalesced(true);
8250 8251      }
8251 8252      // this will be swept up when we hit the end of the free range
8252 8253  
8253 8254      // Verify that the bit map has no bits marked between
8254 8255      // addr and purported end of just dead object.
8255 8256      _bitMap->verifyNoOneBitsInRange(addr + 1, addr + size);
8256 8257    }
8257 8258    assert(_limit >= addr + size,
8258 8259           "A freshly garbage chunk can't possibly straddle over _limit");
8259 8260    if (inFreeRange()) lookahead_and_flush(fc, size);
8260 8261    return size;
8261 8262  }
8262 8263  
8263 8264  size_t SweepClosure::do_live_chunk(FreeChunk* fc) {
8264 8265    HeapWord* addr = (HeapWord*) fc;
8265 8266    // The sweeper has just found a live object. Return any accumulated
8266 8267    // left hand chunk to the free lists.
8267 8268    if (inFreeRange()) {
8268 8269      assert(freeFinger() < addr, "freeFinger points too high");
8269 8270      flush_cur_free_chunk(freeFinger(), pointer_delta(addr, freeFinger()));
8270 8271    }
8271 8272  
8272 8273    // This object is live: we'd normally expect this to be
8273 8274    // an oop, and like to assert the following:
8274 8275    // assert(oop(addr)->is_oop(), "live block should be an oop");
8275 8276    // However, as we commented above, this may be an object whose
8276 8277    // header hasn't yet been initialized.
8277 8278    size_t size;
8278 8279    assert(_bitMap->isMarked(addr), "Tautology for this control point");
8279 8280    if (_bitMap->isMarked(addr + 1)) {
8280 8281      // Determine the size from the bit map, rather than trying to
8281 8282      // compute it from the object header.
8282 8283      HeapWord* nextOneAddr = _bitMap->getNextMarkedWordAddress(addr + 2);
8283 8284      size = pointer_delta(nextOneAddr + 1, addr);
8284 8285      assert(size == CompactibleFreeListSpace::adjustObjectSize(size),
8285 8286             "alignment problem");
8286 8287  
8287 8288  #ifdef DEBUG
8288 8289        if (oop(addr)->klass_or_null() != NULL &&
8289 8290            (   !_collector->should_unload_classes()
8290 8291             || (oop(addr)->is_parsable()) &&
8291 8292                 oop(addr)->is_conc_safe())) {
8292 8293          // Ignore mark word because we are running concurrent with mutators
8293 8294          assert(oop(addr)->is_oop(true), "live block should be an oop");
8294 8295          // is_conc_safe is checked before performing this assertion
8295 8296          // because an object that is not is_conc_safe may yet have
8296 8297          // the return from size() correct.
8297 8298          assert(size ==
8298 8299                 CompactibleFreeListSpace::adjustObjectSize(oop(addr)->size()),
8299 8300                 "P-mark and computed size do not agree");
8300 8301        }
8301 8302  #endif
8302 8303  
8303 8304    } else {
8304 8305      // This should be an initialized object that's alive.
8305 8306      assert(oop(addr)->klass_or_null() != NULL &&
8306 8307             (!_collector->should_unload_classes()
8307 8308              || oop(addr)->is_parsable()),
8308 8309             "Should be an initialized object");
8309 8310      // Note that there are objects used during class redefinition,
8310 8311      // e.g. merge_cp in VM_RedefineClasses::merge_cp_and_rewrite(),
8311 8312      // which are discarded with their is_conc_safe state still
8312 8313      // false.  These object may be floating garbage so may be
8313 8314      // seen here.  If they are floating garbage their size
8314 8315      // should be attainable from their klass.  Do not that
8315 8316      // is_conc_safe() is true for oop(addr).
8316 8317      // Ignore mark word because we are running concurrent with mutators
8317 8318      assert(oop(addr)->is_oop(true), "live block should be an oop");
8318 8319      // Verify that the bit map has no bits marked between
8319 8320      // addr and purported end of this block.
8320 8321      size = CompactibleFreeListSpace::adjustObjectSize(oop(addr)->size());
8321 8322      assert(size >= 3, "Necessary for Printezis marks to work");
8322 8323      assert(!_bitMap->isMarked(addr+1), "Tautology for this control point");
8323 8324      DEBUG_ONLY(_bitMap->verifyNoOneBitsInRange(addr+2, addr+size);)
8324 8325    }
8325 8326    return size;
8326 8327  }
8327 8328  
8328 8329  void SweepClosure::do_post_free_or_garbage_chunk(FreeChunk* fc,
8329 8330                                                   size_t chunkSize) {
8330 8331    // do_post_free_or_garbage_chunk() should only be called in the case
8331 8332    // of the adaptive free list allocator.
8332 8333    const bool fcInFreeLists = fc->isFree();
8333 8334    assert(_sp->adaptive_freelists(), "Should only be used in this case.");
8334 8335    assert((HeapWord*)fc <= _limit, "sweep invariant");
8335 8336    if (CMSTestInFreeList && fcInFreeLists) {
8336 8337      assert(_sp->verifyChunkInFreeLists(fc), "free chunk is not in free lists");
8337 8338    }
8338 8339  
8339 8340    if (CMSTraceSweeper) {
8340 8341      gclog_or_tty->print_cr("  -- pick up another chunk at 0x%x (%d)", fc, chunkSize);
8341 8342    }
8342 8343  
8343 8344    HeapWord* const fc_addr = (HeapWord*) fc;
8344 8345  
8345 8346    bool coalesce;
8346 8347    const size_t left  = pointer_delta(fc_addr, freeFinger());
8347 8348    const size_t right = chunkSize;
8348 8349    switch (FLSCoalescePolicy) {
8349 8350      // numeric value forms a coalition aggressiveness metric
8350 8351      case 0:  { // never coalesce
8351 8352        coalesce = false;
8352 8353        break;
8353 8354      }
8354 8355      case 1: { // coalesce if left & right chunks on overpopulated lists
8355 8356        coalesce = _sp->coalOverPopulated(left) &&
8356 8357                   _sp->coalOverPopulated(right);
8357 8358        break;
8358 8359      }
8359 8360      case 2: { // coalesce if left chunk on overpopulated list (default)
8360 8361        coalesce = _sp->coalOverPopulated(left);
8361 8362        break;
8362 8363      }
8363 8364      case 3: { // coalesce if left OR right chunk on overpopulated list
8364 8365        coalesce = _sp->coalOverPopulated(left) ||
8365 8366                   _sp->coalOverPopulated(right);
8366 8367        break;
8367 8368      }
8368 8369      case 4: { // always coalesce
8369 8370        coalesce = true;
8370 8371        break;
8371 8372      }
8372 8373      default:
8373 8374       ShouldNotReachHere();
8374 8375    }
8375 8376  
8376 8377    // Should the current free range be coalesced?
8377 8378    // If the chunk is in a free range and either we decided to coalesce above
8378 8379    // or the chunk is near the large block at the end of the heap
8379 8380    // (isNearLargestChunk() returns true), then coalesce this chunk.
8380 8381    const bool doCoalesce = inFreeRange()
8381 8382                            && (coalesce || _g->isNearLargestChunk(fc_addr));
8382 8383    if (doCoalesce) {
8383 8384      // Coalesce the current free range on the left with the new
8384 8385      // chunk on the right.  If either is on a free list,
8385 8386      // it must be removed from the list and stashed in the closure.
8386 8387      if (freeRangeInFreeLists()) {
8387 8388        FreeChunk* const ffc = (FreeChunk*)freeFinger();
8388 8389        assert(ffc->size() == pointer_delta(fc_addr, freeFinger()),
8389 8390          "Size of free range is inconsistent with chunk size.");
8390 8391        if (CMSTestInFreeList) {
8391 8392          assert(_sp->verifyChunkInFreeLists(ffc),
8392 8393            "Chunk is not in free lists");
8393 8394        }
8394 8395        _sp->coalDeath(ffc->size());
8395 8396        _sp->removeFreeChunkFromFreeLists(ffc);
8396 8397        set_freeRangeInFreeLists(false);
8397 8398      }
8398 8399      if (fcInFreeLists) {
8399 8400        _sp->coalDeath(chunkSize);
8400 8401        assert(fc->size() == chunkSize,
8401 8402          "The chunk has the wrong size or is not in the free lists");
8402 8403        _sp->removeFreeChunkFromFreeLists(fc);
8403 8404      }
8404 8405      set_lastFreeRangeCoalesced(true);
8405 8406      print_free_block_coalesced(fc);
8406 8407    } else {  // not in a free range and/or should not coalesce
8407 8408      // Return the current free range and start a new one.
8408 8409      if (inFreeRange()) {
8409 8410        // In a free range but cannot coalesce with the right hand chunk.
8410 8411        // Put the current free range into the free lists.
8411 8412        flush_cur_free_chunk(freeFinger(),
8412 8413                             pointer_delta(fc_addr, freeFinger()));
8413 8414      }
8414 8415      // Set up for new free range.  Pass along whether the right hand
8415 8416      // chunk is in the free lists.
8416 8417      initialize_free_range((HeapWord*)fc, fcInFreeLists);
8417 8418    }
8418 8419  }
8419 8420  
8420 8421  // Lookahead flush:
8421 8422  // If we are tracking a free range, and this is the last chunk that
8422 8423  // we'll look at because its end crosses past _limit, we'll preemptively
8423 8424  // flush it along with any free range we may be holding on to. Note that
8424 8425  // this can be the case only for an already free or freshly garbage
8425 8426  // chunk. If this block is an object, it can never straddle
8426 8427  // over _limit. The "straddling" occurs when _limit is set at
8427 8428  // the previous end of the space when this cycle started, and
8428 8429  // a subsequent heap expansion caused the previously co-terminal
8429 8430  // free block to be coalesced with the newly expanded portion,
8430 8431  // thus rendering _limit a non-block-boundary making it dangerous
8431 8432  // for the sweeper to step over and examine.
8432 8433  void SweepClosure::lookahead_and_flush(FreeChunk* fc, size_t chunk_size) {
8433 8434    assert(inFreeRange(), "Should only be called if currently in a free range.");
8434 8435    HeapWord* const eob = ((HeapWord*)fc) + chunk_size;
8435 8436    assert(_sp->used_region().contains(eob - 1),
8436 8437           err_msg("eob = " PTR_FORMAT " out of bounds wrt _sp = [" PTR_FORMAT "," PTR_FORMAT ")"
8437 8438                   " when examining fc = " PTR_FORMAT "(" SIZE_FORMAT ")",
8438 8439                   _limit, _sp->bottom(), _sp->end(), fc, chunk_size));
8439 8440    if (eob >= _limit) {
8440 8441      assert(eob == _limit || fc->isFree(), "Only a free chunk should allow us to cross over the limit");
8441 8442      if (CMSTraceSweeper) {
8442 8443        gclog_or_tty->print_cr("_limit " PTR_FORMAT " reached or crossed by block "
8443 8444                               "[" PTR_FORMAT "," PTR_FORMAT ") in space "
8444 8445                               "[" PTR_FORMAT "," PTR_FORMAT ")",
8445 8446                               _limit, fc, eob, _sp->bottom(), _sp->end());
8446 8447      }
8447 8448      // Return the storage we are tracking back into the free lists.
8448 8449      if (CMSTraceSweeper) {
8449 8450        gclog_or_tty->print_cr("Flushing ... ");
8450 8451      }
8451 8452      assert(freeFinger() < eob, "Error");
8452 8453      flush_cur_free_chunk( freeFinger(), pointer_delta(eob, freeFinger()));
8453 8454    }
8454 8455  }
8455 8456  
8456 8457  void SweepClosure::flush_cur_free_chunk(HeapWord* chunk, size_t size) {
8457 8458    assert(inFreeRange(), "Should only be called if currently in a free range.");
8458 8459    assert(size > 0,
8459 8460      "A zero sized chunk cannot be added to the free lists.");
8460 8461    if (!freeRangeInFreeLists()) {
8461 8462      if (CMSTestInFreeList) {
8462 8463        FreeChunk* fc = (FreeChunk*) chunk;
8463 8464        fc->setSize(size);
8464 8465        assert(!_sp->verifyChunkInFreeLists(fc),
8465 8466          "chunk should not be in free lists yet");
8466 8467      }
8467 8468      if (CMSTraceSweeper) {
8468 8469        gclog_or_tty->print_cr(" -- add free block 0x%x (%d) to free lists",
8469 8470                      chunk, size);
8470 8471      }
8471 8472      // A new free range is going to be starting.  The current
8472 8473      // free range has not been added to the free lists yet or
8473 8474      // was removed so add it back.
8474 8475      // If the current free range was coalesced, then the death
8475 8476      // of the free range was recorded.  Record a birth now.
8476 8477      if (lastFreeRangeCoalesced()) {
8477 8478        _sp->coalBirth(size);
8478 8479      }
8479 8480      _sp->addChunkAndRepairOffsetTable(chunk, size,
8480 8481              lastFreeRangeCoalesced());
8481 8482    } else if (CMSTraceSweeper) {
8482 8483      gclog_or_tty->print_cr("Already in free list: nothing to flush");
8483 8484    }
8484 8485    set_inFreeRange(false);
8485 8486    set_freeRangeInFreeLists(false);
8486 8487  }
8487 8488  
8488 8489  // We take a break if we've been at this for a while,
8489 8490  // so as to avoid monopolizing the locks involved.
8490 8491  void SweepClosure::do_yield_work(HeapWord* addr) {
8491 8492    // Return current free chunk being used for coalescing (if any)
8492 8493    // to the appropriate freelist.  After yielding, the next
8493 8494    // free block encountered will start a coalescing range of
8494 8495    // free blocks.  If the next free block is adjacent to the
8495 8496    // chunk just flushed, they will need to wait for the next
8496 8497    // sweep to be coalesced.
8497 8498    if (inFreeRange()) {
8498 8499      flush_cur_free_chunk(freeFinger(), pointer_delta(addr, freeFinger()));
8499 8500    }
8500 8501  
8501 8502    // First give up the locks, then yield, then re-lock.
8502 8503    // We should probably use a constructor/destructor idiom to
8503 8504    // do this unlock/lock or modify the MutexUnlocker class to
8504 8505    // serve our purpose. XXX
8505 8506    assert_lock_strong(_bitMap->lock());
8506 8507    assert_lock_strong(_freelistLock);
8507 8508    assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),
8508 8509           "CMS thread should hold CMS token");
8509 8510    _bitMap->lock()->unlock();
8510 8511    _freelistLock->unlock();
8511 8512    ConcurrentMarkSweepThread::desynchronize(true);
8512 8513    ConcurrentMarkSweepThread::acknowledge_yield_request();
8513 8514    _collector->stopTimer();
8514 8515    GCPauseTimer p(_collector->size_policy()->concurrent_timer_ptr());
8515 8516    if (PrintCMSStatistics != 0) {
8516 8517      _collector->incrementYields();
8517 8518    }
8518 8519    _collector->icms_wait();
8519 8520  
8520 8521    // See the comment in coordinator_yield()
8521 8522    for (unsigned i = 0; i < CMSYieldSleepCount &&
8522 8523                         ConcurrentMarkSweepThread::should_yield() &&
8523 8524                         !CMSCollector::foregroundGCIsActive(); ++i) {
8524 8525      os::sleep(Thread::current(), 1, false);
8525 8526      ConcurrentMarkSweepThread::acknowledge_yield_request();
8526 8527    }
8527 8528  
8528 8529    ConcurrentMarkSweepThread::synchronize(true);
8529 8530    _freelistLock->lock();
8530 8531    _bitMap->lock()->lock_without_safepoint_check();
8531 8532    _collector->startTimer();
8532 8533  }
8533 8534  
8534 8535  #ifndef PRODUCT
8535 8536  // This is actually very useful in a product build if it can
8536 8537  // be called from the debugger.  Compile it into the product
8537 8538  // as needed.
8538 8539  bool debug_verifyChunkInFreeLists(FreeChunk* fc) {
8539 8540    return debug_cms_space->verifyChunkInFreeLists(fc);
8540 8541  }
8541 8542  #endif
8542 8543  
8543 8544  void SweepClosure::print_free_block_coalesced(FreeChunk* fc) const {
8544 8545    if (CMSTraceSweeper) {
8545 8546      gclog_or_tty->print_cr("Sweep:coal_free_blk " PTR_FORMAT " (" SIZE_FORMAT ")",
8546 8547                             fc, fc->size());
8547 8548    }
8548 8549  }
8549 8550  
8550 8551  // CMSIsAliveClosure
8551 8552  bool CMSIsAliveClosure::do_object_b(oop obj) {
8552 8553    HeapWord* addr = (HeapWord*)obj;
8553 8554    return addr != NULL &&
8554 8555           (!_span.contains(addr) || _bit_map->isMarked(addr));
8555 8556  }
8556 8557  
8557 8558  CMSKeepAliveClosure::CMSKeepAliveClosure( CMSCollector* collector,
8558 8559                        MemRegion span,
8559 8560                        CMSBitMap* bit_map, CMSMarkStack* mark_stack,
8560 8561                        CMSMarkStack* revisit_stack, bool cpc):
8561 8562    KlassRememberingOopClosure(collector, NULL, revisit_stack),
8562 8563    _span(span),
8563 8564    _bit_map(bit_map),
8564 8565    _mark_stack(mark_stack),
8565 8566    _concurrent_precleaning(cpc) {
8566 8567    assert(!_span.is_empty(), "Empty span could spell trouble");
8567 8568  }
8568 8569  
8569 8570  
8570 8571  // CMSKeepAliveClosure: the serial version
8571 8572  void CMSKeepAliveClosure::do_oop(oop obj) {
8572 8573    HeapWord* addr = (HeapWord*)obj;
8573 8574    if (_span.contains(addr) &&
8574 8575        !_bit_map->isMarked(addr)) {
8575 8576      _bit_map->mark(addr);
8576 8577      bool simulate_overflow = false;
8577 8578      NOT_PRODUCT(
8578 8579        if (CMSMarkStackOverflowALot &&
8579 8580            _collector->simulate_overflow()) {
8580 8581          // simulate a stack overflow
8581 8582          simulate_overflow = true;
8582 8583        }
8583 8584      )
8584 8585      if (simulate_overflow || !_mark_stack->push(obj)) {
8585 8586        if (_concurrent_precleaning) {
8586 8587          // We dirty the overflown object and let the remark
8587 8588          // phase deal with it.
8588 8589          assert(_collector->overflow_list_is_empty(), "Error");
8589 8590          // In the case of object arrays, we need to dirty all of
8590 8591          // the cards that the object spans. No locking or atomics
8591 8592          // are needed since no one else can be mutating the mod union
8592 8593          // table.
8593 8594          if (obj->is_objArray()) {
8594 8595            size_t sz = obj->size();
8595 8596            HeapWord* end_card_addr =
8596 8597              (HeapWord*)round_to((intptr_t)(addr+sz), CardTableModRefBS::card_size);
8597 8598            MemRegion redirty_range = MemRegion(addr, end_card_addr);
8598 8599            assert(!redirty_range.is_empty(), "Arithmetical tautology");
8599 8600            _collector->_modUnionTable.mark_range(redirty_range);
8600 8601          } else {
8601 8602            _collector->_modUnionTable.mark(addr);
8602 8603          }
8603 8604          _collector->_ser_kac_preclean_ovflw++;
8604 8605        } else {
8605 8606          _collector->push_on_overflow_list(obj);
8606 8607          _collector->_ser_kac_ovflw++;
8607 8608        }
8608 8609      }
8609 8610    }
8610 8611  }
8611 8612  
8612 8613  void CMSKeepAliveClosure::do_oop(oop* p)       { CMSKeepAliveClosure::do_oop_work(p); }
8613 8614  void CMSKeepAliveClosure::do_oop(narrowOop* p) { CMSKeepAliveClosure::do_oop_work(p); }
8614 8615  
8615 8616  // CMSParKeepAliveClosure: a parallel version of the above.
8616 8617  // The work queues are private to each closure (thread),
8617 8618  // but (may be) available for stealing by other threads.
8618 8619  void CMSParKeepAliveClosure::do_oop(oop obj) {
8619 8620    HeapWord* addr = (HeapWord*)obj;
8620 8621    if (_span.contains(addr) &&
8621 8622        !_bit_map->isMarked(addr)) {
8622 8623      // In general, during recursive tracing, several threads
8623 8624      // may be concurrently getting here; the first one to
8624 8625      // "tag" it, claims it.
8625 8626      if (_bit_map->par_mark(addr)) {
8626 8627        bool res = _work_queue->push(obj);
8627 8628        assert(res, "Low water mark should be much less than capacity");
8628 8629        // Do a recursive trim in the hope that this will keep
8629 8630        // stack usage lower, but leave some oops for potential stealers
8630 8631        trim_queue(_low_water_mark);
8631 8632      } // Else, another thread got there first
8632 8633    }
8633 8634  }
8634 8635  
8635 8636  void CMSParKeepAliveClosure::do_oop(oop* p)       { CMSParKeepAliveClosure::do_oop_work(p); }
8636 8637  void CMSParKeepAliveClosure::do_oop(narrowOop* p) { CMSParKeepAliveClosure::do_oop_work(p); }
8637 8638  
8638 8639  void CMSParKeepAliveClosure::trim_queue(uint max) {
8639 8640    while (_work_queue->size() > max) {
8640 8641      oop new_oop;
8641 8642      if (_work_queue->pop_local(new_oop)) {
8642 8643        assert(new_oop != NULL && new_oop->is_oop(), "Expected an oop");
8643 8644        assert(_bit_map->isMarked((HeapWord*)new_oop),
8644 8645               "no white objects on this stack!");
8645 8646        assert(_span.contains((HeapWord*)new_oop), "Out of bounds oop");
8646 8647        // iterate over the oops in this oop, marking and pushing
8647 8648        // the ones in CMS heap (i.e. in _span).
8648 8649        new_oop->oop_iterate(&_mark_and_push);
8649 8650      }
8650 8651    }
8651 8652  }
8652 8653  
8653 8654  CMSInnerParMarkAndPushClosure::CMSInnerParMarkAndPushClosure(
8654 8655                                  CMSCollector* collector,
8655 8656                                  MemRegion span, CMSBitMap* bit_map,
8656 8657                                  CMSMarkStack* revisit_stack,
8657 8658                                  OopTaskQueue* work_queue):
8658 8659    Par_KlassRememberingOopClosure(collector, NULL, revisit_stack),
8659 8660    _span(span),
8660 8661    _bit_map(bit_map),
8661 8662    _work_queue(work_queue) { }
8662 8663  
8663 8664  void CMSInnerParMarkAndPushClosure::do_oop(oop obj) {
8664 8665    HeapWord* addr = (HeapWord*)obj;
8665 8666    if (_span.contains(addr) &&
8666 8667        !_bit_map->isMarked(addr)) {
8667 8668      if (_bit_map->par_mark(addr)) {
8668 8669        bool simulate_overflow = false;
8669 8670        NOT_PRODUCT(
8670 8671          if (CMSMarkStackOverflowALot &&
8671 8672              _collector->par_simulate_overflow()) {
8672 8673            // simulate a stack overflow
8673 8674            simulate_overflow = true;
8674 8675          }
8675 8676        )
8676 8677        if (simulate_overflow || !_work_queue->push(obj)) {
8677 8678          _collector->par_push_on_overflow_list(obj);
8678 8679          _collector->_par_kac_ovflw++;
8679 8680        }
8680 8681      } // Else another thread got there already
8681 8682    }
8682 8683  }
8683 8684  
8684 8685  void CMSInnerParMarkAndPushClosure::do_oop(oop* p)       { CMSInnerParMarkAndPushClosure::do_oop_work(p); }
8685 8686  void CMSInnerParMarkAndPushClosure::do_oop(narrowOop* p) { CMSInnerParMarkAndPushClosure::do_oop_work(p); }
8686 8687  
8687 8688  //////////////////////////////////////////////////////////////////
8688 8689  //  CMSExpansionCause                /////////////////////////////
8689 8690  //////////////////////////////////////////////////////////////////
8690 8691  const char* CMSExpansionCause::to_string(CMSExpansionCause::Cause cause) {
8691 8692    switch (cause) {
8692 8693      case _no_expansion:
8693 8694        return "No expansion";
8694 8695      case _satisfy_free_ratio:
8695 8696        return "Free ratio";
8696 8697      case _satisfy_promotion:
8697 8698        return "Satisfy promotion";
8698 8699      case _satisfy_allocation:
8699 8700        return "allocation";
8700 8701      case _allocate_par_lab:
8701 8702        return "Par LAB";
8702 8703      case _allocate_par_spooling_space:
8703 8704        return "Par Spooling Space";
8704 8705      case _adaptive_size_policy:
8705 8706        return "Ergonomics";
8706 8707      default:
8707 8708        return "unknown";
8708 8709    }
8709 8710  }
8710 8711  
8711 8712  void CMSDrainMarkingStackClosure::do_void() {
8712 8713    // the max number to take from overflow list at a time
8713 8714    const size_t num = _mark_stack->capacity()/4;
8714 8715    assert(!_concurrent_precleaning || _collector->overflow_list_is_empty(),
8715 8716           "Overflow list should be NULL during concurrent phases");
8716 8717    while (!_mark_stack->isEmpty() ||
8717 8718           // if stack is empty, check the overflow list
8718 8719           _collector->take_from_overflow_list(num, _mark_stack)) {
8719 8720      oop obj = _mark_stack->pop();
8720 8721      HeapWord* addr = (HeapWord*)obj;
8721 8722      assert(_span.contains(addr), "Should be within span");
8722 8723      assert(_bit_map->isMarked(addr), "Should be marked");
8723 8724      assert(obj->is_oop(), "Should be an oop");
8724 8725      obj->oop_iterate(_keep_alive);
8725 8726    }
8726 8727  }
8727 8728  
8728 8729  void CMSParDrainMarkingStackClosure::do_void() {
8729 8730    // drain queue
8730 8731    trim_queue(0);
8731 8732  }
8732 8733  
8733 8734  // Trim our work_queue so its length is below max at return
8734 8735  void CMSParDrainMarkingStackClosure::trim_queue(uint max) {
8735 8736    while (_work_queue->size() > max) {
8736 8737      oop new_oop;
8737 8738      if (_work_queue->pop_local(new_oop)) {
8738 8739        assert(new_oop->is_oop(), "Expected an oop");
8739 8740        assert(_bit_map->isMarked((HeapWord*)new_oop),
8740 8741               "no white objects on this stack!");
8741 8742        assert(_span.contains((HeapWord*)new_oop), "Out of bounds oop");
8742 8743        // iterate over the oops in this oop, marking and pushing
8743 8744        // the ones in CMS heap (i.e. in _span).
8744 8745        new_oop->oop_iterate(&_mark_and_push);
8745 8746      }
8746 8747    }
8747 8748  }
8748 8749  
8749 8750  ////////////////////////////////////////////////////////////////////
8750 8751  // Support for Marking Stack Overflow list handling and related code
8751 8752  ////////////////////////////////////////////////////////////////////
8752 8753  // Much of the following code is similar in shape and spirit to the
8753 8754  // code used in ParNewGC. We should try and share that code
8754 8755  // as much as possible in the future.
8755 8756  
8756 8757  #ifndef PRODUCT
8757 8758  // Debugging support for CMSStackOverflowALot
8758 8759  
8759 8760  // It's OK to call this multi-threaded;  the worst thing
8760 8761  // that can happen is that we'll get a bunch of closely
8761 8762  // spaced simulated oveflows, but that's OK, in fact
8762 8763  // probably good as it would exercise the overflow code
8763 8764  // under contention.
8764 8765  bool CMSCollector::simulate_overflow() {
8765 8766    if (_overflow_counter-- <= 0) { // just being defensive
8766 8767      _overflow_counter = CMSMarkStackOverflowInterval;
8767 8768      return true;
8768 8769    } else {
8769 8770      return false;
8770 8771    }
8771 8772  }
8772 8773  
8773 8774  bool CMSCollector::par_simulate_overflow() {
8774 8775    return simulate_overflow();
8775 8776  }
8776 8777  #endif
8777 8778  
8778 8779  // Single-threaded
8779 8780  bool CMSCollector::take_from_overflow_list(size_t num, CMSMarkStack* stack) {
8780 8781    assert(stack->isEmpty(), "Expected precondition");
8781 8782    assert(stack->capacity() > num, "Shouldn't bite more than can chew");
8782 8783    size_t i = num;
8783 8784    oop  cur = _overflow_list;
8784 8785    const markOop proto = markOopDesc::prototype();
8785 8786    NOT_PRODUCT(ssize_t n = 0;)
8786 8787    for (oop next; i > 0 && cur != NULL; cur = next, i--) {
8787 8788      next = oop(cur->mark());
8788 8789      cur->set_mark(proto);   // until proven otherwise
8789 8790      assert(cur->is_oop(), "Should be an oop");
8790 8791      bool res = stack->push(cur);
8791 8792      assert(res, "Bit off more than can chew?");
8792 8793      NOT_PRODUCT(n++;)
8793 8794    }
8794 8795    _overflow_list = cur;
8795 8796  #ifndef PRODUCT
8796 8797    assert(_num_par_pushes >= n, "Too many pops?");
8797 8798    _num_par_pushes -=n;
8798 8799  #endif
8799 8800    return !stack->isEmpty();
8800 8801  }
8801 8802  
8802 8803  #define BUSY  (oop(0x1aff1aff))
8803 8804  // (MT-safe) Get a prefix of at most "num" from the list.
8804 8805  // The overflow list is chained through the mark word of
8805 8806  // each object in the list. We fetch the entire list,
8806 8807  // break off a prefix of the right size and return the
8807 8808  // remainder. If other threads try to take objects from
8808 8809  // the overflow list at that time, they will wait for
8809 8810  // some time to see if data becomes available. If (and
8810 8811  // only if) another thread places one or more object(s)
8811 8812  // on the global list before we have returned the suffix
8812 8813  // to the global list, we will walk down our local list
8813 8814  // to find its end and append the global list to
8814 8815  // our suffix before returning it. This suffix walk can
8815 8816  // prove to be expensive (quadratic in the amount of traffic)
8816 8817  // when there are many objects in the overflow list and
8817 8818  // there is much producer-consumer contention on the list.
8818 8819  // *NOTE*: The overflow list manipulation code here and
8819 8820  // in ParNewGeneration:: are very similar in shape,
8820 8821  // except that in the ParNew case we use the old (from/eden)
8821 8822  // copy of the object to thread the list via its klass word.
8822 8823  // Because of the common code, if you make any changes in
8823 8824  // the code below, please check the ParNew version to see if
8824 8825  // similar changes might be needed.
8825 8826  // CR 6797058 has been filed to consolidate the common code.
8826 8827  bool CMSCollector::par_take_from_overflow_list(size_t num,
8827 8828                                                 OopTaskQueue* work_q,
8828 8829                                                 int no_of_gc_threads) {
8829 8830    assert(work_q->size() == 0, "First empty local work queue");
8830 8831    assert(num < work_q->max_elems(), "Can't bite more than we can chew");
8831 8832    if (_overflow_list == NULL) {
8832 8833      return false;
8833 8834    }
8834 8835    // Grab the entire list; we'll put back a suffix
8835 8836    oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
8836 8837    Thread* tid = Thread::current();
8837 8838    // Before "no_of_gc_threads" was introduced CMSOverflowSpinCount was
8838 8839    // set to ParallelGCThreads.
8839 8840    size_t CMSOverflowSpinCount = (size_t) no_of_gc_threads; // was ParallelGCThreads;
8840 8841    size_t sleep_time_millis = MAX2((size_t)1, num/100);
8841 8842    // If the list is busy, we spin for a short while,
8842 8843    // sleeping between attempts to get the list.
8843 8844    for (size_t spin = 0; prefix == BUSY && spin < CMSOverflowSpinCount; spin++) {
8844 8845      os::sleep(tid, sleep_time_millis, false);
8845 8846      if (_overflow_list == NULL) {
8846 8847        // Nothing left to take
8847 8848        return false;
8848 8849      } else if (_overflow_list != BUSY) {
8849 8850        // Try and grab the prefix
8850 8851        prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
8851 8852      }
8852 8853    }
8853 8854    // If the list was found to be empty, or we spun long
8854 8855    // enough, we give up and return empty-handed. If we leave
8855 8856    // the list in the BUSY state below, it must be the case that
8856 8857    // some other thread holds the overflow list and will set it
8857 8858    // to a non-BUSY state in the future.
8858 8859    if (prefix == NULL || prefix == BUSY) {
8859 8860       // Nothing to take or waited long enough
8860 8861       if (prefix == NULL) {
8861 8862         // Write back the NULL in case we overwrote it with BUSY above
8862 8863         // and it is still the same value.
8863 8864         (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
8864 8865       }
8865 8866       return false;
8866 8867    }
8867 8868    assert(prefix != NULL && prefix != BUSY, "Error");
8868 8869    size_t i = num;
8869 8870    oop cur = prefix;
8870 8871    // Walk down the first "num" objects, unless we reach the end.
8871 8872    for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--);
8872 8873    if (cur->mark() == NULL) {
8873 8874      // We have "num" or fewer elements in the list, so there
8874 8875      // is nothing to return to the global list.
8875 8876      // Write back the NULL in lieu of the BUSY we wrote
8876 8877      // above, if it is still the same value.
8877 8878      if (_overflow_list == BUSY) {
8878 8879        (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
8879 8880      }
8880 8881    } else {
8881 8882      // Chop off the suffix and rerturn it to the global list.
8882 8883      assert(cur->mark() != BUSY, "Error");
8883 8884      oop suffix_head = cur->mark(); // suffix will be put back on global list
8884 8885      cur->set_mark(NULL);           // break off suffix
8885 8886      // It's possible that the list is still in the empty(busy) state
8886 8887      // we left it in a short while ago; in that case we may be
8887 8888      // able to place back the suffix without incurring the cost
8888 8889      // of a walk down the list.
8889 8890      oop observed_overflow_list = _overflow_list;
8890 8891      oop cur_overflow_list = observed_overflow_list;
8891 8892      bool attached = false;
8892 8893      while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
8893 8894        observed_overflow_list =
8894 8895          (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
8895 8896        if (cur_overflow_list == observed_overflow_list) {
8896 8897          attached = true;
8897 8898          break;
8898 8899        } else cur_overflow_list = observed_overflow_list;
8899 8900      }
8900 8901      if (!attached) {
8901 8902        // Too bad, someone else sneaked in (at least) an element; we'll need
8902 8903        // to do a splice. Find tail of suffix so we can prepend suffix to global
8903 8904        // list.
8904 8905        for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark()));
8905 8906        oop suffix_tail = cur;
8906 8907        assert(suffix_tail != NULL && suffix_tail->mark() == NULL,
8907 8908               "Tautology");
8908 8909        observed_overflow_list = _overflow_list;
8909 8910        do {
8910 8911          cur_overflow_list = observed_overflow_list;
8911 8912          if (cur_overflow_list != BUSY) {
8912 8913            // Do the splice ...
8913 8914            suffix_tail->set_mark(markOop(cur_overflow_list));
8914 8915          } else { // cur_overflow_list == BUSY
8915 8916            suffix_tail->set_mark(NULL);
8916 8917          }
8917 8918          // ... and try to place spliced list back on overflow_list ...
8918 8919          observed_overflow_list =
8919 8920            (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
8920 8921        } while (cur_overflow_list != observed_overflow_list);
8921 8922        // ... until we have succeeded in doing so.
8922 8923      }
8923 8924    }
8924 8925  
8925 8926    // Push the prefix elements on work_q
8926 8927    assert(prefix != NULL, "control point invariant");
8927 8928    const markOop proto = markOopDesc::prototype();
8928 8929    oop next;
8929 8930    NOT_PRODUCT(ssize_t n = 0;)
8930 8931    for (cur = prefix; cur != NULL; cur = next) {
8931 8932      next = oop(cur->mark());
8932 8933      cur->set_mark(proto);   // until proven otherwise
8933 8934      assert(cur->is_oop(), "Should be an oop");
8934 8935      bool res = work_q->push(cur);
8935 8936      assert(res, "Bit off more than we can chew?");
8936 8937      NOT_PRODUCT(n++;)
8937 8938    }
8938 8939  #ifndef PRODUCT
8939 8940    assert(_num_par_pushes >= n, "Too many pops?");
8940 8941    Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes);
8941 8942  #endif
8942 8943    return true;
8943 8944  }
8944 8945  
8945 8946  // Single-threaded
8946 8947  void CMSCollector::push_on_overflow_list(oop p) {
8947 8948    NOT_PRODUCT(_num_par_pushes++;)
8948 8949    assert(p->is_oop(), "Not an oop");
8949 8950    preserve_mark_if_necessary(p);
8950 8951    p->set_mark((markOop)_overflow_list);
8951 8952    _overflow_list = p;
8952 8953  }
8953 8954  
8954 8955  // Multi-threaded; use CAS to prepend to overflow list
8955 8956  void CMSCollector::par_push_on_overflow_list(oop p) {
8956 8957    NOT_PRODUCT(Atomic::inc_ptr(&_num_par_pushes);)
8957 8958    assert(p->is_oop(), "Not an oop");
8958 8959    par_preserve_mark_if_necessary(p);
8959 8960    oop observed_overflow_list = _overflow_list;
8960 8961    oop cur_overflow_list;
8961 8962    do {
8962 8963      cur_overflow_list = observed_overflow_list;
8963 8964      if (cur_overflow_list != BUSY) {
8964 8965        p->set_mark(markOop(cur_overflow_list));
8965 8966      } else {
8966 8967        p->set_mark(NULL);
8967 8968      }
8968 8969      observed_overflow_list =
8969 8970        (oop) Atomic::cmpxchg_ptr(p, &_overflow_list, cur_overflow_list);
8970 8971    } while (cur_overflow_list != observed_overflow_list);
8971 8972  }
8972 8973  #undef BUSY
8973 8974  
8974 8975  // Single threaded
8975 8976  // General Note on GrowableArray: pushes may silently fail
8976 8977  // because we are (temporarily) out of C-heap for expanding
8977 8978  // the stack. The problem is quite ubiquitous and affects
8978 8979  // a lot of code in the JVM. The prudent thing for GrowableArray
8979 8980  // to do (for now) is to exit with an error. However, that may
8980 8981  // be too draconian in some cases because the caller may be
8981 8982  // able to recover without much harm. For such cases, we
8982 8983  // should probably introduce a "soft_push" method which returns
8983 8984  // an indication of success or failure with the assumption that
8984 8985  // the caller may be able to recover from a failure; code in
8985 8986  // the VM can then be changed, incrementally, to deal with such
8986 8987  // failures where possible, thus, incrementally hardening the VM
8987 8988  // in such low resource situations.
8988 8989  void CMSCollector::preserve_mark_work(oop p, markOop m) {
8989 8990    _preserved_oop_stack.push(p);
8990 8991    _preserved_mark_stack.push(m);
8991 8992    assert(m == p->mark(), "Mark word changed");
8992 8993    assert(_preserved_oop_stack.size() == _preserved_mark_stack.size(),
8993 8994           "bijection");
8994 8995  }
8995 8996  
8996 8997  // Single threaded
8997 8998  void CMSCollector::preserve_mark_if_necessary(oop p) {
8998 8999    markOop m = p->mark();
8999 9000    if (m->must_be_preserved(p)) {
9000 9001      preserve_mark_work(p, m);
9001 9002    }
9002 9003  }
9003 9004  
9004 9005  void CMSCollector::par_preserve_mark_if_necessary(oop p) {
9005 9006    markOop m = p->mark();
9006 9007    if (m->must_be_preserved(p)) {
9007 9008      MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
9008 9009      // Even though we read the mark word without holding
9009 9010      // the lock, we are assured that it will not change
9010 9011      // because we "own" this oop, so no other thread can
9011 9012      // be trying to push it on the overflow list; see
9012 9013      // the assertion in preserve_mark_work() that checks
9013 9014      // that m == p->mark().
9014 9015      preserve_mark_work(p, m);
9015 9016    }
9016 9017  }
9017 9018  
9018 9019  // We should be able to do this multi-threaded,
9019 9020  // a chunk of stack being a task (this is
9020 9021  // correct because each oop only ever appears
9021 9022  // once in the overflow list. However, it's
9022 9023  // not very easy to completely overlap this with
9023 9024  // other operations, so will generally not be done
9024 9025  // until all work's been completed. Because we
9025 9026  // expect the preserved oop stack (set) to be small,
9026 9027  // it's probably fine to do this single-threaded.
9027 9028  // We can explore cleverer concurrent/overlapped/parallel
9028 9029  // processing of preserved marks if we feel the
9029 9030  // need for this in the future. Stack overflow should
9030 9031  // be so rare in practice and, when it happens, its
9031 9032  // effect on performance so great that this will
9032 9033  // likely just be in the noise anyway.
9033 9034  void CMSCollector::restore_preserved_marks_if_any() {
9034 9035    assert(SafepointSynchronize::is_at_safepoint(),
9035 9036           "world should be stopped");
9036 9037    assert(Thread::current()->is_ConcurrentGC_thread() ||
9037 9038           Thread::current()->is_VM_thread(),
9038 9039           "should be single-threaded");
9039 9040    assert(_preserved_oop_stack.size() == _preserved_mark_stack.size(),
9040 9041           "bijection");
9041 9042  
9042 9043    while (!_preserved_oop_stack.is_empty()) {
9043 9044      oop p = _preserved_oop_stack.pop();
9044 9045      assert(p->is_oop(), "Should be an oop");
9045 9046      assert(_span.contains(p), "oop should be in _span");
9046 9047      assert(p->mark() == markOopDesc::prototype(),
9047 9048             "Set when taken from overflow list");
9048 9049      markOop m = _preserved_mark_stack.pop();
9049 9050      p->set_mark(m);
9050 9051    }
9051 9052    assert(_preserved_mark_stack.is_empty() && _preserved_oop_stack.is_empty(),
9052 9053           "stacks were cleared above");
9053 9054  }
9054 9055  
9055 9056  #ifndef PRODUCT
9056 9057  bool CMSCollector::no_preserved_marks() const {
9057 9058    return _preserved_mark_stack.is_empty() && _preserved_oop_stack.is_empty();
9058 9059  }
9059 9060  #endif
9060 9061  
9061 9062  CMSAdaptiveSizePolicy* ASConcurrentMarkSweepGeneration::cms_size_policy() const
9062 9063  {
9063 9064    GenCollectedHeap* gch = (GenCollectedHeap*) GenCollectedHeap::heap();
9064 9065    CMSAdaptiveSizePolicy* size_policy =
9065 9066      (CMSAdaptiveSizePolicy*) gch->gen_policy()->size_policy();
9066 9067    assert(size_policy->is_gc_cms_adaptive_size_policy(),
9067 9068      "Wrong type for size policy");
9068 9069    return size_policy;
9069 9070  }
9070 9071  
9071 9072  void ASConcurrentMarkSweepGeneration::resize(size_t cur_promo_size,
9072 9073                                             size_t desired_promo_size) {
9073 9074    if (cur_promo_size < desired_promo_size) {
9074 9075      size_t expand_bytes = desired_promo_size - cur_promo_size;
9075 9076      if (PrintAdaptiveSizePolicy && Verbose) {
9076 9077        gclog_or_tty->print_cr(" ASConcurrentMarkSweepGeneration::resize "
9077 9078          "Expanding tenured generation by " SIZE_FORMAT " (bytes)",
9078 9079          expand_bytes);
9079 9080      }
9080 9081      expand(expand_bytes,
9081 9082             MinHeapDeltaBytes,
9082 9083             CMSExpansionCause::_adaptive_size_policy);
9083 9084    } else if (desired_promo_size < cur_promo_size) {
9084 9085      size_t shrink_bytes = cur_promo_size - desired_promo_size;
9085 9086      if (PrintAdaptiveSizePolicy && Verbose) {
9086 9087        gclog_or_tty->print_cr(" ASConcurrentMarkSweepGeneration::resize "
9087 9088          "Shrinking tenured generation by " SIZE_FORMAT " (bytes)",
9088 9089          shrink_bytes);
9089 9090      }
9090 9091      shrink(shrink_bytes);
9091 9092    }
9092 9093  }
9093 9094  
9094 9095  CMSGCAdaptivePolicyCounters* ASConcurrentMarkSweepGeneration::gc_adaptive_policy_counters() {
9095 9096    GenCollectedHeap* gch = GenCollectedHeap::heap();
9096 9097    CMSGCAdaptivePolicyCounters* counters =
9097 9098      (CMSGCAdaptivePolicyCounters*) gch->collector_policy()->counters();
9098 9099    assert(counters->kind() == GCPolicyCounters::CMSGCAdaptivePolicyCountersKind,
9099 9100      "Wrong kind of counters");
9100 9101    return counters;
9101 9102  }
9102 9103  
9103 9104  
9104 9105  void ASConcurrentMarkSweepGeneration::update_counters() {
9105 9106    if (UsePerfData) {
9106 9107      _space_counters->update_all();
9107 9108      _gen_counters->update_all();
9108 9109      CMSGCAdaptivePolicyCounters* counters = gc_adaptive_policy_counters();
9109 9110      GenCollectedHeap* gch = GenCollectedHeap::heap();
9110 9111      CMSGCStats* gc_stats_l = (CMSGCStats*) gc_stats();
9111 9112      assert(gc_stats_l->kind() == GCStats::CMSGCStatsKind,
9112 9113        "Wrong gc statistics type");
9113 9114      counters->update_counters(gc_stats_l);
9114 9115    }
9115 9116  }
9116 9117  
9117 9118  void ASConcurrentMarkSweepGeneration::update_counters(size_t used) {
9118 9119    if (UsePerfData) {
9119 9120      _space_counters->update_used(used);
9120 9121      _space_counters->update_capacity();
9121 9122      _gen_counters->update_all();
9122 9123  
9123 9124      CMSGCAdaptivePolicyCounters* counters = gc_adaptive_policy_counters();
9124 9125      GenCollectedHeap* gch = GenCollectedHeap::heap();
9125 9126      CMSGCStats* gc_stats_l = (CMSGCStats*) gc_stats();
9126 9127      assert(gc_stats_l->kind() == GCStats::CMSGCStatsKind,
9127 9128        "Wrong gc statistics type");
9128 9129      counters->update_counters(gc_stats_l);
9129 9130    }
9130 9131  }
9131 9132  
9132 9133  // The desired expansion delta is computed so that:
9133 9134  // . desired free percentage or greater is used
9134 9135  void ASConcurrentMarkSweepGeneration::compute_new_size() {
9135 9136    assert_locked_or_safepoint(Heap_lock);
9136 9137  
9137 9138    GenCollectedHeap* gch = (GenCollectedHeap*) GenCollectedHeap::heap();
9138 9139  
9139 9140    // If incremental collection failed, we just want to expand
9140 9141    // to the limit.
9141 9142    if (incremental_collection_failed()) {
9142 9143      clear_incremental_collection_failed();
9143 9144      grow_to_reserved();
9144 9145      return;
9145 9146    }
9146 9147  
9147 9148    assert(UseAdaptiveSizePolicy, "Should be using adaptive sizing");
9148 9149  
9149 9150    assert(gch->kind() == CollectedHeap::GenCollectedHeap,
9150 9151      "Wrong type of heap");
9151 9152    int prev_level = level() - 1;
9152 9153    assert(prev_level >= 0, "The cms generation is the lowest generation");
9153 9154    Generation* prev_gen = gch->get_gen(prev_level);
9154 9155    assert(prev_gen->kind() == Generation::ASParNew,
9155 9156      "Wrong type of young generation");
9156 9157    ParNewGeneration* younger_gen = (ParNewGeneration*) prev_gen;
9157 9158    size_t cur_eden = younger_gen->eden()->capacity();
9158 9159    CMSAdaptiveSizePolicy* size_policy = cms_size_policy();
9159 9160    size_t cur_promo = free();
9160 9161    size_policy->compute_tenured_generation_free_space(cur_promo,
9161 9162                                                         max_available(),
9162 9163                                                         cur_eden);
9163 9164    resize(cur_promo, size_policy->promo_size());
9164 9165  
9165 9166    // Record the new size of the space in the cms generation
9166 9167    // that is available for promotions.  This is temporary.
9167 9168    // It should be the desired promo size.
9168 9169    size_policy->avg_cms_promo()->sample(free());
9169 9170    size_policy->avg_old_live()->sample(used());
9170 9171  
9171 9172    if (UsePerfData) {
9172 9173      CMSGCAdaptivePolicyCounters* counters = gc_adaptive_policy_counters();
9173 9174      counters->update_cms_capacity_counter(capacity());
9174 9175    }
9175 9176  }
9176 9177  
9177 9178  void ASConcurrentMarkSweepGeneration::shrink_by(size_t desired_bytes) {
9178 9179    assert_locked_or_safepoint(Heap_lock);
9179 9180    assert_lock_strong(freelistLock());
9180 9181    HeapWord* old_end = _cmsSpace->end();
9181 9182    HeapWord* unallocated_start = _cmsSpace->unallocated_block();
9182 9183    assert(old_end >= unallocated_start, "Miscalculation of unallocated_start");
9183 9184    FreeChunk* chunk_at_end = find_chunk_at_end();
9184 9185    if (chunk_at_end == NULL) {
9185 9186      // No room to shrink
9186 9187      if (PrintGCDetails && Verbose) {
9187 9188        gclog_or_tty->print_cr("No room to shrink: old_end  "
9188 9189          PTR_FORMAT "  unallocated_start  " PTR_FORMAT
9189 9190          " chunk_at_end  " PTR_FORMAT,
9190 9191          old_end, unallocated_start, chunk_at_end);
9191 9192      }
9192 9193      return;
9193 9194    } else {
9194 9195  
9195 9196      // Find the chunk at the end of the space and determine
9196 9197      // how much it can be shrunk.
9197 9198      size_t shrinkable_size_in_bytes = chunk_at_end->size();
9198 9199      size_t aligned_shrinkable_size_in_bytes =
9199 9200        align_size_down(shrinkable_size_in_bytes, os::vm_page_size());
9200 9201      assert(unallocated_start <= chunk_at_end->end(),
9201 9202        "Inconsistent chunk at end of space");
9202 9203      size_t bytes = MIN2(desired_bytes, aligned_shrinkable_size_in_bytes);
9203 9204      size_t word_size_before = heap_word_size(_virtual_space.committed_size());
9204 9205  
9205 9206      // Shrink the underlying space
9206 9207      _virtual_space.shrink_by(bytes);
9207 9208      if (PrintGCDetails && Verbose) {
9208 9209        gclog_or_tty->print_cr("ConcurrentMarkSweepGeneration::shrink_by:"
9209 9210          " desired_bytes " SIZE_FORMAT
9210 9211          " shrinkable_size_in_bytes " SIZE_FORMAT
9211 9212          " aligned_shrinkable_size_in_bytes " SIZE_FORMAT
9212 9213          "  bytes  " SIZE_FORMAT,
9213 9214          desired_bytes, shrinkable_size_in_bytes,
9214 9215          aligned_shrinkable_size_in_bytes, bytes);
9215 9216        gclog_or_tty->print_cr("          old_end  " SIZE_FORMAT
9216 9217          "  unallocated_start  " SIZE_FORMAT,
9217 9218          old_end, unallocated_start);
9218 9219      }
9219 9220  
9220 9221      // If the space did shrink (shrinking is not guaranteed),
9221 9222      // shrink the chunk at the end by the appropriate amount.
9222 9223      if (((HeapWord*)_virtual_space.high()) < old_end) {
9223 9224        size_t new_word_size =
9224 9225          heap_word_size(_virtual_space.committed_size());
9225 9226  
9226 9227        // Have to remove the chunk from the dictionary because it is changing
9227 9228        // size and might be someplace elsewhere in the dictionary.
9228 9229  
9229 9230        // Get the chunk at end, shrink it, and put it
9230 9231        // back.
9231 9232        _cmsSpace->removeChunkFromDictionary(chunk_at_end);
9232 9233        size_t word_size_change = word_size_before - new_word_size;
9233 9234        size_t chunk_at_end_old_size = chunk_at_end->size();
9234 9235        assert(chunk_at_end_old_size >= word_size_change,
9235 9236          "Shrink is too large");
9236 9237        chunk_at_end->setSize(chunk_at_end_old_size -
9237 9238                            word_size_change);
9238 9239        _cmsSpace->freed((HeapWord*) chunk_at_end->end(),
9239 9240          word_size_change);
9240 9241  
9241 9242        _cmsSpace->returnChunkToDictionary(chunk_at_end);
9242 9243  
9243 9244        MemRegion mr(_cmsSpace->bottom(), new_word_size);
9244 9245        _bts->resize(new_word_size);  // resize the block offset shared array
9245 9246        Universe::heap()->barrier_set()->resize_covered_region(mr);
9246 9247        _cmsSpace->assert_locked();
9247 9248        _cmsSpace->set_end((HeapWord*)_virtual_space.high());
9248 9249  
9249 9250        NOT_PRODUCT(_cmsSpace->dictionary()->verify());
9250 9251  
9251 9252        // update the space and generation capacity counters
9252 9253        if (UsePerfData) {
9253 9254          _space_counters->update_capacity();
9254 9255          _gen_counters->update_all();
9255 9256        }
9256 9257  
9257 9258        if (Verbose && PrintGCDetails) {
9258 9259          size_t new_mem_size = _virtual_space.committed_size();
9259 9260          size_t old_mem_size = new_mem_size + bytes;
9260 9261          gclog_or_tty->print_cr("Shrinking %s from %ldK by %ldK to %ldK",
9261 9262                        name(), old_mem_size/K, bytes/K, new_mem_size/K);
9262 9263        }
9263 9264      }
9264 9265  
9265 9266      assert(_cmsSpace->unallocated_block() <= _cmsSpace->end(),
9266 9267        "Inconsistency at end of space");
9267 9268      assert(chunk_at_end->end() == _cmsSpace->end(),
9268 9269        "Shrinking is inconsistent");
9269 9270      return;
9270 9271    }
9271 9272  }
9272 9273  
9273 9274  // Transfer some number of overflown objects to usual marking
9274 9275  // stack. Return true if some objects were transferred.
9275 9276  bool MarkRefsIntoAndScanClosure::take_from_overflow_list() {
9276 9277    size_t num = MIN2((size_t)(_mark_stack->capacity() - _mark_stack->length())/4,
9277 9278                      (size_t)ParGCDesiredObjsFromOverflowList);
9278 9279  
9279 9280    bool res = _collector->take_from_overflow_list(num, _mark_stack);
9280 9281    assert(_collector->overflow_list_is_empty() || res,
9281 9282           "If list is not empty, we should have taken something");
9282 9283    assert(!res || !_mark_stack->isEmpty(),
9283 9284           "If we took something, it should now be on our stack");
9284 9285    return res;
9285 9286  }
9286 9287  
9287 9288  size_t MarkDeadObjectsClosure::do_blk(HeapWord* addr) {
9288 9289    size_t res = _sp->block_size_no_stall(addr, _collector);
9289 9290    if (_sp->block_is_obj(addr)) {
9290 9291      if (_live_bit_map->isMarked(addr)) {
9291 9292        // It can't have been dead in a previous cycle
9292 9293        guarantee(!_dead_bit_map->isMarked(addr), "No resurrection!");
9293 9294      } else {
9294 9295        _dead_bit_map->mark(addr);      // mark the dead object
9295 9296      }
9296 9297    }
9297 9298    // Could be 0, if the block size could not be computed without stalling.
9298 9299    return res;
9299 9300  }
9300 9301  
9301 9302  TraceCMSMemoryManagerStats::TraceCMSMemoryManagerStats(CMSCollector::CollectorState phase, GCCause::Cause cause): TraceMemoryManagerStats() {
9302 9303  
9303 9304    switch (phase) {
9304 9305      case CMSCollector::InitialMarking:
9305 9306        initialize(true  /* fullGC */ ,
9306 9307                   cause /* cause of the GC */,
9307 9308                   true  /* recordGCBeginTime */,
9308 9309                   true  /* recordPreGCUsage */,
9309 9310                   false /* recordPeakUsage */,
9310 9311                   false /* recordPostGCusage */,
9311 9312                   true  /* recordAccumulatedGCTime */,
9312 9313                   false /* recordGCEndTime */,
9313 9314                   false /* countCollection */  );
9314 9315        break;
9315 9316  
9316 9317      case CMSCollector::FinalMarking:
9317 9318        initialize(true  /* fullGC */ ,
9318 9319                   cause /* cause of the GC */,
9319 9320                   false /* recordGCBeginTime */,
9320 9321                   false /* recordPreGCUsage */,
9321 9322                   false /* recordPeakUsage */,
9322 9323                   false /* recordPostGCusage */,
9323 9324                   true  /* recordAccumulatedGCTime */,
9324 9325                   false /* recordGCEndTime */,
9325 9326                   false /* countCollection */  );
9326 9327        break;
9327 9328  
9328 9329      case CMSCollector::Sweeping:
9329 9330        initialize(true  /* fullGC */ ,
9330 9331                   cause /* cause of the GC */,
9331 9332                   false /* recordGCBeginTime */,
9332 9333                   false /* recordPreGCUsage */,
9333 9334                   true  /* recordPeakUsage */,
9334 9335                   true  /* recordPostGCusage */,
9335 9336                   false /* recordAccumulatedGCTime */,
9336 9337                   true  /* recordGCEndTime */,
9337 9338                   true  /* countCollection */  );
9338 9339        break;
9339 9340  
9340 9341      default:
9341 9342        ShouldNotReachHere();
9342 9343    }
9343 9344  }
9344 9345

↓ open down ↓

5828 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX