hotspot Sdiff src/share/vm/gc_implementation/concurrentMarkSweep

src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File hotspot Sdiff src/share/vm/gc_implementation/concurrentMarkSweep

src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp

Print this page

rev 5732 : [mq]: comments2

 100 // Two important conditions that we have to satisfy:
 101 // 1. if a thread does a low-level wait on the CMS lock, then it
 102 //    relinquishes the CMS token if it were holding that token
 103 //    when it acquired the low-level CMS lock.
 104 // 2. any low-level notifications on the low-level lock
 105 //    should only be sent when a thread has relinquished the token.
 106 //
 107 // In the absence of either property, we'd have potential deadlock.
 108 //
 109 // We protect each of the CMS (concurrent and sequential) phases
 110 // with the CMS _token_, not the CMS _lock_.
 111 //
 112 // The only code protected by CMS lock is the token acquisition code
 113 // itself, see ConcurrentMarkSweepThread::[de]synchronize(), and the
 114 // baton-passing code.
 115 //
 116 // Unfortunately, i couldn't come up with a good abstraction to factor and
 117 // hide the naked CGC_lock manipulation in the baton-passing code
 118 // further below. That's something we should try to do. Also, the proof
 119 // of correctness of this 2-level locking scheme is far from obvious,
 120 // and potentially quite slippery. We have an uneasy supsicion, for instance,
 121 // that there may be a theoretical possibility of delay/starvation in the
 122 // low-level lock/wait/notify scheme used for the baton-passing because of
 123 // potential intereference with the priority scheme embodied in the
 124 // CMS-token-passing protocol. See related comments at a CGC_lock->wait()
 125 // invocation further below and marked with "XXX 20011219YSR".
 126 // Indeed, as we note elsewhere, this may become yet more slippery
 127 // in the presence of multiple CMS and/or multiple VM threads. XXX
 128 
 129 class CMSTokenSync: public StackObj {
 130  private:
 131   bool _is_cms_thread;
 132  public:
 133   CMSTokenSync(bool is_cms_thread):
 134     _is_cms_thread(is_cms_thread) {
 135     assert(is_cms_thread == Thread::current()->is_ConcurrentGC_thread(),
 136            "Incorrect argument to constructor");
 137     ConcurrentMarkSweepThread::synchronize(_is_cms_thread);
 138   }
 139 
 140   ~CMSTokenSync() {
 141     assert(_is_cms_thread ?
 142              ConcurrentMarkSweepThread::cms_thread_has_cms_token() :
 143              ConcurrentMarkSweepThread::vm_thread_has_cms_token(),

 242     if (_par_gc_thread_states == NULL) {
 243       vm_exit_during_initialization("Could not allocate par gc structs");
 244     }
 245     for (uint i = 0; i < ParallelGCThreads; i++) {
 246       _par_gc_thread_states[i] = new CMSParGCThreadState(cmsSpace());
 247       if (_par_gc_thread_states[i] == NULL) {
 248         vm_exit_during_initialization("Could not allocate par gc structs");
 249       }
 250     }
 251   } else {
 252     _par_gc_thread_states = NULL;
 253   }
 254   _incremental_collection_failed = false;
 255   // The "dilatation_factor" is the expansion that can occur on
 256   // account of the fact that the minimum object size in the CMS
 257   // generation may be larger than that in, say, a contiguous young
 258   //  generation.
 259   // Ideally, in the calculation below, we'd compute the dilatation
 260   // factor as: MinChunkSize/(promoting_gen's min object size)
 261   // Since we do not have such a general query interface for the
 262   // promoting generation, we'll instead just use the mimimum
 263   // object size (which today is a header's worth of space);
 264   // note that all arithmetic is in units of HeapWords.
 265   assert(MinChunkSize >= CollectedHeap::min_fill_size(), "just checking");
 266   assert(_dilatation_factor >= 1.0, "from previous assert");
 267 }
 268 
 269 
 270 // The field "_initiating_occupancy" represents the occupancy percentage
 271 // at which we trigger a new collection cycle.  Unless explicitly specified
 272 // via CMSInitiatingOccupancyFraction (argument "io" below), it
 273 // is calculated by:
 274 //
 275 //   Let "f" be MinHeapFreeRatio in
 276 //
 277 //    _intiating_occupancy = 100-f +
 278 //                           f * (CMSTriggerRatio/100)
 279 //   where CMSTriggerRatio is the argument "tr" below.
 280 //
 281 // That is, if we assume the heap is at its desired maximum occupancy at the
 282 // end of a collection, we let CMSTriggerRatio of the (purported) free
 283 // space be allocated before initiating a new collection cycle.
 284 //
 285 void ConcurrentMarkSweepGeneration::init_initiating_occupancy(intx io, uintx tr) {
 286   assert(io <= 100 && tr <= 100, "Check the arguments");
 287   if (io >= 0) {
 288     _initiating_occupancy = (double)io / 100.0;
 289   } else {
 290     _initiating_occupancy = ((100 - MinHeapFreeRatio) +
 291                              (double)(tr * MinHeapFreeRatio) / 100.0)
 292                             / 100.0;
 293   }
 294 }
 295 
 296 void ConcurrentMarkSweepGeneration::ref_processor_init() {
 297   assert(collector() != NULL, "no collector");

2654       ConcurrentMarkSweepThread::CMS_cms_wants_token);
2655   }
2656   if (TraceCMSState) {
2657     gclog_or_tty->print_cr("CMS Thread " INTPTR_FORMAT " continuing at CMS state %d",
2658       Thread::current(), _collectorState);
2659   }
2660   return res;
2661 }
2662 
2663 // Because of the need to lock the free lists and other structures in
2664 // the collector, common to all the generations that the collector is
2665 // collecting, we need the gc_prologues of individual CMS generations
2666 // delegate to their collector. It may have been simpler had the
2667 // current infrastructure allowed one to call a prologue on a
2668 // collector. In the absence of that we have the generation's
2669 // prologue delegate to the collector, which delegates back
2670 // some "local" work to a worker method in the individual generations
2671 // that it's responsible for collecting, while itself doing any
2672 // work common to all generations it's responsible for. A similar
2673 // comment applies to the  gc_epilogue()'s.
2674 // The role of the varaible _between_prologue_and_epilogue is to
2675 // enforce the invocation protocol.
2676 void CMSCollector::gc_prologue(bool full) {
2677   // Call gc_prologue_work() for the CMSGen
2678   // we are responsible for.
2679 
2680   // The following locking discipline assumes that we are only called
2681   // when the world is stopped.
2682   assert(SafepointSynchronize::is_at_safepoint(), "world is stopped assumption");
2683 
2684   // The CMSCollector prologue must call the gc_prologues for the
2685   // "generations" that it's responsible
2686   // for.
2687 
2688   assert(   Thread::current()->is_VM_thread()
2689          || (   CMSScavengeBeforeRemark
2690              && Thread::current()->is_ConcurrentGC_thread()),
2691          "Incorrect thread type for prologue execution");
2692 
2693   if (_between_prologue_and_epilogue) {
2694     // We have already been invoked; this is a gc_prologue delegation

2861 }
2862 
2863 #ifndef PRODUCT
2864 bool CMSCollector::have_cms_token() {
2865   Thread* thr = Thread::current();
2866   if (thr->is_VM_thread()) {
2867     return ConcurrentMarkSweepThread::vm_thread_has_cms_token();
2868   } else if (thr->is_ConcurrentGC_thread()) {
2869     return ConcurrentMarkSweepThread::cms_thread_has_cms_token();
2870   } else if (thr->is_GC_task_thread()) {
2871     return ConcurrentMarkSweepThread::vm_thread_has_cms_token() &&
2872            ParGCRareEvent_lock->owned_by_self();
2873   }
2874   return false;
2875 }
2876 #endif
2877 
2878 // Check reachability of the given heap address in CMS generation,
2879 // treating all other generations as roots.
2880 bool CMSCollector::is_cms_reachable(HeapWord* addr) {
2881   // We could "guarantee" below, rather than assert, but i'll
2882   // leave these as "asserts" so that an adventurous debugger
2883   // could try this in the product build provided some subset of
2884   // the conditions were met, provided they were intersted in the
2885   // results and knew that the computation below wouldn't interfere
2886   // with other concurrent computations mutating the structures
2887   // being read or written.
2888   assert(SafepointSynchronize::is_at_safepoint(),
2889          "Else mutations in object graph will make answer suspect");
2890   assert(have_cms_token(), "Should hold cms token");
2891   assert(haveFreelistLocks(), "must hold free list locks");
2892   assert_lock_strong(bitMapLock());
2893 
2894   // Clear the marking bit map array before starting, but, just
2895   // for kicks, first report if the given address is already marked
2896   gclog_or_tty->print_cr("Start: Address 0x%x is%s marked", addr,
2897                 _markBitMap.isMarked(addr) ? "" : " not");
2898 
2899   if (verify_after_remark()) {
2900     MutexLockerEx x(verification_mark_bm()->lock(), Mutex::_no_safepoint_check_flag);
2901     bool result = verification_mark_bm()->isMarked(addr);
2902     gclog_or_tty->print_cr("TransitiveMark: Address 0x%x %s marked", addr,
2903                            result ? "IS" : "is NOT");
2904     return result;

2965   assert(_collectorState > Marking && _collectorState <= Sweeping,
2966          "Else marking info checked here may be obsolete");
2967   assert(haveFreelistLocks(), "must hold free list locks");
2968   assert_lock_strong(bitMapLock());
2969 
2970 
2971   // Allocate marking bit map if not already allocated
2972   if (!init) { // first time
2973     if (!verification_mark_bm()->allocate(_span)) {
2974       return false;
2975     }
2976     init = true;
2977   }
2978 
2979   assert(verification_mark_stack()->isEmpty(), "Should be empty");
2980 
2981   // Turn off refs discovery -- so we will be tracing through refs.
2982   // This is as intended, because by this time
2983   // GC must already have cleared any refs that need to be cleared,
2984   // and traced those that need to be marked; moreover,
2985   // the marking done here is not going to intefere in any
2986   // way with the marking information used by GC.
2987   NoRefDiscovery no_discovery(ref_processor());
2988 
2989   COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact;)
2990 
2991   // Clear any marks from a previous round
2992   verification_mark_bm()->clear_all();
2993   assert(verification_mark_stack()->isEmpty(), "markStack should be empty");
2994   verify_work_stacks_empty();
2995 
2996   GenCollectedHeap* gch = GenCollectedHeap::heap();
2997   gch->ensure_parsability(false);  // fill TLABs, but no need to retire them
2998   // Update the saved marks which may affect the root scans.
2999   gch->save_marks();
3000 
3001   if (CMSRemarkVerifyVariant == 1) {
3002     // In this first variant of verification, we complete
3003     // all marking, then check if the new marks-verctor is
3004     // a subset of the CMS marks-vector.
3005     verify_after_remark_work_1();
3006   } else if (CMSRemarkVerifyVariant == 2) {
3007     // In this second variant of verification, we flag an error
3008     // (i.e. an object reachable in the new marks-vector not reachable
3009     // in the CMS marks-vector) immediately, also indicating the
3010     // identify of an object (A) that references the unmarked object (B) --
3011     // presumably, a mutation to A failed to be picked up by preclean/remark?
3012     verify_after_remark_work_2();
3013   } else {
3014     warning("Unrecognized value %d for CMSRemarkVerifyVariant",
3015             CMSRemarkVerifyVariant);
3016   }
3017   if (!silent) gclog_or_tty->print(" done] ");
3018   return true;
3019 }
3020 
3021 void CMSCollector::verify_after_remark_work_1() {
3022   ResourceMark rm;
3023   HandleMark  hm;

3384     }
3385   }
3386 }
3387 
3388 HeapWord* ConcurrentMarkSweepGeneration::expand_and_par_lab_allocate(CMSParGCThreadState* ps, size_t word_sz) {
3389   HeapWord* res = NULL;
3390   MutexLocker x(ParGCRareEvent_lock);
3391   while (true) {
3392     // Expansion by some other thread might make alloc OK now:
3393     res = ps->lab.alloc(word_sz);
3394     if (res != NULL) return res;
3395     // If there's not enough expansion space available, give up.
3396     if (_virtual_space.uncommitted_size() < (word_sz * HeapWordSize)) {
3397       return NULL;
3398     }
3399     // Otherwise, we try expansion.
3400     expand(word_sz*HeapWordSize, MinHeapDeltaBytes,
3401       CMSExpansionCause::_allocate_par_lab);
3402     // Now go around the loop and try alloc again;
3403     // A competing par_promote might beat us to the expansion space,
3404     // so we may go around the loop again if promotion fails agaion.
3405     if (GCExpandToAllocateDelayMillis > 0) {
3406       os::sleep(Thread::current(), GCExpandToAllocateDelayMillis, false);
3407     }
3408   }
3409 }
3410 
3411 
3412 bool ConcurrentMarkSweepGeneration::expand_and_ensure_spooling_space(
3413   PromotionInfo* promo) {
3414   MutexLocker x(ParGCRareEvent_lock);
3415   size_t refill_size_bytes = promo->refillSize() * HeapWordSize;
3416   while (true) {
3417     // Expansion by some other thread might make alloc OK now:
3418     if (promo->ensure_spooling_space()) {
3419       assert(promo->has_spooling_space(),
3420              "Post-condition of successful ensure_spooling_space()");
3421       return true;
3422     }
3423     // If there's not enough expansion space available, give up.
3424     if (_virtual_space.uncommitted_size() < refill_size_bytes) {

4356   // It is possible for whichever thread initiated the yield request
4357   // not to get a chance to wake up and take the bitmap lock between
4358   // this thread releasing it and reacquiring it. So, while the
4359   // should_yield() flag is on, let's sleep for a bit to give the
4360   // other thread a chance to wake up. The limit imposed on the number
4361   // of iterations is defensive, to avoid any unforseen circumstances
4362   // putting us into an infinite loop. Since it's always been this
4363   // (coordinator_yield()) method that was observed to cause the
4364   // problem, we are using a parameter (CMSCoordinatorYieldSleepCount)
4365   // which is by default non-zero. For the other seven methods that
4366   // also perform the yield operation, as are using a different
4367   // parameter (CMSYieldSleepCount) which is by default zero. This way we
4368   // can enable the sleeping for those methods too, if necessary.
4369   // See 6442774.
4370   //
4371   // We really need to reconsider the synchronization between the GC
4372   // thread and the yield-requesting threads in the future and we
4373   // should really use wait/notify, which is the recommended
4374   // way of doing this type of interaction. Additionally, we should
4375   // consolidate the eight methods that do the yield operation and they
4376   // are almost identical into one for better maintenability and
4377   // readability. See 6445193.
4378   //
4379   // Tony 2006.06.29
4380   for (unsigned i = 0; i < CMSCoordinatorYieldSleepCount &&
4381                    ConcurrentMarkSweepThread::should_yield() &&
4382                    !CMSCollector::foregroundGCIsActive(); ++i) {
4383     os::sleep(Thread::current(), 1, false);
4384     ConcurrentMarkSweepThread::acknowledge_yield_request();
4385   }
4386 
4387   ConcurrentMarkSweepThread::synchronize(true);
4388   _bit_map_lock->lock_without_safepoint_check();
4389   _collector->startTimer();
4390 }
4391 
4392 bool CMSCollector::do_marking_mt(bool asynch) {
4393   assert(ConcGCThreads > 0 && conc_workers() != NULL, "precondition");
4394   int num_workers = AdaptiveSizePolicy::calc_active_conc_workers(
4395                                        conc_workers()->total_workers(),
4396                                        conc_workers()->active_workers(),

4524   if (CMSPrecleaningEnabled) {
4525     sample_eden();
4526     _collectorState = AbortablePreclean;
4527   } else {
4528     _collectorState = FinalMarking;
4529   }
4530   verify_work_stacks_empty();
4531   verify_overflow_empty();
4532 }
4533 
4534 // Try and schedule the remark such that young gen
4535 // occupancy is CMSScheduleRemarkEdenPenetration %.
4536 void CMSCollector::abortable_preclean() {
4537   check_correct_thread_executing();
4538   assert(CMSPrecleaningEnabled,  "Inconsistent control state");
4539   assert(_collectorState == AbortablePreclean, "Inconsistent control state");
4540 
4541   // If Eden's current occupancy is below this threshold,
4542   // immediately schedule the remark; else preclean
4543   // past the next scavenge in an effort to
4544   // schedule the pause as described avove. By choosing
4545   // CMSScheduleRemarkEdenSizeThreshold >= max eden size
4546   // we will never do an actual abortable preclean cycle.
4547   if (get_eden_used() > CMSScheduleRemarkEdenSizeThreshold) {
4548     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
4549     CMSPhaseAccounting pa(this, "abortable-preclean", !PrintGCDetails);
4550     // We need more smarts in the abortable preclean
4551     // loop below to deal with cases where allocation
4552     // in young gen is very very slow, and our precleaning
4553     // is running a losing race against a horde of
4554     // mutators intent on flooding us with CMS updates
4555     // (dirty cards).
4556     // One, admittedly dumb, strategy is to give up
4557     // after a certain number of abortable precleaning loops
4558     // or after a certain maximum time. We want to make
4559     // this smarter in the next iteration.
4560     // XXX FIX ME!!! YSR
4561     size_t loops = 0, workdone = 0, cumworkdone = 0, waited = 0;
4562     while (!(should_abort_preclean() ||
4563              ConcurrentMarkSweepThread::should_terminate())) {
4564       workdone = preclean_work(CMSPrecleanRefLists2, CMSPrecleanSurvivors2);

5520 }
5521 
5522 void
5523 CMSParRemarkTask::do_dirty_card_rescan_tasks(
5524   CompactibleFreeListSpace* sp, int i,
5525   Par_MarkRefsIntoAndScanClosure* cl) {
5526   // Until all tasks completed:
5527   // . claim an unclaimed task
5528   // . compute region boundaries corresponding to task claimed
5529   // . transfer dirty bits ct->mut for that region
5530   // . apply rescanclosure to dirty mut bits for that region
5531 
5532   ResourceMark rm;
5533   HandleMark   hm;
5534 
5535   OopTaskQueue* work_q = work_queue(i);
5536   ModUnionClosure modUnionClosure(&(_collector->_modUnionTable));
5537   // CAUTION! CAUTION! CAUTION! CAUTION! CAUTION! CAUTION! CAUTION!
5538   // CAUTION: This closure has state that persists across calls to
5539   // the work method dirty_range_iterate_clear() in that it has
5540   // imbedded in it a (subtype of) UpwardsObjectClosure. The
5541   // use of that state in the imbedded UpwardsObjectClosure instance
5542   // assumes that the cards are always iterated (even if in parallel
5543   // by several threads) in monotonically increasing order per each
5544   // thread. This is true of the implementation below which picks
5545   // card ranges (chunks) in monotonically increasing order globally
5546   // and, a-fortiori, in monotonically increasing order per thread
5547   // (the latter order being a subsequence of the former).
5548   // If the work code below is ever reorganized into a more chaotic
5549   // work-partitioning form than the current "sequential tasks"
5550   // paradigm, the use of that persistent state will have to be
5551   // revisited and modified appropriately. See also related
5552   // bug 4756801 work on which should examine this code to make
5553   // sure that the changes there do not run counter to the
5554   // assumptions made here and necessary for correctness and
5555   // efficiency. Note also that this code might yield inefficient
5556   // behaviour in the case of very large objects that span one or
5557   // more work chunks. Such objects would potentially be scanned
5558   // several times redundantly. Work on 4756801 should try and
5559   // address that performance anomaly if at all possible. XXX
5560   MemRegion  full_span  = _collector->_span;
5561   CMSBitMap* bm    = &(_collector->_markBitMap);     // shared
5562   MarkFromDirtyCardsClosure
5563     greyRescanClosure(_collector, full_span, // entire span of interest
5564                       sp, bm, work_q, cl);
5565 
5566   SequentialSubTasksDone* pst = sp->conc_par_seq_tasks();
5567   assert(pst->valid(), "Uninitialized use?");
5568   uint nth_task = 0;
5569   const int alignment = CardTableModRefBS::card_size * BitsPerWord;
5570   MemRegion span = sp->used_region();
5571   HeapWord* start_addr = span.start();
5572   HeapWord* end_addr = (HeapWord*)round_to((intptr_t)span.end(),
5573                                            alignment);
5574   const size_t chunk_size = sp->rescan_task_size(); // in HeapWord units
5575   assert((HeapWord*)round_to((intptr_t)start_addr, alignment) ==
5576          start_addr, "Check alignment");
5577   assert((size_t)round_to((intptr_t)chunk_size, alignment) ==
5578          chunk_size, "Check alignment");
5579 
5580   while (!pst->is_task_claimed(/* reference */ nth_task)) {
5581     // Having claimed the nth_task, compute corresponding mem-region,
5582     // which is a-fortiori aligned correctly (i.e. at a MUT bopundary).
5583     // The alignment restriction ensures that we do not need any
5584     // synchronization with other gang-workers while setting or
5585     // clearing bits in thus chunk of the MUT.
5586     MemRegion this_span = MemRegion(start_addr + nth_task*chunk_size,
5587                                     start_addr + (nth_task+1)*chunk_size);
5588     // The last chunk's end might be way beyond end of the
5589     // used region. In that case pull back appropriately.
5590     if (this_span.end() > end_addr) {
5591       this_span.set_end(end_addr);
5592       assert(!this_span.is_empty(), "Program logic (calculation of n_tasks)");
5593     }
5594     // Iterate over the dirty cards covering this chunk, marking them
5595     // precleaned, and setting the corresponding bits in the mod union
5596     // table. Since we have been careful to partition at Card and MUT-word
5597     // boundaries no synchronization is needed between parallel threads.
5598     _collector->_ct->ct_bs()->dirty_card_iterate(this_span,
5599                                                  &modUnionClosure);
5600 
5601     // Having transferred these marks into the modUnionTable,
5602     // rescan the marked objects on the dirty cards in the modUnionTable.

6354     // already have needed locks
6355     sweepWork(_cmsGen,  asynch);
6356     // Update heap occupancy information which is used as
6357     // input to soft ref clearing policy at the next gc.
6358     Universe::update_heap_info_at_gc();
6359     _collectorState = Resizing;
6360   }
6361   verify_work_stacks_empty();
6362   verify_overflow_empty();
6363 
6364   if (should_unload_classes()) {
6365     ClassLoaderDataGraph::purge();
6366   }
6367 
6368   _intra_sweep_timer.stop();
6369   _intra_sweep_estimate.sample(_intra_sweep_timer.seconds());
6370 
6371   _inter_sweep_timer.reset();
6372   _inter_sweep_timer.start();
6373 
6374   // We need to use a monotonically non-deccreasing time in ms
6375   // or we will see time-warp warnings and os::javaTimeMillis()
6376   // does not guarantee monotonicity.
6377   jlong now = os::javaTimeNanos() / NANOSECS_PER_MILLISEC;
6378   update_time_of_last_gc(now);
6379 
6380   // NOTE on abstract state transitions:
6381   // Mutators allocate-live and/or mark the mod-union table dirty
6382   // based on the state of the collection.  The former is done in
6383   // the interval [Marking, Sweeping] and the latter in the interval
6384   // [Marking, Sweeping).  Thus the transitions into the Marking state
6385   // and out of the Sweeping state must be synchronously visible
6386   // globally to the mutators.
6387   // The transition into the Marking state happens with the world
6388   // stopped so the mutators will globally see it.  Sweeping is
6389   // done asynchronously by the background collector so the transition
6390   // from the Sweeping state to the Resizing state must be done
6391   // under the freelistLock (as is the check for whether to
6392   // allocate-live and whether to dirty the mod-union table).
6393   assert(_collectorState == Resizing, "Change of collector state to"
6394     " Resizing must be done under the freelistLocks (plural)");

6715 // bit vector itself. That is done by a separate call CMSBitMap::allocate()
6716 // further below.
6717 CMSBitMap::CMSBitMap(int shifter, int mutex_rank, const char* mutex_name):
6718   _bm(),
6719   _shifter(shifter),
6720   _lock(mutex_rank >= 0 ? new Mutex(mutex_rank, mutex_name, true) : NULL)
6721 {
6722   _bmStartWord = 0;
6723   _bmWordSize  = 0;
6724 }
6725 
6726 bool CMSBitMap::allocate(MemRegion mr) {
6727   _bmStartWord = mr.start();
6728   _bmWordSize  = mr.word_size();
6729   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
6730                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
6731   if (!brs.is_reserved()) {
6732     warning("CMS bit map allocation failure");
6733     return false;
6734   }
6735   // For now we'll just commit all of the bit map up fromt.
6736   // Later on we'll try to be more parsimonious with swap.
6737   if (!_virtual_space.initialize(brs, brs.size())) {
6738     warning("CMS bit map backing store failure");
6739     return false;
6740   }
6741   assert(_virtual_space.committed_size() == brs.size(),
6742          "didn't reserve backing store for all of CMS bit map?");
6743   _bm.set_map((BitMap::bm_word_t*)_virtual_space.low());
6744   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
6745          _bmWordSize, "inconsistency in bit map sizing");
6746   _bm.set_size(_bmWordSize >> _shifter);
6747 
6748   // bm.clear(); // can we rely on getting zero'd memory? verify below
6749   assert(isAllClear(),
6750          "Expected zero'd memory from ReservedSpace constructor");
6751   assert(_bm.size() == heapWordDiffToOffsetDiff(sizeInWords()),
6752          "consistency check");
6753   return true;
6754 }
6755

6822                    size * sizeof(oop)));
6823   if (!rs.is_reserved()) {
6824     warning("CMSMarkStack allocation failure");
6825     return false;
6826   }
6827   if (!_virtual_space.initialize(rs, rs.size())) {
6828     warning("CMSMarkStack backing store failure");
6829     return false;
6830   }
6831   assert(_virtual_space.committed_size() == rs.size(),
6832          "didn't reserve backing store for all of CMS stack?");
6833   _base = (oop*)(_virtual_space.low());
6834   _index = 0;
6835   _capacity = size;
6836   NOT_PRODUCT(_max_depth = 0);
6837   return true;
6838 }
6839 
6840 // XXX FIX ME !!! In the MT case we come in here holding a
6841 // leaf lock. For printing we need to take a further lock
6842 // which has lower rank. We need to recallibrate the two
6843 // lock-ranks involved in order to be able to rpint the
6844 // messages below. (Or defer the printing to the caller.
6845 // For now we take the expedient path of just disabling the
6846 // messages for the problematic case.)
6847 void CMSMarkStack::expand() {
6848   assert(_capacity <= MarkStackSizeMax, "stack bigger than permitted");
6849   if (_capacity == MarkStackSizeMax) {
6850     if (_hit_limit++ == 0 && !CMSConcurrentMTEnabled && PrintGCDetails) {
6851       // We print a warning message only once per CMS cycle.
6852       gclog_or_tty->print_cr(" (benign) Hit CMSMarkStack max size limit");
6853     }
6854     return;
6855   }
6856   // Double capacity if possible
6857   size_t new_capacity = MIN2(_capacity*2, MarkStackSizeMax);
6858   // Do not give up existing stack until we have managed to
6859   // get the double capacity that we desired.
6860   ReservedSpace rs(ReservedSpace::allocation_align_size_up(
6861                    new_capacity * sizeof(oop)));
6862   if (rs.is_reserved()) {
6863     // Release the backing store associated with old stack

7163         } else {
7164           // A non-array may have been imprecisely marked; we need
7165           // to scan object in its entirety.
7166           size = CompactibleFreeListSpace::adjustObjectSize(
7167                    p->oop_iterate(_scanningClosure));
7168         }
7169         #ifdef ASSERT
7170           size_t direct_size =
7171             CompactibleFreeListSpace::adjustObjectSize(p->size());
7172           assert(size == direct_size, "Inconsistency in size");
7173           assert(size >= 3, "Necessary for Printezis marks to work");
7174           if (!_bitMap->isMarked(addr+1)) {
7175             _bitMap->verifyNoOneBitsInRange(addr+2, addr+size);
7176           } else {
7177             _bitMap->verifyNoOneBitsInRange(addr+2, addr+size-1);
7178             assert(_bitMap->isMarked(addr+size-1),
7179                    "inconsistent Printezis mark");
7180           }
7181         #endif // ASSERT
7182     } else {
7183       // an unitialized object
7184       assert(_bitMap->isMarked(addr+1), "missing Printezis mark?");
7185       HeapWord* nextOneAddr = _bitMap->getNextMarkedWordAddress(addr + 2);
7186       size = pointer_delta(nextOneAddr + 1, addr);
7187       assert(size == CompactibleFreeListSpace::adjustObjectSize(size),
7188              "alignment problem");
7189       // Note that pre-cleaning needn't redirty the card. OopDesc::set_klass()
7190       // will dirty the card when the klass pointer is installed in the
7191       // object (signalling the completion of initialization).
7192     }
7193   } else {
7194     // Either a not yet marked object or an uninitialized object
7195     if (p->klass_or_null() == NULL) {
7196       // An uninitialized object, skip to the next card, since
7197       // we may not be able to read its P-bits yet.
7198       assert(size == 0, "Initial value");
7199     } else {
7200       // An object not (yet) reached by marking: we merely need to
7201       // compute its size so as to go look at the next block.
7202       assert(p->is_oop(true), "should be an oop");
7203       size = CompactibleFreeListSpace::adjustObjectSize(p->size());
7204     }
7205   }
7206   DEBUG_ONLY(_collector->verify_work_stacks_empty();)
7207   return size;
7208 }
7209 
7210 void ScanMarkedObjectsAgainCarefullyClosure::do_yield_work() {
7211   assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),

7982   if (_span.contains(addr) && !_bit_map->isMarked(addr)) {
7983     // a white object ...
7984     _bit_map->mark(addr);         // ... now grey
7985     // push on the marking stack (grey set)
7986     bool simulate_overflow = false;
7987     NOT_PRODUCT(
7988       if (CMSMarkStackOverflowALot &&
7989           _collector->simulate_overflow()) {
7990         // simulate a stack overflow
7991         simulate_overflow = true;
7992       }
7993     )
7994     if (simulate_overflow || !_mark_stack->push(obj)) {
7995       if (_concurrent_precleaning) {
7996          // During precleaning we can just dirty the appropriate card(s)
7997          // in the mod union table, thus ensuring that the object remains
7998          // in the grey set  and continue. In the case of object arrays
7999          // we need to dirty all of the cards that the object spans,
8000          // since the rescan of object arrays will be limited to the
8001          // dirty cards.
8002          // Note that no one can be intefering with us in this action
8003          // of dirtying the mod union table, so no locking or atomics
8004          // are required.
8005          if (obj->is_objArray()) {
8006            size_t sz = obj->size();
8007            HeapWord* end_card_addr = (HeapWord*)round_to(
8008                                         (intptr_t)(addr+sz), CardTableModRefBS::card_size);
8009            MemRegion redirty_range = MemRegion(addr, end_card_addr);
8010            assert(!redirty_range.is_empty(), "Arithmetical tautology");
8011            _mod_union_table->mark_range(redirty_range);
8012          } else {
8013            _mod_union_table->mark(addr);
8014          }
8015          _collector->_ser_pmc_preclean_ovflw++;
8016       } else {
8017          // During the remark phase, we need to remember this oop
8018          // in the overflow list.
8019          _collector->push_on_overflow_list(obj);
8020          _collector->_ser_pmc_remark_ovflw++;
8021       }
8022     }

9008       assert(_span.contains((HeapWord*)new_oop), "Out of bounds oop");
9009       // iterate over the oops in this oop, marking and pushing
9010       // the ones in CMS heap (i.e. in _span).
9011       new_oop->oop_iterate(&_mark_and_push);
9012     }
9013   }
9014 }
9015 
9016 ////////////////////////////////////////////////////////////////////
9017 // Support for Marking Stack Overflow list handling and related code
9018 ////////////////////////////////////////////////////////////////////
9019 // Much of the following code is similar in shape and spirit to the
9020 // code used in ParNewGC. We should try and share that code
9021 // as much as possible in the future.
9022 
9023 #ifndef PRODUCT
9024 // Debugging support for CMSStackOverflowALot
9025 
9026 // It's OK to call this multi-threaded;  the worst thing
9027 // that can happen is that we'll get a bunch of closely
9028 // spaced simulated oveflows, but that's OK, in fact
9029 // probably good as it would exercise the overflow code
9030 // under contention.
9031 bool CMSCollector::simulate_overflow() {
9032   if (_overflow_counter-- <= 0) { // just being defensive
9033     _overflow_counter = CMSMarkStackOverflowInterval;
9034     return true;
9035   } else {
9036     return false;
9037   }
9038 }
9039 
9040 bool CMSCollector::par_simulate_overflow() {
9041   return simulate_overflow();
9042 }
9043 #endif
9044 
9045 // Single-threaded
9046 bool CMSCollector::take_from_overflow_list(size_t num, CMSMarkStack* stack) {
9047   assert(stack->isEmpty(), "Expected precondition");
9048   assert(stack->capacity() > num, "Shouldn't bite more than can chew");

9128        // Write back the NULL in case we overwrote it with BUSY above
9129        // and it is still the same value.
9130        (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
9131      }
9132      return false;
9133   }
9134   assert(prefix != NULL && prefix != BUSY, "Error");
9135   size_t i = num;
9136   oop cur = prefix;
9137   // Walk down the first "num" objects, unless we reach the end.
9138   for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--);
9139   if (cur->mark() == NULL) {
9140     // We have "num" or fewer elements in the list, so there
9141     // is nothing to return to the global list.
9142     // Write back the NULL in lieu of the BUSY we wrote
9143     // above, if it is still the same value.
9144     if (_overflow_list == BUSY) {
9145       (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
9146     }
9147   } else {
9148     // Chop off the suffix and rerturn it to the global list.
9149     assert(cur->mark() != BUSY, "Error");
9150     oop suffix_head = cur->mark(); // suffix will be put back on global list
9151     cur->set_mark(NULL);           // break off suffix
9152     // It's possible that the list is still in the empty(busy) state
9153     // we left it in a short while ago; in that case we may be
9154     // able to place back the suffix without incurring the cost
9155     // of a walk down the list.
9156     oop observed_overflow_list = _overflow_list;
9157     oop cur_overflow_list = observed_overflow_list;
9158     bool attached = false;
9159     while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
9160       observed_overflow_list =
9161         (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
9162       if (cur_overflow_list == observed_overflow_list) {
9163         attached = true;
9164         break;
9165       } else cur_overflow_list = observed_overflow_list;
9166     }
9167     if (!attached) {
9168       // Too bad, someone else sneaked in (at least) an element; we'll need

 100 // Two important conditions that we have to satisfy:
 101 // 1. if a thread does a low-level wait on the CMS lock, then it
 102 //    relinquishes the CMS token if it were holding that token
 103 //    when it acquired the low-level CMS lock.
 104 // 2. any low-level notifications on the low-level lock
 105 //    should only be sent when a thread has relinquished the token.
 106 //
 107 // In the absence of either property, we'd have potential deadlock.
 108 //
 109 // We protect each of the CMS (concurrent and sequential) phases
 110 // with the CMS _token_, not the CMS _lock_.
 111 //
 112 // The only code protected by CMS lock is the token acquisition code
 113 // itself, see ConcurrentMarkSweepThread::[de]synchronize(), and the
 114 // baton-passing code.
 115 //
 116 // Unfortunately, i couldn't come up with a good abstraction to factor and
 117 // hide the naked CGC_lock manipulation in the baton-passing code
 118 // further below. That's something we should try to do. Also, the proof
 119 // of correctness of this 2-level locking scheme is far from obvious,
 120 // and potentially quite slippery. We have an uneasy suspicion, for instance,
 121 // that there may be a theoretical possibility of delay/starvation in the
 122 // low-level lock/wait/notify scheme used for the baton-passing because of
 123 // potential interference with the priority scheme embodied in the
 124 // CMS-token-passing protocol. See related comments at a CGC_lock->wait()
 125 // invocation further below and marked with "XXX 20011219YSR".
 126 // Indeed, as we note elsewhere, this may become yet more slippery
 127 // in the presence of multiple CMS and/or multiple VM threads. XXX
 128 
 129 class CMSTokenSync: public StackObj {
 130  private:
 131   bool _is_cms_thread;
 132  public:
 133   CMSTokenSync(bool is_cms_thread):
 134     _is_cms_thread(is_cms_thread) {
 135     assert(is_cms_thread == Thread::current()->is_ConcurrentGC_thread(),
 136            "Incorrect argument to constructor");
 137     ConcurrentMarkSweepThread::synchronize(_is_cms_thread);
 138   }
 139 
 140   ~CMSTokenSync() {
 141     assert(_is_cms_thread ?
 142              ConcurrentMarkSweepThread::cms_thread_has_cms_token() :
 143              ConcurrentMarkSweepThread::vm_thread_has_cms_token(),

 242     if (_par_gc_thread_states == NULL) {
 243       vm_exit_during_initialization("Could not allocate par gc structs");
 244     }
 245     for (uint i = 0; i < ParallelGCThreads; i++) {
 246       _par_gc_thread_states[i] = new CMSParGCThreadState(cmsSpace());
 247       if (_par_gc_thread_states[i] == NULL) {
 248         vm_exit_during_initialization("Could not allocate par gc structs");
 249       }
 250     }
 251   } else {
 252     _par_gc_thread_states = NULL;
 253   }
 254   _incremental_collection_failed = false;
 255   // The "dilatation_factor" is the expansion that can occur on
 256   // account of the fact that the minimum object size in the CMS
 257   // generation may be larger than that in, say, a contiguous young
 258   //  generation.
 259   // Ideally, in the calculation below, we'd compute the dilatation
 260   // factor as: MinChunkSize/(promoting_gen's min object size)
 261   // Since we do not have such a general query interface for the
 262   // promoting generation, we'll instead just use the minimum
 263   // object size (which today is a header's worth of space);
 264   // note that all arithmetic is in units of HeapWords.
 265   assert(MinChunkSize >= CollectedHeap::min_fill_size(), "just checking");
 266   assert(_dilatation_factor >= 1.0, "from previous assert");
 267 }
 268 
 269 
 270 // The field "_initiating_occupancy" represents the occupancy percentage
 271 // at which we trigger a new collection cycle.  Unless explicitly specified
 272 // via CMSInitiatingOccupancyFraction (argument "io" below), it
 273 // is calculated by:
 274 //
 275 //   Let "f" be MinHeapFreeRatio in
 276 //
 277 //    _initiating_occupancy = 100-f +
 278 //                           f * (CMSTriggerRatio/100)
 279 //   where CMSTriggerRatio is the argument "tr" below.
 280 //
 281 // That is, if we assume the heap is at its desired maximum occupancy at the
 282 // end of a collection, we let CMSTriggerRatio of the (purported) free
 283 // space be allocated before initiating a new collection cycle.
 284 //
 285 void ConcurrentMarkSweepGeneration::init_initiating_occupancy(intx io, uintx tr) {
 286   assert(io <= 100 && tr <= 100, "Check the arguments");
 287   if (io >= 0) {
 288     _initiating_occupancy = (double)io / 100.0;
 289   } else {
 290     _initiating_occupancy = ((100 - MinHeapFreeRatio) +
 291                              (double)(tr * MinHeapFreeRatio) / 100.0)
 292                             / 100.0;
 293   }
 294 }
 295 
 296 void ConcurrentMarkSweepGeneration::ref_processor_init() {
 297   assert(collector() != NULL, "no collector");

2654       ConcurrentMarkSweepThread::CMS_cms_wants_token);
2655   }
2656   if (TraceCMSState) {
2657     gclog_or_tty->print_cr("CMS Thread " INTPTR_FORMAT " continuing at CMS state %d",
2658       Thread::current(), _collectorState);
2659   }
2660   return res;
2661 }
2662 
2663 // Because of the need to lock the free lists and other structures in
2664 // the collector, common to all the generations that the collector is
2665 // collecting, we need the gc_prologues of individual CMS generations
2666 // delegate to their collector. It may have been simpler had the
2667 // current infrastructure allowed one to call a prologue on a
2668 // collector. In the absence of that we have the generation's
2669 // prologue delegate to the collector, which delegates back
2670 // some "local" work to a worker method in the individual generations
2671 // that it's responsible for collecting, while itself doing any
2672 // work common to all generations it's responsible for. A similar
2673 // comment applies to the  gc_epilogue()'s.
2674 // The role of the variable _between_prologue_and_epilogue is to
2675 // enforce the invocation protocol.
2676 void CMSCollector::gc_prologue(bool full) {
2677   // Call gc_prologue_work() for the CMSGen
2678   // we are responsible for.
2679 
2680   // The following locking discipline assumes that we are only called
2681   // when the world is stopped.
2682   assert(SafepointSynchronize::is_at_safepoint(), "world is stopped assumption");
2683 
2684   // The CMSCollector prologue must call the gc_prologues for the
2685   // "generations" that it's responsible
2686   // for.
2687 
2688   assert(   Thread::current()->is_VM_thread()
2689          || (   CMSScavengeBeforeRemark
2690              && Thread::current()->is_ConcurrentGC_thread()),
2691          "Incorrect thread type for prologue execution");
2692 
2693   if (_between_prologue_and_epilogue) {
2694     // We have already been invoked; this is a gc_prologue delegation

2861 }
2862 
2863 #ifndef PRODUCT
2864 bool CMSCollector::have_cms_token() {
2865   Thread* thr = Thread::current();
2866   if (thr->is_VM_thread()) {
2867     return ConcurrentMarkSweepThread::vm_thread_has_cms_token();
2868   } else if (thr->is_ConcurrentGC_thread()) {
2869     return ConcurrentMarkSweepThread::cms_thread_has_cms_token();
2870   } else if (thr->is_GC_task_thread()) {
2871     return ConcurrentMarkSweepThread::vm_thread_has_cms_token() &&
2872            ParGCRareEvent_lock->owned_by_self();
2873   }
2874   return false;
2875 }
2876 #endif
2877 
2878 // Check reachability of the given heap address in CMS generation,
2879 // treating all other generations as roots.
2880 bool CMSCollector::is_cms_reachable(HeapWord* addr) {
2881   // We could "guarantee" below, rather than assert, but I'll
2882   // leave these as "asserts" so that an adventurous debugger
2883   // could try this in the product build provided some subset of
2884   // the conditions were met, provided they were interested in the
2885   // results and knew that the computation below wouldn't interfere
2886   // with other concurrent computations mutating the structures
2887   // being read or written.
2888   assert(SafepointSynchronize::is_at_safepoint(),
2889          "Else mutations in object graph will make answer suspect");
2890   assert(have_cms_token(), "Should hold cms token");
2891   assert(haveFreelistLocks(), "must hold free list locks");
2892   assert_lock_strong(bitMapLock());
2893 
2894   // Clear the marking bit map array before starting, but, just
2895   // for kicks, first report if the given address is already marked
2896   gclog_or_tty->print_cr("Start: Address 0x%x is%s marked", addr,
2897                 _markBitMap.isMarked(addr) ? "" : " not");
2898 
2899   if (verify_after_remark()) {
2900     MutexLockerEx x(verification_mark_bm()->lock(), Mutex::_no_safepoint_check_flag);
2901     bool result = verification_mark_bm()->isMarked(addr);
2902     gclog_or_tty->print_cr("TransitiveMark: Address 0x%x %s marked", addr,
2903                            result ? "IS" : "is NOT");
2904     return result;

2965   assert(_collectorState > Marking && _collectorState <= Sweeping,
2966          "Else marking info checked here may be obsolete");
2967   assert(haveFreelistLocks(), "must hold free list locks");
2968   assert_lock_strong(bitMapLock());
2969 
2970 
2971   // Allocate marking bit map if not already allocated
2972   if (!init) { // first time
2973     if (!verification_mark_bm()->allocate(_span)) {
2974       return false;
2975     }
2976     init = true;
2977   }
2978 
2979   assert(verification_mark_stack()->isEmpty(), "Should be empty");
2980 
2981   // Turn off refs discovery -- so we will be tracing through refs.
2982   // This is as intended, because by this time
2983   // GC must already have cleared any refs that need to be cleared,
2984   // and traced those that need to be marked; moreover,
2985   // the marking done here is not going to interfere in any
2986   // way with the marking information used by GC.
2987   NoRefDiscovery no_discovery(ref_processor());
2988 
2989   COMPILER2_PRESENT(DerivedPointerTableDeactivate dpt_deact;)
2990 
2991   // Clear any marks from a previous round
2992   verification_mark_bm()->clear_all();
2993   assert(verification_mark_stack()->isEmpty(), "markStack should be empty");
2994   verify_work_stacks_empty();
2995 
2996   GenCollectedHeap* gch = GenCollectedHeap::heap();
2997   gch->ensure_parsability(false);  // fill TLABs, but no need to retire them
2998   // Update the saved marks which may affect the root scans.
2999   gch->save_marks();
3000 
3001   if (CMSRemarkVerifyVariant == 1) {
3002     // In this first variant of verification, we complete
3003     // all marking, then check if the new marks-vector is
3004     // a subset of the CMS marks-vector.
3005     verify_after_remark_work_1();
3006   } else if (CMSRemarkVerifyVariant == 2) {
3007     // In this second variant of verification, we flag an error
3008     // (i.e. an object reachable in the new marks-vector not reachable
3009     // in the CMS marks-vector) immediately, also indicating the
3010     // identify of an object (A) that references the unmarked object (B) --
3011     // presumably, a mutation to A failed to be picked up by preclean/remark?
3012     verify_after_remark_work_2();
3013   } else {
3014     warning("Unrecognized value %d for CMSRemarkVerifyVariant",
3015             CMSRemarkVerifyVariant);
3016   }
3017   if (!silent) gclog_or_tty->print(" done] ");
3018   return true;
3019 }
3020 
3021 void CMSCollector::verify_after_remark_work_1() {
3022   ResourceMark rm;
3023   HandleMark  hm;

3384     }
3385   }
3386 }
3387 
3388 HeapWord* ConcurrentMarkSweepGeneration::expand_and_par_lab_allocate(CMSParGCThreadState* ps, size_t word_sz) {
3389   HeapWord* res = NULL;
3390   MutexLocker x(ParGCRareEvent_lock);
3391   while (true) {
3392     // Expansion by some other thread might make alloc OK now:
3393     res = ps->lab.alloc(word_sz);
3394     if (res != NULL) return res;
3395     // If there's not enough expansion space available, give up.
3396     if (_virtual_space.uncommitted_size() < (word_sz * HeapWordSize)) {
3397       return NULL;
3398     }
3399     // Otherwise, we try expansion.
3400     expand(word_sz*HeapWordSize, MinHeapDeltaBytes,
3401       CMSExpansionCause::_allocate_par_lab);
3402     // Now go around the loop and try alloc again;
3403     // A competing par_promote might beat us to the expansion space,
3404     // so we may go around the loop again if promotion fails again.
3405     if (GCExpandToAllocateDelayMillis > 0) {
3406       os::sleep(Thread::current(), GCExpandToAllocateDelayMillis, false);
3407     }
3408   }
3409 }
3410 
3411 
3412 bool ConcurrentMarkSweepGeneration::expand_and_ensure_spooling_space(
3413   PromotionInfo* promo) {
3414   MutexLocker x(ParGCRareEvent_lock);
3415   size_t refill_size_bytes = promo->refillSize() * HeapWordSize;
3416   while (true) {
3417     // Expansion by some other thread might make alloc OK now:
3418     if (promo->ensure_spooling_space()) {
3419       assert(promo->has_spooling_space(),
3420              "Post-condition of successful ensure_spooling_space()");
3421       return true;
3422     }
3423     // If there's not enough expansion space available, give up.
3424     if (_virtual_space.uncommitted_size() < refill_size_bytes) {

4356   // It is possible for whichever thread initiated the yield request
4357   // not to get a chance to wake up and take the bitmap lock between
4358   // this thread releasing it and reacquiring it. So, while the
4359   // should_yield() flag is on, let's sleep for a bit to give the
4360   // other thread a chance to wake up. The limit imposed on the number
4361   // of iterations is defensive, to avoid any unforseen circumstances
4362   // putting us into an infinite loop. Since it's always been this
4363   // (coordinator_yield()) method that was observed to cause the
4364   // problem, we are using a parameter (CMSCoordinatorYieldSleepCount)
4365   // which is by default non-zero. For the other seven methods that
4366   // also perform the yield operation, as are using a different
4367   // parameter (CMSYieldSleepCount) which is by default zero. This way we
4368   // can enable the sleeping for those methods too, if necessary.
4369   // See 6442774.
4370   //
4371   // We really need to reconsider the synchronization between the GC
4372   // thread and the yield-requesting threads in the future and we
4373   // should really use wait/notify, which is the recommended
4374   // way of doing this type of interaction. Additionally, we should
4375   // consolidate the eight methods that do the yield operation and they
4376   // are almost identical into one for better maintainability and
4377   // readability. See 6445193.
4378   //
4379   // Tony 2006.06.29
4380   for (unsigned i = 0; i < CMSCoordinatorYieldSleepCount &&
4381                    ConcurrentMarkSweepThread::should_yield() &&
4382                    !CMSCollector::foregroundGCIsActive(); ++i) {
4383     os::sleep(Thread::current(), 1, false);
4384     ConcurrentMarkSweepThread::acknowledge_yield_request();
4385   }
4386 
4387   ConcurrentMarkSweepThread::synchronize(true);
4388   _bit_map_lock->lock_without_safepoint_check();
4389   _collector->startTimer();
4390 }
4391 
4392 bool CMSCollector::do_marking_mt(bool asynch) {
4393   assert(ConcGCThreads > 0 && conc_workers() != NULL, "precondition");
4394   int num_workers = AdaptiveSizePolicy::calc_active_conc_workers(
4395                                        conc_workers()->total_workers(),
4396                                        conc_workers()->active_workers(),

4524   if (CMSPrecleaningEnabled) {
4525     sample_eden();
4526     _collectorState = AbortablePreclean;
4527   } else {
4528     _collectorState = FinalMarking;
4529   }
4530   verify_work_stacks_empty();
4531   verify_overflow_empty();
4532 }
4533 
4534 // Try and schedule the remark such that young gen
4535 // occupancy is CMSScheduleRemarkEdenPenetration %.
4536 void CMSCollector::abortable_preclean() {
4537   check_correct_thread_executing();
4538   assert(CMSPrecleaningEnabled,  "Inconsistent control state");
4539   assert(_collectorState == AbortablePreclean, "Inconsistent control state");
4540 
4541   // If Eden's current occupancy is below this threshold,
4542   // immediately schedule the remark; else preclean
4543   // past the next scavenge in an effort to
4544   // schedule the pause as described above. By choosing
4545   // CMSScheduleRemarkEdenSizeThreshold >= max eden size
4546   // we will never do an actual abortable preclean cycle.
4547   if (get_eden_used() > CMSScheduleRemarkEdenSizeThreshold) {
4548     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
4549     CMSPhaseAccounting pa(this, "abortable-preclean", !PrintGCDetails);
4550     // We need more smarts in the abortable preclean
4551     // loop below to deal with cases where allocation
4552     // in young gen is very very slow, and our precleaning
4553     // is running a losing race against a horde of
4554     // mutators intent on flooding us with CMS updates
4555     // (dirty cards).
4556     // One, admittedly dumb, strategy is to give up
4557     // after a certain number of abortable precleaning loops
4558     // or after a certain maximum time. We want to make
4559     // this smarter in the next iteration.
4560     // XXX FIX ME!!! YSR
4561     size_t loops = 0, workdone = 0, cumworkdone = 0, waited = 0;
4562     while (!(should_abort_preclean() ||
4563              ConcurrentMarkSweepThread::should_terminate())) {
4564       workdone = preclean_work(CMSPrecleanRefLists2, CMSPrecleanSurvivors2);

5520 }
5521 
5522 void
5523 CMSParRemarkTask::do_dirty_card_rescan_tasks(
5524   CompactibleFreeListSpace* sp, int i,
5525   Par_MarkRefsIntoAndScanClosure* cl) {
5526   // Until all tasks completed:
5527   // . claim an unclaimed task
5528   // . compute region boundaries corresponding to task claimed
5529   // . transfer dirty bits ct->mut for that region
5530   // . apply rescanclosure to dirty mut bits for that region
5531 
5532   ResourceMark rm;
5533   HandleMark   hm;
5534 
5535   OopTaskQueue* work_q = work_queue(i);
5536   ModUnionClosure modUnionClosure(&(_collector->_modUnionTable));
5537   // CAUTION! CAUTION! CAUTION! CAUTION! CAUTION! CAUTION! CAUTION!
5538   // CAUTION: This closure has state that persists across calls to
5539   // the work method dirty_range_iterate_clear() in that it has
5540   // embedded in it a (subtype of) UpwardsObjectClosure. The
5541   // use of that state in the embedded UpwardsObjectClosure instance
5542   // assumes that the cards are always iterated (even if in parallel
5543   // by several threads) in monotonically increasing order per each
5544   // thread. This is true of the implementation below which picks
5545   // card ranges (chunks) in monotonically increasing order globally
5546   // and, a-fortiori, in monotonically increasing order per thread
5547   // (the latter order being a subsequence of the former).
5548   // If the work code below is ever reorganized into a more chaotic
5549   // work-partitioning form than the current "sequential tasks"
5550   // paradigm, the use of that persistent state will have to be
5551   // revisited and modified appropriately. See also related
5552   // bug 4756801 work on which should examine this code to make
5553   // sure that the changes there do not run counter to the
5554   // assumptions made here and necessary for correctness and
5555   // efficiency. Note also that this code might yield inefficient
5556   // behavior in the case of very large objects that span one or
5557   // more work chunks. Such objects would potentially be scanned
5558   // several times redundantly. Work on 4756801 should try and
5559   // address that performance anomaly if at all possible. XXX
5560   MemRegion  full_span  = _collector->_span;
5561   CMSBitMap* bm    = &(_collector->_markBitMap);     // shared
5562   MarkFromDirtyCardsClosure
5563     greyRescanClosure(_collector, full_span, // entire span of interest
5564                       sp, bm, work_q, cl);
5565 
5566   SequentialSubTasksDone* pst = sp->conc_par_seq_tasks();
5567   assert(pst->valid(), "Uninitialized use?");
5568   uint nth_task = 0;
5569   const int alignment = CardTableModRefBS::card_size * BitsPerWord;
5570   MemRegion span = sp->used_region();
5571   HeapWord* start_addr = span.start();
5572   HeapWord* end_addr = (HeapWord*)round_to((intptr_t)span.end(),
5573                                            alignment);
5574   const size_t chunk_size = sp->rescan_task_size(); // in HeapWord units
5575   assert((HeapWord*)round_to((intptr_t)start_addr, alignment) ==
5576          start_addr, "Check alignment");
5577   assert((size_t)round_to((intptr_t)chunk_size, alignment) ==
5578          chunk_size, "Check alignment");
5579 
5580   while (!pst->is_task_claimed(/* reference */ nth_task)) {
5581     // Having claimed the nth_task, compute corresponding mem-region,
5582     // which is a-fortiori aligned correctly (i.e. at a MUT boundary).
5583     // The alignment restriction ensures that we do not need any
5584     // synchronization with other gang-workers while setting or
5585     // clearing bits in thus chunk of the MUT.
5586     MemRegion this_span = MemRegion(start_addr + nth_task*chunk_size,
5587                                     start_addr + (nth_task+1)*chunk_size);
5588     // The last chunk's end might be way beyond end of the
5589     // used region. In that case pull back appropriately.
5590     if (this_span.end() > end_addr) {
5591       this_span.set_end(end_addr);
5592       assert(!this_span.is_empty(), "Program logic (calculation of n_tasks)");
5593     }
5594     // Iterate over the dirty cards covering this chunk, marking them
5595     // precleaned, and setting the corresponding bits in the mod union
5596     // table. Since we have been careful to partition at Card and MUT-word
5597     // boundaries no synchronization is needed between parallel threads.
5598     _collector->_ct->ct_bs()->dirty_card_iterate(this_span,
5599                                                  &modUnionClosure);
5600 
5601     // Having transferred these marks into the modUnionTable,
5602     // rescan the marked objects on the dirty cards in the modUnionTable.

6354     // already have needed locks
6355     sweepWork(_cmsGen,  asynch);
6356     // Update heap occupancy information which is used as
6357     // input to soft ref clearing policy at the next gc.
6358     Universe::update_heap_info_at_gc();
6359     _collectorState = Resizing;
6360   }
6361   verify_work_stacks_empty();
6362   verify_overflow_empty();
6363 
6364   if (should_unload_classes()) {
6365     ClassLoaderDataGraph::purge();
6366   }
6367 
6368   _intra_sweep_timer.stop();
6369   _intra_sweep_estimate.sample(_intra_sweep_timer.seconds());
6370 
6371   _inter_sweep_timer.reset();
6372   _inter_sweep_timer.start();
6373 
6374   // We need to use a monotonically non-decreasing time in ms
6375   // or we will see time-warp warnings and os::javaTimeMillis()
6376   // does not guarantee monotonicity.
6377   jlong now = os::javaTimeNanos() / NANOSECS_PER_MILLISEC;
6378   update_time_of_last_gc(now);
6379 
6380   // NOTE on abstract state transitions:
6381   // Mutators allocate-live and/or mark the mod-union table dirty
6382   // based on the state of the collection.  The former is done in
6383   // the interval [Marking, Sweeping] and the latter in the interval
6384   // [Marking, Sweeping).  Thus the transitions into the Marking state
6385   // and out of the Sweeping state must be synchronously visible
6386   // globally to the mutators.
6387   // The transition into the Marking state happens with the world
6388   // stopped so the mutators will globally see it.  Sweeping is
6389   // done asynchronously by the background collector so the transition
6390   // from the Sweeping state to the Resizing state must be done
6391   // under the freelistLock (as is the check for whether to
6392   // allocate-live and whether to dirty the mod-union table).
6393   assert(_collectorState == Resizing, "Change of collector state to"
6394     " Resizing must be done under the freelistLocks (plural)");

6715 // bit vector itself. That is done by a separate call CMSBitMap::allocate()
6716 // further below.
6717 CMSBitMap::CMSBitMap(int shifter, int mutex_rank, const char* mutex_name):
6718   _bm(),
6719   _shifter(shifter),
6720   _lock(mutex_rank >= 0 ? new Mutex(mutex_rank, mutex_name, true) : NULL)
6721 {
6722   _bmStartWord = 0;
6723   _bmWordSize  = 0;
6724 }
6725 
6726 bool CMSBitMap::allocate(MemRegion mr) {
6727   _bmStartWord = mr.start();
6728   _bmWordSize  = mr.word_size();
6729   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
6730                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
6731   if (!brs.is_reserved()) {
6732     warning("CMS bit map allocation failure");
6733     return false;
6734   }
6735   // For now we'll just commit all of the bit map up front.
6736   // Later on we'll try to be more parsimonious with swap.
6737   if (!_virtual_space.initialize(brs, brs.size())) {
6738     warning("CMS bit map backing store failure");
6739     return false;
6740   }
6741   assert(_virtual_space.committed_size() == brs.size(),
6742          "didn't reserve backing store for all of CMS bit map?");
6743   _bm.set_map((BitMap::bm_word_t*)_virtual_space.low());
6744   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
6745          _bmWordSize, "inconsistency in bit map sizing");
6746   _bm.set_size(_bmWordSize >> _shifter);
6747 
6748   // bm.clear(); // can we rely on getting zero'd memory? verify below
6749   assert(isAllClear(),
6750          "Expected zero'd memory from ReservedSpace constructor");
6751   assert(_bm.size() == heapWordDiffToOffsetDiff(sizeInWords()),
6752          "consistency check");
6753   return true;
6754 }
6755

6822                    size * sizeof(oop)));
6823   if (!rs.is_reserved()) {
6824     warning("CMSMarkStack allocation failure");
6825     return false;
6826   }
6827   if (!_virtual_space.initialize(rs, rs.size())) {
6828     warning("CMSMarkStack backing store failure");
6829     return false;
6830   }
6831   assert(_virtual_space.committed_size() == rs.size(),
6832          "didn't reserve backing store for all of CMS stack?");
6833   _base = (oop*)(_virtual_space.low());
6834   _index = 0;
6835   _capacity = size;
6836   NOT_PRODUCT(_max_depth = 0);
6837   return true;
6838 }
6839 
6840 // XXX FIX ME !!! In the MT case we come in here holding a
6841 // leaf lock. For printing we need to take a further lock
6842 // which has lower rank. We need to recalibrate the two
6843 // lock-ranks involved in order to be able to print the
6844 // messages below. (Or defer the printing to the caller.
6845 // For now we take the expedient path of just disabling the
6846 // messages for the problematic case.)
6847 void CMSMarkStack::expand() {
6848   assert(_capacity <= MarkStackSizeMax, "stack bigger than permitted");
6849   if (_capacity == MarkStackSizeMax) {
6850     if (_hit_limit++ == 0 && !CMSConcurrentMTEnabled && PrintGCDetails) {
6851       // We print a warning message only once per CMS cycle.
6852       gclog_or_tty->print_cr(" (benign) Hit CMSMarkStack max size limit");
6853     }
6854     return;
6855   }
6856   // Double capacity if possible
6857   size_t new_capacity = MIN2(_capacity*2, MarkStackSizeMax);
6858   // Do not give up existing stack until we have managed to
6859   // get the double capacity that we desired.
6860   ReservedSpace rs(ReservedSpace::allocation_align_size_up(
6861                    new_capacity * sizeof(oop)));
6862   if (rs.is_reserved()) {
6863     // Release the backing store associated with old stack

7163         } else {
7164           // A non-array may have been imprecisely marked; we need
7165           // to scan object in its entirety.
7166           size = CompactibleFreeListSpace::adjustObjectSize(
7167                    p->oop_iterate(_scanningClosure));
7168         }
7169         #ifdef ASSERT
7170           size_t direct_size =
7171             CompactibleFreeListSpace::adjustObjectSize(p->size());
7172           assert(size == direct_size, "Inconsistency in size");
7173           assert(size >= 3, "Necessary for Printezis marks to work");
7174           if (!_bitMap->isMarked(addr+1)) {
7175             _bitMap->verifyNoOneBitsInRange(addr+2, addr+size);
7176           } else {
7177             _bitMap->verifyNoOneBitsInRange(addr+2, addr+size-1);
7178             assert(_bitMap->isMarked(addr+size-1),
7179                    "inconsistent Printezis mark");
7180           }
7181         #endif // ASSERT
7182     } else {
7183       // An uninitialized object.
7184       assert(_bitMap->isMarked(addr+1), "missing Printezis mark?");
7185       HeapWord* nextOneAddr = _bitMap->getNextMarkedWordAddress(addr + 2);
7186       size = pointer_delta(nextOneAddr + 1, addr);
7187       assert(size == CompactibleFreeListSpace::adjustObjectSize(size),
7188              "alignment problem");
7189       // Note that pre-cleaning needn't redirty the card. OopDesc::set_klass()
7190       // will dirty the card when the klass pointer is installed in the
7191       // object (signaling the completion of initialization).
7192     }
7193   } else {
7194     // Either a not yet marked object or an uninitialized object
7195     if (p->klass_or_null() == NULL) {
7196       // An uninitialized object, skip to the next card, since
7197       // we may not be able to read its P-bits yet.
7198       assert(size == 0, "Initial value");
7199     } else {
7200       // An object not (yet) reached by marking: we merely need to
7201       // compute its size so as to go look at the next block.
7202       assert(p->is_oop(true), "should be an oop");
7203       size = CompactibleFreeListSpace::adjustObjectSize(p->size());
7204     }
7205   }
7206   DEBUG_ONLY(_collector->verify_work_stacks_empty();)
7207   return size;
7208 }
7209 
7210 void ScanMarkedObjectsAgainCarefullyClosure::do_yield_work() {
7211   assert(ConcurrentMarkSweepThread::cms_thread_has_cms_token(),

7982   if (_span.contains(addr) && !_bit_map->isMarked(addr)) {
7983     // a white object ...
7984     _bit_map->mark(addr);         // ... now grey
7985     // push on the marking stack (grey set)
7986     bool simulate_overflow = false;
7987     NOT_PRODUCT(
7988       if (CMSMarkStackOverflowALot &&
7989           _collector->simulate_overflow()) {
7990         // simulate a stack overflow
7991         simulate_overflow = true;
7992       }
7993     )
7994     if (simulate_overflow || !_mark_stack->push(obj)) {
7995       if (_concurrent_precleaning) {
7996          // During precleaning we can just dirty the appropriate card(s)
7997          // in the mod union table, thus ensuring that the object remains
7998          // in the grey set  and continue. In the case of object arrays
7999          // we need to dirty all of the cards that the object spans,
8000          // since the rescan of object arrays will be limited to the
8001          // dirty cards.
8002          // Note that no one can be interfering with us in this action
8003          // of dirtying the mod union table, so no locking or atomics
8004          // are required.
8005          if (obj->is_objArray()) {
8006            size_t sz = obj->size();
8007            HeapWord* end_card_addr = (HeapWord*)round_to(
8008                                         (intptr_t)(addr+sz), CardTableModRefBS::card_size);
8009            MemRegion redirty_range = MemRegion(addr, end_card_addr);
8010            assert(!redirty_range.is_empty(), "Arithmetical tautology");
8011            _mod_union_table->mark_range(redirty_range);
8012          } else {
8013            _mod_union_table->mark(addr);
8014          }
8015          _collector->_ser_pmc_preclean_ovflw++;
8016       } else {
8017          // During the remark phase, we need to remember this oop
8018          // in the overflow list.
8019          _collector->push_on_overflow_list(obj);
8020          _collector->_ser_pmc_remark_ovflw++;
8021       }
8022     }

9008       assert(_span.contains((HeapWord*)new_oop), "Out of bounds oop");
9009       // iterate over the oops in this oop, marking and pushing
9010       // the ones in CMS heap (i.e. in _span).
9011       new_oop->oop_iterate(&_mark_and_push);
9012     }
9013   }
9014 }
9015 
9016 ////////////////////////////////////////////////////////////////////
9017 // Support for Marking Stack Overflow list handling and related code
9018 ////////////////////////////////////////////////////////////////////
9019 // Much of the following code is similar in shape and spirit to the
9020 // code used in ParNewGC. We should try and share that code
9021 // as much as possible in the future.
9022 
9023 #ifndef PRODUCT
9024 // Debugging support for CMSStackOverflowALot
9025 
9026 // It's OK to call this multi-threaded;  the worst thing
9027 // that can happen is that we'll get a bunch of closely
9028 // spaced simulated overflows, but that's OK, in fact
9029 // probably good as it would exercise the overflow code
9030 // under contention.
9031 bool CMSCollector::simulate_overflow() {
9032   if (_overflow_counter-- <= 0) { // just being defensive
9033     _overflow_counter = CMSMarkStackOverflowInterval;
9034     return true;
9035   } else {
9036     return false;
9037   }
9038 }
9039 
9040 bool CMSCollector::par_simulate_overflow() {
9041   return simulate_overflow();
9042 }
9043 #endif
9044 
9045 // Single-threaded
9046 bool CMSCollector::take_from_overflow_list(size_t num, CMSMarkStack* stack) {
9047   assert(stack->isEmpty(), "Expected precondition");
9048   assert(stack->capacity() > num, "Shouldn't bite more than can chew");

9128        // Write back the NULL in case we overwrote it with BUSY above
9129        // and it is still the same value.
9130        (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
9131      }
9132      return false;
9133   }
9134   assert(prefix != NULL && prefix != BUSY, "Error");
9135   size_t i = num;
9136   oop cur = prefix;
9137   // Walk down the first "num" objects, unless we reach the end.
9138   for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--);
9139   if (cur->mark() == NULL) {
9140     // We have "num" or fewer elements in the list, so there
9141     // is nothing to return to the global list.
9142     // Write back the NULL in lieu of the BUSY we wrote
9143     // above, if it is still the same value.
9144     if (_overflow_list == BUSY) {
9145       (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
9146     }
9147   } else {
9148     // Chop off the suffix and return it to the global list.
9149     assert(cur->mark() != BUSY, "Error");
9150     oop suffix_head = cur->mark(); // suffix will be put back on global list
9151     cur->set_mark(NULL);           // break off suffix
9152     // It's possible that the list is still in the empty(busy) state
9153     // we left it in a short while ago; in that case we may be
9154     // able to place back the suffix without incurring the cost
9155     // of a walk down the list.
9156     oop observed_overflow_list = _overflow_list;
9157     oop cur_overflow_list = observed_overflow_list;
9158     bool attached = false;
9159     while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
9160       observed_overflow_list =
9161         (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
9162       if (cur_overflow_list == observed_overflow_list) {
9163         attached = true;
9164         break;
9165       } else cur_overflow_list = observed_overflow_list;
9166     }
9167     if (!attached) {
9168       // Too bad, someone else sneaked in (at least) an element; we'll need

src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
Index Unified diffs Context diffs Sdiffs Patch New Old Previous File Next File