--- old/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp 2011-09-14 14:54:04.922412961 -0700 +++ new/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp 2011-09-14 14:54:04.676319935 -0700 @@ -2004,7 +2004,7 @@ ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(ref_processor(), false); ref_processor()->set_enqueuing_is_done(false); - ref_processor()->enable_discovery(); + ref_processor()->enable_discovery(false /*verify_disabled*/, false /*check_no_refs*/); ref_processor()->setup_policy(clear_all_soft_refs); // If an asynchronous collection finishes, the _modUnionTable is // all clear. If we are assuming the collection from an asynchronous @@ -3490,8 +3490,8 @@ MutexLockerEx x(bitMapLock(), Mutex::_no_safepoint_check_flag); checkpointRootsInitialWork(asynch); - rp->verify_no_references_recorded(); - rp->enable_discovery(); // enable ("weak") refs discovery + // enable ("weak") refs discovery + rp->enable_discovery(true /*verify_disabled*/, true /*check_no_refs*/); _collectorState = Marking; } else { // (Weak) Refs discovery: this is controlled from genCollectedHeap::do_collection @@ -3503,7 +3503,8 @@ "ref discovery for this generation kind"); // already have locks checkpointRootsInitialWork(asynch); - rp->enable_discovery(); // now enable ("weak") refs discovery + // now enable ("weak") refs discovery + rp->enable_discovery(true /*verify_disabled*/, false /*verify_no_refs*/); _collectorState = Marking; } SpecializationStats::print(); --- old/src/share/vm/gc_implementation/g1/concurrentMark.cpp 2011-09-14 14:54:07.174491899 -0700 +++ new/src/share/vm/gc_implementation/g1/concurrentMark.cpp 2011-09-14 14:54:06.908830168 -0700 @@ -818,10 +818,10 @@ NoteStartOfMarkHRClosure startcl; g1h->heap_region_iterate(&startcl); - // Start weak-reference discovery. - ReferenceProcessor* rp = g1h->ref_processor(); - rp->verify_no_references_recorded(); - rp->enable_discovery(); // enable ("weak") refs discovery + // Start Concurrent Marking weak-reference discovery. + ReferenceProcessor* rp = g1h->ref_processor_cm(); + // enable ("weak") refs discovery + rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); @@ -1133,6 +1133,7 @@ // world is stopped at this checkpoint assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped"); + G1CollectedHeap* g1h = G1CollectedHeap::heap(); // If a full collection has happened, we shouldn't do this. @@ -1837,6 +1838,10 @@ size_t cleaned_up_bytes = start_used_bytes - g1h->used(); g1p->decrease_known_garbage_bytes(cleaned_up_bytes); + // Clean up will have freed any regions completely full of garbage. + // Update the soft reference policy with the new heap occupancy. + Universe::update_heap_info_at_gc(); + // We need to make this be a "collection" so any collection pause that // races with it goes around and waits for completeCleanup to finish. g1h->increment_total_collections(); @@ -2072,8 +2077,10 @@ } }; -// Implementation of AbstractRefProcTaskExecutor for G1 -class G1RefProcTaskExecutor: public AbstractRefProcTaskExecutor { +// Implementation of AbstractRefProcTaskExecutor for parallel +// reference processing at the end of G1 concurrent marking + +class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor { private: G1CollectedHeap* _g1h; ConcurrentMark* _cm; @@ -2082,7 +2089,7 @@ int _active_workers; public: - G1RefProcTaskExecutor(G1CollectedHeap* g1h, + G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, ConcurrentMark* cm, CMBitMap* bitmap, WorkGang* workers, @@ -2096,7 +2103,7 @@ virtual void execute(EnqueueTask& task); }; -class G1RefProcTaskProxy: public AbstractGangTask { +class G1CMRefProcTaskProxy: public AbstractGangTask { typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; ProcessTask& _proc_task; G1CollectedHeap* _g1h; @@ -2104,7 +2111,7 @@ CMBitMap* _bitmap; public: - G1RefProcTaskProxy(ProcessTask& proc_task, + G1CMRefProcTaskProxy(ProcessTask& proc_task, G1CollectedHeap* g1h, ConcurrentMark* cm, CMBitMap* bitmap) : @@ -2122,10 +2129,10 @@ } }; -void G1RefProcTaskExecutor::execute(ProcessTask& proc_task) { +void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { assert(_workers != NULL, "Need parallel worker threads."); - G1RefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap); + G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap); // We need to reset the phase for each task execution so that // the termination protocol of CMTask::do_marking_step works. @@ -2135,12 +2142,12 @@ _g1h->set_par_threads(0); } -class G1RefEnqueueTaskProxy: public AbstractGangTask { +class G1CMRefEnqueueTaskProxy: public AbstractGangTask { typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; EnqueueTask& _enq_task; public: - G1RefEnqueueTaskProxy(EnqueueTask& enq_task) : + G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : AbstractGangTask("Enqueue reference objects in parallel"), _enq_task(enq_task) { } @@ -2150,10 +2157,10 @@ } }; -void G1RefProcTaskExecutor::execute(EnqueueTask& enq_task) { +void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) { assert(_workers != NULL, "Need parallel worker threads."); - G1RefEnqueueTaskProxy enq_task_proxy(enq_task); + G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task); _g1h->set_par_threads(_active_workers); _workers->run_task(&enq_task_proxy); @@ -2164,7 +2171,7 @@ ResourceMark rm; HandleMark hm; G1CollectedHeap* g1h = G1CollectedHeap::heap(); - ReferenceProcessor* rp = g1h->ref_processor(); + ReferenceProcessor* rp = g1h->ref_processor_cm(); // See the comment in G1CollectedHeap::ref_processing_init() // about how reference processing currently works in G1. @@ -2182,7 +2189,7 @@ int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1; active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1); - G1RefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(), + G1CMRefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(), g1h->workers(), active_workers); @@ -2226,7 +2233,7 @@ } rp->verify_no_references_recorded(); - assert(!rp->discovery_enabled(), "should have been disabled"); + assert(!rp->discovery_enabled(), "Post condition"); // Now clean up stale oops in StringTable StringTable::unlink(&g1_is_alive); @@ -3329,7 +3336,7 @@ assert(_ref_processor == NULL, "should be initialized to NULL"); if (G1UseConcMarkReferenceProcessing) { - _ref_processor = g1h->ref_processor(); + _ref_processor = g1h->ref_processor_cm(); assert(_ref_processor != NULL, "should not be NULL"); } } --- old/src/share/vm/gc_implementation/g1/concurrentMark.hpp 2011-09-14 14:54:08.819431084 -0700 +++ new/src/share/vm/gc_implementation/g1/concurrentMark.hpp 2011-09-14 14:54:08.575312447 -0700 @@ -366,8 +366,8 @@ friend class CMConcurrentMarkingTask; friend class G1ParNoteEndTask; friend class CalcLiveObjectsClosure; - friend class G1RefProcTaskProxy; - friend class G1RefProcTaskExecutor; + friend class G1CMRefProcTaskProxy; + friend class G1CMRefProcTaskExecutor; friend class G1CMParKeepAliveAndDrainClosure; friend class G1CMParDrainMarkingStackClosure; --- old/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp 2011-09-14 14:54:10.212628664 -0700 +++ new/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp 2011-09-14 14:54:09.960632454 -0700 @@ -42,6 +42,7 @@ #include "memory/gcLocker.inline.hpp" #include "memory/genOopClosures.inline.hpp" #include "memory/generationSpec.hpp" +#include "memory/referenceProcessor.hpp" #include "oops/oop.inline.hpp" #include "oops/oop.pcgc.inline.hpp" #include "runtime/aprofiler.hpp" @@ -1244,15 +1245,11 @@ COMPILER2_PRESENT(DerivedPointerTable::clear()); - // We want to discover references, but not process them yet. - // This mode is disabled in - // instanceRefKlass::process_discovered_references if the - // generation does some collection work, or - // instanceRefKlass::enqueue_discovered_references if the - // generation returns without doing any work. - ref_processor()->disable_discovery(); - ref_processor()->abandon_partial_discovery(); - ref_processor()->verify_no_references_recorded(); + // Disable discovery and empty the discovered lists + // for the CM ref processor. + ref_processor_cm()->disable_discovery(); + ref_processor_cm()->abandon_partial_discovery(); + ref_processor_cm()->verify_no_references_recorded(); // Abandon current iterations of concurrent marking and concurrent // refinement, if any are in progress. @@ -1280,31 +1277,33 @@ empty_young_list(); g1_policy()->set_full_young_gcs(true); - // See the comment in G1CollectedHeap::ref_processing_init() about + // See the comments in g1CollectedHeap.hpp and + // G1CollectedHeap::ref_processing_init() about // how reference processing currently works in G1. - // Temporarily make reference _discovery_ single threaded (non-MT). - ReferenceProcessorMTDiscoveryMutator rp_disc_ser(ref_processor(), false); + // Temporarily make discovery by the STW ref processor single threaded (non-MT). + ReferenceProcessorMTDiscoveryMutator stw_rp_disc_ser(ref_processor_stw(), false); - // Temporarily make refs discovery atomic - ReferenceProcessorAtomicMutator rp_disc_atomic(ref_processor(), true); + // Temporarily clear the STW ref processor's _is_alive_non_header field. + ReferenceProcessorIsAliveMutator stw_rp_is_alive_null(ref_processor_stw(), NULL); - // Temporarily clear _is_alive_non_header - ReferenceProcessorIsAliveMutator rp_is_alive_null(ref_processor(), NULL); + ref_processor_stw()->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); + ref_processor_stw()->setup_policy(do_clear_all_soft_refs); - ref_processor()->enable_discovery(); - ref_processor()->setup_policy(do_clear_all_soft_refs); // Do collection work { HandleMark hm; // Discard invalid handles created during gc - G1MarkSweep::invoke_at_safepoint(ref_processor(), do_clear_all_soft_refs); + G1MarkSweep::invoke_at_safepoint(ref_processor_stw(), do_clear_all_soft_refs); } + assert(free_regions() == 0, "we should not have added any free regions"); rebuild_region_lists(); _summary_bytes_used = recalculate_used(); - ref_processor()->enqueue_discovered_references(); + // Enqueue any discovered reference objects that have + // not been removed from the discovered lists. + ref_processor_stw()->enqueue_discovered_references(); COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); @@ -1319,7 +1318,16 @@ /* option */ VerifyOption_G1UsePrevMarking); } - NOT_PRODUCT(ref_processor()->verify_no_references_recorded()); + + assert(!ref_processor_stw()->discovery_enabled(), "Postcondition"); + ref_processor_stw()->verify_no_references_recorded(); + + // Note: since we've just done a full GC, concurrent + // marking is no longer active. Therefore we need not + // re-enable reference discovery for the CM ref processor. + // That will be done at the start of the next marking cycle. + assert(!ref_processor_cm()->discovery_enabled(), "Postcondition"); + ref_processor_cm()->verify_no_references_recorded(); reset_gc_time_stamp(); // Since everything potentially moved, we will clear all remembered @@ -1772,8 +1780,10 @@ _g1_policy(policy_), _dirty_card_queue_set(false), _into_cset_dirty_card_queue_set(false), - _is_alive_closure(this), - _ref_processor(NULL), + _is_alive_closure_cm(this), + _is_alive_closure_stw(this), + _ref_processor_cm(NULL), + _ref_processor_stw(NULL), _process_strong_tasks(new SubTasksDone(G1H_PS_NumElements)), _bot_shared(NULL), _objs_with_preserved_marks(NULL), _preserved_marks_of_objs(NULL), @@ -2067,34 +2077,81 @@ void G1CollectedHeap::ref_processing_init() { // Reference processing in G1 currently works as follows: // - // * There is only one reference processor instance that - // 'spans' the entire heap. It is created by the code - // below. - // * Reference discovery is not enabled during an incremental - // pause (see 6484982). - // * Discoverered refs are not enqueued nor are they processed - // during an incremental pause (see 6484982). - // * Reference discovery is enabled at initial marking. - // * Reference discovery is disabled and the discovered - // references processed etc during remarking. - // * Reference discovery is MT (see below). - // * Reference discovery requires a barrier (see below). - // * Reference processing is currently not MT (see 6608385). - // * A full GC enables (non-MT) reference discovery and - // processes any discovered references. + // * There are two reference processor instances. One is + // used to record and process discovered references + // during concurrent marking; the other is used to + // record and process references during STW pauses + // (both full and incremental). + // * Both ref processors need to 'span' the entire heap as + // the regions in the collection set may be dotted around. + // + // * For the concurrent marking ref processor: + // * Reference discovery is enabled at initial marking. + // * Reference discovery is disabled and the discovered + // references processed etc during remarking. + // * Reference discovery is MT (see below). + // * Reference discovery requires a barrier (see below). + // * Reference processing may or may not be MT + // (depending on the value of ParallelRefProcEnabled + // and ParallelGCThreads). + // * A full GC disables reference discovery by the CM + // ref processor and abandons any entries on it's + // discovered lists. + // + // * For the STW processor: + // * Non MT discovery is enabled at the start of a full GC. + // * Processing and enqueueing during a full GC is non-MT. + // * During a full GC, references are processed after marking. + // + // * Discovery (may or may not be MT) is enabled at the start + // of an incremental evacuation pause. + // * References are processed near the end of a STW evacuation pause. + // * For both types of GC: + // * Discovery is atomic - i.e. not concurrent. + // * Reference discovery will not need a barrier. SharedHeap::ref_processing_init(); MemRegion mr = reserved_region(); - _ref_processor = + + // Concurrent Mark ref processor + _ref_processor_cm = + new ReferenceProcessor(mr, // span + ParallelRefProcEnabled && (ParallelGCThreads > 1), + // mt processing + (int) ParallelGCThreads, + // degree of mt processing + (ParallelGCThreads > 1) || (ConcGCThreads > 1), + // mt discovery + (int) MAX2(ParallelGCThreads, ConcGCThreads), + // degree of mt discovery + false, + // Reference discovery is not atomic + &_is_alive_closure_cm, + // is alive closure + // (for efficiency/performance) + true); + // Setting next fields of discovered + // lists requires a barrier. + + // STW ref processor + _ref_processor_stw = new ReferenceProcessor(mr, // span - ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing - (int) ParallelGCThreads, // degree of mt processing - ParallelGCThreads > 1 || ConcGCThreads > 1, // mt discovery - (int) MAX2(ParallelGCThreads, ConcGCThreads), // degree of mt discovery - false, // Reference discovery is not atomic - &_is_alive_closure, // is alive closure for efficiency - true); // Setting next fields of discovered - // lists requires a barrier. + ParallelRefProcEnabled && (ParallelGCThreads > 1), + // mt processing + MAX2((int)ParallelGCThreads, 1), + // degree of mt processing + (ParallelGCThreads > 1), + // mt discovery + MAX2((int)ParallelGCThreads, 1), + // degree of mt discovery + true, + // Reference discovery is atomic + &_is_alive_closure_stw, + // is alive closure + // (for efficiency/performance) + false); + // Setting next fields of discovered + // lists requires a barrier. } size_t G1CollectedHeap::capacity() const { @@ -3117,6 +3174,10 @@ COMPILER2_PRESENT(assert(DerivedPointerTable::is_empty(), "derived pointer present")); // always_do_update_barrier = true; + + // We have just completed a GC. Update the soft reference + // policy with the new heap occupancy + Universe::update_heap_info_at_gc(); } HeapWord* G1CollectedHeap::do_collection_pause(size_t word_size, @@ -3354,230 +3415,241 @@ COMPILER2_PRESENT(DerivedPointerTable::clear()); - // Please see comment in G1CollectedHeap::ref_processing_init() - // to see how reference processing currently works in G1. - // - // We want to turn off ref discovery, if necessary, and turn it back on - // on again later if we do. XXX Dubious: why is discovery disabled? - bool was_enabled = ref_processor()->discovery_enabled(); - if (was_enabled) ref_processor()->disable_discovery(); - - // Forget the current alloc region (we might even choose it to be part - // of the collection set!). - release_mutator_alloc_region(); - - // We should call this after we retire the mutator alloc - // region(s) so that all the ALLOC / RETIRE events are generated - // before the start GC event. - _hr_printer.start_gc(false /* full */, (size_t) total_collections()); - - // The elapsed time induced by the start time below deliberately elides - // the possible verification above. - double start_time_sec = os::elapsedTime(); - size_t start_used_bytes = used(); + // Please see comment in g1CollectedHeap.hpp and + // G1CollectedHeap::ref_processing_init() to see how + // reference processing currently works in G1. + + // Enable discovery in the STW reference processor + ref_processor_stw()->enable_discovery(true /*verify_disabled*/, + true /*verify_no_refs*/); + + { + // We want to temporarily turn off discovery by the + // CM ref processor, if necessary, and turn it back on + // on again later if we do. Using a scoped + // NoRefDiscovery object will do this. + NoRefDiscovery no_cm_discovery(ref_processor_cm()); + + // Forget the current alloc region (we might even choose it to be part + // of the collection set!). + release_mutator_alloc_region(); + + // We should call this after we retire the mutator alloc + // region(s) so that all the ALLOC / RETIRE events are generated + // before the start GC event. + _hr_printer.start_gc(false /* full */, (size_t) total_collections()); + + // The elapsed time induced by the start time below deliberately elides + // the possible verification above. + double start_time_sec = os::elapsedTime(); + size_t start_used_bytes = used(); #if YOUNG_LIST_VERBOSE - gclog_or_tty->print_cr("\nBefore recording pause start.\nYoung_list:"); - _young_list->print(); - g1_policy()->print_collection_set(g1_policy()->inc_cset_head(), gclog_or_tty); + gclog_or_tty->print_cr("\nBefore recording pause start.\nYoung_list:"); + _young_list->print(); + g1_policy()->print_collection_set(g1_policy()->inc_cset_head(), gclog_or_tty); #endif // YOUNG_LIST_VERBOSE - g1_policy()->record_collection_pause_start(start_time_sec, - start_used_bytes); + g1_policy()->record_collection_pause_start(start_time_sec, + start_used_bytes); #if YOUNG_LIST_VERBOSE - gclog_or_tty->print_cr("\nAfter recording pause start.\nYoung_list:"); - _young_list->print(); + gclog_or_tty->print_cr("\nAfter recording pause start.\nYoung_list:"); + _young_list->print(); #endif // YOUNG_LIST_VERBOSE - if (g1_policy()->during_initial_mark_pause()) { - concurrent_mark()->checkpointRootsInitialPre(); - } - perm_gen()->save_marks(); - - // We must do this before any possible evacuation that should propagate - // marks. - if (mark_in_progress()) { - double start_time_sec = os::elapsedTime(); + if (g1_policy()->during_initial_mark_pause()) { + concurrent_mark()->checkpointRootsInitialPre(); + } + perm_gen()->save_marks(); - _cm->drainAllSATBBuffers(); - double finish_mark_ms = (os::elapsedTime() - start_time_sec) * 1000.0; - g1_policy()->record_satb_drain_time(finish_mark_ms); - } - // Record the number of elements currently on the mark stack, so we - // only iterate over these. (Since evacuation may add to the mark - // stack, doing more exposes race conditions.) If no mark is in - // progress, this will be zero. - _cm->set_oops_do_bound(); + // We must do this before any possible evacuation that should propagate + // marks. + if (mark_in_progress()) { + double start_time_sec = os::elapsedTime(); + + _cm->drainAllSATBBuffers(); + double finish_mark_ms = (os::elapsedTime() - start_time_sec) * 1000.0; + g1_policy()->record_satb_drain_time(finish_mark_ms); + } + // Record the number of elements currently on the mark stack, so we + // only iterate over these. (Since evacuation may add to the mark + // stack, doing more exposes race conditions.) If no mark is in + // progress, this will be zero. + _cm->set_oops_do_bound(); - if (mark_in_progress()) { - concurrent_mark()->newCSet(); - } + if (mark_in_progress()) { + concurrent_mark()->newCSet(); + } #if YOUNG_LIST_VERBOSE - gclog_or_tty->print_cr("\nBefore choosing collection set.\nYoung_list:"); - _young_list->print(); - g1_policy()->print_collection_set(g1_policy()->inc_cset_head(), gclog_or_tty); + gclog_or_tty->print_cr("\nBefore choosing collection set.\nYoung_list:"); + _young_list->print(); + g1_policy()->print_collection_set(g1_policy()->inc_cset_head(), gclog_or_tty); #endif // YOUNG_LIST_VERBOSE - g1_policy()->choose_collection_set(target_pause_time_ms); + g1_policy()->choose_collection_set(target_pause_time_ms); - if (_hr_printer.is_active()) { - HeapRegion* hr = g1_policy()->collection_set(); - while (hr != NULL) { - G1HRPrinter::RegionType type; - if (!hr->is_young()) { - type = G1HRPrinter::Old; - } else if (hr->is_survivor()) { - type = G1HRPrinter::Survivor; - } else { - type = G1HRPrinter::Eden; + if (_hr_printer.is_active()) { + HeapRegion* hr = g1_policy()->collection_set(); + while (hr != NULL) { + G1HRPrinter::RegionType type; + if (!hr->is_young()) { + type = G1HRPrinter::Old; + } else if (hr->is_survivor()) { + type = G1HRPrinter::Survivor; + } else { + type = G1HRPrinter::Eden; + } + _hr_printer.cset(hr); + hr = hr->next_in_collection_set(); } - _hr_printer.cset(hr); - hr = hr->next_in_collection_set(); } - } - // We have chosen the complete collection set. If marking is - // active then, we clear the region fields of any of the - // concurrent marking tasks whose region fields point into - // the collection set as these values will become stale. This - // will cause the owning marking threads to claim a new region - // when marking restarts. - if (mark_in_progress()) { - concurrent_mark()->reset_active_task_region_fields_in_cset(); - } + // We have chosen the complete collection set. If marking is + // active then, we clear the region fields of any of the + // concurrent marking tasks whose region fields point into + // the collection set as these values will become stale. This + // will cause the owning marking threads to claim a new region + // when marking restarts. + if (mark_in_progress()) { + concurrent_mark()->reset_active_task_region_fields_in_cset(); + } #ifdef ASSERT - VerifyCSetClosure cl; - collection_set_iterate(&cl); + VerifyCSetClosure cl; + collection_set_iterate(&cl); #endif // ASSERT - setup_surviving_young_words(); + setup_surviving_young_words(); - // Initialize the GC alloc regions. - init_gc_alloc_regions(); + // Initialize the GC alloc regions. + init_gc_alloc_regions(); - // Actually do the work... - evacuate_collection_set(); + // Actually do the work... + evacuate_collection_set(); - free_collection_set(g1_policy()->collection_set()); - g1_policy()->clear_collection_set(); + free_collection_set(g1_policy()->collection_set()); + g1_policy()->clear_collection_set(); - cleanup_surviving_young_words(); + cleanup_surviving_young_words(); - // Start a new incremental collection set for the next pause. - g1_policy()->start_incremental_cset_building(); + // Start a new incremental collection set for the next pause. + g1_policy()->start_incremental_cset_building(); - // Clear the _cset_fast_test bitmap in anticipation of adding - // regions to the incremental collection set for the next - // evacuation pause. - clear_cset_fast_test(); + // Clear the _cset_fast_test bitmap in anticipation of adding + // regions to the incremental collection set for the next + // evacuation pause. + clear_cset_fast_test(); - _young_list->reset_sampled_info(); + _young_list->reset_sampled_info(); - // Don't check the whole heap at this point as the - // GC alloc regions from this pause have been tagged - // as survivors and moved on to the survivor list. - // Survivor regions will fail the !is_young() check. - assert(check_young_list_empty(false /* check_heap */), - "young list should be empty"); + // Don't check the whole heap at this point as the + // GC alloc regions from this pause have been tagged + // as survivors and moved on to the survivor list. + // Survivor regions will fail the !is_young() check. + assert(check_young_list_empty(false /* check_heap */), + "young list should be empty"); #if YOUNG_LIST_VERBOSE - gclog_or_tty->print_cr("Before recording survivors.\nYoung List:"); - _young_list->print(); + gclog_or_tty->print_cr("Before recording survivors.\nYoung List:"); + _young_list->print(); #endif // YOUNG_LIST_VERBOSE - g1_policy()->record_survivor_regions(_young_list->survivor_length(), - _young_list->first_survivor_region(), - _young_list->last_survivor_region()); + g1_policy()->record_survivor_regions(_young_list->survivor_length(), + _young_list->first_survivor_region(), + _young_list->last_survivor_region()); - _young_list->reset_auxilary_lists(); + _young_list->reset_auxilary_lists(); - if (evacuation_failed()) { - _summary_bytes_used = recalculate_used(); - } else { - // The "used" of the the collection set have already been subtracted - // when they were freed. Add in the bytes evacuated. - _summary_bytes_used += g1_policy()->bytes_copied_during_gc(); - } - - if (g1_policy()->during_initial_mark_pause()) { - concurrent_mark()->checkpointRootsInitialPost(); - set_marking_started(); - // CAUTION: after the doConcurrentMark() call below, - // the concurrent marking thread(s) could be running - // concurrently with us. Make sure that anything after - // this point does not assume that we are the only GC thread - // running. Note: of course, the actual marking work will - // not start until the safepoint itself is released in - // ConcurrentGCThread::safepoint_desynchronize(). - doConcurrentMark(); - } + if (evacuation_failed()) { + _summary_bytes_used = recalculate_used(); + } else { + // The "used" of the the collection set have already been subtracted + // when they were freed. Add in the bytes evacuated. + _summary_bytes_used += g1_policy()->bytes_copied_during_gc(); + } + + if (g1_policy()->during_initial_mark_pause()) { + concurrent_mark()->checkpointRootsInitialPost(); + set_marking_started(); + // CAUTION: after the doConcurrentMark() call below, + // the concurrent marking thread(s) could be running + // concurrently with us. Make sure that anything after + // this point does not assume that we are the only GC thread + // running. Note: of course, the actual marking work will + // not start until the safepoint itself is released in + // ConcurrentGCThread::safepoint_desynchronize(). + doConcurrentMark(); + } - allocate_dummy_regions(); + allocate_dummy_regions(); #if YOUNG_LIST_VERBOSE - gclog_or_tty->print_cr("\nEnd of the pause.\nYoung_list:"); - _young_list->print(); - g1_policy()->print_collection_set(g1_policy()->inc_cset_head(), gclog_or_tty); + gclog_or_tty->print_cr("\nEnd of the pause.\nYoung_list:"); + _young_list->print(); + g1_policy()->print_collection_set(g1_policy()->inc_cset_head(), gclog_or_tty); #endif // YOUNG_LIST_VERBOSE - init_mutator_alloc_region(); + init_mutator_alloc_region(); - { - size_t expand_bytes = g1_policy()->expansion_amount(); - if (expand_bytes > 0) { - size_t bytes_before = capacity(); - if (!expand(expand_bytes)) { - // We failed to expand the heap so let's verify that - // committed/uncommitted amount match the backing store - assert(capacity() == _g1_storage.committed_size(), "committed size mismatch"); - assert(max_capacity() == _g1_storage.reserved_size(), "reserved size mismatch"); + { + size_t expand_bytes = g1_policy()->expansion_amount(); + if (expand_bytes > 0) { + size_t bytes_before = capacity(); + if (!expand(expand_bytes)) { + // We failed to expand the heap so let's verify that + // committed/uncommitted amount match the backing store + assert(capacity() == _g1_storage.committed_size(), "committed size mismatch"); + assert(max_capacity() == _g1_storage.reserved_size(), "reserved size mismatch"); + } } } - } - double end_time_sec = os::elapsedTime(); - double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS; - g1_policy()->record_pause_time_ms(pause_time_ms); - g1_policy()->record_collection_pause_end(); - - MemoryService::track_memory_usage(); - - // In prepare_for_verify() below we'll need to scan the deferred - // update buffers to bring the RSets up-to-date if - // G1HRRSFlushLogBuffersOnVerify has been set. While scanning - // the update buffers we'll probably need to scan cards on the - // regions we just allocated to (i.e., the GC alloc - // regions). However, during the last GC we called - // set_saved_mark() on all the GC alloc regions, so card - // scanning might skip the [saved_mark_word()...top()] area of - // those regions (i.e., the area we allocated objects into - // during the last GC). But it shouldn't. Given that - // saved_mark_word() is conditional on whether the GC time stamp - // on the region is current or not, by incrementing the GC time - // stamp here we invalidate all the GC time stamps on all the - // regions and saved_mark_word() will simply return top() for - // all the regions. This is a nicer way of ensuring this rather - // than iterating over the regions and fixing them. In fact, the - // GC time stamp increment here also ensures that - // saved_mark_word() will return top() between pauses, i.e., - // during concurrent refinement. So we don't need the - // is_gc_active() check to decided which top to use when - // scanning cards (see CR 7039627). - increment_gc_time_stamp(); + double end_time_sec = os::elapsedTime(); + double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS; + g1_policy()->record_pause_time_ms(pause_time_ms); + g1_policy()->record_collection_pause_end(); + + MemoryService::track_memory_usage(); + + // In prepare_for_verify() below we'll need to scan the deferred + // update buffers to bring the RSets up-to-date if + // G1HRRSFlushLogBuffersOnVerify has been set. While scanning + // the update buffers we'll probably need to scan cards on the + // regions we just allocated to (i.e., the GC alloc + // regions). However, during the last GC we called + // set_saved_mark() on all the GC alloc regions, so card + // scanning might skip the [saved_mark_word()...top()] area of + // those regions (i.e., the area we allocated objects into + // during the last GC). But it shouldn't. Given that + // saved_mark_word() is conditional on whether the GC time stamp + // on the region is current or not, by incrementing the GC time + // stamp here we invalidate all the GC time stamps on all the + // regions and saved_mark_word() will simply return top() for + // all the regions. This is a nicer way of ensuring this rather + // than iterating over the regions and fixing them. In fact, the + // GC time stamp increment here also ensures that + // saved_mark_word() will return top() between pauses, i.e., + // during concurrent refinement. So we don't need the + // is_gc_active() check to decided which top to use when + // scanning cards (see CR 7039627). + increment_gc_time_stamp(); + + if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) { + HandleMark hm; // Discard invalid handles created during verification + gclog_or_tty->print(" VerifyAfterGC:"); + prepare_for_verify(); + Universe::verify(/* allow dirty */ true, + /* silent */ false, + /* option */ VerifyOption_G1UsePrevMarking); + } - if (VerifyAfterGC && total_collections() >= VerifyGCStartAt) { - HandleMark hm; // Discard invalid handles created during verification - gclog_or_tty->print(" VerifyAfterGC:"); - prepare_for_verify(); - Universe::verify(/* allow dirty */ true, - /* silent */ false, - /* option */ VerifyOption_G1UsePrevMarking); - } + assert(!ref_processor_stw()->discovery_enabled(), "Postcondition"); + ref_processor_stw()->verify_no_references_recorded(); - if (was_enabled) ref_processor()->enable_discovery(); + // CM reference discovery will be re-enabled if necessary. + } { size_t expand_bytes = g1_policy()->expansion_amount(); @@ -3728,34 +3800,6 @@ _evac_failure_scan_stack = NULL; } -// *** Sequential G1 Evacuation - -class G1IsAliveClosure: public BoolObjectClosure { - G1CollectedHeap* _g1; -public: - G1IsAliveClosure(G1CollectedHeap* g1) : _g1(g1) {} - void do_object(oop p) { assert(false, "Do not call."); } - bool do_object_b(oop p) { - // It is reachable if it is outside the collection set, or is inside - // and forwarded. - return !_g1->obj_in_cs(p) || p->is_forwarded(); - } -}; - -class G1KeepAliveClosure: public OopClosure { - G1CollectedHeap* _g1; -public: - G1KeepAliveClosure(G1CollectedHeap* g1) : _g1(g1) {} - void do_oop(narrowOop* p) { guarantee(false, "Not needed"); } - void do_oop( oop* p) { - oop obj = *p; - if (_g1->obj_in_cs(obj)) { - assert( obj->is_forwarded(), "invariant" ); - *p = obj->forwardee(); - } - } -}; - class UpdateRSetDeferred : public OopsInHeapRegionClosure { private: G1CollectedHeap* _g1; @@ -4175,12 +4219,17 @@ #endif // ASSERT void G1ParScanThreadState::trim_queue() { + assert(_evac_cl != NULL, "not set"); + assert(_evac_failure_cl != NULL, "not set"); + assert(_partial_scan_cl != NULL, "not set"); + StarTask ref; do { // Drain the overflow stack first, so other threads can steal. while (refs()->pop_overflow(ref)) { deal_with_reference(ref); } + while (refs()->pop_local(ref)) { deal_with_reference(ref); } @@ -4501,35 +4550,42 @@ ResourceMark rm; HandleMark hm; + ReferenceProcessor* rp = _g1h->ref_processor_stw(); + G1ParScanThreadState pss(_g1h, i); - G1ParScanHeapEvacClosure scan_evac_cl(_g1h, &pss); - G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss); - G1ParScanPartialArrayClosure partial_scan_cl(_g1h, &pss); + G1ParScanHeapEvacClosure scan_evac_cl(_g1h, &pss, rp); + G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, rp); + G1ParScanPartialArrayClosure partial_scan_cl(_g1h, &pss, rp); pss.set_evac_closure(&scan_evac_cl); pss.set_evac_failure_closure(&evac_failure_cl); pss.set_partial_scan_closure(&partial_scan_cl); - G1ParScanExtRootClosure only_scan_root_cl(_g1h, &pss); - G1ParScanPermClosure only_scan_perm_cl(_g1h, &pss); - G1ParScanHeapRSClosure only_scan_heap_rs_cl(_g1h, &pss); - G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, &pss); - - G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss); - G1ParScanAndMarkPermClosure scan_mark_perm_cl(_g1h, &pss); - G1ParScanAndMarkHeapRSClosure scan_mark_heap_rs_cl(_g1h, &pss); + G1ParScanExtRootClosure only_scan_root_cl(_g1h, &pss, rp); + G1ParScanPermClosure only_scan_perm_cl(_g1h, &pss, rp); + + G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss, rp); + G1ParScanAndMarkPermClosure scan_mark_perm_cl(_g1h, &pss, rp); - OopsInHeapRegionClosure *scan_root_cl; - OopsInHeapRegionClosure *scan_perm_cl; + OopClosure* scan_root_cl = &only_scan_root_cl; + OopsInHeapRegionClosure* scan_perm_cl = &only_scan_perm_cl; if (_g1h->g1_policy()->during_initial_mark_pause()) { + // We also need to mark copied objects. scan_root_cl = &scan_mark_root_cl; scan_perm_cl = &scan_mark_perm_cl; - } else { - scan_root_cl = &only_scan_root_cl; - scan_perm_cl = &only_scan_perm_cl; } + // The following closure is used to scan RSets looking for reference + // fields that point into the collection set. The actual field iteration + // is performed by a FilterIntoCSClosure, whose do_oop method calls the + // do_oop method of the following closure. + // Therefore we want to record the reference processor in the + // FilterIntoCSClosure. To do so we record the STW reference + // processor into the following closure and pass it to the + // FilterIntoCSClosure in HeapRegionDCTOC::walk_mem_region_with_cl. + G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, &pss, rp); + pss.start_strong_roots(); _g1h->g1_process_strong_roots(/* not collecting perm */ false, SharedHeap::SO_AllClasses, @@ -4577,6 +4633,7 @@ OopsInHeapRegionClosure* scan_rs, OopsInGenClosure* scan_perm, int worker_i) { + // First scan the strong roots, including the perm gen. double ext_roots_start = os::elapsedTime(); double closure_app_time_sec = 0.0; @@ -4595,12 +4652,13 @@ &eager_scan_code_roots, &buf_scan_perm); - // Now the ref_processor roots. + // Now the CM ref_processor roots. if (!_process_strong_tasks->is_task_claimed(G1H_PS_refProcessor_oops_do)) { - // We need to treat the discovered reference lists as roots and - // keep entries (which are added by the marking threads) on them - // live until they can be processed at the end of marking. - ref_processor()->weak_oops_do(&buf_scan_non_heap_roots); + // We need to treat the discovered reference lists of the + // concurrent mark ref processor as roots and keep entries + // (which are added by the marking threads) on them live + // until they can be processed at the end of marking. + ref_processor_cm()->weak_oops_do(&buf_scan_non_heap_roots); } // Finish up any enqueued closure apps (attributed as object copy time). @@ -4641,6 +4699,524 @@ SharedHeap::process_weak_roots(root_closure, &roots_in_blobs, non_root_closure); } +// Weak Reference Processing support + +// An always "is_alive" closure that is used to preserve referents. +// If the object is non-null then it's alive. Used in the preservation +// of referent objects that are pointed to by reference objects +// discovered by the CM ref processor. +class G1AlwaysAliveClosure: public BoolObjectClosure { + G1CollectedHeap* _g1; +public: + G1AlwaysAliveClosure(G1CollectedHeap* g1) : _g1(g1) {} + void do_object(oop p) { assert(false, "Do not call."); } + bool do_object_b(oop p) { + if (p != NULL) { + return true; + } + return false; + } +}; + +bool G1STWIsAliveClosure::do_object_b(oop p) { + // An object is reachable if it is outside the collection set, + // or is inside and copied. + return !_g1->obj_in_cs(p) || p->is_forwarded(); +} + +// Non Copying Keep Alive closure +class G1KeepAliveClosure: public OopClosure { + G1CollectedHeap* _g1; +public: + G1KeepAliveClosure(G1CollectedHeap* g1) : _g1(g1) {} + void do_oop(narrowOop* p) { guarantee(false, "Not needed"); } + void do_oop( oop* p) { + oop obj = *p; + + if (_g1->obj_in_cs(obj)) { + assert( obj->is_forwarded(), "invariant" ); + *p = obj->forwardee(); + } + } +}; + +// Copying Keep Alive closure - can be called from both +// serial and parallel code as long as different worker +// threads utilize different G1ParScanThreadState instances +// and different queues. + +class G1CopyingKeepAliveClosure: public OopClosure { + G1CollectedHeap* _g1h; + OopClosure* _copy_non_heap_obj_cl; + OopsInHeapRegionClosure* _copy_perm_obj_cl; + G1ParScanThreadState* _par_scan_state; + +public: + G1CopyingKeepAliveClosure(G1CollectedHeap* g1h, + OopClosure* non_heap_obj_cl, + OopsInHeapRegionClosure* perm_obj_cl, + G1ParScanThreadState* pss): + _g1h(g1h), + _copy_non_heap_obj_cl(non_heap_obj_cl), + _copy_perm_obj_cl(perm_obj_cl), + _par_scan_state(pss) + {} + + virtual void do_oop(narrowOop* p) { do_oop_work(p); } + virtual void do_oop( oop* p) { do_oop_work(p); } + + template void do_oop_work(T* p) { + oop obj = oopDesc::load_decode_heap_oop(p); + + if (_g1h->obj_in_cs(obj)) { + // If the referent object has been forwarded (either copied + // to a new location or to itself in the event of an + // evacuation failure) then we need to update the reference + // field and, if both reference and referent are in the G1 + // heap, update the RSet for the referent. + // + // If the referent has not been forwarded then we have to keep + // it alive by policy. Therefore we have copy the referent. + // + // If the reference field is in the G1 heap then we can push + // on the PSS queue. When the queue is drained (after each + // phase of reference processing) the object and it's followers + // will be copied, the reference field set to point to the + // new location, and the RSet updated. Otherwise we need to + // use the the non-heap or perm closures directly to copy + // the refernt object and update the pointer, while avoiding + // updating the RSet. + + if (_g1h->is_in_g1_reserved(p)) { + _par_scan_state->push_on_queue(p); + } else { + // The reference field is not in the G1 heap. + if (_g1h->perm_gen()->is_in(p)) { + _copy_perm_obj_cl->do_oop(p); + } else { + _copy_non_heap_obj_cl->do_oop(p); + } + } + } + } +}; + +// Serial drain queue closure. Called as the 'complete_gc' +// closure for each discovered list in some of the +// reference processing phases. + +class G1STWDrainQueueClosure: public VoidClosure { +protected: + G1CollectedHeap* _g1h; + G1ParScanThreadState* _par_scan_state; + + G1ParScanThreadState* par_scan_state() { return _par_scan_state; } + +public: + G1STWDrainQueueClosure(G1CollectedHeap* g1h, G1ParScanThreadState* pss) : + _g1h(g1h), + _par_scan_state(pss) + { } + + void do_void() { + G1ParScanThreadState* const pss = par_scan_state(); + pss->trim_queue(); + } +}; + +// Parallel Reference Processing closures + +// Implementation of AbstractRefProcTaskExecutor for parallel reference +// processing during G1 evacuation pauses. + +class G1STWRefProcTaskExecutor: public AbstractRefProcTaskExecutor { +private: + G1CollectedHeap* _g1h; + RefToScanQueueSet* _queues; + WorkGang* _workers; + int _active_workers; + +public: + G1STWRefProcTaskExecutor(G1CollectedHeap* g1h, + WorkGang* workers, + RefToScanQueueSet *task_queues, + int n_workers) : + _g1h(g1h), + _queues(task_queues), + _workers(workers), + _active_workers(n_workers) + { + assert(n_workers > 0, "shouldn't call this otherwise"); + } + + // Executes the given task using concurrent marking worker threads. + virtual void execute(ProcessTask& task); + virtual void execute(EnqueueTask& task); +}; + +// Gang task for possibly parallel reference processing + +class G1STWRefProcTaskProxy: public AbstractGangTask { + typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; + ProcessTask& _proc_task; + G1CollectedHeap* _g1h; + RefToScanQueueSet *_task_queues; + ParallelTaskTerminator* _terminator; + +public: + G1STWRefProcTaskProxy(ProcessTask& proc_task, + G1CollectedHeap* g1h, + RefToScanQueueSet *task_queues, + ParallelTaskTerminator* terminator) : + AbstractGangTask("Process reference objects in parallel"), + _proc_task(proc_task), + _g1h(g1h), + _task_queues(task_queues), + _terminator(terminator) + {} + + virtual void work(int i) { + // The reference processing task executed by a single worker. + ResourceMark rm; + HandleMark hm; + + G1STWIsAliveClosure is_alive(_g1h); + + G1ParScanThreadState pss(_g1h, i); + + G1ParScanHeapEvacClosure scan_evac_cl(_g1h, &pss, NULL); + G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, NULL); + G1ParScanPartialArrayClosure partial_scan_cl(_g1h, &pss, NULL); + + pss.set_evac_closure(&scan_evac_cl); + pss.set_evac_failure_closure(&evac_failure_cl); + pss.set_partial_scan_closure(&partial_scan_cl); + + G1ParScanExtRootClosure only_copy_non_heap_cl(_g1h, &pss, NULL); + G1ParScanPermClosure only_copy_perm_cl(_g1h, &pss, NULL); + + G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, &pss, NULL); + G1ParScanAndMarkPermClosure copy_mark_perm_cl(_g1h, &pss, NULL); + + OopClosure* copy_non_heap_cl = &only_copy_non_heap_cl; + OopsInHeapRegionClosure* copy_perm_cl = &only_copy_perm_cl; + + if (_g1h->g1_policy()->during_initial_mark_pause()) { + // We also need to mark copied objects. + copy_non_heap_cl = ©_mark_non_heap_cl; + copy_perm_cl = ©_mark_perm_cl; + } + + // Keep alive closure. + G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, copy_perm_cl, &pss); + + // Complete GC closure + G1ParEvacuateFollowersClosure drain_queue(_g1h, &pss, _task_queues, _terminator); + + // Call the reference processing task's work routine. + _proc_task.work(i, is_alive, keep_alive, drain_queue); + + // Note we cannot assert that the refs array is empty here as not all + // of the processing tasks (specifically phase2 - pp2_work) execute + // the complete_gc closure (which ordinarily would drain the queue) so + // the queue may not be empty. + } +}; + +// Driver routine for parallel reference processing. +// Creates an instance of the ref processing gang +// task and has the worker threads execute it. +void G1STWRefProcTaskExecutor::execute(ProcessTask& proc_task) { + assert(_workers != NULL, "Need parallel worker threads."); + + ParallelTaskTerminator terminator(_active_workers, _queues); + G1STWRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _queues, &terminator); + + _g1h->set_par_threads(_active_workers); + _workers->run_task(&proc_task_proxy); + _g1h->set_par_threads(0); +} + +// Gang task for parallel reference enqueueing. + +class G1STWRefEnqueueTaskProxy: public AbstractGangTask { + typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask; + EnqueueTask& _enq_task; + +public: + G1STWRefEnqueueTaskProxy(EnqueueTask& enq_task) : + AbstractGangTask("Enqueue reference objects in parallel"), + _enq_task(enq_task) + { } + + virtual void work(int i) { + _enq_task.work(i); + } +}; + +// Driver routine for parallel reference enqueing. +// Creates an instance of the ref enqueueing gang +// task and has the worker threads execute it. + +void G1STWRefProcTaskExecutor::execute(EnqueueTask& enq_task) { + assert(_workers != NULL, "Need parallel worker threads."); + + G1STWRefEnqueueTaskProxy enq_task_proxy(enq_task); + + _g1h->set_par_threads(_active_workers); + _workers->run_task(&enq_task_proxy); + _g1h->set_par_threads(0); +} + +// End of weak reference support closures + +// Abstract task used to preserve (i.e. copy) any referent objects +// that are in the collection set and are pointed to by reference +// objects discovered by the CM ref processor. + +class G1ParPreserveCMReferentsTask: public AbstractGangTask { +protected: + G1CollectedHeap* _g1h; + RefToScanQueueSet *_queues; + ParallelTaskTerminator _terminator; + int _n_workers; + +public: + G1ParPreserveCMReferentsTask(G1CollectedHeap* g1h,int workers, RefToScanQueueSet *task_queues) : + AbstractGangTask("ParPreserveCMReferents"), + _g1h(g1h), + _queues(task_queues), + _terminator(workers, _queues), + _n_workers(workers) + { } + + void work(int i) { + ResourceMark rm; + HandleMark hm; + + G1ParScanThreadState pss(_g1h, i); + G1ParScanHeapEvacClosure scan_evac_cl(_g1h, &pss, NULL); + G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, NULL); + G1ParScanPartialArrayClosure partial_scan_cl(_g1h, &pss, NULL); + + pss.set_evac_closure(&scan_evac_cl); + pss.set_evac_failure_closure(&evac_failure_cl); + pss.set_partial_scan_closure(&partial_scan_cl); + + assert(pss.refs()->is_empty(), "both queue and overflow should be empty"); + + + G1ParScanExtRootClosure only_copy_non_heap_cl(_g1h, &pss, NULL); + G1ParScanPermClosure only_copy_perm_cl(_g1h, &pss, NULL); + + G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, &pss, NULL); + G1ParScanAndMarkPermClosure copy_mark_perm_cl(_g1h, &pss, NULL); + + OopClosure* copy_non_heap_cl = &only_copy_non_heap_cl; + OopsInHeapRegionClosure* copy_perm_cl = &only_copy_perm_cl; + + if (_g1h->g1_policy()->during_initial_mark_pause()) { + // We also need to mark copied objects. + copy_non_heap_cl = ©_mark_non_heap_cl; + copy_perm_cl = ©_mark_perm_cl; + } + + // Is alive closure + G1AlwaysAliveClosure always_alive(_g1h); + + // Copying keep alive closure. Applied to referent objects that need + // to be copied. + G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, copy_perm_cl, &pss); + + ReferenceProcessor* rp = _g1h->ref_processor_cm(); + + int limit = ReferenceProcessor::number_of_subclasses_of_ref() * rp->max_num_q(); + int stride = MIN2(MAX2(_n_workers, 1), limit); + + // limit is set using max_num_q() - which was set using ParallelGCThreads. + // So this must be true - but assert just in case someone decides to + // change the worker ids. + assert(0 <= i && i < limit, "sanity"); + assert(!rp->discovery_is_atomic(), "check this code"); + + // Select discovered lists [i, i+stride, i+2*stride,...,limit) + for (int idx = i; idx < limit; idx += stride) { + DiscoveredList& ref_list = rp->discovered_soft_refs()[idx]; + + DiscoveredListIterator iter(ref_list, &keep_alive, &always_alive); + while (iter.has_next()) { + // Since discovery is not atomic for the CM ref processor, we + // can see some null referent objects. + iter.load_ptrs(DEBUG_ONLY(true)); + oop ref = iter.obj(); + + // This will filter nulls. + if (iter.is_referent_alive()) { + iter.make_referent_alive(); + } + iter.move_to_next(); + } + } + + // Drain the queue - which may cause stealing + G1ParEvacuateFollowersClosure drain_queue(_g1h, &pss, _queues, &_terminator); + drain_queue.do_void(); + // Allocation buffers were retired at the end of G1ParEvacuateFollowersClosure + assert(pss.refs()->is_empty(), "should be"); + } +}; + +// Weak Reference processing during an evacuation pause (part 1). +void G1CollectedHeap::process_discovered_references() { + double ref_proc_start = os::elapsedTime(); + + ReferenceProcessor* rp = _ref_processor_stw; + assert(rp->discovery_enabled(), "should have been enabled"); + + // Any reference objects, in the collection set, that were 'discovered' + // by the CM ref processor should have already been copied (either by + // applying the external root copy closure to the discovered lists, or + // by following an RSet entry). + // + // But some of the referents, that are in the collection set, that these + // reference objects point to may not have been copied: the STW ref + // processor would have seen that the reference object had already + // been 'discovered' and would have skipped discovering the reference, + // but would not have treated the reference object as a regular oop. + // As a reult the copy closure would not have been applied to the + // referent object. + // + // We need to explicitly copy these referent objects - the references + // will be processed at the end of remarking. + // + // We also need to do this copying before we process the reference + // objects discovered by the STW ref processor in case one of these + // referents points to another object which is also referenced by an + // object discovered by the STW ref processor. + + int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? + workers()->total_workers() : 1); + + set_par_threads(n_workers); + G1ParPreserveCMReferentsTask keep_cm_referents(this, n_workers, _task_queues); + + if (G1CollectedHeap::use_parallel_gc_threads()) { + workers()->run_task(&keep_cm_referents); + } else { + keep_cm_referents.work(0); + } + + set_par_threads(0); + + // Closure to test whether a referent is alive. + G1STWIsAliveClosure is_alive(this); + + // Even when parallel reference processing is enabled, the processing + // of JNI refs is serial and performed serially by the current thread + // rather than by a worker. The following PSS will be used for processing + // JNI refs. + + // Use only a single queue for this PSS. + G1ParScanThreadState pss(this, 0); + + // We do not embed a reference processor in the copying/scanning + // closures while we're actually processing the discovered + // reference objects. + G1ParScanHeapEvacClosure scan_evac_cl(this, &pss, NULL); + G1ParScanHeapEvacFailureClosure evac_failure_cl(this, &pss, NULL); + G1ParScanPartialArrayClosure partial_scan_cl(this, &pss, NULL); + + pss.set_evac_closure(&scan_evac_cl); + pss.set_evac_failure_closure(&evac_failure_cl); + pss.set_partial_scan_closure(&partial_scan_cl); + + assert(pss.refs()->is_empty(), "pre-condition"); + + G1ParScanExtRootClosure only_copy_non_heap_cl(this, &pss, NULL); + G1ParScanPermClosure only_copy_perm_cl(this, &pss, NULL); + + G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(this, &pss, NULL); + G1ParScanAndMarkPermClosure copy_mark_perm_cl(this, &pss, NULL); + + OopClosure* copy_non_heap_cl = &only_copy_non_heap_cl; + OopsInHeapRegionClosure* copy_perm_cl = &only_copy_perm_cl; + + if (_g1h->g1_policy()->during_initial_mark_pause()) { + // We also need to mark copied objects. + copy_non_heap_cl = ©_mark_non_heap_cl; + copy_perm_cl = ©_mark_perm_cl; + } + + // Keep alive closure. + G1CopyingKeepAliveClosure keep_alive(this, copy_non_heap_cl, copy_perm_cl, &pss); + + // Serial Complete GC closure + G1STWDrainQueueClosure drain_queue(this, &pss); + + // Setup the soft refs policy... + rp->setup_policy(false); + + if (!rp->processing_is_mt()) { + // Serial reference processing... + rp->process_discovered_references(&is_alive, + &keep_alive, + &drain_queue, + NULL); + } else { + // Parallel reference processing + int active_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); + assert(rp->num_q() == active_workers, "sanity"); + assert(active_workers <= rp->max_num_q(), "sanity"); + + G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, active_workers); + rp->process_discovered_references(&is_alive, &keep_alive, &drain_queue, &par_task_executor); + } + + // We have completed copying any necessary live referent objects + // (that were not copied during the actual pause) so we can + // retire any active alloc buffers + pss.retire_alloc_buffers(); + assert(pss.refs()->is_empty(), "both queue and overflow should be empty"); + + double ref_proc_time = os::elapsedTime() - ref_proc_start; + g1_policy()->record_ref_proc_time(ref_proc_time * 1000.0); +} + +// Weak Reference processing during an evacuation pause (part 2). +void G1CollectedHeap::enqueue_discovered_references() { + double ref_enq_start = os::elapsedTime(); + + ReferenceProcessor* rp = _ref_processor_stw; + assert(!rp->discovery_enabled(), "should have been disabled as part of processing"); + + // Now enqueue any remaining on the discovered lists on to + // the pending list. + if (!rp->processing_is_mt()) { + // Serial reference processing... + rp->enqueue_discovered_references(); + } else { + // Parallel reference enqueuing + + int active_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); + assert(rp->num_q() == active_workers, "sanity"); + assert(active_workers <= rp->max_num_q(), "sanity"); + + G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, active_workers); + rp->enqueue_discovered_references(&par_task_executor); + } + + rp->verify_no_references_recorded(); + assert(!rp->discovery_enabled(), "should have been disabled"); + + // FIXME + // CM's reference processing also cleans up the string and symbol tables. + // Should we do that here also? We could, but it is a serial operation + // and could signicantly increase the pause time. + + double ref_enq_time = os::elapsedTime() - ref_enq_start; + g1_policy()->record_ref_enq_time(ref_enq_time * 1000.0); +} + void G1CollectedHeap::evacuate_collection_set() { set_evacuation_failed(false); @@ -4658,6 +5234,7 @@ assert(dirty_card_queue_set().completed_buffers_num() == 0, "Should be empty"); double start_par = os::elapsedTime(); + if (G1CollectedHeap::use_parallel_gc_threads()) { // The individual threads will set their evac-failure closures. StrongRootsScope srs(this); @@ -4672,15 +5249,23 @@ g1_policy()->record_par_time(par_time); set_par_threads(0); + // Process any discovered reference objects - we have + // to do this _before_ we retire the GC alloc regions + // as we may have to copy some 'reachable' referent + // objects (and their reachable sub-graphs) that were + // not copied during the pause. + process_discovered_references(); + // Weak root processing. // Note: when JSR 292 is enabled and code blobs can contain // non-perm oops then we will need to process the code blobs // here too. { - G1IsAliveClosure is_alive(this); + G1STWIsAliveClosure is_alive(this); G1KeepAliveClosure keep_alive(this); JNIHandles::weak_oops_do(&is_alive, &keep_alive); } + release_gc_alloc_regions(); g1_rem_set()->cleanup_after_oops_into_collection_set_do(); @@ -4702,6 +5287,15 @@ } } + // Enqueue any remaining references remaining on the STW + // reference processor's discovered lists. We need to do + // this after the card table is cleaned (and verified) as + // the act of enqueuing entries on to the pending list + // will log these updates (and dirty their associated + // cards). We need these updates logged to update any + // RSets. + enqueue_discovered_references(); + if (G1DeferredRSUpdate) { RedirtyLoggedCardTableEntryFastClosure redirty; dirty_card_queue_set().set_closure(&redirty); @@ -4902,7 +5496,7 @@ } double elapsed = os::elapsedTime() - start; - g1_policy()->record_clear_ct_time( elapsed * 1000.0); + g1_policy()->record_clear_ct_time(elapsed * 1000.0); #ifndef PRODUCT if (G1VerifyCTCleanup || VerifyAfterGC) { G1VerifyCardTableCleanup cleanup_verifier(this, ct_bs); --- old/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp 2011-09-14 14:54:11.838199694 -0700 +++ new/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp 2011-09-14 14:54:11.588800763 -0700 @@ -155,6 +155,19 @@ : G1AllocRegion("Mutator Alloc Region", false /* bot_updates */) { } }; +// The G1 STW is alive closure. +// An instance is embedded into the G1CH and used as the +// (optional) _is_alive_non_header closure in the STW +// reference processor. It is also extensively used during +// refence processing during STW evacuation pauses. +class G1STWIsAliveClosure: public BoolObjectClosure { + G1CollectedHeap* _g1; +public: + G1STWIsAliveClosure(G1CollectedHeap* g1) : _g1(g1) {} + void do_object(oop p) { assert(false, "Do not call."); } + bool do_object_b(oop p); +}; + class SurvivorGCAllocRegion : public G1AllocRegion { protected: virtual HeapRegion* allocate_new_region(size_t word_size, bool force); @@ -174,6 +187,7 @@ }; class RefineCardTableEntryClosure; + class G1CollectedHeap : public SharedHeap { friend class VM_G1CollectForAllocation; friend class VM_GenCollectForPermanentAllocation; @@ -573,6 +587,14 @@ // allocated block, or else "NULL". HeapWord* expand_and_allocate(size_t word_size); + // Process any reference objects discovered during + // an incremental evacuation pause. + void process_discovered_references(); + + // Enqueue any remaining discovered references + // after processing. + void enqueue_discovered_references(); + public: G1MonitoringSupport* g1mm() { return _g1mm; } @@ -825,14 +847,83 @@ oop handle_evacuation_failure_par(OopsInHeapRegionClosure* cl, oop obj); void handle_evacuation_failure_common(oop obj, markOop m); - // Instance of the concurrent mark is_alive closure for embedding - // into the reference processor as the is_alive_non_header. This - // prevents unnecessary additions to the discovered lists during - // concurrent discovery. - G1CMIsAliveClosure _is_alive_closure; + // ("Weak") Reference processing support. + // + // G1 has 2 instances of the referece processor class. One + // (_ref_processor_cm) handles reference object discovery + // and subsequent processing during concurrent marking cycles. + // + // The other (_ref_processor_stw) handles reference object + // discovery and processing during full GCs and incremental + // evacuation pauses. + // + // During an incremental pause, reference discovery will be + // temporarily disabled for _ref_processor_cm and will be + // enabled for _ref_processor_stw. At the end of the evacuation + // pause references discovered by _ref_processor_stw will be + // processed and discovery will be disabled. The previous + // setting for reference object discovery for _ref_processor_cm + // will be re-instated. + // + // At the start of marking: + // * Discovery by the CM ref processor is verified to be inactive + // and it's discovered lists are empty. + // * Discovery by the CM ref processor is then enabled. + // + // At the end of marking: + // * Any references on the CM ref processor's discovered + // lists are processed (possibly MT). + // + // At the start of full GC we: + // * Disable discovery by the CM ref processor and + // empty CM ref processor's discovered lists + // (without processing any entries). + // * Verify that the STW ref processor is inactive and it's + // discovered lists are empty. + // * Temporarily set STW ref processor discovery as single threaded. + // * Temporarily clear the STW ref processor's _is_alive_non_header + // field. + // * Finally enable discovery by the STW ref processor. + // + // The STW ref processor is used to record any discovered + // references during the full GC. + // + // At the end of a full GC we: + // * Enqueue any reference objects discovered by the STW ref processor + // that have non-live referents. This has the side-effect of + // making the STW ref processor inactive by disabling discovery. + // * Verify that the CM ref processor is still inactive + // and no references have been placed on it's discovered + // lists (also checked as a precondition during initial marking). + + // The (stw) reference processor... + ReferenceProcessor* _ref_processor_stw; + + // During reference object discovery, the _is_alive_non_header + // closure (if non-null) is applied to the referent object to + // determine whether the referent is live. If so then the + // reference object does not need to be 'discovered' and can + // be treated as a regular oop. This has the benefit of reducing + // the number of 'discovered' reference objects that need to + // be processed. + // + // Instance of the is_alive closure for embedding into the + // STW reference processor as the _is_alive_non_header field. + // Supplying a value for the _is_alive_non_header field is + // optional but doing so prevents unnecessary additions to + // the discovered lists during reference discovery. + G1STWIsAliveClosure _is_alive_closure_stw; - // ("Weak") Reference processing support - ReferenceProcessor* _ref_processor; + // The (concurrent marking) reference processor... + ReferenceProcessor* _ref_processor_cm; + + // Instance of the concurrent mark is_alive closure for embedding + // into the Concurrent Marking reference processor as the + // _is_alive_non_header field. Supplying a value for the + // _is_alive_non_header field is optional but doing so prevents + // unnecessary additions to the discovered lists during reference + // discovery. + G1CMIsAliveClosure _is_alive_closure_cm; enum G1H_process_strong_roots_tasks { G1H_PS_mark_stack_oops_do, @@ -873,6 +964,7 @@ // specified by the policy object. jint initialize(); + // Initialize weak reference processing. virtual void ref_processing_init(); void set_par_threads(int t) { @@ -924,8 +1016,13 @@ // The shared block offset table array. G1BlockOffsetSharedArray* bot_shared() const { return _bot_shared; } - // Reference Processing accessor - ReferenceProcessor* ref_processor() { return _ref_processor; } + // Reference Processing accessors + + // The STW reference processor.... + ReferenceProcessor* ref_processor_stw() const { return _ref_processor_stw; } + + // The Concurent Marking reference processor... + ReferenceProcessor* ref_processor_cm() const { return _ref_processor_cm; } virtual size_t capacity() const; virtual size_t used() const; --- old/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp 2011-09-14 14:54:13.341348312 -0700 +++ new/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp 2011-09-14 14:54:13.028244849 -0700 @@ -152,8 +152,12 @@ _summary(new Summary()), -#ifndef PRODUCT _cur_clear_ct_time_ms(0.0), + + _cur_ref_proc_time_ms(0.0), + _cur_ref_enq_time_ms(0.0), + +#ifndef PRODUCT _min_clear_cc_time_ms(-1.0), _max_clear_cc_time_ms(-1.0), _cur_clear_cc_time_ms(0.0), @@ -1479,6 +1483,8 @@ #endif print_stats(1, "Other", other_time_ms); print_stats(2, "Choose CSet", _recorded_young_cset_choice_time_ms); + print_stats(2, "Ref Proc", _cur_ref_proc_time_ms); + print_stats(2, "Ref Enq", _cur_ref_enq_time_ms); for (int i = 0; i < _aux_num; ++i) { if (_cur_aux_times_set[i]) { --- old/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp 2011-09-14 14:54:14.805298363 -0700 +++ new/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp 2011-09-14 14:54:14.543677370 -0700 @@ -119,6 +119,8 @@ double _cur_satb_drain_time_ms; double _cur_clear_ct_time_ms; bool _satb_drain_time_set; + double _cur_ref_proc_time_ms; + double _cur_ref_enq_time_ms; #ifndef PRODUCT // Card Table Count Cache stats @@ -986,6 +988,14 @@ _cur_aux_times_ms[i] += ms; } + void record_ref_proc_time(double ms) { + _cur_ref_proc_time_ms = ms; + } + + void record_ref_enq_time(double ms) { + _cur_ref_enq_time_ms = ms; + } + #ifndef PRODUCT void record_cc_clear_time(double ms) { if (_min_clear_cc_time_ms < 0.0 || ms <= _min_clear_cc_time_ms) --- old/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp 2011-09-14 14:54:16.207305142 -0700 +++ new/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp 2011-09-14 14:54:15.965689280 -0700 @@ -62,6 +62,8 @@ // hook up weak ref data so it can be used during Mark-Sweep assert(GenMarkSweep::ref_processor() == NULL, "no stomping"); assert(rp != NULL, "should be non-NULL"); + assert(rp == G1CollectedHeap::heap()->ref_processor_stw(), "Precondition"); + GenMarkSweep::_ref_processor = rp; rp->setup_policy(clear_all_softrefs); @@ -139,6 +141,8 @@ // Process reference objects found during marking ReferenceProcessor* rp = GenMarkSweep::ref_processor(); + assert(rp == G1CollectedHeap::heap()->ref_processor_stw(), "Sanity"); + rp->setup_policy(clear_all_softrefs); rp->process_discovered_references(&GenMarkSweep::is_alive, &GenMarkSweep::keep_alive, @@ -166,7 +170,6 @@ GenMarkSweep::follow_mdo_weak_refs(); assert(GenMarkSweep::_marking_stack.is_empty(), "just drained"); - // Visit interned string tables and delete unmarked oops StringTable::unlink(&GenMarkSweep::is_alive); // Clean up unreferenced symbols in symbol table. @@ -346,7 +349,8 @@ NULL, // do not touch code cache here &GenMarkSweep::adjust_pointer_closure); - g1h->ref_processor()->weak_oops_do(&GenMarkSweep::adjust_root_pointer_closure); + assert(GenMarkSweep::ref_processor() == g1h->ref_processor_stw(), "Sanity"); + g1h->ref_processor_stw()->weak_oops_do(&GenMarkSweep::adjust_root_pointer_closure); // Now adjust pointers in remaining weak roots. (All of which should // have been cleared if they pointed to non-surviving objects.) --- old/src/share/vm/gc_implementation/g1/g1OopClosures.hpp 2011-09-14 14:54:17.430514500 -0700 +++ new/src/share/vm/gc_implementation/g1/g1OopClosures.hpp 2011-09-14 14:54:17.205275062 -0700 @@ -34,6 +34,7 @@ class CMMarkStack; class G1ParScanThreadState; class CMTask; +class ReferenceProcessor; // A class that scans oops in a given heap region (much as OopsInGenClosure // scans oops in a generation.) @@ -59,8 +60,15 @@ class G1ParPushHeapRSClosure : public G1ParClosureSuper { public: - G1ParPushHeapRSClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : - G1ParClosureSuper(g1, par_scan_state) { } + G1ParPushHeapRSClosure(G1CollectedHeap* g1, + G1ParScanThreadState* par_scan_state, + ReferenceProcessor* rp) : + G1ParClosureSuper(g1, par_scan_state) + { + assert(_ref_processor == NULL, "sanity"); + _ref_processor = rp; + } + template void do_oop_nv(T* p); virtual void do_oop(oop* p) { do_oop_nv(p); } virtual void do_oop(narrowOop* p) { do_oop_nv(p); } @@ -68,8 +76,13 @@ class G1ParScanClosure : public G1ParClosureSuper { public: - G1ParScanClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : - G1ParClosureSuper(g1, par_scan_state) { } + G1ParScanClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state, ReferenceProcessor* rp) : + G1ParClosureSuper(g1, par_scan_state) + { + assert(_ref_processor == NULL, "sanity"); + _ref_processor = rp; + } + template void do_oop_nv(T* p); virtual void do_oop(oop* p) { do_oop_nv(p); } virtual void do_oop(narrowOop* p) { do_oop_nv(p); } @@ -92,9 +105,18 @@ class G1ParScanPartialArrayClosure : public G1ParClosureSuper { G1ParScanClosure _scanner; + public: - G1ParScanPartialArrayClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : - G1ParClosureSuper(g1, par_scan_state), _scanner(g1, par_scan_state) { } + G1ParScanPartialArrayClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state, ReferenceProcessor* rp) : + G1ParClosureSuper(g1, par_scan_state), _scanner(g1, par_scan_state, rp) + { + assert(_ref_processor == NULL, "sanity"); + } + + G1ParScanClosure* scanner() { + return &_scanner; + } + template void do_oop_nv(T* p); virtual void do_oop(oop* p) { do_oop_nv(p); } virtual void do_oop(narrowOop* p) { do_oop_nv(p); } @@ -116,10 +138,20 @@ bool do_mark_object> class G1ParCopyClosure : public G1ParCopyHelper { G1ParScanClosure _scanner; + template void do_oop_work(T* p); + public: - G1ParCopyClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) : - _scanner(g1, par_scan_state), G1ParCopyHelper(g1, par_scan_state, &_scanner) { } + G1ParCopyClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state, + ReferenceProcessor* rp) : + _scanner(g1, par_scan_state, rp), + G1ParCopyHelper(g1, par_scan_state, &_scanner) + { + assert(_ref_processor == NULL, "sanity"); + } + + G1ParScanClosure* scanner() { return &_scanner; } + template void do_oop_nv(T* p) { do_oop_work(p); } @@ -129,21 +161,25 @@ typedef G1ParCopyClosure G1ParScanExtRootClosure; typedef G1ParCopyClosure G1ParScanPermClosure; -typedef G1ParCopyClosure G1ParScanHeapRSClosure; + typedef G1ParCopyClosure G1ParScanAndMarkExtRootClosure; typedef G1ParCopyClosure G1ParScanAndMarkPermClosure; -typedef G1ParCopyClosure G1ParScanAndMarkHeapRSClosure; -// This is the only case when we set skip_cset_test. Basically, this -// closure is (should?) only be called directly while we're draining -// the overflow and task queues. In that case we know that the -// reference in question points into the collection set, otherwise we -// would not have pushed it on the queue. The following is defined in -// g1_specialized_oop_closures.hpp. -// typedef G1ParCopyClosure G1ParScanHeapEvacClosure; -// We need a separate closure to handle references during evacuation -// failure processing, as we cannot asume that the reference already -// points into the collection set (like G1ParScanHeapEvacClosure does). +// The following closure types are no longer used but are retained +// for historical reasons: +// typedef G1ParCopyClosure G1ParScanHeapRSClosure; +// typedef G1ParCopyClosure G1ParScanAndMarkHeapRSClosure; + +// The following closure type is defined in g1_specialized_oop_closures.hpp: +// +// typedef G1ParCopyClosure G1ParScanHeapEvacClosure; + +// We use a separate closure to handle references during evacuation +// failure processing. +// We could have used another instance of G1ParScanHeapEvacClosure +// (since that closure no longer assumes that the references it +// handles point into the collection set). + typedef G1ParCopyClosure G1ParScanHeapEvacFailureClosure; class FilterIntoCSClosure: public OopClosure { @@ -152,9 +188,15 @@ DirtyCardToOopClosure* _dcto_cl; public: FilterIntoCSClosure( DirtyCardToOopClosure* dcto_cl, - G1CollectedHeap* g1, OopClosure* oc) : + G1CollectedHeap* g1, + OopClosure* oc, + ReferenceProcessor* rp) : _dcto_cl(dcto_cl), _g1(g1), _oc(oc) - {} + { + assert(_ref_processor == NULL, "sanity"); + _ref_processor = rp; + } + template void do_oop_nv(T* p); virtual void do_oop(oop* p) { do_oop_nv(p); } virtual void do_oop(narrowOop* p) { do_oop_nv(p); } --- old/src/share/vm/gc_implementation/g1/g1RemSet.cpp 2011-09-14 14:54:18.636745451 -0700 +++ new/src/share/vm/gc_implementation/g1/g1RemSet.cpp 2011-09-14 14:54:18.405762409 -0700 @@ -234,6 +234,7 @@ HeapRegion *startRegion = calculateStartRegion(worker_i); ScanRSClosure scanRScl(oc, worker_i); + _g1->collection_set_iterate_from(startRegion, &scanRScl); scanRScl.set_try_claimed(); _g1->collection_set_iterate_from(startRegion, &scanRScl); @@ -283,6 +284,7 @@ double start = os::elapsedTime(); // Apply the given closure to all remaining log entries. RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq); + _g1->iterate_dirty_card_closure(&into_cset_update_rs_cl, into_cset_dcq, false, worker_i); // Now there should be no dirty cards. @@ -466,7 +468,7 @@ MemRegion scanRegion(start, end); UpdateRSetImmediate update_rs_cl(_g1->g1_rem_set()); - FilterIntoCSClosure update_rs_cset_oop_cl(NULL, _g1, &update_rs_cl); + FilterIntoCSClosure update_rs_cset_oop_cl(NULL, _g1, &update_rs_cl, NULL /* rp */); FilterOutOfRegionClosure filter_then_update_rs_cset_oop_cl(r, &update_rs_cset_oop_cl); // We can pass false as the "filter_young" parameter here as: @@ -642,7 +644,7 @@ update_rs_oop_cl.set_from(r); TriggerClosure trigger_cl; - FilterIntoCSClosure into_cs_cl(NULL, _g1, &trigger_cl); + FilterIntoCSClosure into_cs_cl(NULL, _g1, &trigger_cl, NULL /* rp */); InvokeIfNotTriggeredClosure invoke_cl(&trigger_cl, &into_cs_cl); Mux2Closure mux(&invoke_cl, &update_rs_oop_cl); --- old/src/share/vm/gc_implementation/g1/heapRegion.cpp 2011-09-14 14:54:19.853282145 -0700 +++ new/src/share/vm/gc_implementation/g1/heapRegion.cpp 2011-09-14 14:54:19.628941365 -0700 @@ -45,7 +45,7 @@ FilterKind fk) : ContiguousSpaceDCTOC(hr, cl, precision, NULL), _hr(hr), _fk(fk), _g1(g1) -{} +{ } FilterOutOfRegionClosure::FilterOutOfRegionClosure(HeapRegion* r, OopClosure* oc) : @@ -214,8 +214,37 @@ int oop_size; OopClosure* cl2 = cl; - FilterIntoCSClosure intoCSFilt(this, g1h, cl); + + // If we are scanning the remembered sets looking for refs + // into the collection set during an evacuation pause then + // we will want to 'discover' reference objects that point + // to referents in the collection set. + // + // Unfortunately it is an instance of FilterIntoCSClosure + // that is iterated over the reference fields of oops in + // mr (and not the G1ParPushHeapRSClosure - which is the + // cl parameter). + // If we set the _ref_processor field in the FilterIntoCSClosure + // instance, all the reference objects that are walked + // (regardless of whether their referent object's are in + // the cset) will be 'discovered'. + // + // The G1STWIsAlive closure considers a referent object that + // is outside the cset as alive. The G1CopyingKeepAliveClosure + // skips referents that are not in the cset. + // + // Therefore reference objects in mr with a referent that is + // outside the cset should be OK. + + ReferenceProcessor* rp = _cl->_ref_processor; + if (rp != NULL) { + assert(rp == _g1->ref_processor_stw(), "should be stw"); + assert(_fk == IntoCSFilterKind, "should be looking for refs into CS"); + } + + FilterIntoCSClosure intoCSFilt(this, g1h, cl, rp); FilterOutOfRegionClosure outOfRegionFilt(_hr, cl); + switch (_fk) { case IntoCSFilterKind: cl2 = &intoCSFilt; break; case OutOfRegionFilterKind: cl2 = &outOfRegionFilt; break; @@ -239,16 +268,19 @@ case NoFilterKind: bottom = walk_mem_region_loop(cl, g1h, _hr, bottom, top); break; + case IntoCSFilterKind: { - FilterIntoCSClosure filt(this, g1h, cl); + FilterIntoCSClosure filt(this, g1h, cl, rp); bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top); break; } + case OutOfRegionFilterKind: { FilterOutOfRegionClosure filt(_hr, cl); bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top); break; } + default: ShouldNotReachHere(); } @@ -483,7 +515,7 @@ HeapRegion(size_t hrs_index, G1BlockOffsetSharedArray* sharedOffsetArray, MemRegion mr, bool is_zeroed) : G1OffsetTableContigSpace(sharedOffsetArray, mr, is_zeroed), - _next_fk(HeapRegionDCTOC::NoFilterKind), _hrs_index(hrs_index), + _hrs_index(hrs_index), _humongous_type(NotHumongous), _humongous_start_region(NULL), _in_collection_set(false), _next_in_special_set(NULL), _orig_end(NULL), --- old/src/share/vm/gc_implementation/g1/heapRegion.hpp 2011-09-14 14:54:21.117324327 -0700 +++ new/src/share/vm/gc_implementation/g1/heapRegion.hpp 2011-09-14 14:54:20.883890365 -0700 @@ -118,7 +118,6 @@ FilterKind fk); }; - // The complicating factor is that BlockOffsetTable diverged // significantly, and we need functionality that is only in the G1 version. // So I copied that code, which led to an alternate G1 version of @@ -223,10 +222,6 @@ ContinuesHumongous }; - // The next filter kind that should be used for a "new_dcto_cl" call with - // the "traditional" signature. - HeapRegionDCTOC::FilterKind _next_fk; - // Requires that the region "mr" be dense with objects, and begin and end // with an object. void oops_in_mr_iterate(MemRegion mr, OopClosure* cl); @@ -573,40 +568,14 @@ // allocated in the current region before the last call to "save_mark". void oop_before_save_marks_iterate(OopClosure* cl); - // This call determines the "filter kind" argument that will be used for - // the next call to "new_dcto_cl" on this region with the "traditional" - // signature (i.e., the call below.) The default, in the absence of a - // preceding call to this method, is "NoFilterKind", and a call to this - // method is necessary for each such call, or else it reverts to the - // default. - // (This is really ugly, but all other methods I could think of changed a - // lot of main-line code for G1.) - void set_next_filter_kind(HeapRegionDCTOC::FilterKind nfk) { - _next_fk = nfk; - } - DirtyCardToOopClosure* new_dcto_closure(OopClosure* cl, CardTableModRefBS::PrecisionStyle precision, HeapRegionDCTOC::FilterKind fk); -#if WHASSUP - DirtyCardToOopClosure* - new_dcto_closure(OopClosure* cl, - CardTableModRefBS::PrecisionStyle precision, - HeapWord* boundary) { - assert(boundary == NULL, "This arg doesn't make sense here."); - DirtyCardToOopClosure* res = new_dcto_closure(cl, precision, _next_fk); - _next_fk = HeapRegionDCTOC::NoFilterKind; - return res; - } -#endif - - // // Note the start or end of marking. This tells the heap region // that the collector is about to start or has finished (concurrently) // marking the heap. - // // Note the start of a marking phase. Record the // start of the unmarked area of the region here. --- old/src/share/vm/gc_implementation/g1/satbQueue.cpp 2011-09-14 14:54:22.336124275 -0700 +++ new/src/share/vm/gc_implementation/g1/satbQueue.cpp 2011-09-14 14:54:22.107821898 -0700 @@ -29,6 +29,7 @@ #include "memory/sharedHeap.hpp" #include "runtime/mutexLocker.hpp" #include "runtime/thread.hpp" +#include "runtime/vmThread.hpp" // This method removes entries from an SATB buffer that will not be // useful to the concurrent marking threads. An entry is removed if it @@ -252,9 +253,18 @@ t->satb_mark_queue().apply_closure(_par_closures[worker]); } } - // We'll have worker 0 do this one. - if (worker == 0) { - shared_satb_queue()->apply_closure(_par_closures[0]); + + // We also need to claim the VMThread so that its parity is updated + // otherwise the next call to Thread::possibly_parallel_oops_do inside + // a StrongRootsScope might skip the VMThread because it has a stale + // parity that matches the parity set by the StrongRootsScope + // + // Whichever worker succeeds in claiming the VMThread gets to do + // the shared queue. + + VMThread* vmt = VMThread::vm_thread(); + if (vmt->claim_oops_do(true, parity)) { + shared_satb_queue()->apply_closure(_par_closures[worker]); } } --- old/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp 2011-09-14 14:54:23.591700613 -0700 +++ new/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp 2011-09-14 14:54:23.363303611 -0700 @@ -198,10 +198,9 @@ allocate_stacks(); - NOT_PRODUCT(ref_processor()->verify_no_references_recorded()); COMPILER2_PRESENT(DerivedPointerTable::clear()); - ref_processor()->enable_discovery(); + ref_processor()->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); ref_processor()->setup_policy(clear_all_softrefs); mark_sweep_phase1(clear_all_softrefs); --- old/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp 2011-09-14 14:54:24.813499927 -0700 +++ new/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp 2011-09-14 14:54:24.569957989 -0700 @@ -2069,10 +2069,9 @@ CodeCache::gc_prologue(); Threads::gc_prologue(); - NOT_PRODUCT(ref_processor()->verify_no_references_recorded()); COMPILER2_PRESENT(DerivedPointerTable::clear()); - ref_processor()->enable_discovery(); + ref_processor()->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); ref_processor()->setup_policy(maximum_heap_compaction); bool marked_for_unloading = false; --- old/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp 2011-09-14 14:54:26.174324669 -0700 +++ new/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp 2011-09-14 14:54:25.940550632 -0700 @@ -350,10 +350,9 @@ } save_to_space_top_before_gc(); - NOT_PRODUCT(reference_processor()->verify_no_references_recorded()); COMPILER2_PRESENT(DerivedPointerTable::clear()); - reference_processor()->enable_discovery(); + reference_processor()->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); reference_processor()->setup_policy(false); // We track how much was promoted to the next generation for --- old/src/share/vm/memory/genCollectedHeap.cpp 2011-09-14 14:54:27.396624951 -0700 +++ new/src/share/vm/memory/genCollectedHeap.cpp 2011-09-14 14:54:27.166473191 -0700 @@ -599,8 +599,7 @@ // atomic wrt other collectors in this configuration, we // are guaranteed to have empty discovered ref lists. if (rp->discovery_is_atomic()) { - rp->verify_no_references_recorded(); - rp->enable_discovery(); + rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/); rp->setup_policy(do_clear_all_soft_refs); } else { // collect() below will enable discovery as appropriate --- old/src/share/vm/memory/referenceProcessor.cpp 2011-09-14 14:54:29.942556708 -0700 +++ new/src/share/vm/memory/referenceProcessor.cpp 2011-09-14 14:54:29.652060505 -0700 @@ -35,42 +35,8 @@ ReferencePolicy* ReferenceProcessor::_always_clear_soft_ref_policy = NULL; ReferencePolicy* ReferenceProcessor::_default_soft_ref_policy = NULL; -const int subclasses_of_ref = REF_PHANTOM - REF_OTHER; bool ReferenceProcessor::_pending_list_uses_discovered_field = false; -// List of discovered references. -class DiscoveredList { -public: - DiscoveredList() : _len(0), _compressed_head(0), _oop_head(NULL) { } - oop head() const { - return UseCompressedOops ? oopDesc::decode_heap_oop(_compressed_head) : - _oop_head; - } - HeapWord* adr_head() { - return UseCompressedOops ? (HeapWord*)&_compressed_head : - (HeapWord*)&_oop_head; - } - void set_head(oop o) { - if (UseCompressedOops) { - // Must compress the head ptr. - _compressed_head = oopDesc::encode_heap_oop(o); - } else { - _oop_head = o; - } - } - bool empty() const { return head() == NULL; } - size_t length() { return _len; } - void set_length(size_t len) { _len = len; } - void inc_length(size_t inc) { _len += inc; assert(_len > 0, "Error"); } - void dec_length(size_t dec) { _len -= dec; } -private: - // Set value depending on UseCompressedOops. This could be a template class - // but then we have to fix all the instantiations and declarations that use this class. - oop _oop_head; - narrowOop _compressed_head; - size_t _len; -}; - void referenceProcessor_init() { ReferenceProcessor::init_statics(); } @@ -112,7 +78,8 @@ _discovery_is_mt = mt_discovery; _num_q = MAX2(1, mt_processing_degree); _max_num_q = MAX2(_num_q, mt_discovery_degree); - _discoveredSoftRefs = NEW_C_HEAP_ARRAY(DiscoveredList, _max_num_q * subclasses_of_ref); + _discoveredSoftRefs = NEW_C_HEAP_ARRAY(DiscoveredList, + _max_num_q * number_of_subclasses_of_ref()); if (_discoveredSoftRefs == NULL) { vm_exit_during_initialization("Could not allocated RefProc Array"); } @@ -120,7 +87,7 @@ _discoveredFinalRefs = &_discoveredWeakRefs[_max_num_q]; _discoveredPhantomRefs = &_discoveredFinalRefs[_max_num_q]; // Initialized all entries to NULL - for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { + for (int i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) { _discoveredSoftRefs[i].set_head(NULL); _discoveredSoftRefs[i].set_length(0); } @@ -134,19 +101,22 @@ #ifndef PRODUCT void ReferenceProcessor::verify_no_references_recorded() { guarantee(!_discovering_refs, "Discovering refs?"); - for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { - guarantee(_discoveredSoftRefs[i].empty(), + for (int i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) { + guarantee(_discoveredSoftRefs[i].is_empty(), "Found non-empty discovered list"); } } #endif void ReferenceProcessor::weak_oops_do(OopClosure* f) { - // Should this instead be - // for (int i = 0; i < subclasses_of_ref; i++_ { + // An alternative implementation of this routine + // could use the following nested loop: + // + // for (int i = 0; i < number_of_subclasses_of_ref(); i++_ { // for (int j = 0; j < _num_q; j++) { // int index = i * _max_num_q + j; - for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { + + for (int i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) { if (UseCompressedOops) { f->do_oop((narrowOop*)_discoveredSoftRefs[i].adr_head()); } else { @@ -404,7 +374,7 @@ // allocated and are indexed into. assert(_n_queues == (int) _ref_processor.max_num_q(), "Different number not expected"); for (int j = 0; - j < subclasses_of_ref; + j < ReferenceProcessor::number_of_subclasses_of_ref(); j++, index += _n_queues) { _ref_processor.enqueue_discovered_reflist( _refs_lists[index], _pending_list_addr); @@ -424,7 +394,7 @@ task_executor->execute(tsk); } else { // Serial code: call the parent class's implementation - for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { + for (int i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) { enqueue_discovered_reflist(_discoveredSoftRefs[i], pending_list_addr); _discoveredSoftRefs[i].set_head(NULL); _discoveredSoftRefs[i].set_length(0); @@ -432,119 +402,7 @@ } } -// Iterator for the list of discovered references. -class DiscoveredListIterator { -public: - inline DiscoveredListIterator(DiscoveredList& refs_list, - OopClosure* keep_alive, - BoolObjectClosure* is_alive); - - // End Of List. - inline bool has_next() const { return _ref != NULL; } - - // Get oop to the Reference object. - inline oop obj() const { return _ref; } - - // Get oop to the referent object. - inline oop referent() const { return _referent; } - - // Returns true if referent is alive. - inline bool is_referent_alive() const; - - // Loads data for the current reference. - // The "allow_null_referent" argument tells us to allow for the possibility - // of a NULL referent in the discovered Reference object. This typically - // happens in the case of concurrent collectors that may have done the - // discovery concurrently, or interleaved, with mutator execution. - inline void load_ptrs(DEBUG_ONLY(bool allow_null_referent)); - - // Move to the next discovered reference. - inline void next(); - - // Remove the current reference from the list - inline void remove(); - - // Make the Reference object active again. - inline void make_active() { java_lang_ref_Reference::set_next(_ref, NULL); } - - // Make the referent alive. - inline void make_referent_alive() { - if (UseCompressedOops) { - _keep_alive->do_oop((narrowOop*)_referent_addr); - } else { - _keep_alive->do_oop((oop*)_referent_addr); - } - } - - // Update the discovered field. - inline void update_discovered() { - // First _prev_next ref actually points into DiscoveredList (gross). - if (UseCompressedOops) { - if (!oopDesc::is_null(*(narrowOop*)_prev_next)) { - _keep_alive->do_oop((narrowOop*)_prev_next); - } - } else { - if (!oopDesc::is_null(*(oop*)_prev_next)) { - _keep_alive->do_oop((oop*)_prev_next); - } - } - } - - // NULL out referent pointer. - inline void clear_referent() { oop_store_raw(_referent_addr, NULL); } - - // Statistics - NOT_PRODUCT( - inline size_t processed() const { return _processed; } - inline size_t removed() const { return _removed; } - ) - - inline void move_to_next(); - -private: - DiscoveredList& _refs_list; - HeapWord* _prev_next; - oop _prev; - oop _ref; - HeapWord* _discovered_addr; - oop _next; - HeapWord* _referent_addr; - oop _referent; - OopClosure* _keep_alive; - BoolObjectClosure* _is_alive; - DEBUG_ONLY( - oop _first_seen; // cyclic linked list check - ) - NOT_PRODUCT( - size_t _processed; - size_t _removed; - ) -}; - -inline DiscoveredListIterator::DiscoveredListIterator(DiscoveredList& refs_list, - OopClosure* keep_alive, - BoolObjectClosure* is_alive) - : _refs_list(refs_list), - _prev_next(refs_list.adr_head()), - _prev(NULL), - _ref(refs_list.head()), -#ifdef ASSERT - _first_seen(refs_list.head()), -#endif -#ifndef PRODUCT - _processed(0), - _removed(0), -#endif - _next(NULL), - _keep_alive(keep_alive), - _is_alive(is_alive) -{ } - -inline bool DiscoveredListIterator::is_referent_alive() const { - return _is_alive->do_object_b(_referent); -} - -inline void DiscoveredListIterator::load_ptrs(DEBUG_ONLY(bool allow_null_referent)) { +void DiscoveredListIterator::load_ptrs(DEBUG_ONLY(bool allow_null_referent)) { _discovered_addr = java_lang_ref_Reference::discovered_addr(_ref); oop discovered = java_lang_ref_Reference::discovered(_ref); assert(_discovered_addr && discovered->is_oop_or_null(), @@ -560,13 +418,7 @@ "bad referent"); } -inline void DiscoveredListIterator::next() { - _prev_next = _discovered_addr; - _prev = _ref; - move_to_next(); -} - -inline void DiscoveredListIterator::remove() { +void DiscoveredListIterator::remove() { assert(_ref->is_oop(), "Dropping a bad reference"); oop_store_raw(_discovered_addr, NULL); @@ -592,15 +444,29 @@ _refs_list.dec_length(1); } -inline void DiscoveredListIterator::move_to_next() { - if (_ref == _next) { - // End of the list. - _ref = NULL; +// Make the Reference object active again. +void DiscoveredListIterator::make_active() { + // For G1 we don't want to use set_next - it + // will dirty the card for the next field of + // the reference object and will fail + // CT verification. + if (UseG1GC) { + BarrierSet* bs = oopDesc::bs(); + HeapWord* next_addr = java_lang_ref_Reference::next_addr(_ref); + + if (UseCompressedOops) { + bs->write_ref_field_pre((narrowOop*)next_addr, NULL); + } else { + bs->write_ref_field_pre((oop*)next_addr, NULL); + } + java_lang_ref_Reference::set_next_raw(_ref, NULL); } else { - _ref = _next; + java_lang_ref_Reference::set_next(_ref, NULL); } - assert(_ref != _first_seen, "cyclic ref_list found"); - NOT_PRODUCT(_processed++); +} + +void DiscoveredListIterator::clear_referent() { + oop_store_raw(_referent_addr, NULL); } // NOTE: process_phase*() are largely similar, and at a high level @@ -786,10 +652,9 @@ void ReferenceProcessor::abandon_partial_discovery() { // loop over the lists - for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { + for (int i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) { if (TraceReferenceGC && PrintGCDetails && ((i % _max_num_q) == 0)) { - gclog_or_tty->print_cr("\nAbandoning %s discovered list", - list_name(i)); + gclog_or_tty->print_cr("\nAbandoning %s discovered list", list_name(i)); } abandon_partial_discovered_list(_discoveredSoftRefs[i]); } @@ -858,6 +723,14 @@ bool _clear_referent; }; +void ReferenceProcessor::set_discovered(oop ref, oop value) { + if (_discovered_list_needs_barrier) { + java_lang_ref_Reference::set_discovered(ref, value); + } else { + java_lang_ref_Reference::set_discovered_raw(ref, value); + } +} + // Balances reference queues. // Move entries from all queues[0, 1, ..., _max_num_q-1] to // queues[0, 1, ..., _num_q-1] because only the first _num_q @@ -915,9 +788,9 @@ // Add the chain to the to list. if (ref_lists[to_idx].head() == NULL) { // to list is empty. Make a loop at the end. - java_lang_ref_Reference::set_discovered(move_tail, move_tail); + set_discovered(move_tail, move_tail); } else { - java_lang_ref_Reference::set_discovered(move_tail, ref_lists[to_idx].head()); + set_discovered(move_tail, ref_lists[to_idx].head()); } ref_lists[to_idx].set_head(move_head); ref_lists[to_idx].inc_length(refs_to_move); @@ -1038,11 +911,15 @@ void ReferenceProcessor::clean_up_discovered_references() { // loop over the lists - // Should this instead be - // for (int i = 0; i < subclasses_of_ref; i++_ { + + // An alternative implementation of this routine could + // use the following nested loop: + // + // for (int i = 0; i < number_of_subclasses_of_ref(); i++) { // for (int j = 0; j < _num_q; j++) { // int index = i * _max_num_q + j; - for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { + + for (int i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) { if (TraceReferenceGC && PrintGCDetails && ((i % _max_num_q) == 0)) { gclog_or_tty->print_cr( "\nScrubbing %s discovered list of Null referents", @@ -1260,6 +1137,8 @@ } } + ResourceMark rm; // Needed for tracing. + HeapWord* const discovered_addr = java_lang_ref_Reference::discovered_addr(obj); const oop discovered = java_lang_ref_Reference::discovered(obj); assert(discovered->is_oop_or_null(), "bad discovered field"); @@ -1472,7 +1351,9 @@ } const char* ReferenceProcessor::list_name(int i) { - assert(i >= 0 && i <= _max_num_q * subclasses_of_ref, "Out of bounds index"); + assert(i >= 0 && i <= _max_num_q * number_of_subclasses_of_ref(), + "Out of bounds index"); + int j = i / _max_num_q; switch (j) { case 0: return "SoftRef"; @@ -1493,7 +1374,7 @@ #ifndef PRODUCT void ReferenceProcessor::clear_discovered_references() { guarantee(!_discovering_refs, "Discovering refs?"); - for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { + for (int i = 0; i < _max_num_q * number_of_subclasses_of_ref(); i++) { clear_discovered_references(_discoveredSoftRefs[i]); } } --- old/src/share/vm/memory/referenceProcessor.hpp 2011-09-14 14:54:33.491088488 -0700 +++ new/src/share/vm/memory/referenceProcessor.hpp 2011-09-14 14:54:32.950320210 -0700 @@ -48,18 +48,175 @@ // forward references class ReferencePolicy; class AbstractRefProcTaskExecutor; -class DiscoveredList; + +// List of discovered references. +class DiscoveredList { +public: + DiscoveredList() : _len(0), _compressed_head(0), _oop_head(NULL) { } + oop head() const { + return UseCompressedOops ? oopDesc::decode_heap_oop(_compressed_head) : + _oop_head; + } + HeapWord* adr_head() { + return UseCompressedOops ? (HeapWord*)&_compressed_head : + (HeapWord*)&_oop_head; + } + void set_head(oop o) { + if (UseCompressedOops) { + // Must compress the head ptr. + _compressed_head = oopDesc::encode_heap_oop(o); + } else { + _oop_head = o; + } + } + bool is_empty() const { return head() == NULL; } + size_t length() { return _len; } + void set_length(size_t len) { _len = len; } + void inc_length(size_t inc) { _len += inc; assert(_len > 0, "Error"); } + void dec_length(size_t dec) { _len -= dec; } +private: + // Set value depending on UseCompressedOops. This could be a template class + // but then we have to fix all the instantiations and declarations that use this class. + oop _oop_head; + narrowOop _compressed_head; + size_t _len; +}; + +// Iterator for the list of discovered references. +class DiscoveredListIterator { +private: + DiscoveredList& _refs_list; + HeapWord* _prev_next; + oop _prev; + oop _ref; + HeapWord* _discovered_addr; + oop _next; + HeapWord* _referent_addr; + oop _referent; + OopClosure* _keep_alive; + BoolObjectClosure* _is_alive; + + DEBUG_ONLY( + oop _first_seen; // cyclic linked list check + ) + + NOT_PRODUCT( + size_t _processed; + size_t _removed; + ) + +public: + inline DiscoveredListIterator(DiscoveredList& refs_list, + OopClosure* keep_alive, + BoolObjectClosure* is_alive): + _refs_list(refs_list), + _prev_next(refs_list.adr_head()), + _prev(NULL), + _ref(refs_list.head()), +#ifdef ASSERT + _first_seen(refs_list.head()), +#endif +#ifndef PRODUCT + _processed(0), + _removed(0), +#endif + _next(NULL), + _keep_alive(keep_alive), + _is_alive(is_alive) +{ } + + // End Of List. + inline bool has_next() const { return _ref != NULL; } + + // Get oop to the Reference object. + inline oop obj() const { return _ref; } + + // Get oop to the referent object. + inline oop referent() const { return _referent; } + + // Returns true if referent is alive. + inline bool is_referent_alive() const { + return _is_alive->do_object_b(_referent); + } + + // Loads data for the current reference. + // The "allow_null_referent" argument tells us to allow for the possibility + // of a NULL referent in the discovered Reference object. This typically + // happens in the case of concurrent collectors that may have done the + // discovery concurrently, or interleaved, with mutator execution. + void load_ptrs(DEBUG_ONLY(bool allow_null_referent)); + + // Move to the next discovered reference. + inline void next() { + _prev_next = _discovered_addr; + _prev = _ref; + move_to_next(); + } + + // Remove the current reference from the list + void remove(); + + // Make the Reference object active again. + void make_active(); + + // Make the referent alive. + inline void make_referent_alive() { + if (UseCompressedOops) { + _keep_alive->do_oop((narrowOop*)_referent_addr); + } else { + _keep_alive->do_oop((oop*)_referent_addr); + } + } + + // Update the discovered field. + inline void update_discovered() { + // First _prev_next ref actually points into DiscoveredList (gross). + if (UseCompressedOops) { + if (!oopDesc::is_null(*(narrowOop*)_prev_next)) { + _keep_alive->do_oop((narrowOop*)_prev_next); + } + } else { + if (!oopDesc::is_null(*(oop*)_prev_next)) { + _keep_alive->do_oop((oop*)_prev_next); + } + } + } + + // NULL out referent pointer. + void clear_referent(); + + // Statistics + NOT_PRODUCT( + inline size_t processed() const { return _processed; } + inline size_t removed() const { return _removed; } + ) + + inline void move_to_next() { + if (_ref == _next) { + // End of the list. + _ref = NULL; + } else { + _ref = _next; + } + assert(_ref != _first_seen, "cyclic ref_list found"); + NOT_PRODUCT(_processed++); + } + +}; class ReferenceProcessor : public CHeapObj { protected: // Compatibility with pre-4965777 JDK's static bool _pending_list_uses_discovered_field; - MemRegion _span; // (right-open) interval of heap - // subject to wkref discovery - bool _discovering_refs; // true when discovery enabled - bool _discovery_is_atomic; // if discovery is atomic wrt - // other collectors in configuration - bool _discovery_is_mt; // true if reference discovery is MT. + + MemRegion _span; // (right-open) interval of heap + // subject to wkref discovery + + bool _discovering_refs; // true when discovery enabled + bool _discovery_is_atomic; // if discovery is atomic wrt + // other collectors in configuration + bool _discovery_is_mt; // true if reference discovery is MT. + // If true, setting "next" field of a discovered refs list requires // write barrier(s). (Must be true if used in a collector in which // elements of a discovered list may be moved during discovery: for @@ -67,18 +224,19 @@ // long-term concurrent marking phase that does weak reference // discovery.) bool _discovered_list_needs_barrier; - BarrierSet* _bs; // Cached copy of BarrierSet. - bool _enqueuing_is_done; // true if all weak references enqueued - bool _processing_is_mt; // true during phases when - // reference processing is MT. - int _next_id; // round-robin mod _num_q counter in - // support of work distribution - // For collectors that do not keep GC marking information + BarrierSet* _bs; // Cached copy of BarrierSet. + bool _enqueuing_is_done; // true if all weak references enqueued + bool _processing_is_mt; // true during phases when + // reference processing is MT. + int _next_id; // round-robin mod _num_q counter in + // support of work distribution + + // For collectors that do not keep GC liveness information // in the object header, this field holds a closure that // helps the reference processor determine the reachability - // of an oop (the field is currently initialized to NULL for - // all collectors but the CMS collector). + // of an oop. It is currently initialized to NULL for all + // collectors except for CMS and G1. BoolObjectClosure* _is_alive_non_header; // Soft ref clearing policies @@ -102,10 +260,13 @@ DiscoveredList* _discoveredPhantomRefs; public: - int num_q() { return _num_q; } - int max_num_q() { return _max_num_q; } - void set_active_mt_degree(int v) { _num_q = v; } - DiscoveredList* discovered_soft_refs() { return _discoveredSoftRefs; } + static int number_of_subclasses_of_ref() { return (REF_PHANTOM - REF_OTHER); } + + int num_q() { return _num_q; } + int max_num_q() { return _max_num_q; } + void set_active_mt_degree(int v) { _num_q = v; } + DiscoveredList* discovered_soft_refs() { return _discoveredSoftRefs; } + ReferencePolicy* setup_policy(bool always_clear) { _current_soft_ref_policy = always_clear ? _always_clear_soft_ref_policy : _default_soft_ref_policy; @@ -205,6 +366,11 @@ void enqueue_discovered_reflists(HeapWord* pending_list_addr, AbstractRefProcTaskExecutor* task_executor); protected: + // Set the 'discovered' field of the given reference to + // the given value - emitting barriers depending upon + // the value of _discovered_list_needs_barrier. + void set_discovered(oop ref, oop value); + // "Preclean" the given discovered reference list // by removing references with strongly reachable referents. // Currently used in support of CMS only. @@ -290,7 +456,19 @@ void set_span(MemRegion span) { _span = span; } // start and stop weak ref discovery - void enable_discovery() { _discovering_refs = true; } + void enable_discovery(bool verify_disabled, bool check_no_refs) { +#ifdef ASSERT + // Verify that we're not currently discovering refs + assert(!verify_disabled || !_discovering_refs, "nested call?"); + + if (check_no_refs) { + // Verify that the discovered lists are empty + verify_no_references_recorded(); + } +#endif // ASSERT + _discovering_refs = true; + } + void disable_discovery() { _discovering_refs = false; } bool discovery_enabled() { return _discovering_refs; } @@ -365,7 +543,7 @@ ~NoRefDiscovery() { if (_was_discovering_refs) { - _rp->enable_discovery(); + _rp->enable_discovery(true /*verify_disabled*/, false /*check_no_refs*/); } } }; --- old/src/share/vm/runtime/thread.cpp 2011-09-14 14:54:35.610893543 -0700 +++ new/src/share/vm/runtime/thread.cpp 2011-09-14 14:54:35.280389497 -0700 @@ -749,8 +749,9 @@ jint thread_parity = _oops_do_parity; if (thread_parity != strong_roots_parity) { jint res = Atomic::cmpxchg(strong_roots_parity, &_oops_do_parity, thread_parity); - if (res == thread_parity) return true; - else { + if (res == thread_parity) { + return true; + } else { guarantee(res == strong_roots_parity, "Or else what?"); assert(SharedHeap::heap()->n_par_threads() > 0, "Should only fail when parallel."); @@ -3905,8 +3906,9 @@ } } VMThread* vmt = VMThread::vm_thread(); - if (vmt->claim_oops_do(is_par, cp)) + if (vmt->claim_oops_do(is_par, cp)) { vmt->oops_do(f, cf); + } } #ifndef SERIALGC