--- old/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Mon Mar 14 15:56:14 2011 +++ new/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Mon Mar 14 15:56:13 2011 @@ -292,13 +292,15 @@ void CMSCollector::ref_processor_init() { if (_ref_processor == NULL) { // Allocate and initialize a reference processor - _ref_processor = ReferenceProcessor::create_ref_processor( - _span, // span - _cmsGen->refs_discovery_is_atomic(), // atomic_discovery - _cmsGen->refs_discovery_is_mt(), // mt_discovery - &_is_alive_closure, - ParallelGCThreads, - ParallelRefProcEnabled); + _ref_processor = + new ReferenceProcessor(_span, // span + (ParallelGCThreads > 1) && ParallelRefProcEnabled, // mt processing + ParallelGCThreads, // mt processing degree + _cmsGen->refs_discovery_is_mt(), // mt discovery + MAX2(ConcGCThreads, ParallelGCThreads), // mt discovery degree + _cmsGen->refs_discovery_is_atomic(), // discovery is not atomic + &_is_alive_closure, // closure for liveness info + false); // next field updates do not need write barrier // Initialize the _ref_processor field of CMSGen _cmsGen->set_ref_processor(_ref_processor); @@ -641,7 +643,7 @@ } // Support for multi-threaded concurrent phases - if (CollectedHeap::use_parallel_gc_threads() && CMSConcurrentMTEnabled) { + if (CMSConcurrentMTEnabled) { if (FLAG_IS_DEFAULT(ConcGCThreads)) { // just for now FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4); @@ -2000,6 +2002,9 @@ // Temporarily make refs discovery atomic ReferenceProcessorAtomicMutator w(ref_processor(), true); + // Temporarily make refs discovery ST + ReferenceProcessorMTDiscoveryMutator(ref_processor(), false); + ref_processor()->set_enqueuing_is_done(false); ref_processor()->enable_discovery(); ref_processor()->setup_policy(clear_all_soft_refs); @@ -4263,9 +4268,7 @@ // Refs discovery is already non-atomic. assert(!ref_processor()->discovery_is_atomic(), "Should be non-atomic"); - // Mutate the Refs discovery so it is MT during the - // multi-threaded marking phase. - ReferenceProcessorMTMutator mt(ref_processor(), num_workers > 1); + assert(num_workers <= 1 || ref_processor()->discovery_is_mt(), "Discovery should be MT"); DEBUG_ONLY(RememberKlassesChecker cmx(should_unload_classes());) conc_workers()->start_task(&tsk); while (tsk.yielded()) { @@ -5576,8 +5579,8 @@ // in the multi-threaded case, but we special-case n=1 here to get // repeatable measurements of the 1-thread overhead of the parallel code. if (n_workers > 1) { - // Make refs discovery MT-safe - ReferenceProcessorMTMutator mt(ref_processor(), true); + // Make refs discovery MT-safe, if it isn't already + ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), true); GenCollectedHeap::StrongRootsScope srs(gch); workers->run_task(&tsk); } else { @@ -5703,14 +5706,19 @@ CMSBitMap* mark_bit_map, AbstractWorkGang* workers, OopTaskQueueSet* task_queues): + // XXX Should superclass AGTWOQ also know about AWG since it knows + // about the task_queues used by the AWG? Then it could initialize + // the terminator() object. See 6984287. The set_for_termination() + // below is a temporary band-aid for the regression in 6984287. AbstractGangTaskWOopQueues("Process referents by policy in parallel", task_queues), _task(task), _collector(collector), _span(span), _mark_bit_map(mark_bit_map) - { - assert(_collector->_span.equals(_span) && !_span.is_empty(), - "Inconsistency in _span"); - } + { + assert(_collector->_span.equals(_span) && !_span.is_empty(), + "Inconsistency in _span"); + set_for_termination(workers->active_workers()); + } OopTaskQueueSet* task_queues() { return queues(); } @@ -5872,8 +5880,7 @@ // That is OK as long as the Reference lists are balanced (see // balance_all_queues() and balance_queues()). - - rp->set_mt_degree(ParallelGCThreads); + rp->set_active_mt_degree(ParallelGCThreads); CMSRefProcTaskExecutor task_executor(*this); rp->process_discovered_references(&_is_alive_closure, &cmsKeepAliveClosure, --- old/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp Mon Mar 14 15:56:16 2011 +++ new/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp Mon Mar 14 15:56:16 2011 @@ -1133,7 +1133,7 @@ // rare that the cost of the CAS's involved is in the // noise. That's a measurement that should be done, and // the code simplified if that turns out to be the case. - return false; + return ConcGCThreads > 1; } // Override --- old/src/share/vm/gc_implementation/g1/concurrentMark.cpp Mon Mar 14 15:56:18 2011 +++ new/src/share/vm/gc_implementation/g1/concurrentMark.cpp Mon Mar 14 15:56:17 2011 @@ -2141,14 +2141,15 @@ G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap()); G1CMDrainMarkingStackClosure g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive); - // We use the work gang from the G1CollectedHeap and we utilize all // the worker threads. - int active_workers = MAX2(MIN2(g1h->workers()->total_workers(), (int)_max_task_num), 1); + int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1; + active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1); G1RefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(), g1h->workers(), active_workers); + if (rp->processing_is_mt()) { // Set the degree of MT here. If the discovery is done MT, there // may have been a different number of threads doing the discovery @@ -2155,7 +2156,7 @@ // and a different number of discovered lists may have Ref objects. // That is OK as long as the Reference lists are balanced (see // balance_all_queues() and balance_queues()). - rp->set_mt_degree(active_workers); + rp->set_active_mt_degree(active_workers); rp->process_discovered_references(&g1_is_alive, &g1_keep_alive, --- old/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Mon Mar 14 15:56:19 2011 +++ new/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Mon Mar 14 15:56:19 2011 @@ -1462,7 +1462,7 @@ // how reference processing currently works in G1. // Temporarily make reference _discovery_ single threaded (non-MT). - ReferenceProcessorMTMutator rp_disc_ser(ref_processor(), false); + ReferenceProcessorMTDiscoveryMutator rp_disc_ser(ref_processor(), false); // Temporarily make refs discovery atomic ReferenceProcessorAtomicMutator rp_disc_atomic(ref_processor(), true); @@ -2219,16 +2219,16 @@ SharedHeap::ref_processing_init(); MemRegion mr = reserved_region(); - _ref_processor = ReferenceProcessor::create_ref_processor( - mr, // span - false, // Reference discovery is not atomic - true, // mt_discovery - &_is_alive_closure, // is alive closure - // for efficiency - ParallelGCThreads, - ParallelRefProcEnabled, - true); // Setting next fields of discovered - // lists requires a barrier. + _ref_processor = + new ReferenceProcessor(mr, // span + ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing + ParallelGCThreads, // degree of mt processing + ParallelGCThreads > 1 || ConcGCThreads > 1, // mt discovery + MAX2(ParallelGCThreads, ConcGCThreads), // degree of mt discovery + false, // Reference discovery is not atomic + &_is_alive_closure, // is alive closure for efficiency + true); // Setting next fields of discovered + // lists requires a barrier. } size_t G1CollectedHeap::capacity() const { --- old/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Mon Mar 14 15:56:21 2011 +++ new/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Mon Mar 14 15:56:21 2011 @@ -1530,13 +1530,15 @@ { if (_ref_processor == NULL) { // Allocate and initialize a reference processor - _ref_processor = ReferenceProcessor::create_ref_processor( - _reserved, // span - refs_discovery_is_atomic(), // atomic_discovery - refs_discovery_is_mt(), // mt_discovery - NULL, // is_alive_non_header - ParallelGCThreads, - ParallelRefProcEnabled); + _ref_processor = + new ReferenceProcessor(_reserved, // span + ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing + ParallelGCThreads, // mt processing degree + refs_discovery_is_mt(), // mt discovery + ParallelGCThreads, // mt discovery degree + refs_discovery_is_atomic(), // atomic_discovery + NULL, // is_alive_non_header + false); // write barrier for next field updates } } --- old/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Mon Mar 14 15:56:23 2011 +++ new/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp Mon Mar 14 15:56:23 2011 @@ -58,9 +58,7 @@ void PSMarkSweep::initialize() { MemRegion mr = Universe::heap()->reserved_region(); - _ref_processor = new ReferenceProcessor(mr, - true, // atomic_discovery - false); // mt_discovery + _ref_processor = new ReferenceProcessor(mr); // a vanilla ref proc _counters = new CollectorCounters("PSMarkSweep", 1); } --- old/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Mon Mar 14 15:56:24 2011 +++ new/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Mon Mar 14 15:56:24 2011 @@ -827,13 +827,15 @@ assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity"); MemRegion mr = heap->reserved_region(); - _ref_processor = ReferenceProcessor::create_ref_processor( - mr, // span - true, // atomic_discovery - true, // mt_discovery - &_is_alive_closure, - ParallelGCThreads, - ParallelRefProcEnabled); + _ref_processor = + new ReferenceProcessor(mr, // span + ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing + ParallelGCThreads, // mt processing degree + true, // mt discovery + ParallelGCThreads, // mt discovery degree + true, // atomic_discovery + &_is_alive_closure, // non-header is alive closure + false); // write barrier for next field updates _counters = new CollectorCounters("PSParallelCompact", 1); // Initialize static fields in ParCompactionManager. --- old/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp Mon Mar 14 15:56:26 2011 +++ new/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp Mon Mar 14 15:56:26 2011 @@ -796,13 +796,15 @@ // Initialize ref handling object for scavenging. MemRegion mr = young_gen->reserved(); - _ref_processor = ReferenceProcessor::create_ref_processor( - mr, // span - true, // atomic_discovery - true, // mt_discovery - NULL, // is_alive_non_header - ParallelGCThreads, - ParallelRefProcEnabled); + _ref_processor = + new ReferenceProcessor(mr, // span + ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing + ParallelGCThreads, // mt processing degree + true, // mt discovery + ParallelGCThreads, // mt discovery degree + true, // atomic_discovery + NULL, // header provides liveness info + false); // next field updates do not need write barrier // Cache the cardtable BarrierSet* bs = Universe::heap()->barrier_set(); --- old/src/share/vm/memory/generation.cpp Mon Mar 14 15:56:27 2011 +++ new/src/share/vm/memory/generation.cpp Mon Mar 14 15:56:27 2011 @@ -83,14 +83,11 @@ } // By default we get a single threaded default reference processor; -// generations needing multi-threaded refs discovery override this method. +// generations needing multi-threaded refs processing or discovery override this method. void Generation::ref_processor_init() { assert(_ref_processor == NULL, "a reference processor already exists"); assert(!_reserved.is_empty(), "empty generation?"); - _ref_processor = - new ReferenceProcessor(_reserved, // span - refs_discovery_is_atomic(), // atomic_discovery - refs_discovery_is_mt()); // mt_discovery + _ref_processor = new ReferenceProcessor(_reserved); // a vanilla reference processor if (_ref_processor == NULL) { vm_exit_during_initialization("Could not allocate ReferenceProcessor object"); } --- old/src/share/vm/memory/referenceProcessor.cpp Mon Mar 14 15:56:29 2011 +++ new/src/share/vm/memory/referenceProcessor.cpp Mon Mar 14 15:56:28 2011 @@ -102,40 +102,17 @@ "Unrecongnized RefDiscoveryPolicy"); } -ReferenceProcessor* -ReferenceProcessor::create_ref_processor(MemRegion span, - bool atomic_discovery, - bool mt_discovery, - BoolObjectClosure* is_alive_non_header, - int parallel_gc_threads, - bool mt_processing, - bool dl_needs_barrier) { - int mt_degree = 1; - if (parallel_gc_threads > 1) { - mt_degree = parallel_gc_threads; - } - ReferenceProcessor* rp = - new ReferenceProcessor(span, atomic_discovery, - mt_discovery, mt_degree, - mt_processing && (parallel_gc_threads > 0), - dl_needs_barrier); - if (rp == NULL) { - vm_exit_during_initialization("Could not allocate ReferenceProcessor object"); - } - rp->set_is_alive_non_header(is_alive_non_header); - rp->setup_policy(false /* default soft ref policy */); - return rp; -} - ReferenceProcessor::ReferenceProcessor(MemRegion span, - bool atomic_discovery, - bool mt_discovery, - int mt_degree, bool mt_processing, + int mt_processing_degree, + bool mt_discovery, + int mt_discovery_degree, + bool atomic_discovery, + BoolObjectClosure* is_alive_non_header, bool discovered_list_needs_barrier) : _discovering_refs(false), _enqueuing_is_done(false), - _is_alive_non_header(NULL), + _is_alive_non_header(is_alive_non_header), _discovered_list_needs_barrier(discovered_list_needs_barrier), _bs(NULL), _processing_is_mt(mt_processing), @@ -144,8 +121,8 @@ _span = span; _discovery_is_atomic = atomic_discovery; _discovery_is_mt = mt_discovery; - _num_q = mt_degree; - _max_num_q = mt_degree; + _num_q = MAX2(1, mt_processing_degree); + _max_num_q = MAX2(_num_q, mt_discovery_degree); _discoveredSoftRefs = NEW_C_HEAP_ARRAY(DiscoveredList, _max_num_q * subclasses_of_ref); if (_discoveredSoftRefs == NULL) { vm_exit_during_initialization("Could not allocated RefProc Array"); @@ -163,6 +140,7 @@ if (discovered_list_needs_barrier) { _bs = Universe::heap()->barrier_set(); } + setup_policy(false /* default soft ref policy */); } #ifndef PRODUCT @@ -405,15 +383,14 @@ { } virtual void work(unsigned int work_id) { - assert(work_id < (unsigned int)_ref_processor.num_q(), "Index out-of-bounds"); + assert(work_id < (unsigned int)_ref_processor.max_num_q(), "Index out-of-bounds"); // Simplest first cut: static partitioning. int index = work_id; // The increment on "index" must correspond to the maximum number of queues // (n_queues) with which that ReferenceProcessor was created. That // is because of the "clever" way the discovered references lists were - // allocated and are indexed into. That number is ParallelGCThreads - // currently. Assert that. - assert(_n_queues == (int) ParallelGCThreads, "Different number not expected"); + // allocated and are indexed into. + assert(_n_queues == (int) _ref_processor.max_num_q(), "Different number not expected"); for (int j = 0; j < subclasses_of_ref; j++, index += _n_queues) { @@ -951,7 +928,7 @@ } if (PrintReferenceGC && PrintGCDetails) { size_t total = 0; - for (int i = 0; i < _num_q; ++i) { + for (int i = 0; i < _max_num_q; ++i) { total += refs_lists[i].length(); } gclog_or_tty->print(", %u refs", total); @@ -967,7 +944,7 @@ RefProcPhase1Task phase1(*this, refs_lists, policy, true /*marks_oops_alive*/); task_executor->execute(phase1); } else { - for (int i = 0; i < _num_q; i++) { + for (int i = 0; i < _max_num_q; i++) { process_phase1(refs_lists[i], policy, is_alive, keep_alive, complete_gc); } @@ -983,7 +960,7 @@ RefProcPhase2Task phase2(*this, refs_lists, !discovery_is_atomic() /*marks_oops_alive*/); task_executor->execute(phase2); } else { - for (int i = 0; i < _num_q; i++) { + for (int i = 0; i < _max_num_q; i++) { process_phase2(refs_lists[i], is_alive, keep_alive, complete_gc); } } @@ -994,7 +971,7 @@ RefProcPhase3Task phase3(*this, refs_lists, clear_referent, true /*marks_oops_alive*/); task_executor->execute(phase3); } else { - for (int i = 0; i < _num_q; i++) { + for (int i = 0; i < _max_num_q; i++) { process_phase3(refs_lists[i], clear_referent, is_alive, keep_alive, complete_gc); } @@ -1008,7 +985,7 @@ // for (int j = 0; j < _num_q; j++) { // int index = i * _max_num_q + j; for (int i = 0; i < _max_num_q * subclasses_of_ref; i++) { - if (TraceReferenceGC && PrintGCDetails && ((i % _num_q) == 0)) { + if (TraceReferenceGC && PrintGCDetails && ((i % _max_num_q) == 0)) { gclog_or_tty->print_cr( "\nScrubbing %s discovered list of Null referents", list_name(i)); @@ -1350,7 +1327,7 @@ { TraceTime tt("Preclean WeakReferences", PrintGCDetails && PrintReferenceGC, false, gclog_or_tty); - for (int i = 0; i < _num_q; i++) { + for (int i = 0; i < _max_num_q; i++) { if (yield->should_return()) { return; } @@ -1363,7 +1340,7 @@ { TraceTime tt("Preclean FinalReferences", PrintGCDetails && PrintReferenceGC, false, gclog_or_tty); - for (int i = 0; i < _num_q; i++) { + for (int i = 0; i < _max_num_q; i++) { if (yield->should_return()) { return; } @@ -1376,7 +1353,7 @@ { TraceTime tt("Preclean PhantomReferences", PrintGCDetails && PrintReferenceGC, false, gclog_or_tty); - for (int i = 0; i < _num_q; i++) { + for (int i = 0; i < _max_num_q; i++) { if (yield->should_return()) { return; } --- old/src/share/vm/memory/referenceProcessor.hpp Mon Mar 14 15:56:30 2011 +++ new/src/share/vm/memory/referenceProcessor.hpp Mon Mar 14 15:56:30 2011 @@ -71,7 +71,7 @@ bool _enqueuing_is_done; // true if all weak references enqueued bool _processing_is_mt; // true during phases when // reference processing is MT. - int _next_id; // round-robin counter in + int _next_id; // round-robin mod _num_q counter in // support of work distribution // For collectors that do not keep GC marking information @@ -103,7 +103,8 @@ public: int num_q() { return _num_q; } - void set_mt_degree(int v) { _num_q = v; } + int max_num_q() { return _max_num_q; } + void set_active_mt_degree(int v) { _num_q = v; } DiscoveredList* discovered_soft_refs() { return _discoveredSoftRefs; } static oop sentinel_ref() { return _sentinelRef; } static oop* adr_sentinel_ref() { return &_sentinelRef; } @@ -216,6 +217,7 @@ VoidClosure* complete_gc, YieldClosure* yield); + // round-robin mod _num_q (not: _not_ mode _max_num_q) int next_id() { int id = _next_id; if (++_next_id == _num_q) { @@ -256,24 +258,16 @@ _max_num_q(0), _processing_is_mt(false), _next_id(0) - {} + { } - ReferenceProcessor(MemRegion span, bool atomic_discovery, - bool mt_discovery, - int mt_degree = 1, - bool mt_processing = false, + // Default parameters give you a vanilla reference processor. + ReferenceProcessor(MemRegion span, + bool mt_processing = false, int mt_processing_degree = 1, + bool mt_discovery = false, int mt_discovery_degree = 1, + bool atomic_discovery = true, + BoolObjectClosure* is_alive_non_header = NULL, bool discovered_list_needs_barrier = false); - // Allocates and initializes a reference processor. - static ReferenceProcessor* create_ref_processor( - MemRegion span, - bool atomic_discovery, - bool mt_discovery, - BoolObjectClosure* is_alive_non_header = NULL, - int parallel_gc_threads = 1, - bool mt_processing = false, - bool discovered_list_needs_barrier = false); - // RefDiscoveryPolicy values enum DiscoveryPolicy { ReferenceBasedDiscovery = 0, @@ -397,20 +391,20 @@ // A utility class to temporarily change the MT'ness of // reference discovery for the given ReferenceProcessor // in the scope that contains it. -class ReferenceProcessorMTMutator: StackObj { +class ReferenceProcessorMTDiscoveryMutator: StackObj { private: ReferenceProcessor* _rp; bool _saved_mt; public: - ReferenceProcessorMTMutator(ReferenceProcessor* rp, - bool mt): + ReferenceProcessorMTDiscoveryMutator(ReferenceProcessor* rp, + bool mt): _rp(rp) { _saved_mt = _rp->discovery_is_mt(); _rp->set_mt_discovery(mt); } - ~ReferenceProcessorMTMutator() { + ~ReferenceProcessorMTDiscoveryMutator() { _rp->set_mt_discovery(_saved_mt); } }; --- old/src/share/vm/utilities/workgroup.cpp Mon Mar 14 15:56:32 2011 +++ new/src/share/vm/utilities/workgroup.cpp Mon Mar 14 15:56:31 2011 @@ -156,7 +156,7 @@ tty->print_cr("/nFinished work gang %s: %d/%d sequence %d", name(), finished_workers(), total_workers(), _sequence_number); - } + } } void AbstractWorkGang::stop() { --- old/src/share/vm/utilities/workgroup.hpp Mon Mar 14 15:56:34 2011 +++ new/src/share/vm/utilities/workgroup.hpp Mon Mar 14 15:56:33 2011 @@ -36,6 +36,20 @@ # include "thread_windows.inline.hpp" #endif +// Task class hierarchy: +// AbstractGangTask +// AbstractGangTaskWOopQueues +// +// Gang/Group class hierarchy: +// AbstractWorkGang +// WorkGang +// FlexibleWorkGang +// YieldingFlexibleWorkGang (defined in another file) +// +// Worker class hierarchy: +// GangWorker (subclass of WorkerThread) +// YieldingFlexibleGangWorker (defined in another file) + // Forward declarations of classes defined here class WorkGang;