# HG changeset patch # User rkennke # Date 1498591788 -7200 # Tue Jun 27 21:29:48 2017 +0200 # Node ID c681b0ce1fe72cf4f380224809d561473dad62c5 # Parent 7d3478491210390556a9f34210bc9bc8d9f5ebd1 8180932: Parallelize safepoint cleanup Summary: Provide infrastructure to do safepoint cleanup tasks using parallel worker threads Reviewed-by: dholmes, rehn diff --git a/src/share/vm/gc/shared/collectedHeap.hpp b/src/share/vm/gc/shared/collectedHeap.hpp --- a/src/share/vm/gc/shared/collectedHeap.hpp +++ b/src/share/vm/gc/shared/collectedHeap.hpp @@ -50,6 +50,7 @@ class Thread; class ThreadClosure; class VirtualSpaceSummary; +class WorkGang; class nmethod; class GCMessage : public FormatBuffer<1024> { @@ -606,6 +607,16 @@ // unknown phase. The default implementation returns false. virtual bool request_concurrent_phase(const char* phase); + // Provides a thread pool to SafepointSynchronize to use + // for parallel safepoint cleanup. + // GCs that use a GC worker thread pool may want to share + // it for use during safepoint cleanup. This is only possible + // if the GC can pause and resume concurrent work (e.g. G1 + // concurrent marking) for an intermittent non-GC safepoint. + // If this method returns NULL, SafepointSynchronize will + // perform cleanup tasks serially in the VMThread. + virtual WorkGang* get_safepoint_workers() { return NULL; } + // Non product verification and debugging. #ifndef PRODUCT // Support for PromotionFailureALot. Return true if it's time to cause a diff --git a/src/share/vm/runtime/safepoint.cpp b/src/share/vm/runtime/safepoint.cpp --- a/src/share/vm/runtime/safepoint.cpp +++ b/src/share/vm/runtime/safepoint.cpp @@ -540,64 +540,126 @@ } } +class ParallelSPCleanupThreadClosure : public ThreadClosure { +private: + CodeBlobClosure* _nmethod_cl; + DeflateMonitorCounters* _counters; + +public: + ParallelSPCleanupThreadClosure(DeflateMonitorCounters* counters) : + _counters(counters), + _nmethod_cl(NMethodSweeper::prepare_mark_active_nmethods()) {} + + void do_thread(Thread* thread) { + ObjectSynchronizer::deflate_thread_local_monitors(thread, _counters); + if (_nmethod_cl != NULL && thread->is_Java_thread() && + ! thread->is_Code_cache_sweeper_thread()) { + JavaThread* jt = (JavaThread*) thread; + jt->nmethods_do(_nmethod_cl); + } + } +}; + +class ParallelSPCleanupTask : public AbstractGangTask { +private: + SubTasksDone _subtasks; + ParallelSPCleanupThreadClosure _cleanup_threads_cl; + uint _num_workers; + DeflateMonitorCounters* _counters; +public: + ParallelSPCleanupTask(uint num_workers, DeflateMonitorCounters* counters) : + AbstractGangTask("Parallel Safepoint Cleanup"), + _cleanup_threads_cl(ParallelSPCleanupThreadClosure(counters)), + _num_workers(num_workers), + _subtasks(SubTasksDone(SafepointSynchronize::SAFEPOINT_CLEANUP_NUM_TASKS)), + _counters(counters) {} + + void work(uint worker_id) { + // All threads deflate monitors and mark nmethods (if necessary). + Threads::parallel_java_threads_do(&_cleanup_threads_cl); + + if (! _subtasks.is_task_claimed(SafepointSynchronize::SAFEPOINT_CLEANUP_DEFLATE_MONITORS)) { + const char* name = "deflating idle monitors"; + EventSafepointCleanupTask event; + TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup)); + ObjectSynchronizer::deflate_idle_monitors(_counters); + event_safepoint_cleanup_task_commit(event, name); + } + + if (! _subtasks.is_task_claimed(SafepointSynchronize::SAFEPOINT_CLEANUP_UPDATE_INLINE_CACHES)) { + const char* name = "updating inline caches"; + EventSafepointCleanupTask event; + TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup)); + InlineCacheBuffer::update_inline_caches(); + event_safepoint_cleanup_task_commit(event, name); + } + + if (! _subtasks.is_task_claimed(SafepointSynchronize::SAFEPOINT_CLEANUP_COMPILATION_POLICY)) { + const char* name = "compilation policy safepoint handler"; + EventSafepointCleanupTask event; + TraceTime timer("compilation policy safepoint handler", TRACETIME_LOG(Info, safepoint, cleanup)); + CompilationPolicy::policy()->do_safepoint_work(); + event_safepoint_cleanup_task_commit(event, name); + } + + if (! _subtasks.is_task_claimed(SafepointSynchronize::SAFEPOINT_CLEANUP_SYMBOL_TABLE_REHASH)) { + if (SymbolTable::needs_rehashing()) { + const char* name = "rehashing symbol table"; + EventSafepointCleanupTask event; + TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup)); + SymbolTable::rehash_table(); + event_safepoint_cleanup_task_commit(event, name); + } + } + + if (! _subtasks.is_task_claimed(SafepointSynchronize::SAFEPOINT_CLEANUP_STRING_TABLE_REHASH)) { + if (StringTable::needs_rehashing()) { + const char* name = "rehashing string table"; + EventSafepointCleanupTask event; + TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup)); + StringTable::rehash_table(); + event_safepoint_cleanup_task_commit(event, name); + } + } + + if (! _subtasks.is_task_claimed(SafepointSynchronize::SAFEPOINT_CLEANUP_CLD_PURGE)) { + // CMS delays purging the CLDG until the beginning of the next safepoint and to + // make sure concurrent sweep is done + const char* name = "purging class loader data graph"; + EventSafepointCleanupTask event; + TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup)); + ClassLoaderDataGraph::purge_if_needed(); + event_safepoint_cleanup_task_commit(event, name); + } + _subtasks.all_tasks_completed(_num_workers); + } +}; + // Various cleaning tasks that should be done periodically at safepoints void SafepointSynchronize::do_cleanup_tasks() { - { - const char* name = "deflating idle monitors"; - EventSafepointCleanupTask event; - TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup)); - ObjectSynchronizer::deflate_idle_monitors(); - event_safepoint_cleanup_task_commit(event, name); + + // Prepare for monitor deflation + DeflateMonitorCounters deflate_counters; + ObjectSynchronizer::prepare_deflate_idle_monitors(&deflate_counters); + + CollectedHeap* heap = Universe::heap(); + assert(heap != NULL, "heap not initialized yet?"); + WorkGang* cleanup_workers = heap->get_safepoint_workers(); + if (cleanup_workers != NULL) { + // Parallel cleanup using GC provided thread pool. + uint num_cleanup_workers = cleanup_workers->active_workers(); + ParallelSPCleanupTask cleanup(num_cleanup_workers, &deflate_counters); + StrongRootsScope srs(num_cleanup_workers); + cleanup_workers->run_task(&cleanup); + } else { + // Serial cleanup using VMThread. + ParallelSPCleanupTask cleanup(1, &deflate_counters); + StrongRootsScope srs(1); + cleanup.work(0); } - { - const char* name = "updating inline caches"; - EventSafepointCleanupTask event; - TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup)); - InlineCacheBuffer::update_inline_caches(); - event_safepoint_cleanup_task_commit(event, name); - } - { - const char* name = "compilation policy safepoint handler"; - EventSafepointCleanupTask event; - TraceTime timer("compilation policy safepoint handler", TRACETIME_LOG(Info, safepoint, cleanup)); - CompilationPolicy::policy()->do_safepoint_work(); - event_safepoint_cleanup_task_commit(event, name); - } - - { - const char* name = "mark nmethods"; - EventSafepointCleanupTask event; - TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup)); - NMethodSweeper::mark_active_nmethods(); - event_safepoint_cleanup_task_commit(event, name); - } - - if (SymbolTable::needs_rehashing()) { - const char* name = "rehashing symbol table"; - EventSafepointCleanupTask event; - TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup)); - SymbolTable::rehash_table(); - event_safepoint_cleanup_task_commit(event, name); - } - - if (StringTable::needs_rehashing()) { - const char* name = "rehashing string table"; - EventSafepointCleanupTask event; - TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup)); - StringTable::rehash_table(); - event_safepoint_cleanup_task_commit(event, name); - } - - { - // CMS delays purging the CLDG until the beginning of the next safepoint and to - // make sure concurrent sweep is done - const char* name = "purging class loader data graph"; - EventSafepointCleanupTask event; - TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup)); - ClassLoaderDataGraph::purge_if_needed(); - event_safepoint_cleanup_task_commit(event, name); - } + // Finish monitor deflation. + ObjectSynchronizer::finish_deflate_idle_monitors(&deflate_counters); } diff --git a/src/share/vm/runtime/safepoint.hpp b/src/share/vm/runtime/safepoint.hpp --- a/src/share/vm/runtime/safepoint.hpp +++ b/src/share/vm/runtime/safepoint.hpp @@ -75,6 +75,17 @@ _blocking_timeout = 1 }; + enum SafepointCleanupTasks { + SAFEPOINT_CLEANUP_DEFLATE_MONITORS, + SAFEPOINT_CLEANUP_UPDATE_INLINE_CACHES, + SAFEPOINT_CLEANUP_COMPILATION_POLICY, + SAFEPOINT_CLEANUP_SYMBOL_TABLE_REHASH, + SAFEPOINT_CLEANUP_STRING_TABLE_REHASH, + SAFEPOINT_CLEANUP_CLD_PURGE, + // Leave this one last. + SAFEPOINT_CLEANUP_NUM_TASKS + }; + typedef struct { float _time_stamp; // record when the current safepoint occurs in seconds int _vmop_type; // type of VM operation triggers the safepoint diff --git a/src/share/vm/runtime/sweeper.cpp b/src/share/vm/runtime/sweeper.cpp --- a/src/share/vm/runtime/sweeper.cpp +++ b/src/share/vm/runtime/sweeper.cpp @@ -199,11 +199,20 @@ * safepoint. */ void NMethodSweeper::mark_active_nmethods() { + CodeBlobClosure* cl = prepare_mark_active_nmethods(); + if (cl != NULL) { + Threads::nmethods_do(cl); + // TODO: Is this really needed? + OrderAccess::storestore(); + } +} + +CodeBlobClosure* NMethodSweeper::prepare_mark_active_nmethods() { assert(SafepointSynchronize::is_at_safepoint(), "must be executed at a safepoint"); // If we do not want to reclaim not-entrant or zombie methods there is no need // to scan stacks if (!MethodFlushing) { - return; + return NULL; } // Increase time so that we can estimate when to invoke the sweeper again. @@ -231,14 +240,13 @@ if (PrintMethodFlushing) { tty->print_cr("### Sweep: stack traversal %ld", _traversals); } - Threads::nmethods_do(&mark_activation_closure); + return &mark_activation_closure; } else { // Only set hotness counter - Threads::nmethods_do(&set_hotness_closure); + return &set_hotness_closure; } - OrderAccess::storestore(); } /** diff --git a/src/share/vm/runtime/sweeper.hpp b/src/share/vm/runtime/sweeper.hpp --- a/src/share/vm/runtime/sweeper.hpp +++ b/src/share/vm/runtime/sweeper.hpp @@ -30,6 +30,8 @@ #include "code/codeCache.hpp" #include "utilities/ticks.hpp" +class CodeBlobClosure; + // An NmethodSweeper is an incremental cleaner for: // - cleanup inline caches // - reclamation of nmethods @@ -114,6 +116,7 @@ #endif static void mark_active_nmethods(); // Invoked at the end of each safepoint + static CodeBlobClosure* prepare_mark_active_nmethods(); static void sweeper_loop(); static void notify(int code_blob_type); // Possibly start the sweeper thread. static void force_sweep(); diff --git a/src/share/vm/runtime/synchronizer.cpp b/src/share/vm/runtime/synchronizer.cpp --- a/src/share/vm/runtime/synchronizer.cpp +++ b/src/share/vm/runtime/synchronizer.cpp @@ -1693,11 +1693,14 @@ return deflated_count; } -void ObjectSynchronizer::deflate_idle_monitors() { +void ObjectSynchronizer::prepare_deflate_idle_monitors(DeflateMonitorCounters* counters) { + counters->nInuse = 0; // currently associated with objects + counters->nInCirculation = 0; // extant + counters->nScavenged = 0; // reclaimed +} + +void ObjectSynchronizer::deflate_idle_monitors(DeflateMonitorCounters* counters) { assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); - int nInuse = 0; // currently associated with objects - int nInCirculation = 0; // extant - int nScavenged = 0; // reclaimed bool deflated = false; ObjectMonitor * freeHeadp = NULL; // Local SLL of scavenged monitors @@ -1710,25 +1713,16 @@ Thread::muxAcquire(&gListLock, "scavenge - return"); if (MonitorInUseLists) { - int inUse = 0; - for (JavaThread* cur = Threads::first(); cur != NULL; cur = cur->next()) { - nInCirculation+= cur->omInUseCount; - int deflated_count = deflate_monitor_list(cur->omInUseList_addr(), &freeHeadp, &freeTailp); - cur->omInUseCount-= deflated_count; - if (ObjectMonitor::Knob_VerifyInUse) { - verifyInUse(cur); - } - nScavenged += deflated_count; - nInuse += cur->omInUseCount; - } + // Note: the thread-local monitors lists get deflated in + // a separate pass. See deflate_thread_local_monitors(). // For moribund threads, scan gOmInUseList if (gOmInUseList) { - nInCirculation += gOmInUseCount; + counters->nInCirculation += gOmInUseCount; int deflated_count = deflate_monitor_list((ObjectMonitor **)&gOmInUseList, &freeHeadp, &freeTailp); gOmInUseCount-= deflated_count; - nScavenged += deflated_count; - nInuse += gOmInUseCount; + counters->nScavenged += deflated_count; + counters->nInuse += gOmInUseCount; } } else { @@ -1737,7 +1731,7 @@ for (; block != NULL; block = (PaddedEnd *)next(block)) { // Iterate over all extant monitors - Scavenge all idle monitors. assert(block->object() == CHAINMARKER, "must be a block header"); - nInCirculation += _BLOCKSIZE; + counters->nInCirculation += _BLOCKSIZE; for (int i = 1; i < _BLOCKSIZE; i++) { ObjectMonitor* mid = (ObjectMonitor*)&block[i]; oop obj = (oop)mid->object(); @@ -1754,31 +1748,17 @@ if (deflated) { mid->FreeNext = NULL; - nScavenged++; + counters->nScavenged++; } else { - nInuse++; + counters->nInuse++; } } } } - gMonitorFreeCount += nScavenged; - - // Consider: audit gFreeList to ensure that gMonitorFreeCount and list agree. - - if (ObjectMonitor::Knob_Verbose) { - tty->print_cr("INFO: Deflate: InCirc=%d InUse=%d Scavenged=%d " - "ForceMonitorScavenge=%d : pop=%d free=%d", - nInCirculation, nInuse, nScavenged, ForceMonitorScavenge, - gMonitorPopulation, gMonitorFreeCount); - tty->flush(); - } - - ForceMonitorScavenge = 0; // Reset - // Move the scavenged monitors back to the global free list. if (freeHeadp != NULL) { - guarantee(freeTailp != NULL && nScavenged > 0, "invariant"); + guarantee(freeTailp != NULL && counters->nScavenged > 0, "invariant"); assert(freeTailp->FreeNext == NULL, "invariant"); // constant-time list splice - prepend scavenged segment to gFreeList freeTailp->FreeNext = gFreeList; @@ -1786,8 +1766,25 @@ } Thread::muxRelease(&gListLock); - OM_PERFDATA_OP(Deflations, inc(nScavenged)); - OM_PERFDATA_OP(MonExtant, set_value(nInCirculation)); +} + +void ObjectSynchronizer::finish_deflate_idle_monitors(DeflateMonitorCounters* counters) { + gMonitorFreeCount += counters->nScavenged; + + // Consider: audit gFreeList to ensure that gMonitorFreeCount and list agree. + + if (ObjectMonitor::Knob_Verbose) { + tty->print_cr("INFO: Deflate: InCirc=%d InUse=%d Scavenged=%d " + "ForceMonitorScavenge=%d : pop=%d free=%d", + counters->nInCirculation, counters->nInuse, counters->nScavenged, ForceMonitorScavenge, + gMonitorPopulation, gMonitorFreeCount); + tty->flush(); + } + + ForceMonitorScavenge = 0; // Reset + + OM_PERFDATA_OP(Deflations, inc(counters->nScavenged)); + OM_PERFDATA_OP(MonExtant, set_value(counters->nInCirculation)); // TODO: Add objectMonitor leak detection. // Audit/inventory the objectMonitors -- make sure they're all accounted for. @@ -1795,6 +1792,38 @@ GVars.stwCycle++; } +void ObjectSynchronizer::deflate_thread_local_monitors(Thread* thread, DeflateMonitorCounters* counters) { + assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); + if (! MonitorInUseLists) return; + + ObjectMonitor * freeHeadp = NULL; // Local SLL of scavenged monitors + ObjectMonitor * freeTailp = NULL; + + int deflated_count = deflate_monitor_list(thread->omInUseList_addr(), &freeHeadp, &freeTailp); + + Thread::muxAcquire(&gListLock, "scavenge - return"); + + // Adjust counters + counters->nInCirculation += thread->omInUseCount; + thread->omInUseCount-= deflated_count; + if (ObjectMonitor::Knob_VerifyInUse) { + verifyInUse(thread); + } + counters->nScavenged += deflated_count; + counters->nInuse += thread->omInUseCount; + + // Move the scavenged monitors back to the global free list. + if (freeHeadp != NULL) { + guarantee(freeTailp != NULL && deflated_count > 0, "invariant"); + assert(freeTailp->FreeNext == NULL, "invariant"); + + // constant-time list splice - prepend scavenged segment to gFreeList + freeTailp->FreeNext = gFreeList; + gFreeList = freeHeadp; + } + Thread::muxRelease(&gListLock); +} + // Monitor cleanup on JavaThread::exit // Iterate through monitor cache and attempt to release thread's monitors diff --git a/src/share/vm/runtime/synchronizer.hpp b/src/share/vm/runtime/synchronizer.hpp --- a/src/share/vm/runtime/synchronizer.hpp +++ b/src/share/vm/runtime/synchronizer.hpp @@ -32,6 +32,12 @@ class ObjectMonitor; +struct DeflateMonitorCounters { + int nInuse; // currently associated with objects + int nInCirculation; // extant + int nScavenged; // reclaimed +}; + class ObjectSynchronizer : AllStatic { friend class VMStructs; public: @@ -127,7 +133,11 @@ // GC: we current use aggressive monitor deflation policy // Basically we deflate all monitors that are not busy. // An adaptive profile-based deflation policy could be used if needed - static void deflate_idle_monitors(); + static void deflate_idle_monitors(DeflateMonitorCounters* counters); + static void deflate_thread_local_monitors(Thread* thread, DeflateMonitorCounters* counters); + static void prepare_deflate_idle_monitors(DeflateMonitorCounters* counters); + static void finish_deflate_idle_monitors(DeflateMonitorCounters* counters); + // For a given monitor list: global or per-thread, deflate idle monitors static int deflate_monitor_list(ObjectMonitor** listheadp, ObjectMonitor** freeHeadp, diff --git a/src/share/vm/runtime/thread.cpp b/src/share/vm/runtime/thread.cpp --- a/src/share/vm/runtime/thread.cpp +++ b/src/share/vm/runtime/thread.cpp @@ -3378,6 +3378,15 @@ // If CompilerThreads ever become non-JavaThreads, add them here } +void Threads::parallel_java_threads_do(ThreadClosure* tc) { + int cp = Threads::thread_claim_parity(); + ALL_JAVA_THREADS(p) { + if (p->claim_oops_do(true, cp)) { + tc->do_thread(p); + } + } +} + // The system initialization in the library has three phases. // // Phase 1: java.lang.System class initialization diff --git a/src/share/vm/runtime/thread.hpp b/src/share/vm/runtime/thread.hpp --- a/src/share/vm/runtime/thread.hpp +++ b/src/share/vm/runtime/thread.hpp @@ -2068,6 +2068,7 @@ static bool includes(JavaThread* p); static JavaThread* first() { return _thread_list; } static void threads_do(ThreadClosure* tc); + static void parallel_java_threads_do(ThreadClosure* tc); // Initializes the vm and creates the vm thread static jint create_vm(JavaVMInitArgs* args, bool* canTryAgain);