--- old/src/share/vm/runtime/safepoint.cpp 2016-02-10 16:20:45.269135629 +0100 +++ new/src/share/vm/runtime/safepoint.cpp 2016-02-10 16:20:45.209135331 +0100 @@ -55,6 +55,8 @@ #include "runtime/synchronizer.hpp" #include "runtime/thread.inline.hpp" #include "services/runtimeService.hpp" +#include "trace/tracing.hpp" +#include "trace/traceMacros.hpp" #include "utilities/events.hpp" #include "utilities/macros.hpp" #if INCLUDE_ALL_GCS @@ -79,10 +81,9 @@ // Roll all threads forward to a safepoint and suspend them all void SafepointSynchronize::begin() { - + EventSafepointBegin begin_event; Thread* myThread = Thread::current(); assert(myThread->is_VM_thread(), "Only VM thread may execute a safepoint"); - if (PrintSafepointStatistics || PrintSafepointStatisticsTimeout > 0) { _safepoint_begin_time = os::javaTimeNanos(); _ts_of_current_safepoint = tty->time_stamp().seconds(); @@ -169,191 +170,217 @@ // between states, the safepointing code will wait for the thread to // block itself when it attempts transitions to a new state. // - _state = _synchronizing; - OrderAccess::fence(); + { + EventSafepointStateSync sync_event; + int initial_running = 0; - // Flush all thread states to memory - if (!UseMembar) { - os::serialize_thread_states(); - } + _state = _synchronizing; + OrderAccess::fence(); - // Make interpreter safepoint aware - Interpreter::notice_safepoints(); + // Flush all thread states to memory + if (!UseMembar) { + os::serialize_thread_states(); + } - if (DeferPollingPageLoopCount < 0) { - // Make polling safepoint aware - guarantee (PageArmed == 0, "invariant") ; - PageArmed = 1 ; - os::make_polling_page_unreadable(); - } + // Make interpreter safepoint aware + Interpreter::notice_safepoints(); - // Consider using active_processor_count() ... but that call is expensive. - int ncpus = os::processor_count() ; + if (DeferPollingPageLoopCount < 0) { + // Make polling safepoint aware + guarantee (PageArmed == 0, "invariant") ; + PageArmed = 1 ; + os::make_polling_page_unreadable(); + } + + // Consider using active_processor_count() ... but that call is expensive. + int ncpus = os::processor_count() ; #ifdef ASSERT - for (JavaThread *cur = Threads::first(); cur != NULL; cur = cur->next()) { - assert(cur->safepoint_state()->is_running(), "Illegal initial state"); - // Clear the visited flag to ensure that the critical counts are collected properly. - cur->set_visited_for_critical_count(false); - } + for (JavaThread *cur = Threads::first(); cur != NULL; cur = cur->next()) { + assert(cur->safepoint_state()->is_running(), "Illegal initial state"); + // Clear the visited flag to ensure that the critical counts are collected properly. + cur->set_visited_for_critical_count(false); + } #endif // ASSERT - if (SafepointTimeout) - safepoint_limit_time = os::javaTimeNanos() + (jlong)SafepointTimeoutDelay * MICROUNITS; + if (SafepointTimeout) + safepoint_limit_time = os::javaTimeNanos() + (jlong)SafepointTimeoutDelay * MICROUNITS; - // Iterate through all threads until it have been determined how to stop them all at a safepoint - unsigned int iterations = 0; - int steps = 0 ; - while(still_running > 0) { - for (JavaThread *cur = Threads::first(); cur != NULL; cur = cur->next()) { - assert(!cur->is_ConcurrentGC_thread(), "A concurrent GC thread is unexpectly being suspended"); - ThreadSafepointState *cur_state = cur->safepoint_state(); - if (cur_state->is_running()) { - cur_state->examine_state_of_thread(); - if (!cur_state->is_running()) { - still_running--; - // consider adjusting steps downward: - // steps = 0 - // steps -= NNN - // steps >>= 1 - // steps = MIN(steps, 2000-100) - // if (iterations != 0) steps -= NNN + // Iterate through all threads until it have been determined how to stop them all at a safepoint + unsigned int iterations = 0; + int steps = 0 ; + while(still_running > 0) { + for (JavaThread *cur = Threads::first(); cur != NULL; cur = cur->next()) { + assert(!cur->is_ConcurrentGC_thread(), "A concurrent GC thread is unexpectly being suspended"); + ThreadSafepointState *cur_state = cur->safepoint_state(); + if (cur_state->is_running()) { + cur_state->examine_state_of_thread(); + if (!cur_state->is_running()) { + still_running--; + // consider adjusting steps downward: + // steps = 0 + // steps -= NNN + // steps >>= 1 + // steps = MIN(steps, 2000-100) + // if (iterations != 0) steps -= NNN + } + if (log_is_enabled(Trace, safepoint)) { + ResourceMark rm; + cur_state->print_on(LogHandle(safepoint)::debug_stream()); + } } - if (log_is_enabled(Trace, safepoint)) { - ResourceMark rm; - cur_state->print_on(LogHandle(safepoint)::debug_stream()); + } + + if (iterations == 0) { + initial_running = still_running; + if (PrintSafepointStatistics) { + begin_statistics(nof_threads, still_running); } } - } - if (PrintSafepointStatistics && iterations == 0) { - begin_statistics(nof_threads, still_running); - } + if (still_running > 0) { + // Check for if it takes to long + if (SafepointTimeout && safepoint_limit_time < os::javaTimeNanos()) { + print_safepoint_timeout(_spinning_timeout); + } - if (still_running > 0) { - // Check for if it takes to long - if (SafepointTimeout && safepoint_limit_time < os::javaTimeNanos()) { - print_safepoint_timeout(_spinning_timeout); - } + // Spin to avoid context switching. + // There's a tension between allowing the mutators to run (and rendezvous) + // vs spinning. As the VM thread spins, wasting cycles, it consumes CPU that + // a mutator might otherwise use profitably to reach a safepoint. Excessive + // spinning by the VM thread on a saturated system can increase rendezvous latency. + // Blocking or yielding incur their own penalties in the form of context switching + // and the resultant loss of $ residency. + // + // Further complicating matters is that yield() does not work as naively expected + // on many platforms -- yield() does not guarantee that any other ready threads + // will run. As such we revert to naked_short_sleep() after some number of iterations. + // nakes_short_sleep() is implemented as a short unconditional sleep. + // Typical operating systems round a "short" sleep period up to 10 msecs, so sleeping + // can actually increase the time it takes the VM thread to detect that a system-wide + // stop-the-world safepoint has been reached. In a pathological scenario such as that + // described in CR6415670 the VMthread may sleep just before the mutator(s) become safe. + // In that case the mutators will be stalled waiting for the safepoint to complete and the + // the VMthread will be sleeping, waiting for the mutators to rendezvous. The VMthread + // will eventually wake up and detect that all mutators are safe, at which point + // we'll again make progress. + // + // Beware too that that the VMThread typically runs at elevated priority. + // Its default priority is higher than the default mutator priority. + // Obviously, this complicates spinning. + // + // Note too that on Windows XP SwitchThreadTo() has quite different behavior than Sleep(0). + // Sleep(0) will _not yield to lower priority threads, while SwitchThreadTo() will. + // + // See the comments in synchronizer.cpp for additional remarks on spinning. + // + // In the future we might: + // 1. Modify the safepoint scheme to avoid potentially unbounded spinning. + // This is tricky as the path used by a thread exiting the JVM (say on + // on JNI call-out) simply stores into its state field. The burden + // is placed on the VM thread, which must poll (spin). + // 2. Find something useful to do while spinning. If the safepoint is GC-related + // we might aggressively scan the stacks of threads that are already safe. + // 3. Use Solaris schedctl to examine the state of the still-running mutators. + // If all the mutators are ONPROC there's no reason to sleep or yield. + // 4. YieldTo() any still-running mutators that are ready but OFFPROC. + // 5. Check system saturation. If the system is not fully saturated then + // simply spin and avoid sleep/yield. + // 6. As still-running mutators rendezvous they could unpark the sleeping + // VMthread. This works well for still-running mutators that become + // safe. The VMthread must still poll for mutators that call-out. + // 7. Drive the policy on time-since-begin instead of iterations. + // 8. Consider making the spin duration a function of the # of CPUs: + // Spin = (((ncpus-1) * M) + K) + F(still_running) + // Alternately, instead of counting iterations of the outer loop + // we could count the # of threads visited in the inner loop, above. + // 9. On windows consider using the return value from SwitchThreadTo() + // to drive subsequent spin/SwitchThreadTo()/Sleep(N) decisions. + + if (int(iterations) == DeferPollingPageLoopCount) { + guarantee (PageArmed == 0, "invariant") ; + PageArmed = 1 ; + os::make_polling_page_unreadable(); + } - // Spin to avoid context switching. - // There's a tension between allowing the mutators to run (and rendezvous) - // vs spinning. As the VM thread spins, wasting cycles, it consumes CPU that - // a mutator might otherwise use profitably to reach a safepoint. Excessive - // spinning by the VM thread on a saturated system can increase rendezvous latency. - // Blocking or yielding incur their own penalties in the form of context switching - // and the resultant loss of $ residency. - // - // Further complicating matters is that yield() does not work as naively expected - // on many platforms -- yield() does not guarantee that any other ready threads - // will run. As such we revert to naked_short_sleep() after some number of iterations. - // nakes_short_sleep() is implemented as a short unconditional sleep. - // Typical operating systems round a "short" sleep period up to 10 msecs, so sleeping - // can actually increase the time it takes the VM thread to detect that a system-wide - // stop-the-world safepoint has been reached. In a pathological scenario such as that - // described in CR6415670 the VMthread may sleep just before the mutator(s) become safe. - // In that case the mutators will be stalled waiting for the safepoint to complete and the - // the VMthread will be sleeping, waiting for the mutators to rendezvous. The VMthread - // will eventually wake up and detect that all mutators are safe, at which point - // we'll again make progress. - // - // Beware too that that the VMThread typically runs at elevated priority. - // Its default priority is higher than the default mutator priority. - // Obviously, this complicates spinning. - // - // Note too that on Windows XP SwitchThreadTo() has quite different behavior than Sleep(0). - // Sleep(0) will _not yield to lower priority threads, while SwitchThreadTo() will. - // - // See the comments in synchronizer.cpp for additional remarks on spinning. - // - // In the future we might: - // 1. Modify the safepoint scheme to avoid potentially unbounded spinning. - // This is tricky as the path used by a thread exiting the JVM (say on - // on JNI call-out) simply stores into its state field. The burden - // is placed on the VM thread, which must poll (spin). - // 2. Find something useful to do while spinning. If the safepoint is GC-related - // we might aggressively scan the stacks of threads that are already safe. - // 3. Use Solaris schedctl to examine the state of the still-running mutators. - // If all the mutators are ONPROC there's no reason to sleep or yield. - // 4. YieldTo() any still-running mutators that are ready but OFFPROC. - // 5. Check system saturation. If the system is not fully saturated then - // simply spin and avoid sleep/yield. - // 6. As still-running mutators rendezvous they could unpark the sleeping - // VMthread. This works well for still-running mutators that become - // safe. The VMthread must still poll for mutators that call-out. - // 7. Drive the policy on time-since-begin instead of iterations. - // 8. Consider making the spin duration a function of the # of CPUs: - // Spin = (((ncpus-1) * M) + K) + F(still_running) - // Alternately, instead of counting iterations of the outer loop - // we could count the # of threads visited in the inner loop, above. - // 9. On windows consider using the return value from SwitchThreadTo() - // to drive subsequent spin/SwitchThreadTo()/Sleep(N) decisions. - - if (int(iterations) == DeferPollingPageLoopCount) { - guarantee (PageArmed == 0, "invariant") ; - PageArmed = 1 ; - os::make_polling_page_unreadable(); - } + // Instead of (ncpus > 1) consider either (still_running < (ncpus + EPSILON)) or + // ((still_running + _waiting_to_block - TryingToBlock)) < ncpus) + ++steps ; + if (ncpus > 1 && steps < SafepointSpinBeforeYield) { + SpinPause() ; // MP-Polite spin + } else + if (steps < DeferThrSuspendLoopCount) { + os::naked_yield() ; + } else { + os::naked_short_sleep(1); + } - // Instead of (ncpus > 1) consider either (still_running < (ncpus + EPSILON)) or - // ((still_running + _waiting_to_block - TryingToBlock)) < ncpus) - ++steps ; - if (ncpus > 1 && steps < SafepointSpinBeforeYield) { - SpinPause() ; // MP-Polite spin - } else - if (steps < DeferThrSuspendLoopCount) { - os::naked_yield() ; - } else { - os::naked_short_sleep(1); + iterations ++ ; } + assert(iterations < (uint)max_jint, "We have been iterating in the safepoint loop too long"); + } + assert(still_running == 0, "sanity check"); - iterations ++ ; + if (PrintSafepointStatistics) { + update_statistics_on_spin_end(); } - assert(iterations < (uint)max_jint, "We have been iterating in the safepoint loop too long"); - } - assert(still_running == 0, "sanity check"); - if (PrintSafepointStatistics) { - update_statistics_on_spin_end(); - } + if (sync_event.should_commit()) { + sync_event.set_safepointId(safepoint_counter()); + sync_event.set_initialThreadCount(initial_running); + sync_event.set_runningThreadCount(_waiting_to_block); + sync_event.set_iterations(iterations); + sync_event.commit(); + } + } //EventSafepointStateSync // wait until all threads are stopped - while (_waiting_to_block > 0) { - log_debug(safepoint)("Waiting for %d thread(s) to block", _waiting_to_block); - if (!SafepointTimeout || timeout_error_printed) { - Safepoint_lock->wait(true); // true, means with no safepoint checks - } else { - // Compute remaining time - jlong remaining_time = safepoint_limit_time - os::javaTimeNanos(); - - // If there is no remaining time, then there is an error - if (remaining_time < 0 || Safepoint_lock->wait(true, remaining_time / MICROUNITS)) { - print_safepoint_timeout(_blocking_timeout); + { + EventSafepointWaitBlocked wait_blocked_event; + int initial_waiting_to_block = _waiting_to_block; + + while (_waiting_to_block > 0) { + log_debug(safepoint)("Waiting for %d thread(s) to block", _waiting_to_block); + if (!SafepointTimeout || timeout_error_printed) { + Safepoint_lock->wait(true); // true, means with no safepoint checks + } else { + // Compute remaining time + jlong remaining_time = safepoint_limit_time - os::javaTimeNanos(); + + // If there is no remaining time, then there is an error + if (remaining_time < 0 || Safepoint_lock->wait(true, remaining_time / MICROUNITS)) { + print_safepoint_timeout(_blocking_timeout); + } } } - } - assert(_waiting_to_block == 0, "sanity check"); + assert(_waiting_to_block == 0, "sanity check"); #ifndef PRODUCT - if (SafepointTimeout) { - jlong current_time = os::javaTimeNanos(); - if (safepoint_limit_time < current_time) { - tty->print_cr("# SafepointSynchronize: Finished after " - INT64_FORMAT_W(6) " ms", - ((current_time - safepoint_limit_time) / MICROUNITS + - (jlong)SafepointTimeoutDelay)); + if (SafepointTimeout) { + jlong current_time = os::javaTimeNanos(); + if (safepoint_limit_time < current_time) { + tty->print_cr("# SafepointSynchronize: Finished after " + INT64_FORMAT_W(6) " ms", + ((current_time - safepoint_limit_time) / MICROUNITS + + (jlong)SafepointTimeoutDelay)); + } } - } #endif - assert((_safepoint_counter & 0x1) == 0, "must be even"); - assert(Threads_lock->owned_by_self(), "must hold Threads_lock"); - _safepoint_counter ++; + assert((_safepoint_counter & 0x1) == 0, "must be even"); + assert(Threads_lock->owned_by_self(), "must hold Threads_lock"); + _safepoint_counter ++; - // Record state - _state = _synchronized; + // Record state + _state = _synchronized; - OrderAccess::fence(); + OrderAccess::fence(); + if (wait_blocked_event.should_commit()) { + wait_blocked_event.set_safepointId(safepoint_counter()); + wait_blocked_event.set_runningThreadCount(initial_waiting_to_block); + wait_blocked_event.commit(); + } + } // EventSafepointWaitBlocked #ifdef ASSERT for (JavaThread *cur = Threads::first(); cur != NULL; cur = cur->next()) { @@ -377,17 +404,32 @@ } // Call stuff that needs to be run when a safepoint is just about to be completed - do_cleanup_tasks(); + { + EventSafepointCleanup cleanup_event; + do_cleanup_tasks(); + if (cleanup_event.should_commit()) { + cleanup_event.set_safepointId(safepoint_counter()); + cleanup_event.commit(); + } + } if (PrintSafepointStatistics) { // Record how much time spend on the above cleanup tasks update_statistics_on_cleanup_end(os::javaTimeNanos()); } + if (begin_event.should_commit()) { + begin_event.set_safepointId(safepoint_counter()); + begin_event.set_totalThreadCount(nof_threads); + begin_event.set_jniCriticalThreadCount(_current_jni_active_count); + begin_event.commit(); + } } // Wake up all threads, so they are ready to resume execution after the safepoint // operation has been carried out void SafepointSynchronize::end() { + EventSafepointEnd event; + int safepoint_id = safepoint_counter(); // Keep the odd counter as "id" assert(Threads_lock->owned_by_self(), "must hold Threads_lock"); assert((_safepoint_counter & 0x1) == 1, "must be odd"); @@ -474,6 +516,11 @@ // record this time so VMThread can keep track how much time has elapsed // since last safepoint. _end_of_last_safepoint = os::javaTimeMillis(); + + if (event.should_commit()) { + event.set_safepointId(safepoint_id); + event.commit(); + } } bool SafepointSynchronize::is_cleanup_needed() { @@ -487,39 +534,88 @@ // Various cleaning tasks that should be done periodically at safepoints void SafepointSynchronize::do_cleanup_tasks() { { - TraceTime t1("deflating idle monitors", TraceSafepointCleanupTime); + const char* name = "deflating idle monitors"; + EventSafepointCleanupTask event; + TraceTime t1(name, TraceSafepointCleanupTime); ObjectSynchronizer::deflate_idle_monitors(); + if (event.should_commit()) { + event.set_safepointId(safepoint_counter()); + event.set_name(name); + event.commit(); + } } { - TraceTime t2("updating inline caches", TraceSafepointCleanupTime); + const char* name = "updating inline caches"; + EventSafepointCleanupTask event; + TraceTime t2(name, TraceSafepointCleanupTime); InlineCacheBuffer::update_inline_caches(); + if (event.should_commit()) { + event.set_safepointId(safepoint_counter()); + event.set_name(name); + event.commit(); + } } { - TraceTime t3("compilation policy safepoint handler", TraceSafepointCleanupTime); + const char* name = "compilation policy safepoint handler"; + EventSafepointCleanupTask event; + TraceTime t3(name, TraceSafepointCleanupTime); CompilationPolicy::policy()->do_safepoint_work(); + if (event.should_commit()) { + event.set_safepointId(safepoint_counter()); + event.set_name(name); + event.commit(); + } } { - TraceTime t4("mark nmethods", TraceSafepointCleanupTime); + const char* name = "mark nmethods"; + EventSafepointCleanupTask event; + TraceTime t4(name, TraceSafepointCleanupTime); NMethodSweeper::mark_active_nmethods(); + if (event.should_commit()) { + event.set_safepointId(safepoint_counter()); + event.set_name(name); + event.commit(); + } } if (SymbolTable::needs_rehashing()) { - TraceTime t5("rehashing symbol table", TraceSafepointCleanupTime); + const char* name = "rehashing symbol table"; + EventSafepointCleanupTask event; + TraceTime t5(name, TraceSafepointCleanupTime); SymbolTable::rehash_table(); + if (event.should_commit()) { + event.set_safepointId(safepoint_counter()); + event.set_name(name); + event.commit(); + } } if (StringTable::needs_rehashing()) { - TraceTime t6("rehashing string table", TraceSafepointCleanupTime); + const char* name = "rehashing string table"; + EventSafepointCleanupTask event; + TraceTime t6(name, TraceSafepointCleanupTime); StringTable::rehash_table(); + if (event.should_commit()) { + event.set_safepointId(safepoint_counter()); + event.set_name(name); + event.commit(); + } } { // CMS delays purging the CLDG until the beginning of the next safepoint and to // make sure concurrent sweep is done - TraceTime t7("purging class loader data graph", TraceSafepointCleanupTime); + const char* name = "purging class loader data graph"; + EventSafepointCleanupTask event; + TraceTime t7(name, TraceSafepointCleanupTime); ClassLoaderDataGraph::purge_if_needed(); + if (event.should_commit()) { + event.set_safepointId(safepoint_counter()); + event.set_name(name); + event.commit(); + } } } --- old/src/share/vm/runtime/safepoint.hpp 2016-02-10 16:20:45.497136759 +0100 +++ new/src/share/vm/runtime/safepoint.hpp 2016-02-10 16:20:45.437136462 +0100 @@ -145,6 +145,7 @@ // Query inline static bool is_at_safepoint() { return _state == _synchronized; } inline static bool is_synchronizing() { return _state == _synchronizing; } + inline static int safepoint_counter() { return _safepoint_counter; } inline static bool do_call_back() { return (_state != _not_synchronized); --- old/src/share/vm/trace/trace.xml 2016-02-10 16:20:45.709137811 +0100 +++ new/src/share/vm/trace/trace.xml 2016-02-10 16:20:45.649137513 +0100 @@ -37,6 +37,7 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +