512 // platforms provide strong ST-ST order, so the issue is moot on IA32,
513 // x64, and SPARC.
514 //
515 // As a general policy we use "volatile" to control compiler-based reordering
516 // and explicit fences (barriers) to control for architectural reordering
517 // performed by the CPU(s) or platform.
518
519 struct SharedGlobals {
520 char _pad_prefix[DEFAULT_CACHE_LINE_SIZE];
521 // These are highly shared mostly-read variables.
522 // To avoid false-sharing they need to be the sole occupants of a cache line.
523 volatile int stw_random;
524 volatile int stw_cycle;
525 DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int) * 2);
526 // Hot RW variable -- Sequester to avoid false-sharing
527 volatile int hc_sequence;
528 DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int));
529 };
530
531 static SharedGlobals GVars;
532 static int MonitorScavengeThreshold = 1000000;
533 static volatile int ForceMonitorScavenge = 0; // Scavenge required and pending
534
535 static markWord read_stable_mark(oop obj) {
536 markWord mark = obj->mark();
537 if (!mark.is_being_inflated()) {
538 return mark; // normal fast-path return
539 }
540
541 int its = 0;
542 for (;;) {
543 markWord mark = obj->mark();
544 if (!mark.is_being_inflated()) {
545 return mark; // normal fast-path return
546 }
547
548 // The object is being inflated by some other thread.
549 // The caller of read_stable_mark() must wait for inflation to complete.
550 // Avoid live-lock
551 // TODO: consider calling SafepointSynchronize::do_call_back() while
552 // spinning to see if there's a safepoint pending. If so, immediately
553 // yielding or blocking would be appropriate. Avoid spinning while
901
902 static bool monitors_used_above_threshold() {
903 if (g_om_population == 0) {
904 return false;
905 }
906 int monitors_used = g_om_population - g_om_free_count;
907 int monitor_usage = (monitors_used * 100LL) / g_om_population;
908 return monitor_usage > MonitorUsedDeflationThreshold;
909 }
910
911 bool ObjectSynchronizer::is_cleanup_needed() {
912 if (MonitorUsedDeflationThreshold > 0) {
913 if (monitors_used_above_threshold()) {
914 return true;
915 }
916 }
917 return needs_monitor_scavenge();
918 }
919
920 bool ObjectSynchronizer::needs_monitor_scavenge() {
921 if (Atomic::load(&ForceMonitorScavenge) == 1) {
922 log_info(monitorinflation)("Monitor scavenge needed, triggering safepoint cleanup.");
923 return true;
924 }
925 return false;
926 }
927
928 void ObjectSynchronizer::oops_do(OopClosure* f) {
929 // We only scan the global used list here (for moribund threads), and
930 // the thread-local monitors in Thread::oops_do().
931 global_used_oops_do(f);
932 }
933
934 void ObjectSynchronizer::global_used_oops_do(OopClosure* f) {
935 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
936 list_oops_do(g_om_in_use_list, f);
937 }
938
939 void ObjectSynchronizer::thread_local_used_oops_do(Thread* thread, OopClosure* f) {
940 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
941 list_oops_do(thread->om_in_use_list, f);
984 // Generally we favor time in space-time tradeoffs, but as there's no
985 // natural back-pressure on the # of extant monitors we need to impose some
986 // type of limit. Beware that if MonitorBound is set to too low a value
987 // we could just loop. In addition, if MonitorBound is set to a low value
988 // we'll incur more safepoints, which are harmful to performance.
989 // See also: GuaranteedSafepointInterval
990 //
991 // If MonitorBound is set, the boundry applies to
992 // (g_om_population - g_om_free_count)
993 // i.e., if there are not enough ObjectMonitors on the global free list,
994 // then a safepoint deflation is induced. Picking a good MonitorBound value
995 // is non-trivial.
996
997 static void InduceScavenge(Thread* self, const char * Whence) {
998 // Induce STW safepoint to trim monitors
999 // Ultimately, this results in a call to deflate_idle_monitors() in the near future.
1000 // More precisely, trigger a cleanup safepoint as the number
1001 // of active monitors passes the specified threshold.
1002 // TODO: assert thread state is reasonable
1003
1004 if (Atomic::load(&ForceMonitorScavenge) == 0 && Atomic::xchg (1, &ForceMonitorScavenge) == 0) {
1005 VMThread::check_cleanup();
1006 }
1007 }
1008
1009 ObjectMonitor* ObjectSynchronizer::om_alloc(Thread* self) {
1010 // A large MAXPRIVATE value reduces both list lock contention
1011 // and list coherency traffic, but also tends to increase the
1012 // number of ObjectMonitors in circulation as well as the STW
1013 // scavenge costs. As usual, we lean toward time in space-time
1014 // tradeoffs.
1015 const int MAXPRIVATE = 1024;
1016 stringStream ss;
1017 for (;;) {
1018 ObjectMonitor* m;
1019
1020 // 1: try to allocate from the thread's local om_free_list.
1021 // Threads will attempt to allocate first from their local list, then
1022 // from the global list, and only after those attempts fail will the thread
1023 // attempt to instantiate new monitors. Thread-local free lists take
1024 // heat off the gListLock and improve allocation latency, as well as reducing
1663 // Report the cumulative time for deflating each thread's idle
1664 // monitors. Note: if the work is split among more than one
1665 // worker thread, then the reported time will likely be more
1666 // than a beginning to end measurement of the phase.
1667 log_info(safepoint, cleanup)("deflating per-thread idle monitors, %3.7f secs, monitors=%d", counters->per_thread_times, counters->per_thread_scavenged);
1668
1669 g_om_free_count += counters->n_scavenged;
1670
1671 if (log_is_enabled(Debug, monitorinflation)) {
1672 // exit_globals()'s call to audit_and_print_stats() is done
1673 // at the Info level.
1674 ObjectSynchronizer::audit_and_print_stats(false /* on_exit */);
1675 } else if (log_is_enabled(Info, monitorinflation)) {
1676 Thread::muxAcquire(&gListLock, "finish_deflate_idle_monitors");
1677 log_info(monitorinflation)("g_om_population=%d, g_om_in_use_count=%d, "
1678 "g_om_free_count=%d", g_om_population,
1679 g_om_in_use_count, g_om_free_count);
1680 Thread::muxRelease(&gListLock);
1681 }
1682
1683 Atomic::store(0, &ForceMonitorScavenge); // Reset
1684
1685 OM_PERFDATA_OP(Deflations, inc(counters->n_scavenged));
1686 OM_PERFDATA_OP(MonExtant, set_value(counters->n_in_circulation));
1687
1688 GVars.stw_random = os::random();
1689 GVars.stw_cycle++;
1690 }
1691
1692 void ObjectSynchronizer::deflate_thread_local_monitors(Thread* thread, DeflateMonitorCounters* counters) {
1693 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
1694
1695 ObjectMonitor* free_head_p = NULL; // Local SLL of scavenged monitors
1696 ObjectMonitor* free_tail_p = NULL;
1697 elapsedTimer timer;
1698
1699 if (log_is_enabled(Info, safepoint, cleanup) ||
1700 log_is_enabled(Info, monitorinflation)) {
1701 timer.start();
1702 }
1703
|
512 // platforms provide strong ST-ST order, so the issue is moot on IA32,
513 // x64, and SPARC.
514 //
515 // As a general policy we use "volatile" to control compiler-based reordering
516 // and explicit fences (barriers) to control for architectural reordering
517 // performed by the CPU(s) or platform.
518
519 struct SharedGlobals {
520 char _pad_prefix[DEFAULT_CACHE_LINE_SIZE];
521 // These are highly shared mostly-read variables.
522 // To avoid false-sharing they need to be the sole occupants of a cache line.
523 volatile int stw_random;
524 volatile int stw_cycle;
525 DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int) * 2);
526 // Hot RW variable -- Sequester to avoid false-sharing
527 volatile int hc_sequence;
528 DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int));
529 };
530
531 static SharedGlobals GVars;
532 static int _forceMonitorScavenge = 0; // Scavenge required and pending
533
534 static markWord read_stable_mark(oop obj) {
535 markWord mark = obj->mark();
536 if (!mark.is_being_inflated()) {
537 return mark; // normal fast-path return
538 }
539
540 int its = 0;
541 for (;;) {
542 markWord mark = obj->mark();
543 if (!mark.is_being_inflated()) {
544 return mark; // normal fast-path return
545 }
546
547 // The object is being inflated by some other thread.
548 // The caller of read_stable_mark() must wait for inflation to complete.
549 // Avoid live-lock
550 // TODO: consider calling SafepointSynchronize::do_call_back() while
551 // spinning to see if there's a safepoint pending. If so, immediately
552 // yielding or blocking would be appropriate. Avoid spinning while
900
901 static bool monitors_used_above_threshold() {
902 if (g_om_population == 0) {
903 return false;
904 }
905 int monitors_used = g_om_population - g_om_free_count;
906 int monitor_usage = (monitors_used * 100LL) / g_om_population;
907 return monitor_usage > MonitorUsedDeflationThreshold;
908 }
909
910 bool ObjectSynchronizer::is_cleanup_needed() {
911 if (MonitorUsedDeflationThreshold > 0) {
912 if (monitors_used_above_threshold()) {
913 return true;
914 }
915 }
916 return needs_monitor_scavenge();
917 }
918
919 bool ObjectSynchronizer::needs_monitor_scavenge() {
920 if (Atomic::load(&_forceMonitorScavenge) == 1) {
921 log_info(monitorinflation)("Monitor scavenge needed, triggering safepoint cleanup.");
922 return true;
923 }
924 return false;
925 }
926
927 void ObjectSynchronizer::oops_do(OopClosure* f) {
928 // We only scan the global used list here (for moribund threads), and
929 // the thread-local monitors in Thread::oops_do().
930 global_used_oops_do(f);
931 }
932
933 void ObjectSynchronizer::global_used_oops_do(OopClosure* f) {
934 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
935 list_oops_do(g_om_in_use_list, f);
936 }
937
938 void ObjectSynchronizer::thread_local_used_oops_do(Thread* thread, OopClosure* f) {
939 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
940 list_oops_do(thread->om_in_use_list, f);
983 // Generally we favor time in space-time tradeoffs, but as there's no
984 // natural back-pressure on the # of extant monitors we need to impose some
985 // type of limit. Beware that if MonitorBound is set to too low a value
986 // we could just loop. In addition, if MonitorBound is set to a low value
987 // we'll incur more safepoints, which are harmful to performance.
988 // See also: GuaranteedSafepointInterval
989 //
990 // If MonitorBound is set, the boundry applies to
991 // (g_om_population - g_om_free_count)
992 // i.e., if there are not enough ObjectMonitors on the global free list,
993 // then a safepoint deflation is induced. Picking a good MonitorBound value
994 // is non-trivial.
995
996 static void InduceScavenge(Thread* self, const char * Whence) {
997 // Induce STW safepoint to trim monitors
998 // Ultimately, this results in a call to deflate_idle_monitors() in the near future.
999 // More precisely, trigger a cleanup safepoint as the number
1000 // of active monitors passes the specified threshold.
1001 // TODO: assert thread state is reasonable
1002
1003 if (Atomic::load(&_forceMonitorScavenge) == 0 && Atomic::xchg (1, &_forceMonitorScavenge) == 0) {
1004 VMThread::check_cleanup();
1005 }
1006 }
1007
1008 ObjectMonitor* ObjectSynchronizer::om_alloc(Thread* self) {
1009 // A large MAXPRIVATE value reduces both list lock contention
1010 // and list coherency traffic, but also tends to increase the
1011 // number of ObjectMonitors in circulation as well as the STW
1012 // scavenge costs. As usual, we lean toward time in space-time
1013 // tradeoffs.
1014 const int MAXPRIVATE = 1024;
1015 stringStream ss;
1016 for (;;) {
1017 ObjectMonitor* m;
1018
1019 // 1: try to allocate from the thread's local om_free_list.
1020 // Threads will attempt to allocate first from their local list, then
1021 // from the global list, and only after those attempts fail will the thread
1022 // attempt to instantiate new monitors. Thread-local free lists take
1023 // heat off the gListLock and improve allocation latency, as well as reducing
1662 // Report the cumulative time for deflating each thread's idle
1663 // monitors. Note: if the work is split among more than one
1664 // worker thread, then the reported time will likely be more
1665 // than a beginning to end measurement of the phase.
1666 log_info(safepoint, cleanup)("deflating per-thread idle monitors, %3.7f secs, monitors=%d", counters->per_thread_times, counters->per_thread_scavenged);
1667
1668 g_om_free_count += counters->n_scavenged;
1669
1670 if (log_is_enabled(Debug, monitorinflation)) {
1671 // exit_globals()'s call to audit_and_print_stats() is done
1672 // at the Info level.
1673 ObjectSynchronizer::audit_and_print_stats(false /* on_exit */);
1674 } else if (log_is_enabled(Info, monitorinflation)) {
1675 Thread::muxAcquire(&gListLock, "finish_deflate_idle_monitors");
1676 log_info(monitorinflation)("g_om_population=%d, g_om_in_use_count=%d, "
1677 "g_om_free_count=%d", g_om_population,
1678 g_om_in_use_count, g_om_free_count);
1679 Thread::muxRelease(&gListLock);
1680 }
1681
1682 Atomic::store(0, &_forceMonitorScavenge); // Reset
1683
1684 OM_PERFDATA_OP(Deflations, inc(counters->n_scavenged));
1685 OM_PERFDATA_OP(MonExtant, set_value(counters->n_in_circulation));
1686
1687 GVars.stw_random = os::random();
1688 GVars.stw_cycle++;
1689 }
1690
1691 void ObjectSynchronizer::deflate_thread_local_monitors(Thread* thread, DeflateMonitorCounters* counters) {
1692 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
1693
1694 ObjectMonitor* free_head_p = NULL; // Local SLL of scavenged monitors
1695 ObjectMonitor* free_tail_p = NULL;
1696 elapsedTimer timer;
1697
1698 if (log_is_enabled(Info, safepoint, cleanup) ||
1699 log_is_enabled(Info, monitorinflation)) {
1700 timer.start();
1701 }
1702
|