101 }
102
103 #else // ndef DTRACE_ENABLED
104
105 #define DTRACE_MONITOR_WAIT_PROBE(obj, thread, millis, mon) {;}
106 #define DTRACE_MONITOR_PROBE(probe, obj, thread, mon) {;}
107
108 #endif // ndef DTRACE_ENABLED
109
110 // This exists only as a workaround of dtrace bug 6254741
111 int dtrace_waited_probe(ObjectMonitor* monitor, Handle obj, Thread* thr) {
112 DTRACE_MONITOR_PROBE(waited, monitor, obj(), thr);
113 return 0;
114 }
115
116 #define NINFLATIONLOCKS 256
117 static volatile intptr_t gInflationLocks[NINFLATIONLOCKS];
118
119 // global list of blocks of monitors
120 PaddedObjectMonitor* volatile ObjectSynchronizer::g_block_list = NULL;
121 // Global ObjectMonitor free list. Newly allocated and deflated
122 // ObjectMonitors are prepended here.
123 ObjectMonitor* volatile ObjectSynchronizer::g_free_list = NULL;
124 // Global ObjectMonitor in-use list. When a JavaThread is exiting,
125 // ObjectMonitors on its per-thread in-use list are prepended here.
126 ObjectMonitor* volatile ObjectSynchronizer::g_om_in_use_list = NULL;
127 int ObjectSynchronizer::g_om_in_use_count = 0; // # on g_om_in_use_list
128 bool volatile ObjectSynchronizer::_is_async_deflation_requested = false;
129 bool volatile ObjectSynchronizer::_is_special_deflation_requested = false;
130 jlong ObjectSynchronizer::_last_async_deflation_time_ns = 0;
131
132 static volatile intptr_t gListLock = 0; // protects global monitor lists
133 static volatile int g_om_free_count = 0; // # on g_free_list
134 static volatile int g_om_population = 0; // # Extant -- in circulation
135
136 #define CHAINMARKER (cast_to_oop<intptr_t>(-1))
137
138
139 // =====================> Quick functions
140
141 // The quick_* forms are special fast-path variants used to improve
142 // performance. In the simplest case, a "quick_*" implementation could
143 // simply return false, in which case the caller will perform the necessary
144 // state transitions and call the slow-path form.
145 // The fast-path is designed to handle frequently arising cases in an efficient
146 // manner and is just a degenerate "optimistic" variant of the slow-path.
147 // returns true -- to indicate the call was satisfied.
148 // returns false -- to indicate the call needs the services of the slow-path.
149 // A no-loitering ordinance is in effect for code in the quick_* family
150 // operators: safepoints or indefinite blocking (blocking that might span a
151 // safepoint) are forbidden. Generally the thread_state() is _in_Java upon
152 // entry.
153 //
565 //
566 // Performance concern:
567 // OrderAccess::storestore() calls release() which at one time stored 0
568 // into the global volatile OrderAccess::dummy variable. This store was
569 // unnecessary for correctness. Many threads storing into a common location
570 // causes considerable cache migration or "sloshing" on large SMP systems.
571 // As such, I avoided using OrderAccess::storestore(). In some cases
572 // OrderAccess::fence() -- which incurs local latency on the executing
573 // processor -- is a better choice as it scales on SMP systems.
574 //
575 // See http://blogs.oracle.com/dave/entry/biased_locking_in_hotspot for
576 // a discussion of coherency costs. Note that all our current reference
577 // platforms provide strong ST-ST order, so the issue is moot on IA32,
578 // x64, and SPARC.
579 //
580 // As a general policy we use "volatile" to control compiler-based reordering
581 // and explicit fences (barriers) to control for architectural reordering
582 // performed by the CPU(s) or platform.
583
584 struct SharedGlobals {
585 char _pad_prefix[DEFAULT_CACHE_LINE_SIZE];
586 // These are highly shared mostly-read variables.
587 // To avoid false-sharing they need to be the sole occupants of a cache line.
588 volatile int stw_random;
589 volatile int stw_cycle;
590 DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int) * 2);
591 // Hot RW variable -- Sequester to avoid false-sharing
592 volatile int hc_sequence;
593 DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int));
594 };
595
596 static SharedGlobals GVars;
597 static int MonitorScavengeThreshold = 1000000;
598 static volatile int ForceMonitorScavenge = 0; // Scavenge required and pending
599
600 static markWord read_stable_mark(oop obj) {
601 markWord mark = obj->mark();
602 if (!mark.is_being_inflated()) {
603 return mark; // normal fast-path return
604 }
605
606 int its = 0;
607 for (;;) {
608 markWord mark = obj->mark();
609 if (!mark.is_being_inflated()) {
610 return mark; // normal fast-path return
611 }
612
613 // The object is being inflated by some other thread.
981 // Cannot have assertion since this object may have been
982 // locked by another thread when reaching here.
983 // assert(mark.is_neutral(), "sanity check");
984
985 return NULL;
986 }
987 }
988
989 // Visitors ...
990
991 void ObjectSynchronizer::monitors_iterate(MonitorClosure* closure) {
992 PaddedObjectMonitor* block = OrderAccess::load_acquire(&g_block_list);
993 while (block != NULL) {
994 assert(block->object() == CHAINMARKER, "must be a block header");
995 for (int i = _BLOCKSIZE - 1; i > 0; i--) {
996 ObjectMonitor* mid = (ObjectMonitor *)(block + i);
997 if (mid->is_active()) {
998 ObjectMonitorHandle omh(mid);
999
1000 if (mid->object() == NULL ||
1001 (AsyncDeflateIdleMonitors && mid->_owner == DEFLATER_MARKER)) {
1002 // Only process with closure if the object is set.
1003 // For async deflation, race here if monitor is not owned!
1004 // The above ref_count bump (in ObjectMonitorHandle ctr)
1005 // will cause subsequent async deflation to skip it.
1006 // However, previous or concurrent async deflation is a race.
1007 continue;
1008 }
1009 closure->do_monitor(mid);
1010 }
1011 }
1012 block = (PaddedObjectMonitor*)block->_next_om;
1013 }
1014 }
1015
1016 static bool monitors_used_above_threshold() {
1017 if (g_om_population == 0) {
1018 return false;
1019 }
1020 if (MonitorUsedDeflationThreshold > 0) {
1021 int monitors_used = g_om_population - g_om_free_count;
1022 int monitor_usage = (monitors_used * 100LL) / g_om_population;
1023 return monitor_usage > MonitorUsedDeflationThreshold;
1024 }
1025 return false;
1026 }
1081 // We only scan the global used list here (for moribund threads), and
1082 // the thread-local monitors in Thread::oops_do().
1083 global_used_oops_do(f);
1084 }
1085
1086 void ObjectSynchronizer::global_used_oops_do(OopClosure* f) {
1087 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
1088 list_oops_do(g_om_in_use_list, f);
1089 }
1090
1091 void ObjectSynchronizer::thread_local_used_oops_do(Thread* thread, OopClosure* f) {
1092 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
1093 list_oops_do(thread->om_in_use_list, f);
1094 }
1095
1096 void ObjectSynchronizer::list_oops_do(ObjectMonitor* list, OopClosure* f) {
1097 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
1098 // The oops_do() phase does not overlap with monitor deflation
1099 // so no need to update the ObjectMonitor's ref_count for this
1100 // ObjectMonitor* use.
1101 ObjectMonitor* mid;
1102 for (mid = list; mid != NULL; mid = mid->_next_om) {
1103 if (mid->object() != NULL) {
1104 f->do_oop((oop*)mid->object_addr());
1105 }
1106 }
1107 }
1108
1109
1110 // -----------------------------------------------------------------------------
1111 // ObjectMonitor Lifecycle
1112 // -----------------------
1113 // Inflation unlinks monitors from the global g_free_list and
1114 // associates them with objects. Deflation -- which occurs at
1115 // STW-time -- disassociates idle monitors from objects. Such
1116 // scavenged monitors are returned to the g_free_list.
1117 //
1118 // The global list is protected by gListLock. All the critical sections
1119 // are short and operate in constant-time.
1120 //
1121 // ObjectMonitors reside in type-stable memory (TSM) and are immortal.
1122 //
1185 const InflateCause cause) {
1186 // A large MAXPRIVATE value reduces both list lock contention
1187 // and list coherency traffic, but also tends to increase the
1188 // number of ObjectMonitors in circulation as well as the STW
1189 // scavenge costs. As usual, we lean toward time in space-time
1190 // tradeoffs.
1191 const int MAXPRIVATE = 1024;
1192
1193 if (AsyncDeflateIdleMonitors) {
1194 JavaThread* jt = (JavaThread *)self;
1195 if (jt->om_request_deflation && jt->om_in_use_count > 0 &&
1196 cause != inflate_cause_vm_internal) {
1197 // Deflate any per-thread idle monitors for this JavaThread if
1198 // this is not an internal inflation; internal inflations can
1199 // occur in places where it is not safe to pause for a safepoint.
1200 // Clean up your own mess (Gibbs Rule 45). Otherwise, skip this
1201 // deflation. deflate_global_idle_monitors_using_JT() is called
1202 // by the ServiceThread. Per-thread async deflation is triggered
1203 // by the ServiceThread via om_request_deflation.
1204 debug_only(jt->check_for_valid_safepoint_state(false);)
1205 ObjectSynchronizer::deflate_per_thread_idle_monitors_using_JT();
1206 }
1207 }
1208
1209 stringStream ss;
1210 for (;;) {
1211 ObjectMonitor* m;
1212
1213 // 1: try to allocate from the thread's local om_free_list.
1214 // Threads will attempt to allocate first from their local list, then
1215 // from the global list, and only after those attempts fail will the thread
1216 // attempt to instantiate new monitors. Thread-local free lists take
1217 // heat off the gListLock and improve allocation latency, as well as reducing
1218 // coherency traffic on the shared global list.
1219 m = self->om_free_list;
1220 if (m != NULL) {
1221 self->om_free_list = m->_next_om;
1222 self->om_free_count--;
1223 guarantee(m->object() == NULL, "invariant");
1224 m->set_allocation_state(ObjectMonitor::New);
1225 m->_next_om = self->om_in_use_list;
1272 // Not enough ObjectMonitors on the global free list.
1273 // We can't safely induce a STW safepoint from om_alloc() as our thread
1274 // state may not be appropriate for such activities and callers may hold
1275 // naked oops, so instead we defer the action.
1276 InduceScavenge(self, "om_alloc");
1277 }
1278 continue;
1279 }
1280
1281 // 3: allocate a block of new ObjectMonitors
1282 // Both the local and global free lists are empty -- resort to malloc().
1283 // In the current implementation ObjectMonitors are TSM - immortal.
1284 // Ideally, we'd write "new ObjectMonitor[_BLOCKSIZE], but we want
1285 // each ObjectMonitor to start at the beginning of a cache line,
1286 // so we use align_up().
1287 // A better solution would be to use C++ placement-new.
1288 // BEWARE: As it stands currently, we don't run the ctors!
1289 assert(_BLOCKSIZE > 1, "invariant");
1290 size_t neededsize = sizeof(PaddedObjectMonitor) * _BLOCKSIZE;
1291 PaddedObjectMonitor* temp;
1292 size_t aligned_size = neededsize + (DEFAULT_CACHE_LINE_SIZE - 1);
1293 void* real_malloc_addr = (void*)NEW_C_HEAP_ARRAY(char, aligned_size,
1294 mtInternal);
1295 temp = (PaddedObjectMonitor*)align_up(real_malloc_addr, DEFAULT_CACHE_LINE_SIZE);
1296
1297 // NOTE: (almost) no way to recover if allocation failed.
1298 // We might be able to induce a STW safepoint and scavenge enough
1299 // ObjectMonitors to permit progress.
1300 if (temp == NULL) {
1301 vm_exit_out_of_memory(neededsize, OOM_MALLOC_ERROR,
1302 "Allocate ObjectMonitors");
1303 }
1304 (void)memset((void *) temp, 0, neededsize);
1305
1306 // Format the block.
1307 // initialize the linked list, each monitor points to its next
1308 // forming the single linked free list, the very first monitor
1309 // will points to next block, which forms the block list.
1310 // The trick of using the 1st element in the block as g_block_list
1311 // linkage should be reconsidered. A better implementation would
1312 // look like: class Block { Block * next; int N; ObjectMonitor Body [N] ; }
1313
1314 for (int i = 1; i < _BLOCKSIZE; i++) {
1315 temp[i]._next_om = (ObjectMonitor *)&temp[i+1];
1397 }
1398
1399 // Return ObjectMonitors on a moribund thread's free and in-use
1400 // lists to the appropriate global lists. The ObjectMonitors on the
1401 // per-thread in-use list may still be in use by other threads.
1402 //
1403 // We currently call om_flush() from Threads::remove() before the
1404 // thread has been excised from the thread list and is no longer a
1405 // mutator. This means that om_flush() cannot run concurrently with
1406 // a safepoint and interleave with deflate_idle_monitors(). In
1407 // particular, this ensures that the thread's in-use monitors are
1408 // scanned by a GC safepoint, either via Thread::oops_do() (before
1409 // om_flush() is called) or via ObjectSynchronizer::oops_do() (after
1410 // om_flush() is called).
1411 //
1412 // With AsyncDeflateIdleMonitors, deflate_global_idle_monitors_using_JT()
1413 // and deflate_per_thread_idle_monitors_using_JT() (in another thread) can
1414 // run at the same time as om_flush() so we have to be careful.
1415
1416 void ObjectSynchronizer::om_flush(Thread* self) {
1417 ObjectMonitor* free_list = self->om_free_list;
1418 ObjectMonitor* free_tail = NULL;
1419 int free_count = 0;
1420 if (free_list != NULL) {
1421 ObjectMonitor* s;
1422 // The thread is going away. Set 'free_tail' to the last per-thread free
1423 // monitor which will be linked to g_free_list below under the gListLock.
1424 stringStream ss;
1425 for (s = free_list; s != NULL; s = s->_next_om) {
1426 free_count++;
1427 free_tail = s;
1428 guarantee(s->object() == NULL, "invariant");
1429 guarantee(!s->is_busy(), "must be !is_busy: %s", s->is_busy_to_string(&ss));
1430 }
1431 guarantee(free_tail != NULL, "invariant");
1432 ADIM_guarantee(self->om_free_count == free_count, "free-count off");
1433 self->om_free_list = NULL;
1434 self->om_free_count = 0;
1435 }
1436
1437 ObjectMonitor* in_use_list = self->om_in_use_list;
1438 ObjectMonitor* in_use_tail = NULL;
1439 int in_use_count = 0;
1440 if (in_use_list != NULL) {
1441 // The thread is going away, however the ObjectMonitors on the
1442 // om_in_use_list may still be in-use by other threads. Link
1443 // them to in_use_tail, which will be linked into the global
1444 // in-use list g_om_in_use_list below, under the gListLock.
1445 ObjectMonitor *cur_om;
1446 for (cur_om = in_use_list; cur_om != NULL; cur_om = cur_om->_next_om) {
1447 in_use_tail = cur_om;
1448 in_use_count++;
1449 ADIM_guarantee(cur_om->is_active(), "invariant");
1450 }
1451 guarantee(in_use_tail != NULL, "invariant");
1452 ADIM_guarantee(self->om_in_use_count == in_use_count, "in-use count off");
1453 self->om_in_use_list = NULL;
1454 self->om_in_use_count = 0;
1455 }
1456
1457 Thread::muxAcquire(&gListLock, "om_flush");
1458 if (free_tail != NULL) {
1459 free_tail->_next_om = g_free_list;
1460 g_free_list = free_list;
1461 g_om_free_count += free_count;
1462 }
1463
1464 if (in_use_tail != NULL) {
1465 in_use_tail->_next_om = g_om_in_use_list;
1466 g_om_in_use_list = in_use_list;
1467 g_om_in_use_count += in_use_count;
1468 }
1469
1470 Thread::muxRelease(&gListLock);
1471
1472 LogStreamHandle(Debug, monitorinflation) lsh_debug;
1473 LogStreamHandle(Info, monitorinflation) lsh_info;
1474 LogStream* ls = NULL;
1475 if (log_is_enabled(Debug, monitorinflation)) {
1476 ls = &lsh_debug;
1838 if (AsyncDeflateIdleMonitors) {
1839 // clear() expects the owner field to be NULL and we won't race
1840 // with the simple C2 ObjectMonitor enter optimization since
1841 // we're at a safepoint.
1842 mid->set_owner(NULL);
1843 }
1844 mid->clear();
1845
1846 assert(mid->object() == NULL, "invariant: object=" INTPTR_FORMAT,
1847 p2i(mid->object()));
1848 assert(mid->is_free(), "invariant");
1849
1850 // Move the deflated ObjectMonitor to the working free list
1851 // defined by free_head_p and free_tail_p.
1852 if (*free_head_p == NULL) *free_head_p = mid;
1853 if (*free_tail_p != NULL) {
1854 // We append to the list so the caller can use mid->_next_om
1855 // to fix the linkages in its context.
1856 ObjectMonitor* prevtail = *free_tail_p;
1857 // Should have been cleaned up by the caller:
1858 assert(prevtail->_next_om == NULL, "cleaned up deflated?");
1859 prevtail->_next_om = mid;
1860 }
1861 *free_tail_p = mid;
1862 // At this point, mid->_next_om still refers to its current
1863 // value and another ObjectMonitor's _next_om field still
1864 // refers to this ObjectMonitor. Those linkages have to be
1865 // cleaned up by the caller who has the complete context.
1866 deflated = true;
1867 }
1868 return deflated;
1869 }
1870
1871 // Deflate the specified ObjectMonitor if not in-use using a JavaThread.
1872 // Returns true if it was deflated and false otherwise.
1873 //
1874 // The async deflation protocol sets owner to DEFLATER_MARKER and
1875 // makes ref_count negative as signals to contending threads that
1876 // an async deflation is in progress. There are a number of checks
1877 // as part of the protocol to make sure that the calling thread has
1878 // not lost the race to a contending thread or to a thread that just
2005
2006 // The owner field is no longer NULL so we lost the race since the
2007 // ObjectMonitor is now busy.
2008 return false;
2009 }
2010
2011 // Walk a given monitor list, and deflate idle monitors
2012 // The given list could be a per-thread list or a global list
2013 // Caller acquires gListLock as needed.
2014 //
2015 // In the case of parallel processing of thread local monitor lists,
2016 // work is done by Threads::parallel_threads_do() which ensures that
2017 // each Java thread is processed by exactly one worker thread, and
2018 // thus avoid conflicts that would arise when worker threads would
2019 // process the same monitor lists concurrently.
2020 //
2021 // See also ParallelSPCleanupTask and
2022 // SafepointSynchronize::do_cleanup_tasks() in safepoint.cpp and
2023 // Threads::parallel_java_threads_do() in thread.cpp.
2024 int ObjectSynchronizer::deflate_monitor_list(ObjectMonitor** list_p,
2025 ObjectMonitor** free_head_p,
2026 ObjectMonitor** free_tail_p) {
2027 ObjectMonitor* mid;
2028 ObjectMonitor* next;
2029 ObjectMonitor* cur_mid_in_use = NULL;
2030 int deflated_count = 0;
2031
2032 for (mid = *list_p; mid != NULL;) {
2033 oop obj = (oop) mid->object();
2034 if (obj != NULL && deflate_monitor(mid, obj, free_head_p, free_tail_p)) {
2035 // Deflation succeeded and already updated free_head_p and
2036 // free_tail_p as needed. Finish the move to the local free list
2037 // by unlinking mid from the global or per-thread in-use list.
2038 if (mid == *list_p) {
2039 *list_p = mid->_next_om;
2040 } else if (cur_mid_in_use != NULL) {
2041 cur_mid_in_use->_next_om = mid->_next_om; // maintain the current thread in-use list
2042 }
2043 next = mid->_next_om;
2044 mid->_next_om = NULL; // This mid is current tail in the free_head_p list
2045 mid = next;
2046 deflated_count++;
2047 } else {
2048 cur_mid_in_use = mid;
2049 mid = mid->_next_om;
2050 }
2051 }
2052 return deflated_count;
2053 }
2054
2055 // Walk a given ObjectMonitor list and deflate idle ObjectMonitors using
2056 // a JavaThread. Returns the number of deflated ObjectMonitors. The given
2057 // list could be a per-thread in-use list or the global in-use list.
2058 // Caller acquires gListLock as appropriate. If a safepoint has started,
2059 // then we save state via saved_mid_in_use_p and return to the caller to
2060 // honor the safepoint.
2061 //
2062 int ObjectSynchronizer::deflate_monitor_list_using_JT(ObjectMonitor** list_p,
2063 ObjectMonitor** free_head_p,
2064 ObjectMonitor** free_tail_p,
2065 ObjectMonitor** saved_mid_in_use_p) {
2066 assert(AsyncDeflateIdleMonitors, "sanity check");
2067 assert(Thread::current()->is_Java_thread(), "precondition");
2068
2069 ObjectMonitor* mid;
2070 ObjectMonitor* next;
2071 ObjectMonitor* cur_mid_in_use = NULL;
2072 int deflated_count = 0;
2073
2074 if (*saved_mid_in_use_p == NULL) {
2075 // No saved state so start at the beginning.
2076 mid = *list_p;
2077 } else {
2078 // We're restarting after a safepoint so restore the necessary state
2079 // before we resume.
2080 cur_mid_in_use = *saved_mid_in_use_p;
2081 mid = cur_mid_in_use->_next_om;
2082 }
2083 while (mid != NULL) {
2084 // Only try to deflate if there is an associated Java object and if
2085 // mid is old (is not newly allocated and is not newly freed).
2086 if (mid->object() != NULL && mid->is_old() &&
2087 deflate_monitor_using_JT(mid, free_head_p, free_tail_p)) {
2088 // Deflation succeeded and already updated free_head_p and
2089 // free_tail_p as needed. Finish the move to the local free list
2090 // by unlinking mid from the global or per-thread in-use list.
2091 if (mid == *list_p) {
2092 *list_p = mid->_next_om;
2093 } else if (cur_mid_in_use != NULL) {
2094 // Maintain the current in-use list.
2095 cur_mid_in_use->_next_om = mid->_next_om;
2096 }
2097 next = mid->_next_om;
2098 mid->_next_om = NULL;
2099 // At this point mid is disconnected from the in-use list
2100 // and is the current tail in the free_head_p list.
2101 mid = next;
2102 deflated_count++;
2103 } else {
2104 // mid is considered in-use if it does not have an associated
2105 // Java object or mid is not old or deflation did not succeed.
2106 // A mid->is_new() node can be seen here when it is freshly
2107 // returned by om_alloc() (and skips the deflation code path).
2108 // A mid->is_old() node can be seen here when deflation failed.
2109 // A mid->is_free() node can be seen here when a fresh node from
2110 // om_alloc() is released by om_release() due to losing the race
2111 // in inflate().
2112
2113 cur_mid_in_use = mid;
2114 mid = mid->_next_om;
2115
2116 if (SafepointSynchronize::is_synchronizing() &&
2117 cur_mid_in_use != *list_p && cur_mid_in_use->is_old()) {
2118 // If a safepoint has started and cur_mid_in_use is not the list
2119 // head and is old, then it is safe to use as saved state. Return
2120 // to the caller so gListLock can be dropped as appropriate
2121 // before blocking.
2122 *saved_mid_in_use_p = cur_mid_in_use;
2152 bool deflated = false;
2153
2154 ObjectMonitor* free_head_p = NULL; // Local SLL of scavenged monitors
2155 ObjectMonitor* free_tail_p = NULL;
2156 elapsedTimer timer;
2157
2158 if (log_is_enabled(Info, monitorinflation)) {
2159 timer.start();
2160 }
2161
2162 // Prevent om_flush from changing mids in Thread dtor's during deflation
2163 // And in case the vm thread is acquiring a lock during a safepoint
2164 // See e.g. 6320749
2165 Thread::muxAcquire(&gListLock, "deflate_idle_monitors");
2166
2167 // Note: the thread-local monitors lists get deflated in
2168 // a separate pass. See deflate_thread_local_monitors().
2169
2170 // For moribund threads, scan g_om_in_use_list
2171 int deflated_count = 0;
2172 if (g_om_in_use_list) {
2173 counters->n_in_circulation += g_om_in_use_count;
2174 deflated_count = deflate_monitor_list((ObjectMonitor **)&g_om_in_use_list, &free_head_p, &free_tail_p);
2175 g_om_in_use_count -= deflated_count;
2176 counters->n_scavenged += deflated_count;
2177 counters->n_in_use += g_om_in_use_count;
2178 }
2179
2180 if (free_head_p != NULL) {
2181 // Move the deflated ObjectMonitors back to the global free list.
2182 guarantee(free_tail_p != NULL && counters->n_scavenged > 0, "invariant");
2183 assert(free_tail_p->_next_om == NULL, "invariant");
2184 // constant-time list splice - prepend scavenged segment to g_free_list
2185 free_tail_p->_next_om = g_free_list;
2186 g_free_list = free_head_p;
2187 }
2188 Thread::muxRelease(&gListLock);
2189 timer.stop();
2190
2191 LogStreamHandle(Debug, monitorinflation) lsh_debug;
2192 LogStreamHandle(Info, monitorinflation) lsh_info;
2193 LogStream* ls = NULL;
2194 if (log_is_enabled(Debug, monitorinflation)) {
2195 ls = &lsh_debug;
2196 } else if (deflated_count != 0 && log_is_enabled(Info, monitorinflation)) {
2197 ls = &lsh_info;
2198 }
2199 if (ls != NULL) {
2200 ls->print_cr("deflating global idle monitors, %3.7f secs, %d monitors", timer.seconds(), deflated_count);
2201 }
2202 }
2203
2204 // Deflate global idle ObjectMonitors using a JavaThread.
2205 //
2206 void ObjectSynchronizer::deflate_global_idle_monitors_using_JT() {
2207 assert(AsyncDeflateIdleMonitors, "sanity check");
2208 assert(Thread::current()->is_Java_thread(), "precondition");
2209 JavaThread* self = JavaThread::current();
2210
2211 deflate_common_idle_monitors_using_JT(true /* is_global */, self);
2212 }
2213
2214 // Deflate per-thread idle ObjectMonitors using a JavaThread.
2215 //
2216 void ObjectSynchronizer::deflate_per_thread_idle_monitors_using_JT() {
2217 assert(AsyncDeflateIdleMonitors, "sanity check");
2218 assert(Thread::current()->is_Java_thread(), "precondition");
2219 JavaThread* self = JavaThread::current();
2220
2221 self->om_request_deflation = false;
2222
2223 deflate_common_idle_monitors_using_JT(false /* !is_global */, self);
2224 }
2225
2226 // Deflate global or per-thread idle ObjectMonitors using a JavaThread.
2227 //
2228 void ObjectSynchronizer::deflate_common_idle_monitors_using_JT(bool is_global, JavaThread* self) {
2229 int deflated_count = 0;
2230 ObjectMonitor* free_head_p = NULL; // Local SLL of scavenged ObjectMonitors
2231 ObjectMonitor* free_tail_p = NULL;
2232 ObjectMonitor* saved_mid_in_use_p = NULL;
2233 elapsedTimer timer;
2234
2235 if (log_is_enabled(Info, monitorinflation)) {
2236 timer.start();
2237 }
2238
2239 if (is_global) {
2240 Thread::muxAcquire(&gListLock, "deflate_global_idle_monitors_using_JT(1)");
2241 OM_PERFDATA_OP(MonExtant, set_value(g_om_in_use_count));
2242 } else {
2243 OM_PERFDATA_OP(MonExtant, inc(self->om_in_use_count));
2244 }
2245
2246 do {
2247 int local_deflated_count;
2248 if (is_global) {
2249 local_deflated_count = deflate_monitor_list_using_JT((ObjectMonitor **)&g_om_in_use_list, &free_head_p, &free_tail_p, &saved_mid_in_use_p);
2250 g_om_in_use_count -= local_deflated_count;
2251 } else {
2252 local_deflated_count = deflate_monitor_list_using_JT(self->om_in_use_list_addr(), &free_head_p, &free_tail_p, &saved_mid_in_use_p);
2253 self->om_in_use_count -= local_deflated_count;
2254 }
2255 deflated_count += local_deflated_count;
2256
2257 if (free_head_p != NULL) {
2258 // Move the deflated ObjectMonitors to the global free list.
2259 guarantee(free_tail_p != NULL && local_deflated_count > 0, "free_tail_p=" INTPTR_FORMAT ", local_deflated_count=%d", p2i(free_tail_p), local_deflated_count);
2260 assert(free_tail_p->_next_om == NULL, "invariant");
2261
2262 if (!is_global) {
2263 Thread::muxAcquire(&gListLock, "deflate_per_thread_idle_monitors_using_JT(2)");
2264 }
2265 // Constant-time list splice - prepend scavenged segment to g_free_list.
2266 free_tail_p->_next_om = g_free_list;
2267 g_free_list = free_head_p;
2268
2269 g_om_free_count += local_deflated_count;
2270 OM_PERFDATA_OP(Deflations, inc(local_deflated_count));
2271 if (!is_global) {
2272 Thread::muxRelease(&gListLock);
2273 }
2274 }
2275
2276 if (saved_mid_in_use_p != NULL) {
2277 // deflate_monitor_list_using_JT() detected a safepoint starting.
2278 if (is_global) {
2279 Thread::muxRelease(&gListLock);
2280 }
2281 timer.stop();
2282 {
2283 if (is_global) {
2284 log_debug(monitorinflation)("pausing deflation of global idle monitors for a safepoint.");
2285 } else {
2286 log_debug(monitorinflation)("jt=" INTPTR_FORMAT ": pausing deflation of per-thread idle monitors for a safepoint.", p2i(self));
2287 }
2288 assert(SafepointSynchronize::is_synchronizing(), "sanity check");
2289 ThreadBlockInVM blocker(self);
2290 }
2291 // Prepare for another loop after the safepoint.
2292 free_head_p = NULL;
2293 free_tail_p = NULL;
2294 if (log_is_enabled(Info, monitorinflation)) {
2295 timer.start();
2296 }
2297 if (is_global) {
2298 Thread::muxAcquire(&gListLock, "deflate_global_idle_monitors_using_JT(3)");
2299 }
2300 }
2301 } while (saved_mid_in_use_p != NULL);
2302 if (is_global) {
2303 Thread::muxRelease(&gListLock);
2304 }
2305 timer.stop();
2306
2307 LogStreamHandle(Debug, monitorinflation) lsh_debug;
2308 LogStreamHandle(Info, monitorinflation) lsh_info;
2309 LogStream* ls = NULL;
2310 if (log_is_enabled(Debug, monitorinflation)) {
2311 ls = &lsh_debug;
2312 } else if (deflated_count != 0 && log_is_enabled(Info, monitorinflation)) {
2313 ls = &lsh_info;
2314 }
2315 if (ls != NULL) {
2316 if (is_global) {
2317 ls->print_cr("async-deflating global idle monitors, %3.7f secs, %d monitors", timer.seconds(), deflated_count);
2318 } else {
2319 ls->print_cr("jt=" INTPTR_FORMAT ": async-deflating per-thread idle monitors, %3.7f secs, %d monitors", p2i(self), timer.seconds(), deflated_count);
2320 }
2321 }
2322 }
2323
2324 void ObjectSynchronizer::finish_deflate_idle_monitors(DeflateMonitorCounters* counters) {
2325 // Report the cumulative time for deflating each thread's idle
2326 // monitors. Note: if the work is split among more than one
2327 // worker thread, then the reported time will likely be more
2328 // than a beginning to end measurement of the phase.
2329 // Note: AsyncDeflateIdleMonitors only deflates per-thread idle
2330 // monitors at a safepoint when a special deflation has been requested.
2331 log_info(safepoint, cleanup)("deflating per-thread idle monitors, %3.7f secs, monitors=%d", counters->per_thread_times, counters->per_thread_scavenged);
2332
2333 bool needs_special_deflation = is_special_deflation_requested();
2334 if (!AsyncDeflateIdleMonitors || needs_special_deflation) {
2335 // AsyncDeflateIdleMonitors does not use these counters unless
2336 // there is a special deflation request.
2337
2338 g_om_free_count += counters->n_scavenged;
2339
2340 OM_PERFDATA_OP(Deflations, inc(counters->n_scavenged));
2341 OM_PERFDATA_OP(MonExtant, set_value(counters->n_in_circulation));
2342 }
2343
2344 if (log_is_enabled(Debug, monitorinflation)) {
2345 // exit_globals()'s call to audit_and_print_stats() is done
2346 // at the Info level.
2347 ObjectSynchronizer::audit_and_print_stats(false /* on_exit */);
2348 } else if (log_is_enabled(Info, monitorinflation)) {
2349 Thread::muxAcquire(&gListLock, "finish_deflate_idle_monitors");
2350 log_info(monitorinflation)("g_om_population=%d, g_om_in_use_count=%d, "
2351 "g_om_free_count=%d", g_om_population,
2352 g_om_in_use_count, g_om_free_count);
2353 Thread::muxRelease(&gListLock);
2354 }
2355
2356 ForceMonitorScavenge = 0; // Reset
2357 GVars.stw_random = os::random();
2358 GVars.stw_cycle++;
2359 if (needs_special_deflation) {
2368 if (!is_special_deflation_requested()) {
2369 // Mark the JavaThread for idle monitor deflation if a special
2370 // deflation has NOT been requested.
2371 if (thread->om_in_use_count > 0) {
2372 // This JavaThread is using monitors so mark it.
2373 thread->om_request_deflation = true;
2374 }
2375 return;
2376 }
2377 }
2378
2379 ObjectMonitor* free_head_p = NULL; // Local SLL of scavenged monitors
2380 ObjectMonitor* free_tail_p = NULL;
2381 elapsedTimer timer;
2382
2383 if (log_is_enabled(Info, safepoint, cleanup) ||
2384 log_is_enabled(Info, monitorinflation)) {
2385 timer.start();
2386 }
2387
2388 int deflated_count = deflate_monitor_list(thread->om_in_use_list_addr(), &free_head_p, &free_tail_p);
2389
2390 Thread::muxAcquire(&gListLock, "deflate_thread_local_monitors");
2391
2392 // Adjust counters
2393 counters->n_in_circulation += thread->om_in_use_count;
2394 thread->om_in_use_count -= deflated_count;
2395 counters->n_scavenged += deflated_count;
2396 counters->n_in_use += thread->om_in_use_count;
2397 counters->per_thread_scavenged += deflated_count;
2398
2399 if (free_head_p != NULL) {
2400 // Move the deflated ObjectMonitors back to the global free list.
2401 guarantee(free_tail_p != NULL && deflated_count > 0, "invariant");
2402 assert(free_tail_p->_next_om == NULL, "invariant");
2403
2404 // constant-time list splice - prepend scavenged segment to g_free_list
2405 free_tail_p->_next_om = g_free_list;
2406 g_free_list = free_head_p;
2407 }
2408
2409 timer.stop();
2410 // Safepoint logging cares about cumulative per_thread_times and
2411 // we'll capture most of the cost, but not the muxRelease() which
2412 // should be cheap.
2413 counters->per_thread_times += timer.seconds();
2414
2415 Thread::muxRelease(&gListLock);
2416
2417 LogStreamHandle(Debug, monitorinflation) lsh_debug;
2418 LogStreamHandle(Info, monitorinflation) lsh_info;
2419 LogStream* ls = NULL;
2420 if (log_is_enabled(Debug, monitorinflation)) {
2421 ls = &lsh_debug;
2422 } else if (deflated_count != 0 && log_is_enabled(Info, monitorinflation)) {
2423 ls = &lsh_info;
2424 }
2425 if (ls != NULL) {
2426 ls->print_cr("jt=" INTPTR_FORMAT ": deflating per-thread idle monitors, %3.7f secs, %d monitors", p2i(thread), timer.seconds(), deflated_count);
|
101 }
102
103 #else // ndef DTRACE_ENABLED
104
105 #define DTRACE_MONITOR_WAIT_PROBE(obj, thread, millis, mon) {;}
106 #define DTRACE_MONITOR_PROBE(probe, obj, thread, mon) {;}
107
108 #endif // ndef DTRACE_ENABLED
109
110 // This exists only as a workaround of dtrace bug 6254741
111 int dtrace_waited_probe(ObjectMonitor* monitor, Handle obj, Thread* thr) {
112 DTRACE_MONITOR_PROBE(waited, monitor, obj(), thr);
113 return 0;
114 }
115
116 #define NINFLATIONLOCKS 256
117 static volatile intptr_t gInflationLocks[NINFLATIONLOCKS];
118
119 // global list of blocks of monitors
120 PaddedObjectMonitor* volatile ObjectSynchronizer::g_block_list = NULL;
121 bool volatile ObjectSynchronizer::_is_async_deflation_requested = false;
122 bool volatile ObjectSynchronizer::_is_special_deflation_requested = false;
123 jlong ObjectSynchronizer::_last_async_deflation_time_ns = 0;
124
125 // Global ObjectMonitor free list. Newly allocated and deflated
126 // ObjectMonitors are prepended here.
127 static ObjectMonitor* volatile g_free_list = NULL;
128 // Global ObjectMonitor in-use list. When a JavaThread is exiting,
129 // ObjectMonitors on its per-thread in-use list are prepended here.
130 static ObjectMonitor* volatile g_om_in_use_list = NULL;
131
132 static volatile intptr_t gListLock = 0; // protects global monitor lists
133 static volatile int g_om_free_count = 0; // # on g_free_list
134 static volatile int g_om_in_use_count = 0; // # on g_om_in_use_list
135 static volatile int g_om_population = 0; // # Extant -- in circulation
136
137 #define CHAINMARKER (cast_to_oop<intptr_t>(-1))
138
139
140 // =====================> Quick functions
141
142 // The quick_* forms are special fast-path variants used to improve
143 // performance. In the simplest case, a "quick_*" implementation could
144 // simply return false, in which case the caller will perform the necessary
145 // state transitions and call the slow-path form.
146 // The fast-path is designed to handle frequently arising cases in an efficient
147 // manner and is just a degenerate "optimistic" variant of the slow-path.
148 // returns true -- to indicate the call was satisfied.
149 // returns false -- to indicate the call needs the services of the slow-path.
150 // A no-loitering ordinance is in effect for code in the quick_* family
151 // operators: safepoints or indefinite blocking (blocking that might span a
152 // safepoint) are forbidden. Generally the thread_state() is _in_Java upon
153 // entry.
154 //
566 //
567 // Performance concern:
568 // OrderAccess::storestore() calls release() which at one time stored 0
569 // into the global volatile OrderAccess::dummy variable. This store was
570 // unnecessary for correctness. Many threads storing into a common location
571 // causes considerable cache migration or "sloshing" on large SMP systems.
572 // As such, I avoided using OrderAccess::storestore(). In some cases
573 // OrderAccess::fence() -- which incurs local latency on the executing
574 // processor -- is a better choice as it scales on SMP systems.
575 //
576 // See http://blogs.oracle.com/dave/entry/biased_locking_in_hotspot for
577 // a discussion of coherency costs. Note that all our current reference
578 // platforms provide strong ST-ST order, so the issue is moot on IA32,
579 // x64, and SPARC.
580 //
581 // As a general policy we use "volatile" to control compiler-based reordering
582 // and explicit fences (barriers) to control for architectural reordering
583 // performed by the CPU(s) or platform.
584
585 struct SharedGlobals {
586 char _pad_prefix[OM_CACHE_LINE_SIZE];
587 // These are highly shared mostly-read variables.
588 // To avoid false-sharing they need to be the sole occupants of a cache line.
589 volatile int stw_random;
590 volatile int stw_cycle;
591 DEFINE_PAD_MINUS_SIZE(1, OM_CACHE_LINE_SIZE, sizeof(volatile int) * 2);
592 // Hot RW variable -- Sequester to avoid false-sharing
593 volatile int hc_sequence;
594 DEFINE_PAD_MINUS_SIZE(2, OM_CACHE_LINE_SIZE, sizeof(volatile int));
595 };
596
597 static SharedGlobals GVars;
598 static int MonitorScavengeThreshold = 1000000;
599 static volatile int ForceMonitorScavenge = 0; // Scavenge required and pending
600
601 static markWord read_stable_mark(oop obj) {
602 markWord mark = obj->mark();
603 if (!mark.is_being_inflated()) {
604 return mark; // normal fast-path return
605 }
606
607 int its = 0;
608 for (;;) {
609 markWord mark = obj->mark();
610 if (!mark.is_being_inflated()) {
611 return mark; // normal fast-path return
612 }
613
614 // The object is being inflated by some other thread.
982 // Cannot have assertion since this object may have been
983 // locked by another thread when reaching here.
984 // assert(mark.is_neutral(), "sanity check");
985
986 return NULL;
987 }
988 }
989
990 // Visitors ...
991
992 void ObjectSynchronizer::monitors_iterate(MonitorClosure* closure) {
993 PaddedObjectMonitor* block = OrderAccess::load_acquire(&g_block_list);
994 while (block != NULL) {
995 assert(block->object() == CHAINMARKER, "must be a block header");
996 for (int i = _BLOCKSIZE - 1; i > 0; i--) {
997 ObjectMonitor* mid = (ObjectMonitor *)(block + i);
998 if (mid->is_active()) {
999 ObjectMonitorHandle omh(mid);
1000
1001 if (mid->object() == NULL ||
1002 (AsyncDeflateIdleMonitors && mid->ref_count() < 0)) {
1003 // Only process with closure if the object is set.
1004 // For async deflation, race here if monitor is not owned!
1005 // The above ref_count bump (in ObjectMonitorHandle ctr)
1006 // will cause subsequent async deflation to skip it.
1007 // However, previous or concurrent async deflation is a race
1008 // so skip this ObjectMonitor if it is being async deflated.
1009 continue;
1010 }
1011 closure->do_monitor(mid);
1012 }
1013 }
1014 block = (PaddedObjectMonitor*)block->_next_om;
1015 }
1016 }
1017
1018 static bool monitors_used_above_threshold() {
1019 if (g_om_population == 0) {
1020 return false;
1021 }
1022 if (MonitorUsedDeflationThreshold > 0) {
1023 int monitors_used = g_om_population - g_om_free_count;
1024 int monitor_usage = (monitors_used * 100LL) / g_om_population;
1025 return monitor_usage > MonitorUsedDeflationThreshold;
1026 }
1027 return false;
1028 }
1083 // We only scan the global used list here (for moribund threads), and
1084 // the thread-local monitors in Thread::oops_do().
1085 global_used_oops_do(f);
1086 }
1087
1088 void ObjectSynchronizer::global_used_oops_do(OopClosure* f) {
1089 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
1090 list_oops_do(g_om_in_use_list, f);
1091 }
1092
1093 void ObjectSynchronizer::thread_local_used_oops_do(Thread* thread, OopClosure* f) {
1094 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
1095 list_oops_do(thread->om_in_use_list, f);
1096 }
1097
1098 void ObjectSynchronizer::list_oops_do(ObjectMonitor* list, OopClosure* f) {
1099 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
1100 // The oops_do() phase does not overlap with monitor deflation
1101 // so no need to update the ObjectMonitor's ref_count for this
1102 // ObjectMonitor* use.
1103 for (ObjectMonitor* mid = list; mid != NULL; mid = mid->_next_om) {
1104 if (mid->object() != NULL) {
1105 f->do_oop((oop*)mid->object_addr());
1106 }
1107 }
1108 }
1109
1110
1111 // -----------------------------------------------------------------------------
1112 // ObjectMonitor Lifecycle
1113 // -----------------------
1114 // Inflation unlinks monitors from the global g_free_list and
1115 // associates them with objects. Deflation -- which occurs at
1116 // STW-time -- disassociates idle monitors from objects. Such
1117 // scavenged monitors are returned to the g_free_list.
1118 //
1119 // The global list is protected by gListLock. All the critical sections
1120 // are short and operate in constant-time.
1121 //
1122 // ObjectMonitors reside in type-stable memory (TSM) and are immortal.
1123 //
1186 const InflateCause cause) {
1187 // A large MAXPRIVATE value reduces both list lock contention
1188 // and list coherency traffic, but also tends to increase the
1189 // number of ObjectMonitors in circulation as well as the STW
1190 // scavenge costs. As usual, we lean toward time in space-time
1191 // tradeoffs.
1192 const int MAXPRIVATE = 1024;
1193
1194 if (AsyncDeflateIdleMonitors) {
1195 JavaThread* jt = (JavaThread *)self;
1196 if (jt->om_request_deflation && jt->om_in_use_count > 0 &&
1197 cause != inflate_cause_vm_internal) {
1198 // Deflate any per-thread idle monitors for this JavaThread if
1199 // this is not an internal inflation; internal inflations can
1200 // occur in places where it is not safe to pause for a safepoint.
1201 // Clean up your own mess (Gibbs Rule 45). Otherwise, skip this
1202 // deflation. deflate_global_idle_monitors_using_JT() is called
1203 // by the ServiceThread. Per-thread async deflation is triggered
1204 // by the ServiceThread via om_request_deflation.
1205 debug_only(jt->check_for_valid_safepoint_state(false);)
1206 ObjectSynchronizer::deflate_per_thread_idle_monitors_using_JT(jt);
1207 }
1208 }
1209
1210 stringStream ss;
1211 for (;;) {
1212 ObjectMonitor* m;
1213
1214 // 1: try to allocate from the thread's local om_free_list.
1215 // Threads will attempt to allocate first from their local list, then
1216 // from the global list, and only after those attempts fail will the thread
1217 // attempt to instantiate new monitors. Thread-local free lists take
1218 // heat off the gListLock and improve allocation latency, as well as reducing
1219 // coherency traffic on the shared global list.
1220 m = self->om_free_list;
1221 if (m != NULL) {
1222 self->om_free_list = m->_next_om;
1223 self->om_free_count--;
1224 guarantee(m->object() == NULL, "invariant");
1225 m->set_allocation_state(ObjectMonitor::New);
1226 m->_next_om = self->om_in_use_list;
1273 // Not enough ObjectMonitors on the global free list.
1274 // We can't safely induce a STW safepoint from om_alloc() as our thread
1275 // state may not be appropriate for such activities and callers may hold
1276 // naked oops, so instead we defer the action.
1277 InduceScavenge(self, "om_alloc");
1278 }
1279 continue;
1280 }
1281
1282 // 3: allocate a block of new ObjectMonitors
1283 // Both the local and global free lists are empty -- resort to malloc().
1284 // In the current implementation ObjectMonitors are TSM - immortal.
1285 // Ideally, we'd write "new ObjectMonitor[_BLOCKSIZE], but we want
1286 // each ObjectMonitor to start at the beginning of a cache line,
1287 // so we use align_up().
1288 // A better solution would be to use C++ placement-new.
1289 // BEWARE: As it stands currently, we don't run the ctors!
1290 assert(_BLOCKSIZE > 1, "invariant");
1291 size_t neededsize = sizeof(PaddedObjectMonitor) * _BLOCKSIZE;
1292 PaddedObjectMonitor* temp;
1293 size_t aligned_size = neededsize + (OM_CACHE_LINE_SIZE - 1);
1294 void* real_malloc_addr = (void*)NEW_C_HEAP_ARRAY(char, aligned_size,
1295 mtInternal);
1296 temp = (PaddedObjectMonitor*)align_up(real_malloc_addr, OM_CACHE_LINE_SIZE);
1297
1298 // NOTE: (almost) no way to recover if allocation failed.
1299 // We might be able to induce a STW safepoint and scavenge enough
1300 // ObjectMonitors to permit progress.
1301 if (temp == NULL) {
1302 vm_exit_out_of_memory(neededsize, OOM_MALLOC_ERROR,
1303 "Allocate ObjectMonitors");
1304 }
1305 (void)memset((void *) temp, 0, neededsize);
1306
1307 // Format the block.
1308 // initialize the linked list, each monitor points to its next
1309 // forming the single linked free list, the very first monitor
1310 // will points to next block, which forms the block list.
1311 // The trick of using the 1st element in the block as g_block_list
1312 // linkage should be reconsidered. A better implementation would
1313 // look like: class Block { Block * next; int N; ObjectMonitor Body [N] ; }
1314
1315 for (int i = 1; i < _BLOCKSIZE; i++) {
1316 temp[i]._next_om = (ObjectMonitor *)&temp[i+1];
1398 }
1399
1400 // Return ObjectMonitors on a moribund thread's free and in-use
1401 // lists to the appropriate global lists. The ObjectMonitors on the
1402 // per-thread in-use list may still be in use by other threads.
1403 //
1404 // We currently call om_flush() from Threads::remove() before the
1405 // thread has been excised from the thread list and is no longer a
1406 // mutator. This means that om_flush() cannot run concurrently with
1407 // a safepoint and interleave with deflate_idle_monitors(). In
1408 // particular, this ensures that the thread's in-use monitors are
1409 // scanned by a GC safepoint, either via Thread::oops_do() (before
1410 // om_flush() is called) or via ObjectSynchronizer::oops_do() (after
1411 // om_flush() is called).
1412 //
1413 // With AsyncDeflateIdleMonitors, deflate_global_idle_monitors_using_JT()
1414 // and deflate_per_thread_idle_monitors_using_JT() (in another thread) can
1415 // run at the same time as om_flush() so we have to be careful.
1416
1417 void ObjectSynchronizer::om_flush(Thread* self) {
1418 int in_use_count = 0;
1419 ObjectMonitor* in_use_list = self->om_in_use_list;
1420 ObjectMonitor* in_use_tail = NULL;
1421 if (in_use_list != NULL) {
1422 // The thread is going away, however the ObjectMonitors on the
1423 // om_in_use_list may still be in-use by other threads. Link
1424 // them to in_use_tail, which will be linked into the global
1425 // in-use list g_om_in_use_list below, under the gListLock.
1426 for (ObjectMonitor* cur_om = in_use_list; cur_om != NULL; cur_om = cur_om->_next_om) {
1427 in_use_tail = cur_om;
1428 in_use_count++;
1429 ADIM_guarantee(cur_om->is_active(), "invariant");
1430 }
1431 guarantee(in_use_tail != NULL, "invariant");
1432 ADIM_guarantee(self->om_in_use_count == in_use_count, "in-use count off");
1433 self->om_in_use_list = NULL;
1434 self->om_in_use_count = 0;
1435 }
1436
1437 int free_count = 0;
1438 ObjectMonitor* free_list = self->om_free_list;
1439 ObjectMonitor* free_tail = NULL;
1440 if (free_list != NULL) {
1441 // The thread is going away. Set 'free_tail' to the last per-thread free
1442 // monitor which will be linked to g_free_list below under the gListLock.
1443 stringStream ss;
1444 for (ObjectMonitor* s = free_list; s != NULL; s = s->_next_om) {
1445 free_count++;
1446 free_tail = s;
1447 guarantee(s->object() == NULL, "invariant");
1448 guarantee(!s->is_busy(), "must be !is_busy: %s", s->is_busy_to_string(&ss));
1449 }
1450 guarantee(free_tail != NULL, "invariant");
1451 ADIM_guarantee(self->om_free_count == free_count, "free-count off");
1452 self->om_free_list = NULL;
1453 self->om_free_count = 0;
1454 }
1455
1456 Thread::muxAcquire(&gListLock, "om_flush");
1457 if (free_tail != NULL) {
1458 free_tail->_next_om = g_free_list;
1459 g_free_list = free_list;
1460 g_om_free_count += free_count;
1461 }
1462
1463 if (in_use_tail != NULL) {
1464 in_use_tail->_next_om = g_om_in_use_list;
1465 g_om_in_use_list = in_use_list;
1466 g_om_in_use_count += in_use_count;
1467 }
1468
1469 Thread::muxRelease(&gListLock);
1470
1471 LogStreamHandle(Debug, monitorinflation) lsh_debug;
1472 LogStreamHandle(Info, monitorinflation) lsh_info;
1473 LogStream* ls = NULL;
1474 if (log_is_enabled(Debug, monitorinflation)) {
1475 ls = &lsh_debug;
1837 if (AsyncDeflateIdleMonitors) {
1838 // clear() expects the owner field to be NULL and we won't race
1839 // with the simple C2 ObjectMonitor enter optimization since
1840 // we're at a safepoint.
1841 mid->set_owner(NULL);
1842 }
1843 mid->clear();
1844
1845 assert(mid->object() == NULL, "invariant: object=" INTPTR_FORMAT,
1846 p2i(mid->object()));
1847 assert(mid->is_free(), "invariant");
1848
1849 // Move the deflated ObjectMonitor to the working free list
1850 // defined by free_head_p and free_tail_p.
1851 if (*free_head_p == NULL) *free_head_p = mid;
1852 if (*free_tail_p != NULL) {
1853 // We append to the list so the caller can use mid->_next_om
1854 // to fix the linkages in its context.
1855 ObjectMonitor* prevtail = *free_tail_p;
1856 // Should have been cleaned up by the caller:
1857 assert(prevtail->_next_om == NULL, "must be NULL: _next_om="
1858 INTPTR_FORMAT, p2i(prevtail->_next_om));
1859 prevtail->_next_om = mid;
1860 }
1861 *free_tail_p = mid;
1862 // At this point, mid->_next_om still refers to its current
1863 // value and another ObjectMonitor's _next_om field still
1864 // refers to this ObjectMonitor. Those linkages have to be
1865 // cleaned up by the caller who has the complete context.
1866 deflated = true;
1867 }
1868 return deflated;
1869 }
1870
1871 // Deflate the specified ObjectMonitor if not in-use using a JavaThread.
1872 // Returns true if it was deflated and false otherwise.
1873 //
1874 // The async deflation protocol sets owner to DEFLATER_MARKER and
1875 // makes ref_count negative as signals to contending threads that
1876 // an async deflation is in progress. There are a number of checks
1877 // as part of the protocol to make sure that the calling thread has
1878 // not lost the race to a contending thread or to a thread that just
2005
2006 // The owner field is no longer NULL so we lost the race since the
2007 // ObjectMonitor is now busy.
2008 return false;
2009 }
2010
2011 // Walk a given monitor list, and deflate idle monitors
2012 // The given list could be a per-thread list or a global list
2013 // Caller acquires gListLock as needed.
2014 //
2015 // In the case of parallel processing of thread local monitor lists,
2016 // work is done by Threads::parallel_threads_do() which ensures that
2017 // each Java thread is processed by exactly one worker thread, and
2018 // thus avoid conflicts that would arise when worker threads would
2019 // process the same monitor lists concurrently.
2020 //
2021 // See also ParallelSPCleanupTask and
2022 // SafepointSynchronize::do_cleanup_tasks() in safepoint.cpp and
2023 // Threads::parallel_java_threads_do() in thread.cpp.
2024 int ObjectSynchronizer::deflate_monitor_list(ObjectMonitor** list_p,
2025 int* count_p,
2026 ObjectMonitor** free_head_p,
2027 ObjectMonitor** free_tail_p) {
2028 ObjectMonitor* cur_mid_in_use = NULL;
2029 ObjectMonitor* mid;
2030 ObjectMonitor* next;
2031 int deflated_count = 0;
2032
2033 for (mid = *list_p; mid != NULL;) {
2034 oop obj = (oop) mid->object();
2035 if (obj != NULL && deflate_monitor(mid, obj, free_head_p, free_tail_p)) {
2036 // Deflation succeeded and already updated free_head_p and
2037 // free_tail_p as needed. Finish the move to the local free list
2038 // by unlinking mid from the global or per-thread in-use list.
2039 if (mid == *list_p) {
2040 *list_p = mid->_next_om;
2041 } else if (cur_mid_in_use != NULL) {
2042 cur_mid_in_use->_next_om = mid->_next_om; // maintain the current thread in-use list
2043 }
2044 next = mid->_next_om;
2045 mid->_next_om = NULL; // This mid is current tail in the free_head_p list
2046 mid = next;
2047 deflated_count++;
2048 *count_p = *count_p - 1;
2049 } else {
2050 cur_mid_in_use = mid;
2051 mid = mid->_next_om;
2052 }
2053 }
2054 return deflated_count;
2055 }
2056
2057 // Walk a given ObjectMonitor list and deflate idle ObjectMonitors using
2058 // a JavaThread. Returns the number of deflated ObjectMonitors. The given
2059 // list could be a per-thread in-use list or the global in-use list.
2060 // Caller acquires gListLock as appropriate. If a safepoint has started,
2061 // then we save state via saved_mid_in_use_p and return to the caller to
2062 // honor the safepoint.
2063 //
2064 int ObjectSynchronizer::deflate_monitor_list_using_JT(ObjectMonitor** list_p,
2065 int* count_p,
2066 ObjectMonitor** free_head_p,
2067 ObjectMonitor** free_tail_p,
2068 ObjectMonitor** saved_mid_in_use_p) {
2069 assert(AsyncDeflateIdleMonitors, "sanity check");
2070 assert(Thread::current()->is_Java_thread(), "precondition");
2071
2072 ObjectMonitor* cur_mid_in_use = NULL;
2073 ObjectMonitor* mid;
2074 ObjectMonitor* next;
2075 int deflated_count = 0;
2076
2077 if (*saved_mid_in_use_p == NULL) {
2078 // No saved state so start at the beginning.
2079 mid = *list_p;
2080 } else {
2081 // We're restarting after a safepoint so restore the necessary state
2082 // before we resume.
2083 cur_mid_in_use = *saved_mid_in_use_p;
2084 mid = cur_mid_in_use->_next_om;
2085 }
2086 while (mid != NULL) {
2087 // Only try to deflate if there is an associated Java object and if
2088 // mid is old (is not newly allocated and is not newly freed).
2089 if (mid->object() != NULL && mid->is_old() &&
2090 deflate_monitor_using_JT(mid, free_head_p, free_tail_p)) {
2091 // Deflation succeeded and already updated free_head_p and
2092 // free_tail_p as needed. Finish the move to the local free list
2093 // by unlinking mid from the global or per-thread in-use list.
2094 if (mid == *list_p) {
2095 *list_p = mid->_next_om;
2096 } else if (cur_mid_in_use != NULL) {
2097 // Maintain the current in-use list.
2098 cur_mid_in_use->_next_om = mid->_next_om;
2099 }
2100 next = mid->_next_om;
2101 mid->_next_om = NULL;
2102 // At this point mid is disconnected from the in-use list
2103 // and is the current tail in the free_head_p list.
2104 mid = next;
2105 deflated_count++;
2106 *count_p = *count_p - 1;
2107 } else {
2108 // mid is considered in-use if it does not have an associated
2109 // Java object or mid is not old or deflation did not succeed.
2110 // A mid->is_new() node can be seen here when it is freshly
2111 // returned by om_alloc() (and skips the deflation code path).
2112 // A mid->is_old() node can be seen here when deflation failed.
2113 // A mid->is_free() node can be seen here when a fresh node from
2114 // om_alloc() is released by om_release() due to losing the race
2115 // in inflate().
2116
2117 cur_mid_in_use = mid;
2118 mid = mid->_next_om;
2119
2120 if (SafepointSynchronize::is_synchronizing() &&
2121 cur_mid_in_use != *list_p && cur_mid_in_use->is_old()) {
2122 // If a safepoint has started and cur_mid_in_use is not the list
2123 // head and is old, then it is safe to use as saved state. Return
2124 // to the caller so gListLock can be dropped as appropriate
2125 // before blocking.
2126 *saved_mid_in_use_p = cur_mid_in_use;
2156 bool deflated = false;
2157
2158 ObjectMonitor* free_head_p = NULL; // Local SLL of scavenged monitors
2159 ObjectMonitor* free_tail_p = NULL;
2160 elapsedTimer timer;
2161
2162 if (log_is_enabled(Info, monitorinflation)) {
2163 timer.start();
2164 }
2165
2166 // Prevent om_flush from changing mids in Thread dtor's during deflation
2167 // And in case the vm thread is acquiring a lock during a safepoint
2168 // See e.g. 6320749
2169 Thread::muxAcquire(&gListLock, "deflate_idle_monitors");
2170
2171 // Note: the thread-local monitors lists get deflated in
2172 // a separate pass. See deflate_thread_local_monitors().
2173
2174 // For moribund threads, scan g_om_in_use_list
2175 int deflated_count = 0;
2176 if (g_om_in_use_list != NULL) {
2177 // Update n_in_circulation before g_om_in_use_count is updated by deflation.
2178 counters->n_in_circulation += g_om_in_use_count;
2179 deflated_count = deflate_monitor_list((ObjectMonitor**)&g_om_in_use_list, (int*)&g_om_in_use_count, &free_head_p, &free_tail_p);
2180 counters->n_in_use += g_om_in_use_count;
2181 }
2182
2183 if (free_head_p != NULL) {
2184 // Move the deflated ObjectMonitors back to the global free list.
2185 guarantee(free_tail_p != NULL && deflated_count > 0, "invariant");
2186 assert(free_tail_p->_next_om == NULL, "must be NULL: _next_om="
2187 INTPTR_FORMAT, p2i(free_tail_p->_next_om));
2188 // constant-time list splice - prepend scavenged segment to g_free_list
2189 free_tail_p->_next_om = g_free_list;
2190 g_free_list = free_head_p;
2191 counters->n_scavenged += deflated_count;
2192 }
2193 Thread::muxRelease(&gListLock);
2194 timer.stop();
2195
2196 LogStreamHandle(Debug, monitorinflation) lsh_debug;
2197 LogStreamHandle(Info, monitorinflation) lsh_info;
2198 LogStream* ls = NULL;
2199 if (log_is_enabled(Debug, monitorinflation)) {
2200 ls = &lsh_debug;
2201 } else if (deflated_count != 0 && log_is_enabled(Info, monitorinflation)) {
2202 ls = &lsh_info;
2203 }
2204 if (ls != NULL) {
2205 ls->print_cr("deflating global idle monitors, %3.7f secs, %d monitors", timer.seconds(), deflated_count);
2206 }
2207 }
2208
2209 // Deflate global idle ObjectMonitors using a JavaThread.
2210 //
2211 void ObjectSynchronizer::deflate_global_idle_monitors_using_JT() {
2212 assert(AsyncDeflateIdleMonitors, "sanity check");
2213 assert(Thread::current()->is_Java_thread(), "precondition");
2214 JavaThread* self = JavaThread::current();
2215
2216 deflate_common_idle_monitors_using_JT(true /* is_global */, self);
2217 }
2218
2219 // Deflate the specified JavaThread's idle ObjectMonitors using a JavaThread.
2220 //
2221 void ObjectSynchronizer::deflate_per_thread_idle_monitors_using_JT(JavaThread* target) {
2222 assert(AsyncDeflateIdleMonitors, "sanity check");
2223 assert(Thread::current()->is_Java_thread(), "precondition");
2224
2225 target->om_request_deflation = false;
2226
2227 deflate_common_idle_monitors_using_JT(false /* !is_global */, target);
2228 }
2229
2230 // Deflate global or per-thread idle ObjectMonitors using a JavaThread.
2231 //
2232 void ObjectSynchronizer::deflate_common_idle_monitors_using_JT(bool is_global, JavaThread* target) {
2233 JavaThread* self = JavaThread::current();
2234
2235 int deflated_count = 0;
2236 ObjectMonitor* free_head_p = NULL; // Local SLL of scavenged ObjectMonitors
2237 ObjectMonitor* free_tail_p = NULL;
2238 ObjectMonitor* saved_mid_in_use_p = NULL;
2239 elapsedTimer timer;
2240
2241 if (log_is_enabled(Info, monitorinflation)) {
2242 timer.start();
2243 }
2244
2245 if (is_global) {
2246 Thread::muxAcquire(&gListLock, "deflate_global_idle_monitors_using_JT(1)");
2247 OM_PERFDATA_OP(MonExtant, set_value(g_om_in_use_count));
2248 } else {
2249 OM_PERFDATA_OP(MonExtant, inc(target->om_in_use_count));
2250 }
2251
2252 do {
2253 int local_deflated_count;
2254 if (is_global) {
2255 local_deflated_count = deflate_monitor_list_using_JT((ObjectMonitor**)&g_om_in_use_list, (int*)&g_om_in_use_count, &free_head_p, &free_tail_p, &saved_mid_in_use_p);
2256 } else {
2257 local_deflated_count = deflate_monitor_list_using_JT(&target->om_in_use_list, &target->om_in_use_count, &free_head_p, &free_tail_p, &saved_mid_in_use_p);
2258 }
2259 deflated_count += local_deflated_count;
2260
2261 if (free_head_p != NULL) {
2262 // Move the deflated ObjectMonitors to the global free list.
2263 guarantee(free_tail_p != NULL && local_deflated_count > 0, "free_tail_p=" INTPTR_FORMAT ", local_deflated_count=%d", p2i(free_tail_p), local_deflated_count);
2264 assert(free_tail_p->_next_om == NULL, "must be NULL: _next_om="
2265 INTPTR_FORMAT, p2i(free_tail_p->_next_om));
2266
2267 if (!is_global) {
2268 Thread::muxAcquire(&gListLock, "deflate_per_thread_idle_monitors_using_JT(2)");
2269 }
2270 // Constant-time list splice - prepend scavenged segment to g_free_list.
2271 free_tail_p->_next_om = g_free_list;
2272 g_free_list = free_head_p;
2273
2274 OM_PERFDATA_OP(Deflations, inc(local_deflated_count));
2275 if (!is_global) {
2276 Thread::muxRelease(&gListLock);
2277 }
2278 }
2279
2280 if (saved_mid_in_use_p != NULL) {
2281 // deflate_monitor_list_using_JT() detected a safepoint starting.
2282 if (is_global) {
2283 Thread::muxRelease(&gListLock);
2284 }
2285 timer.stop();
2286 {
2287 if (is_global) {
2288 log_debug(monitorinflation)("pausing deflation of global idle monitors for a safepoint.");
2289 } else {
2290 log_debug(monitorinflation)("jt=" INTPTR_FORMAT ": pausing deflation of per-thread idle monitors for a safepoint.", p2i(target));
2291 }
2292 assert(SafepointSynchronize::is_synchronizing(), "sanity check");
2293 ThreadBlockInVM blocker(self);
2294 }
2295 // Prepare for another loop after the safepoint.
2296 free_head_p = NULL;
2297 free_tail_p = NULL;
2298 if (log_is_enabled(Info, monitorinflation)) {
2299 timer.start();
2300 }
2301 if (is_global) {
2302 Thread::muxAcquire(&gListLock, "deflate_global_idle_monitors_using_JT(3)");
2303 }
2304 }
2305 } while (saved_mid_in_use_p != NULL);
2306 if (is_global) {
2307 Thread::muxRelease(&gListLock);
2308 }
2309 timer.stop();
2310
2311 LogStreamHandle(Debug, monitorinflation) lsh_debug;
2312 LogStreamHandle(Info, monitorinflation) lsh_info;
2313 LogStream* ls = NULL;
2314 if (log_is_enabled(Debug, monitorinflation)) {
2315 ls = &lsh_debug;
2316 } else if (deflated_count != 0 && log_is_enabled(Info, monitorinflation)) {
2317 ls = &lsh_info;
2318 }
2319 if (ls != NULL) {
2320 if (is_global) {
2321 ls->print_cr("async-deflating global idle monitors, %3.7f secs, %d monitors", timer.seconds(), deflated_count);
2322 } else {
2323 ls->print_cr("jt=" INTPTR_FORMAT ": async-deflating per-thread idle monitors, %3.7f secs, %d monitors", p2i(target), timer.seconds(), deflated_count);
2324 }
2325 }
2326 }
2327
2328 void ObjectSynchronizer::finish_deflate_idle_monitors(DeflateMonitorCounters* counters) {
2329 // Report the cumulative time for deflating each thread's idle
2330 // monitors. Note: if the work is split among more than one
2331 // worker thread, then the reported time will likely be more
2332 // than a beginning to end measurement of the phase.
2333 // Note: AsyncDeflateIdleMonitors only deflates per-thread idle
2334 // monitors at a safepoint when a special deflation has been requested.
2335 log_info(safepoint, cleanup)("deflating per-thread idle monitors, %3.7f secs, monitors=%d", counters->per_thread_times, counters->per_thread_scavenged);
2336
2337 bool needs_special_deflation = is_special_deflation_requested();
2338 if (!AsyncDeflateIdleMonitors || needs_special_deflation) {
2339 // AsyncDeflateIdleMonitors does not use these counters unless
2340 // there is a special deflation request.
2341
2342 OM_PERFDATA_OP(Deflations, inc(counters->n_scavenged));
2343 OM_PERFDATA_OP(MonExtant, set_value(counters->n_in_circulation));
2344 }
2345
2346 if (log_is_enabled(Debug, monitorinflation)) {
2347 // exit_globals()'s call to audit_and_print_stats() is done
2348 // at the Info level.
2349 ObjectSynchronizer::audit_and_print_stats(false /* on_exit */);
2350 } else if (log_is_enabled(Info, monitorinflation)) {
2351 Thread::muxAcquire(&gListLock, "finish_deflate_idle_monitors");
2352 log_info(monitorinflation)("g_om_population=%d, g_om_in_use_count=%d, "
2353 "g_om_free_count=%d", g_om_population,
2354 g_om_in_use_count, g_om_free_count);
2355 Thread::muxRelease(&gListLock);
2356 }
2357
2358 ForceMonitorScavenge = 0; // Reset
2359 GVars.stw_random = os::random();
2360 GVars.stw_cycle++;
2361 if (needs_special_deflation) {
2370 if (!is_special_deflation_requested()) {
2371 // Mark the JavaThread for idle monitor deflation if a special
2372 // deflation has NOT been requested.
2373 if (thread->om_in_use_count > 0) {
2374 // This JavaThread is using monitors so mark it.
2375 thread->om_request_deflation = true;
2376 }
2377 return;
2378 }
2379 }
2380
2381 ObjectMonitor* free_head_p = NULL; // Local SLL of scavenged monitors
2382 ObjectMonitor* free_tail_p = NULL;
2383 elapsedTimer timer;
2384
2385 if (log_is_enabled(Info, safepoint, cleanup) ||
2386 log_is_enabled(Info, monitorinflation)) {
2387 timer.start();
2388 }
2389
2390 // Update n_in_circulation before om_in_use_count is updated by deflation.
2391 counters->n_in_circulation += thread->om_in_use_count;
2392
2393 int deflated_count = deflate_monitor_list(&thread->om_in_use_list, &thread->om_in_use_count, &free_head_p, &free_tail_p);
2394 counters->n_in_use += thread->om_in_use_count;
2395
2396 Thread::muxAcquire(&gListLock, "deflate_thread_local_monitors");
2397
2398 if (free_head_p != NULL) {
2399 // Move the deflated ObjectMonitors back to the global free list.
2400 guarantee(free_tail_p != NULL && deflated_count > 0, "invariant");
2401 assert(free_tail_p->_next_om == NULL, "must be NULL: _next_om="
2402 INTPTR_FORMAT, p2i(free_tail_p->_next_om));
2403
2404 // constant-time list splice - prepend scavenged segment to g_free_list
2405 free_tail_p->_next_om = g_free_list;
2406 g_free_list = free_head_p;
2407 counters->n_scavenged += deflated_count;
2408 counters->per_thread_scavenged += deflated_count;
2409 }
2410
2411 timer.stop();
2412 // Safepoint logging cares about cumulative per_thread_times and
2413 // we'll capture most of the cost, but not the muxRelease() which
2414 // should be cheap.
2415 counters->per_thread_times += timer.seconds();
2416
2417 Thread::muxRelease(&gListLock);
2418
2419 LogStreamHandle(Debug, monitorinflation) lsh_debug;
2420 LogStreamHandle(Info, monitorinflation) lsh_info;
2421 LogStream* ls = NULL;
2422 if (log_is_enabled(Debug, monitorinflation)) {
2423 ls = &lsh_debug;
2424 } else if (deflated_count != 0 && log_is_enabled(Info, monitorinflation)) {
2425 ls = &lsh_info;
2426 }
2427 if (ls != NULL) {
2428 ls->print_cr("jt=" INTPTR_FORMAT ": deflating per-thread idle monitors, %3.7f secs, %d monitors", p2i(thread), timer.seconds(), deflated_count);
|