238 m->_recursions++;
239 return true;
240 }
241
242 // This Java Monitor is inflated so obj's header will never be
243 // displaced to this thread's BasicLock. Make the displaced header
244 // non-NULL so this BasicLock is not seen as recursive nor as
245 // being locked. We do this unconditionally so that this thread's
246 // BasicLock cannot be mis-interpreted by any stack walkers. For
247 // performance reasons, stack walkers generally first check for
248 // Biased Locking in the object's header, the second check is for
249 // stack-locking in the object's header, the third check is for
250 // recursive stack-locking in the displaced header in the BasicLock,
251 // and last are the inflated Java Monitor (ObjectMonitor) checks.
252 lock->set_displaced_header(markOopDesc::unused_mark());
253
254 if (owner == NULL && Atomic::replace_if_null(Self, &(m->_owner))) {
255 assert(m->_recursions == 0, "invariant");
256 return true;
257 }
258 }
259 break;
260 }
261
262 // Note that we could inflate in quick_enter.
263 // This is likely a useful optimization
264 // Critically, in quick_enter() we must not:
265 // -- perform bias revocation, or
266 // -- block indefinitely, or
267 // -- reach a safepoint
268
269 return false; // revert to slow-path
270 }
271
272 // -----------------------------------------------------------------------------
273 // Fast Monitor Enter/Exit
274 // This the fast monitor enter. The interpreter and compiler use
275 // some assembly copies of this code. Make sure update those code
276 // if the following function is changed. The implementation is
277 // extremely sensitive to race condition. Be careful.
1004 // Get the next block in the block list.
1005 static inline PaddedEnd<ObjectMonitor>* next(PaddedEnd<ObjectMonitor>* block) {
1006 assert(block->object() == CHAINMARKER, "must be a block header");
1007 block = (PaddedEnd<ObjectMonitor>*) block->FreeNext;
1008 assert(block == NULL || block->object() == CHAINMARKER, "must be a block header");
1009 return block;
1010 }
1011
1012 static bool monitors_used_above_threshold() {
1013 if (gMonitorPopulation == 0) {
1014 return false;
1015 }
1016 if (MonitorUsedDeflationThreshold > 0) {
1017 int monitors_used = gMonitorPopulation - gMonitorFreeCount;
1018 int monitor_usage = (monitors_used * 100LL) / gMonitorPopulation;
1019 return monitor_usage > MonitorUsedDeflationThreshold;
1020 }
1021 return false;
1022 }
1023
1024 bool ObjectSynchronizer::is_async_deflation_needed() {
1025 if (!AsyncDeflateIdleMonitors) {
1026 return false;
1027 }
1028 if (is_async_deflation_requested()) {
1029 // Async deflation request.
1030 return true;
1031 }
1032 if (AsyncDeflationInterval > 0 &&
1033 time_since_last_async_deflation_ms() > AsyncDeflationInterval &&
1034 monitors_used_above_threshold()) {
1035 // It's been longer than our specified deflate interval and there
1036 // are too many monitors in use. We don't deflate more frequently
1037 // than AsyncDeflationInterval (unless is_async_deflation_requested)
1038 // in order to not swamp the ServiceThread.
1039 _last_async_deflation_time_ns = os::javaTimeNanos();
1040 return true;
1041 }
1042 return false;
1043 }
1044
1045 bool ObjectSynchronizer::is_safepoint_deflation_needed() {
1046 if (!AsyncDeflateIdleMonitors) {
1047 if (monitors_used_above_threshold()) {
1048 // Too many monitors in use.
1049 return true;
1050 }
1051 return false;
1052 }
1053 if (is_special_deflation_requested()) {
1054 // For AsyncDeflateIdleMonitors only do a safepoint deflation
1055 // if there is a special deflation request.
1056 return true;
1057 }
1058 return false;
1059 }
1060
1061 jlong ObjectSynchronizer::time_since_last_async_deflation_ms() {
1094 // -----------------------
1095 // Inflation unlinks monitors from the global gFreeList and
1096 // associates them with objects. Deflation -- which occurs at
1097 // STW-time -- disassociates idle monitors from objects. Such
1098 // scavenged monitors are returned to the gFreeList.
1099 //
1100 // The global list is protected by gListLock. All the critical sections
1101 // are short and operate in constant-time.
1102 //
1103 // ObjectMonitors reside in type-stable memory (TSM) and are immortal.
1104 //
1105 // Lifecycle:
1106 // -- unassigned and on the global free list
1107 // -- unassigned and on a thread's private omFreeList
1108 // -- assigned to an object. The object is inflated and the mark refers
1109 // to the objectmonitor.
1110
1111
1112 // Constraining monitor pool growth via MonitorBound ...
1113 //
1114 // The monitor pool is grow-only. We scavenge at STW safepoint-time, but the
1115 // the rate of scavenging is driven primarily by GC. As such, we can find
1116 // an inordinate number of monitors in circulation.
1117 // To avoid that scenario we can artificially induce a STW safepoint
1118 // if the pool appears to be growing past some reasonable bound.
1119 // Generally we favor time in space-time tradeoffs, but as there's no
1120 // natural back-pressure on the # of extant monitors we need to impose some
1121 // type of limit. Beware that if MonitorBound is set to too low a value
1122 // we could just loop. In addition, if MonitorBound is set to a low value
1123 // we'll incur more safepoints, which are harmful to performance.
1124 // See also: GuaranteedSafepointInterval
1125 //
1126 // The current implementation uses asynchronous VM operations.
1127
1128 static void InduceScavenge(Thread * Self, const char * Whence) {
1129 // Induce STW safepoint to trim monitors
1130 // Ultimately, this results in a call to deflate_idle_monitors() in the near future.
1131 // More precisely, trigger an asynchronous STW safepoint as the number
1132 // of active monitors passes the specified threshold.
1133 // TODO: assert thread state is reasonable
1134
1135 if (ForceMonitorScavenge == 0 && Atomic::xchg (1, &ForceMonitorScavenge) == 0) {
1136 // Induce a 'null' safepoint to scavenge monitors
1137 // Must VM_Operation instance be heap allocated as the op will be enqueue and posted
1138 // to the VMthread and have a lifespan longer than that of this activation record.
1139 // The VMThread will delete the op when completed.
1140 VMThread::execute(new VM_ScavengeMonitors());
1141 }
1142 }
1143
1144 ObjectMonitor* ObjectSynchronizer::omAlloc(Thread * Self,
1145 const InflateCause cause) {
1146 // A large MAXPRIVATE value reduces both list lock contention
1147 // and list coherency traffic, but also tends to increase the
1148 // number of objectMonitors in circulation as well as the STW
1149 // scavenge costs. As usual, we lean toward time in space-time
1150 // tradeoffs.
1151 const int MAXPRIVATE = 1024;
1152
1153 if (AsyncDeflateIdleMonitors) {
1154 JavaThread * jt = (JavaThread *)Self;
1155 if (jt->omShouldDeflateIdleMonitors && jt->omInUseCount > 0 &&
1156 cause != inflate_cause_vm_internal) {
1157 // Deflate any per-thread idle monitors for this JavaThread if
1158 // this is not an internal inflation; internal inflations can
1159 // occur in places where it is not safe to pause for a safepoint.
1160 // Clean up your own mess. (Gibbs Rule 45) Otherwise, skip this
1161 // deflation. deflate_global_idle_monitors_using_JT() is called
1162 // by the ServiceThread.
1163 debug_only(jt->check_for_valid_safepoint_state(false);)
1164 ObjectSynchronizer::deflate_per_thread_idle_monitors_using_JT();
1165 }
1166 }
1167
1168 stringStream ss;
1169 for (;;) {
1170 ObjectMonitor * m;
1171
1172 // 1: try to allocate from the thread's local omFreeList.
1173 // Threads will attempt to allocate first from their local list, then
1174 // from the global list, and only after those attempts fail will the thread
1175 // attempt to instantiate new monitors. Thread-local free lists take
1176 // heat off the gListLock and improve allocation latency, as well as reducing
1177 // coherency traffic on the shared global list.
1178 m = Self->omFreeList;
1179 if (m != NULL) {
1180 Self->omFreeList = m->FreeNext;
1181 Self->omFreeCount--;
1182 guarantee(m->object() == NULL, "invariant");
1186 Self->omInUseCount++;
1187 return m;
1188 }
1189
1190 // 2: try to allocate from the global gFreeList
1191 // CONSIDER: use muxTry() instead of muxAcquire().
1192 // If the muxTry() fails then drop immediately into case 3.
1193 // If we're using thread-local free lists then try
1194 // to reprovision the caller's free list.
1195 if (gFreeList != NULL) {
1196 // Reprovision the thread's omFreeList.
1197 // Use bulk transfers to reduce the allocation rate and heat
1198 // on various locks.
1199 Thread::muxAcquire(&gListLock, "omAlloc(1)");
1200 for (int i = Self->omFreeProvision; --i >= 0 && gFreeList != NULL;) {
1201 gMonitorFreeCount--;
1202 ObjectMonitor * take = gFreeList;
1203 gFreeList = take->FreeNext;
1204 guarantee(take->object() == NULL, "invariant");
1205 if (AsyncDeflateIdleMonitors) {
1206 // Clear any values we allowed to linger during async deflation.
1207 take->_header = NULL;
1208 take->set_owner(NULL);
1209
1210 if (take->ref_count() < 0) {
1211 // Add back max_jint to restore the ref_count field to its
1212 // proper value.
1213 Atomic::add(max_jint, &take->_ref_count);
1214
1215 assert(take->ref_count() >= 0, "must not be negative: ref_count=%d",
1216 take->ref_count());
1217 }
1218 }
1219 take->Recycle();
1220 assert(take->is_free(), "invariant");
1221 omRelease(Self, take, false);
1222 }
1223 Thread::muxRelease(&gListLock);
1224 Self->omFreeProvision += 1 + (Self->omFreeProvision/2);
1225 if (Self->omFreeProvision > MAXPRIVATE) Self->omFreeProvision = MAXPRIVATE;
1226
1227 const int mx = MonitorBound;
1228 if (mx > 0 && (gMonitorPopulation-gMonitorFreeCount) > mx) {
1229 // We can't safely induce a STW safepoint from omAlloc() as our thread
1230 // state may not be appropriate for such activities and callers may hold
1231 // naked oops, so instead we defer the action.
1232 InduceScavenge(Self, "omAlloc");
1233 }
1234 continue;
1235 }
1236
1237 // 3: allocate a block of new ObjectMonitors
1238 // Both the local and global free lists are empty -- resort to malloc().
1239 // In the current implementation objectMonitors are TSM - immortal.
1240 // Ideally, we'd write "new ObjectMonitor[_BLOCKSIZE], but we want
1241 // each ObjectMonitor to start at the beginning of a cache line,
1242 // so we use align_up().
1243 // A better solution would be to use C++ placement-new.
1244 // BEWARE: As it stands currently, we don't run the ctors!
1245 assert(_BLOCKSIZE > 1, "invariant");
1246 size_t neededsize = sizeof(PaddedEnd<ObjectMonitor>) * _BLOCKSIZE;
1247 PaddedEnd<ObjectMonitor> * temp;
1248 size_t aligned_size = neededsize + (DEFAULT_CACHE_LINE_SIZE - 1);
1656 // would be useful.
1657
1658 // Catch if the object's header is not neutral (not locked and
1659 // not marked is what we care about here).
1660 ADIM_guarantee(mark->is_neutral(), "invariant: header=" INTPTR_FORMAT, p2i(mark));
1661 ObjectMonitor * m;
1662 if (!AsyncDeflateIdleMonitors || cause == inflate_cause_vm_internal) {
1663 // If !AsyncDeflateIdleMonitors or if an internal inflation, then
1664 // we won't stop for a potential safepoint in omAlloc.
1665 m = omAlloc(Self, cause);
1666 } else {
1667 // If AsyncDeflateIdleMonitors and not an internal inflation, then
1668 // we may stop for a safepoint in omAlloc() so protect object.
1669 Handle h_obj(Self, object);
1670 m = omAlloc(Self, cause);
1671 object = h_obj(); // Refresh object.
1672 }
1673 // prepare m for installation - set monitor to initial state
1674 m->Recycle();
1675 m->set_header(mark);
1676 m->set_object(object);
1677 m->_Responsible = NULL;
1678 m->_SpinDuration = ObjectMonitor::Knob_SpinLimit; // consider: keep metastats by type/class
1679
1680 omh_p->set_om_ptr(m);
1681 assert(m->is_new(), "freshly allocated monitor must be new");
1682 m->set_allocation_state(ObjectMonitor::Old);
1683
1684 if (object->cas_set_mark(markOopDesc::encode(m), mark) != mark) {
1685 m->set_header(NULL);
1686 m->set_object(NULL);
1687 m->Recycle();
1688 omh_p->set_om_ptr(NULL);
1689 // omRelease() will reset the allocation state
1690 omRelease(Self, m, true);
1691 m = NULL;
1692 continue;
1693 // interference - the markword changed - just retry.
1694 // The state-transitions are one-way, so there's no chance of
1695 // live-lock -- "Inflated" is an absorbing state.
1698 // Hopefully the performance counters are allocated on distinct
1699 // cache lines to avoid false sharing on MP systems ...
1700 OM_PERFDATA_OP(Inflations, inc());
1701 if (log_is_enabled(Trace, monitorinflation)) {
1702 ResourceMark rm(Self);
1703 lsh.print_cr("inflate(neutral): object=" INTPTR_FORMAT ", mark="
1704 INTPTR_FORMAT ", type='%s'", p2i(object),
1705 p2i(object->mark()), object->klass()->external_name());
1706 }
1707 if (event.should_commit()) {
1708 post_monitor_inflate_event(&event, object, cause);
1709 }
1710 ADIM_guarantee(!m->is_free(), "inflated monitor to be returned cannot be free");
1711 return;
1712 }
1713 }
1714
1715
1716 // We maintain a list of in-use monitors for each thread.
1717 //
1718 // deflate_thread_local_monitors() scans a single thread's in-use list, while
1719 // deflate_idle_monitors() scans only a global list of in-use monitors which
1720 // is populated only as a thread dies (see omFlush()).
1721 //
1722 // These operations are called at all safepoints, immediately after mutators
1723 // are stopped, but before any objects have moved. Collectively they traverse
1724 // the population of in-use monitors, deflating where possible. The scavenged
1725 // monitors are returned to the global monitor free list.
1726 //
1727 // Beware that we scavenge at *every* stop-the-world point. Having a large
1728 // number of monitors in-use could negatively impact performance. We also want
1729 // to minimize the total # of monitors in circulation, as they incur a small
1730 // footprint penalty.
1731 //
1732 // Perversely, the heap size -- and thus the STW safepoint rate --
1733 // typically drives the scavenge rate. Large heaps can mean infrequent GC,
1734 // which in turn can mean large(r) numbers of ObjectMonitors in circulation.
1735 // This is an unfortunate aspect of this design.
1736
1737 void ObjectSynchronizer::do_safepoint_work(DeflateMonitorCounters* _counters) {
1738 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
1739
1740 // The per-thread in-use lists are handled in
1741 // ParallelSPCleanupThreadClosure::do_thread().
1742
1743 if (!AsyncDeflateIdleMonitors || is_special_deflation_requested()) {
1744 // Use the older mechanism for the global in-use list or if a
1745 // special deflation has been requested before the safepoint.
1746 ObjectSynchronizer::deflate_idle_monitors(_counters);
1747 return;
1748 }
1749
1750 log_debug(monitorinflation)("requesting async deflation of idle monitors.");
1751 // Request deflation of idle monitors by the ServiceThread:
1752 set_is_async_deflation_requested(true);
1753 MonitorLocker ml(Service_lock, Mutex::_no_safepoint_check_flag);
1754 ml.notify_all();
1755 }
1756
1757 // Deflate a single monitor if not in-use
1758 // Return true if deflated, false if in-use
1759 bool ObjectSynchronizer::deflate_monitor(ObjectMonitor* mid, oop obj,
1760 ObjectMonitor** freeHeadp,
1761 ObjectMonitor** freeTailp) {
1762 bool deflated;
1763 // Normal case ... The monitor is associated with obj.
1764 const markOop mark = obj->mark();
1765 guarantee(mark == markOopDesc::encode(mid), "should match: mark="
1766 INTPTR_FORMAT ", encoded mid=" INTPTR_FORMAT, p2i(mark),
1767 p2i(markOopDesc::encode(mid)));
1768 // Make sure that mark->monitor() and markOopDesc::encode() agree:
1769 guarantee(mark->monitor() == mid, "should match: monitor()=" INTPTR_FORMAT
1770 ", mid=" INTPTR_FORMAT, p2i(mark->monitor()), p2i(mid));
1771 const markOop dmw = mid->header();
1772 guarantee(dmw->is_neutral(), "invariant: header=" INTPTR_FORMAT, p2i(dmw));
1773
1774 if (mid->is_busy()) {
1775 deflated = false;
1776 } else {
1777 // Deflate the monitor if it is no longer being used
1778 // It's idle - scavenge and return to the global free list
1779 // plain old deflation ...
1780 if (log_is_enabled(Trace, monitorinflation)) {
1781 ResourceMark rm;
1782 log_trace(monitorinflation)("deflate_monitor: "
1783 "object=" INTPTR_FORMAT ", mark="
1784 INTPTR_FORMAT ", type='%s'", p2i(obj),
1785 p2i(mark), obj->klass()->external_name());
1786 }
1787
1788 // Restore the header back to obj
1789 obj->release_set_mark(dmw);
1790 mid->clear();
1791
1792 assert(mid->object() == NULL, "invariant: object=" INTPTR_FORMAT,
1793 p2i(mid->object()));
1794 assert(mid->is_free(), "invariant");
1795
1796 // Move the object to the working free list defined by freeHeadp, freeTailp
1797 if (*freeHeadp == NULL) *freeHeadp = mid;
1798 if (*freeTailp != NULL) {
1799 ObjectMonitor * prevtail = *freeTailp;
1800 assert(prevtail->FreeNext == NULL, "cleaned up deflated?");
1801 prevtail->FreeNext = mid;
1802 }
1803 *freeTailp = mid;
1804 deflated = true;
1805 }
1806 return deflated;
1807 }
1808
1809 // Deflate the specified ObjectMonitor if not in-use using a JavaThread.
1840 if (Atomic::replace_if_null(DEFLATER_MARKER, &(mid->_owner))) {
1841 // ObjectMonitor is not owned by another thread. Our setting
1842 // owner to DEFLATER_MARKER forces any contending thread through
1843 // the slow path. This is just the first part of the async
1844 // deflation dance.
1845
1846 if (mid->_contentions != 0 || mid->_waiters != 0) {
1847 // Another thread has raced to enter the ObjectMonitor after
1848 // mid->is_busy() above or has already entered and waited on
1849 // it which makes it busy so no deflation. Restore owner to
1850 // NULL if it is still DEFLATER_MARKER.
1851 Atomic::cmpxchg((void*)NULL, &mid->_owner, DEFLATER_MARKER);
1852 return false;
1853 }
1854
1855 if (Atomic::cmpxchg(-max_jint, &mid->_ref_count, (jint)0) == 0) {
1856 // Make ref_count negative to force any contending threads or
1857 // ObjectMonitor* using threads to retry. This is the second
1858 // part of the async deflation dance.
1859
1860 if (mid->_owner == DEFLATER_MARKER) {
1861 // If owner is still DEFLATER_MARKER, then we have successfully
1862 // signaled any contending threads to retry. If it is not, then we
1863 // have lost the race to an entering thread and the ObjectMonitor
1864 // is now busy. This is the third and final part of the async
1865 // deflation dance.
1866 // Note: This owner check solves the ABA problem with ref_count
1867 // where another thread acquired the ObjectMonitor, finished
1868 // using it and restored the ref_count to zero.
1869
1870 // Sanity checks for the races:
1871 guarantee(mid->_contentions == 0, "must be 0: contentions=%d",
1872 mid->_contentions);
1873 guarantee(mid->_waiters == 0, "must be 0: waiters=%d", mid->_waiters);
1874 guarantee(mid->_cxq == NULL, "must be no contending threads: cxq="
1875 INTPTR_FORMAT, p2i(mid->_cxq));
1876 guarantee(mid->_EntryList == NULL,
1877 "must be no entering threads: EntryList=" INTPTR_FORMAT,
1878 p2i(mid->_EntryList));
1879
1880 const oop obj = (oop) mid->object();
2503 log_error(monitorinflation)("found monitor list errors: error_cnt=%d", error_cnt);
2504 }
2505
2506 if ((on_exit && log_is_enabled(Info, monitorinflation)) ||
2507 (!on_exit && log_is_enabled(Trace, monitorinflation))) {
2508 // When exiting this log output is at the Info level. When called
2509 // at a safepoint, this log output is at the Trace level since
2510 // there can be a lot of it.
2511 log_in_use_monitor_details(ls, on_exit);
2512 }
2513
2514 ls->flush();
2515
2516 guarantee(error_cnt == 0, "ERROR: found monitor list errors: error_cnt=%d", error_cnt);
2517 }
2518
2519 // Check a free monitor entry; log any errors.
2520 void ObjectSynchronizer::chk_free_entry(JavaThread * jt, ObjectMonitor * n,
2521 outputStream * out, int *error_cnt_p) {
2522 stringStream ss;
2523 if ((!AsyncDeflateIdleMonitors && n->is_busy()) ||
2524 (AsyncDeflateIdleMonitors && n->is_busy_async())) {
2525 if (jt != NULL) {
2526 out->print_cr("ERROR: jt=" INTPTR_FORMAT ", monitor=" INTPTR_FORMAT
2527 ": free per-thread monitor must not be busy: %s", p2i(jt),
2528 p2i(n), n->is_busy_to_string(&ss));
2529 } else {
2530 out->print_cr("ERROR: monitor=" INTPTR_FORMAT ": free global monitor "
2531 "must not be busy: %s", p2i(n), n->is_busy_to_string(&ss));
2532 }
2533 *error_cnt_p = *error_cnt_p + 1;
2534 }
2535 if (n->header() != NULL) {
2536 if (jt != NULL) {
2537 out->print_cr("ERROR: jt=" INTPTR_FORMAT ", monitor=" INTPTR_FORMAT
2538 ": free per-thread monitor must have NULL _header "
2539 "field: _header=" INTPTR_FORMAT, p2i(jt), p2i(n),
2540 p2i(n->header()));
2541 *error_cnt_p = *error_cnt_p + 1;
2542 } else if (!AsyncDeflateIdleMonitors) {
2543 out->print_cr("ERROR: monitor=" INTPTR_FORMAT ": free global monitor "
2544 "must have NULL _header field: _header=" INTPTR_FORMAT,
|
238 m->_recursions++;
239 return true;
240 }
241
242 // This Java Monitor is inflated so obj's header will never be
243 // displaced to this thread's BasicLock. Make the displaced header
244 // non-NULL so this BasicLock is not seen as recursive nor as
245 // being locked. We do this unconditionally so that this thread's
246 // BasicLock cannot be mis-interpreted by any stack walkers. For
247 // performance reasons, stack walkers generally first check for
248 // Biased Locking in the object's header, the second check is for
249 // stack-locking in the object's header, the third check is for
250 // recursive stack-locking in the displaced header in the BasicLock,
251 // and last are the inflated Java Monitor (ObjectMonitor) checks.
252 lock->set_displaced_header(markOopDesc::unused_mark());
253
254 if (owner == NULL && Atomic::replace_if_null(Self, &(m->_owner))) {
255 assert(m->_recursions == 0, "invariant");
256 return true;
257 }
258
259 if (AsyncDeflateIdleMonitors &&
260 Atomic::cmpxchg(Self, &m->_owner, DEFLATER_MARKER) == DEFLATER_MARKER) {
261 // The deflation protocol finished the first part (setting owner),
262 // but it failed the second part (making ref_count negative) and
263 // bailed. Or the ObjectMonitor was async deflated and reused.
264 // Acquired the monitor.
265 assert(m->_recursions == 0, "invariant");
266 return true;
267 }
268 }
269 break;
270 }
271
272 // Note that we could inflate in quick_enter.
273 // This is likely a useful optimization
274 // Critically, in quick_enter() we must not:
275 // -- perform bias revocation, or
276 // -- block indefinitely, or
277 // -- reach a safepoint
278
279 return false; // revert to slow-path
280 }
281
282 // -----------------------------------------------------------------------------
283 // Fast Monitor Enter/Exit
284 // This the fast monitor enter. The interpreter and compiler use
285 // some assembly copies of this code. Make sure update those code
286 // if the following function is changed. The implementation is
287 // extremely sensitive to race condition. Be careful.
1014 // Get the next block in the block list.
1015 static inline PaddedEnd<ObjectMonitor>* next(PaddedEnd<ObjectMonitor>* block) {
1016 assert(block->object() == CHAINMARKER, "must be a block header");
1017 block = (PaddedEnd<ObjectMonitor>*) block->FreeNext;
1018 assert(block == NULL || block->object() == CHAINMARKER, "must be a block header");
1019 return block;
1020 }
1021
1022 static bool monitors_used_above_threshold() {
1023 if (gMonitorPopulation == 0) {
1024 return false;
1025 }
1026 if (MonitorUsedDeflationThreshold > 0) {
1027 int monitors_used = gMonitorPopulation - gMonitorFreeCount;
1028 int monitor_usage = (monitors_used * 100LL) / gMonitorPopulation;
1029 return monitor_usage > MonitorUsedDeflationThreshold;
1030 }
1031 return false;
1032 }
1033
1034 // Returns true if MonitorBound is set (> 0) and if the specified
1035 // cnt is > MonitorBound. Otherwise returns false.
1036 static bool is_MonitorBound_exceeded(const int cnt) {
1037 const int mx = MonitorBound;
1038 return mx > 0 && cnt > mx;
1039 }
1040
1041 bool ObjectSynchronizer::is_async_deflation_needed() {
1042 if (!AsyncDeflateIdleMonitors) {
1043 return false;
1044 }
1045 if (is_async_deflation_requested()) {
1046 // Async deflation request.
1047 return true;
1048 }
1049 if (AsyncDeflationInterval > 0 &&
1050 time_since_last_async_deflation_ms() > AsyncDeflationInterval &&
1051 monitors_used_above_threshold()) {
1052 // It's been longer than our specified deflate interval and there
1053 // are too many monitors in use. We don't deflate more frequently
1054 // than AsyncDeflationInterval (unless is_async_deflation_requested)
1055 // in order to not swamp the ServiceThread.
1056 _last_async_deflation_time_ns = os::javaTimeNanos();
1057 return true;
1058 }
1059 if (is_MonitorBound_exceeded(gMonitorPopulation - gMonitorFreeCount)) {
1060 // Not enough ObjectMonitors on the global free list.
1061 return true;
1062 }
1063 return false;
1064 }
1065
1066 bool ObjectSynchronizer::is_safepoint_deflation_needed() {
1067 if (!AsyncDeflateIdleMonitors) {
1068 if (monitors_used_above_threshold()) {
1069 // Too many monitors in use.
1070 return true;
1071 }
1072 return false;
1073 }
1074 if (is_special_deflation_requested()) {
1075 // For AsyncDeflateIdleMonitors only do a safepoint deflation
1076 // if there is a special deflation request.
1077 return true;
1078 }
1079 return false;
1080 }
1081
1082 jlong ObjectSynchronizer::time_since_last_async_deflation_ms() {
1115 // -----------------------
1116 // Inflation unlinks monitors from the global gFreeList and
1117 // associates them with objects. Deflation -- which occurs at
1118 // STW-time -- disassociates idle monitors from objects. Such
1119 // scavenged monitors are returned to the gFreeList.
1120 //
1121 // The global list is protected by gListLock. All the critical sections
1122 // are short and operate in constant-time.
1123 //
1124 // ObjectMonitors reside in type-stable memory (TSM) and are immortal.
1125 //
1126 // Lifecycle:
1127 // -- unassigned and on the global free list
1128 // -- unassigned and on a thread's private omFreeList
1129 // -- assigned to an object. The object is inflated and the mark refers
1130 // to the objectmonitor.
1131
1132
1133 // Constraining monitor pool growth via MonitorBound ...
1134 //
1135 // If MonitorBound is not set (<= 0), MonitorBound checks are disabled.
1136 //
1137 // When safepoint deflation is being used (!AsyncDeflateIdleMonitors):
1138 // The monitor pool is grow-only. We scavenge at STW safepoint-time, but the
1139 // the rate of scavenging is driven primarily by GC. As such, we can find
1140 // an inordinate number of monitors in circulation.
1141 // To avoid that scenario we can artificially induce a STW safepoint
1142 // if the pool appears to be growing past some reasonable bound.
1143 // Generally we favor time in space-time tradeoffs, but as there's no
1144 // natural back-pressure on the # of extant monitors we need to impose some
1145 // type of limit. Beware that if MonitorBound is set to too low a value
1146 // we could just loop. In addition, if MonitorBound is set to a low value
1147 // we'll incur more safepoints, which are harmful to performance.
1148 // See also: GuaranteedSafepointInterval
1149 //
1150 // The current implementation uses asynchronous VM operations.
1151 //
1152 // When safepoint deflation is being used and MonitorBound is set, the
1153 // boundry applies to (gMonitorPopulation - gMonitorFreeCount), i.e.,
1154 // if there are not enough ObjectMonitors on the global free list, then
1155 // a safepoint deflation is induced. Picking a good MonitorBound value
1156 // is non-trivial.
1157 //
1158 // When async deflation is being used:
1159 // The monitor pool is still grow-only. Async deflation is requested
1160 // by a safepoint's cleanup phase or by the ServiceThread at periodic
1161 // intervals when is_async_deflation_needed() returns true. In
1162 // addition to other policies that are checked, if there are not
1163 // enough ObjectMonitors on the global free list, then
1164 // is_async_deflation_needed() will return true. The ServiceThread
1165 // calls deflate_global_idle_monitors_using_JT() and also sets the
1166 // per-thread omShouldDeflateIdleMonitors flag as needed.
1167
1168 static void InduceScavenge(Thread * Self, const char * Whence) {
1169 assert(!AsyncDeflateIdleMonitors, "is not used by async deflation");
1170
1171 // Induce STW safepoint to trim monitors
1172 // Ultimately, this results in a call to deflate_idle_monitors() in the near future.
1173 // More precisely, trigger an asynchronous STW safepoint as the number
1174 // of active monitors passes the specified threshold.
1175 // TODO: assert thread state is reasonable
1176
1177 if (ForceMonitorScavenge == 0 && Atomic::xchg (1, &ForceMonitorScavenge) == 0) {
1178 // Induce a 'null' safepoint to scavenge monitors
1179 // Must VM_Operation instance be heap allocated as the op will be enqueue and posted
1180 // to the VMthread and have a lifespan longer than that of this activation record.
1181 // The VMThread will delete the op when completed.
1182 VMThread::execute(new VM_ScavengeMonitors());
1183 }
1184 }
1185
1186 ObjectMonitor* ObjectSynchronizer::omAlloc(Thread * Self,
1187 const InflateCause cause) {
1188 // A large MAXPRIVATE value reduces both list lock contention
1189 // and list coherency traffic, but also tends to increase the
1190 // number of objectMonitors in circulation as well as the STW
1191 // scavenge costs. As usual, we lean toward time in space-time
1192 // tradeoffs.
1193 const int MAXPRIVATE = 1024;
1194
1195 if (AsyncDeflateIdleMonitors) {
1196 JavaThread * jt = (JavaThread *)Self;
1197 if (jt->omShouldDeflateIdleMonitors && jt->omInUseCount > 0 &&
1198 cause != inflate_cause_vm_internal) {
1199 // Deflate any per-thread idle monitors for this JavaThread if
1200 // this is not an internal inflation; internal inflations can
1201 // occur in places where it is not safe to pause for a safepoint.
1202 // Clean up your own mess (Gibbs Rule 45). Otherwise, skip this
1203 // deflation. deflate_global_idle_monitors_using_JT() is called
1204 // by the ServiceThread. Per-thread async deflation is triggered
1205 // by the ServiceThread via omShouldDeflateIdleMonitors.
1206 debug_only(jt->check_for_valid_safepoint_state(false);)
1207 ObjectSynchronizer::deflate_per_thread_idle_monitors_using_JT();
1208 }
1209 }
1210
1211 stringStream ss;
1212 for (;;) {
1213 ObjectMonitor * m;
1214
1215 // 1: try to allocate from the thread's local omFreeList.
1216 // Threads will attempt to allocate first from their local list, then
1217 // from the global list, and only after those attempts fail will the thread
1218 // attempt to instantiate new monitors. Thread-local free lists take
1219 // heat off the gListLock and improve allocation latency, as well as reducing
1220 // coherency traffic on the shared global list.
1221 m = Self->omFreeList;
1222 if (m != NULL) {
1223 Self->omFreeList = m->FreeNext;
1224 Self->omFreeCount--;
1225 guarantee(m->object() == NULL, "invariant");
1229 Self->omInUseCount++;
1230 return m;
1231 }
1232
1233 // 2: try to allocate from the global gFreeList
1234 // CONSIDER: use muxTry() instead of muxAcquire().
1235 // If the muxTry() fails then drop immediately into case 3.
1236 // If we're using thread-local free lists then try
1237 // to reprovision the caller's free list.
1238 if (gFreeList != NULL) {
1239 // Reprovision the thread's omFreeList.
1240 // Use bulk transfers to reduce the allocation rate and heat
1241 // on various locks.
1242 Thread::muxAcquire(&gListLock, "omAlloc(1)");
1243 for (int i = Self->omFreeProvision; --i >= 0 && gFreeList != NULL;) {
1244 gMonitorFreeCount--;
1245 ObjectMonitor * take = gFreeList;
1246 gFreeList = take->FreeNext;
1247 guarantee(take->object() == NULL, "invariant");
1248 if (AsyncDeflateIdleMonitors) {
1249 // We allowed 3 field values to linger during async deflation.
1250 // We clear header and restore ref_count here, but we leave
1251 // owner == DEFLATER_MARKER so the simple C2 ObjectMonitor
1252 // enter optimization can no longer race with async deflation
1253 // and reuse.
1254 take->_header = NULL;
1255 if (take->ref_count() < 0) {
1256 // Add back max_jint to restore the ref_count field to its
1257 // proper value.
1258 Atomic::add(max_jint, &take->_ref_count);
1259
1260 assert(take->ref_count() >= 0, "must not be negative: ref_count=%d",
1261 take->ref_count());
1262 }
1263 }
1264 take->Recycle();
1265 assert(take->is_free(), "invariant");
1266 omRelease(Self, take, false);
1267 }
1268 Thread::muxRelease(&gListLock);
1269 Self->omFreeProvision += 1 + (Self->omFreeProvision/2);
1270 if (Self->omFreeProvision > MAXPRIVATE) Self->omFreeProvision = MAXPRIVATE;
1271
1272 if (!AsyncDeflateIdleMonitors &&
1273 is_MonitorBound_exceeded(gMonitorPopulation - gMonitorFreeCount)) {
1274 // Not enough ObjectMonitors on the global free list.
1275 // We can't safely induce a STW safepoint from omAlloc() as our thread
1276 // state may not be appropriate for such activities and callers may hold
1277 // naked oops, so instead we defer the action.
1278 InduceScavenge(Self, "omAlloc");
1279 }
1280 continue;
1281 }
1282
1283 // 3: allocate a block of new ObjectMonitors
1284 // Both the local and global free lists are empty -- resort to malloc().
1285 // In the current implementation objectMonitors are TSM - immortal.
1286 // Ideally, we'd write "new ObjectMonitor[_BLOCKSIZE], but we want
1287 // each ObjectMonitor to start at the beginning of a cache line,
1288 // so we use align_up().
1289 // A better solution would be to use C++ placement-new.
1290 // BEWARE: As it stands currently, we don't run the ctors!
1291 assert(_BLOCKSIZE > 1, "invariant");
1292 size_t neededsize = sizeof(PaddedEnd<ObjectMonitor>) * _BLOCKSIZE;
1293 PaddedEnd<ObjectMonitor> * temp;
1294 size_t aligned_size = neededsize + (DEFAULT_CACHE_LINE_SIZE - 1);
1702 // would be useful.
1703
1704 // Catch if the object's header is not neutral (not locked and
1705 // not marked is what we care about here).
1706 ADIM_guarantee(mark->is_neutral(), "invariant: header=" INTPTR_FORMAT, p2i(mark));
1707 ObjectMonitor * m;
1708 if (!AsyncDeflateIdleMonitors || cause == inflate_cause_vm_internal) {
1709 // If !AsyncDeflateIdleMonitors or if an internal inflation, then
1710 // we won't stop for a potential safepoint in omAlloc.
1711 m = omAlloc(Self, cause);
1712 } else {
1713 // If AsyncDeflateIdleMonitors and not an internal inflation, then
1714 // we may stop for a safepoint in omAlloc() so protect object.
1715 Handle h_obj(Self, object);
1716 m = omAlloc(Self, cause);
1717 object = h_obj(); // Refresh object.
1718 }
1719 // prepare m for installation - set monitor to initial state
1720 m->Recycle();
1721 m->set_header(mark);
1722 // If we leave _owner == DEFLATER_MARKER here, then the simple C2
1723 // ObjectMonitor enter optimization can no longer race with async
1724 // deflation and reuse.
1725 m->set_object(object);
1726 m->_Responsible = NULL;
1727 m->_SpinDuration = ObjectMonitor::Knob_SpinLimit; // consider: keep metastats by type/class
1728
1729 omh_p->set_om_ptr(m);
1730 assert(m->is_new(), "freshly allocated monitor must be new");
1731 m->set_allocation_state(ObjectMonitor::Old);
1732
1733 if (object->cas_set_mark(markOopDesc::encode(m), mark) != mark) {
1734 m->set_header(NULL);
1735 m->set_object(NULL);
1736 m->Recycle();
1737 omh_p->set_om_ptr(NULL);
1738 // omRelease() will reset the allocation state
1739 omRelease(Self, m, true);
1740 m = NULL;
1741 continue;
1742 // interference - the markword changed - just retry.
1743 // The state-transitions are one-way, so there's no chance of
1744 // live-lock -- "Inflated" is an absorbing state.
1747 // Hopefully the performance counters are allocated on distinct
1748 // cache lines to avoid false sharing on MP systems ...
1749 OM_PERFDATA_OP(Inflations, inc());
1750 if (log_is_enabled(Trace, monitorinflation)) {
1751 ResourceMark rm(Self);
1752 lsh.print_cr("inflate(neutral): object=" INTPTR_FORMAT ", mark="
1753 INTPTR_FORMAT ", type='%s'", p2i(object),
1754 p2i(object->mark()), object->klass()->external_name());
1755 }
1756 if (event.should_commit()) {
1757 post_monitor_inflate_event(&event, object, cause);
1758 }
1759 ADIM_guarantee(!m->is_free(), "inflated monitor to be returned cannot be free");
1760 return;
1761 }
1762 }
1763
1764
1765 // We maintain a list of in-use monitors for each thread.
1766 //
1767 // For safepoint based deflation:
1768 // deflate_thread_local_monitors() scans a single thread's in-use list, while
1769 // deflate_idle_monitors() scans only a global list of in-use monitors which
1770 // is populated only as a thread dies (see omFlush()).
1771 //
1772 // These operations are called at all safepoints, immediately after mutators
1773 // are stopped, but before any objects have moved. Collectively they traverse
1774 // the population of in-use monitors, deflating where possible. The scavenged
1775 // monitors are returned to the global monitor free list.
1776 //
1777 // Beware that we scavenge at *every* stop-the-world point. Having a large
1778 // number of monitors in-use could negatively impact performance. We also want
1779 // to minimize the total # of monitors in circulation, as they incur a small
1780 // footprint penalty.
1781 //
1782 // Perversely, the heap size -- and thus the STW safepoint rate --
1783 // typically drives the scavenge rate. Large heaps can mean infrequent GC,
1784 // which in turn can mean large(r) numbers of ObjectMonitors in circulation.
1785 // This is an unfortunate aspect of this design.
1786 //
1787 // For async deflation:
1788 // If a special deflation request is made, then the safepoint based
1789 // deflation mechanism is used. Otherwise, an async deflation request
1790 // is registered with the ServiceThread and it is notified.
1791
1792 void ObjectSynchronizer::do_safepoint_work(DeflateMonitorCounters* _counters) {
1793 assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
1794
1795 // The per-thread in-use lists are handled in
1796 // ParallelSPCleanupThreadClosure::do_thread().
1797
1798 if (!AsyncDeflateIdleMonitors || is_special_deflation_requested()) {
1799 // Use the older mechanism for the global in-use list or if a
1800 // special deflation has been requested before the safepoint.
1801 ObjectSynchronizer::deflate_idle_monitors(_counters);
1802 return;
1803 }
1804
1805 log_debug(monitorinflation)("requesting async deflation of idle monitors.");
1806 // Request deflation of idle monitors by the ServiceThread:
1807 set_is_async_deflation_requested(true);
1808 MonitorLocker ml(Service_lock, Mutex::_no_safepoint_check_flag);
1809 ml.notify_all();
1810 }
1811
1812 // Deflate a single monitor if not in-use
1813 // Return true if deflated, false if in-use
1814 bool ObjectSynchronizer::deflate_monitor(ObjectMonitor* mid, oop obj,
1815 ObjectMonitor** freeHeadp,
1816 ObjectMonitor** freeTailp) {
1817 bool deflated;
1818 // Normal case ... The monitor is associated with obj.
1819 const markOop mark = obj->mark();
1820 guarantee(mark == markOopDesc::encode(mid), "should match: mark="
1821 INTPTR_FORMAT ", encoded mid=" INTPTR_FORMAT, p2i(mark),
1822 p2i(markOopDesc::encode(mid)));
1823 // Make sure that mark->monitor() and markOopDesc::encode() agree:
1824 guarantee(mark->monitor() == mid, "should match: monitor()=" INTPTR_FORMAT
1825 ", mid=" INTPTR_FORMAT, p2i(mark->monitor()), p2i(mid));
1826 const markOop dmw = mid->header();
1827 guarantee(dmw->is_neutral(), "invariant: header=" INTPTR_FORMAT, p2i(dmw));
1828
1829 if (mid->is_busy() || mid->ref_count() != 0) {
1830 // Easy checks are first - the ObjectMonitor is busy or ObjectMonitor*
1831 // is in use so no deflation.
1832 deflated = false;
1833 } else {
1834 // Deflate the monitor if it is no longer being used
1835 // It's idle - scavenge and return to the global free list
1836 // plain old deflation ...
1837 if (log_is_enabled(Trace, monitorinflation)) {
1838 ResourceMark rm;
1839 log_trace(monitorinflation)("deflate_monitor: "
1840 "object=" INTPTR_FORMAT ", mark="
1841 INTPTR_FORMAT ", type='%s'", p2i(obj),
1842 p2i(mark), obj->klass()->external_name());
1843 }
1844
1845 // Restore the header back to obj
1846 obj->release_set_mark(dmw);
1847 if (AsyncDeflateIdleMonitors) {
1848 // clear() expects the owner field to be NULL and we won't race
1849 // with the simple C2 ObjectMonitor enter optimization since
1850 // we're at a safepoint.
1851 mid->set_owner(NULL);
1852 }
1853 mid->clear();
1854
1855 assert(mid->object() == NULL, "invariant: object=" INTPTR_FORMAT,
1856 p2i(mid->object()));
1857 assert(mid->is_free(), "invariant");
1858
1859 // Move the object to the working free list defined by freeHeadp, freeTailp
1860 if (*freeHeadp == NULL) *freeHeadp = mid;
1861 if (*freeTailp != NULL) {
1862 ObjectMonitor * prevtail = *freeTailp;
1863 assert(prevtail->FreeNext == NULL, "cleaned up deflated?");
1864 prevtail->FreeNext = mid;
1865 }
1866 *freeTailp = mid;
1867 deflated = true;
1868 }
1869 return deflated;
1870 }
1871
1872 // Deflate the specified ObjectMonitor if not in-use using a JavaThread.
1903 if (Atomic::replace_if_null(DEFLATER_MARKER, &(mid->_owner))) {
1904 // ObjectMonitor is not owned by another thread. Our setting
1905 // owner to DEFLATER_MARKER forces any contending thread through
1906 // the slow path. This is just the first part of the async
1907 // deflation dance.
1908
1909 if (mid->_contentions != 0 || mid->_waiters != 0) {
1910 // Another thread has raced to enter the ObjectMonitor after
1911 // mid->is_busy() above or has already entered and waited on
1912 // it which makes it busy so no deflation. Restore owner to
1913 // NULL if it is still DEFLATER_MARKER.
1914 Atomic::cmpxchg((void*)NULL, &mid->_owner, DEFLATER_MARKER);
1915 return false;
1916 }
1917
1918 if (Atomic::cmpxchg(-max_jint, &mid->_ref_count, (jint)0) == 0) {
1919 // Make ref_count negative to force any contending threads or
1920 // ObjectMonitor* using threads to retry. This is the second
1921 // part of the async deflation dance.
1922
1923 if (mid->owner_is_DEFLATER_MARKER()) {
1924 // If owner is still DEFLATER_MARKER, then we have successfully
1925 // signaled any contending threads to retry. If it is not, then we
1926 // have lost the race to an entering thread and the ObjectMonitor
1927 // is now busy. This is the third and final part of the async
1928 // deflation dance.
1929 // Note: This owner check solves the ABA problem with ref_count
1930 // where another thread acquired the ObjectMonitor, finished
1931 // using it and restored the ref_count to zero.
1932
1933 // Sanity checks for the races:
1934 guarantee(mid->_contentions == 0, "must be 0: contentions=%d",
1935 mid->_contentions);
1936 guarantee(mid->_waiters == 0, "must be 0: waiters=%d", mid->_waiters);
1937 guarantee(mid->_cxq == NULL, "must be no contending threads: cxq="
1938 INTPTR_FORMAT, p2i(mid->_cxq));
1939 guarantee(mid->_EntryList == NULL,
1940 "must be no entering threads: EntryList=" INTPTR_FORMAT,
1941 p2i(mid->_EntryList));
1942
1943 const oop obj = (oop) mid->object();
2566 log_error(monitorinflation)("found monitor list errors: error_cnt=%d", error_cnt);
2567 }
2568
2569 if ((on_exit && log_is_enabled(Info, monitorinflation)) ||
2570 (!on_exit && log_is_enabled(Trace, monitorinflation))) {
2571 // When exiting this log output is at the Info level. When called
2572 // at a safepoint, this log output is at the Trace level since
2573 // there can be a lot of it.
2574 log_in_use_monitor_details(ls, on_exit);
2575 }
2576
2577 ls->flush();
2578
2579 guarantee(error_cnt == 0, "ERROR: found monitor list errors: error_cnt=%d", error_cnt);
2580 }
2581
2582 // Check a free monitor entry; log any errors.
2583 void ObjectSynchronizer::chk_free_entry(JavaThread * jt, ObjectMonitor * n,
2584 outputStream * out, int *error_cnt_p) {
2585 stringStream ss;
2586 if (n->is_busy()) {
2587 if (jt != NULL) {
2588 out->print_cr("ERROR: jt=" INTPTR_FORMAT ", monitor=" INTPTR_FORMAT
2589 ": free per-thread monitor must not be busy: %s", p2i(jt),
2590 p2i(n), n->is_busy_to_string(&ss));
2591 } else {
2592 out->print_cr("ERROR: monitor=" INTPTR_FORMAT ": free global monitor "
2593 "must not be busy: %s", p2i(n), n->is_busy_to_string(&ss));
2594 }
2595 *error_cnt_p = *error_cnt_p + 1;
2596 }
2597 if (n->header() != NULL) {
2598 if (jt != NULL) {
2599 out->print_cr("ERROR: jt=" INTPTR_FORMAT ", monitor=" INTPTR_FORMAT
2600 ": free per-thread monitor must have NULL _header "
2601 "field: _header=" INTPTR_FORMAT, p2i(jt), p2i(n),
2602 p2i(n->header()));
2603 *error_cnt_p = *error_cnt_p + 1;
2604 } else if (!AsyncDeflateIdleMonitors) {
2605 out->print_cr("ERROR: monitor=" INTPTR_FORMAT ": free global monitor "
2606 "must have NULL _header field: _header=" INTPTR_FORMAT,
|