--- old/src/hotspot/cpu/x86/macroAssembler_x86.cpp 2019-07-11 14:36:27.000000000 -0400 +++ new/src/hotspot/cpu/x86/macroAssembler_x86.cpp 2019-07-11 14:36:26.000000000 -0400 @@ -1839,8 +1839,28 @@ // Unconditionally set box->_displaced_header = markOopDesc::unused_mark(). // Without cast to int32_t movptr will destroy r10 which is typically obj. movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark())); + // The following code to verify that the object field still refers + // to the object we are trying to lock is not needed with safepoint + // based deflation. It is also not needed with async deflation when + // the DEFLATER_MARKER is allowed to linger in the owner field in an + // async deflated ObjectMonitor until replaced by the next owner value. + // We keep this code as a sanity check against bugs in other parts + // of the async deflation mechanism. + // + // If we weren't able to swing _owner from NULL to r15_thread + // then take the slow path. + jccb(Assembler::notZero, DONE_LABEL); + // r15_thread is now the owner so verify that the ObjectMonitor + // still refers to the same object. + cmpptr(objReg, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(object))); + // The ObjectMonitor still refers to the same object so + // r15_thread's ownership is valid. + jccb(Assembler::zero, DONE_LABEL); + // The ObjectMonitor does not refer to the same object so + // drop ownership. + movptr(Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); // Intentional fall-through into DONE_LABEL ... - // Propagate ICC.ZF from CAS above into DONE_LABEL. + // Propagate ICC.ZF from cmpptr() above into DONE_LABEL. #endif // _LP64 #if INCLUDE_RTM_OPT } // use_rtm() --- old/src/hotspot/share/runtime/objectMonitor.cpp 2019-07-11 14:36:29.000000000 -0400 +++ new/src/hotspot/share/runtime/objectMonitor.cpp 2019-07-11 14:36:28.000000000 -0400 @@ -239,7 +239,7 @@ // Enter support void ObjectMonitor::enter(TRAPS) { - ADIM_guarantee(_ref_count > 0, "must be positive: ref_count=%d", _ref_count); + ADIM_guarantee(ref_count() > 0, "must be positive: ref_count=%d", ref_count()); // The following code is ordered to check the most common cases first // and to reduce RTS->RTO cache line upgrades on SPARC and IA32 processors. @@ -266,6 +266,16 @@ return; } + if (AsyncDeflateIdleMonitors && + Atomic::cmpxchg(Self, &_owner, DEFLATER_MARKER) == DEFLATER_MARKER) { + // The deflation protocol finished the first part (setting owner), + // but it failed the second part (making ref_count negative) and + // bailed. Or the ObjectMonitor was async deflated and reused. + // Acquired the monitor. + assert(_recursions == 0, "invariant"); + return; + } + // We've encountered genuine contention. assert(Self->_Stalled == 0, "invariant"); Self->_Stalled = intptr_t(this); @@ -437,8 +447,8 @@ markOop dmw = header(); if (dmw == NULL) { - // ObjectMonitor's header/dmw has been cleared by the deflating - // thread so the object's header has already been restored. + // ObjectMonitor's header/dmw has been cleared so the object's + // header has already been restored. return; } @@ -458,8 +468,8 @@ // other trying to update the _header field. dmw = (markOop) Atomic::cmpxchg(marked_dmw, &_header, dmw); if (dmw == NULL) { - // ObjectMonitor's header/dmw has been cleared by the deflating - // thread so the object's header has already been restored. + // ObjectMonitor's header/dmw has been cleared so the object's + // header has already been restored. return; } // The _header field is now marked. The winner's 'dmw' variable @@ -499,16 +509,23 @@ // Convert the fields used by is_busy() to a string that can be // used for diagnostic output. const char* ObjectMonitor::is_busy_to_string(stringStream* ss) { - ss->print("is_busy: contentions=%d, waiters=%d, owner=" INTPTR_FORMAT - ", cxq=" INTPTR_FORMAT ", EntryList=" INTPTR_FORMAT, _contentions, - _waiters, p2i(_owner), p2i(_cxq), p2i(_EntryList)); + ss->print("is_busy: contentions=%d, waiters=%d, ", _contentions, _waiters); + if (!AsyncDeflateIdleMonitors) { + ss->print("owner=" INTPTR_FORMAT, p2i(_owner)); + } else if (_owner != DEFLATER_MARKER) { + ss->print("owner=" INTPTR_FORMAT, p2i(_owner)); + } else { + ss->print("owner=" INTPTR_FORMAT, NULL); + } + ss->print(", cxq=" INTPTR_FORMAT ", EntryList=" INTPTR_FORMAT, p2i(_cxq), + p2i(_EntryList)); return ss->base(); } #define MAX_RECHECK_INTERVAL 1000 void ObjectMonitor::EnterI(TRAPS) { - ADIM_guarantee(_ref_count > 0, "must be positive: ref_count=%d", _ref_count); + ADIM_guarantee(ref_count() > 0, "must be positive: ref_count=%d", ref_count()); Thread * const Self = THREAD; assert(Self->is_Java_thread(), "invariant"); @@ -522,15 +539,15 @@ return; } - if (_owner == DEFLATER_MARKER) { - // The deflation protocol finished the first part (setting owner), but - // it failed the second part (making ref_count negative) and bailed. - if (Atomic::cmpxchg(Self, &_owner, DEFLATER_MARKER) == DEFLATER_MARKER) { - // Acquired the monitor. - assert(_succ != Self, "invariant"); - assert(_Responsible != Self, "invariant"); - return; - } + if (AsyncDeflateIdleMonitors && + Atomic::cmpxchg(Self, &_owner, DEFLATER_MARKER) == DEFLATER_MARKER) { + // The deflation protocol finished the first part (setting owner), + // but it failed the second part (making ref_count negative) and + // bailed. Or the ObjectMonitor was async deflated and reused. + // Acquired the monitor. + assert(_succ != Self, "invariant"); + assert(_Responsible != Self, "invariant"); + return; } assert(InitDone, "Unexpectedly not initialized"); @@ -649,13 +666,13 @@ if (TryLock(Self) > 0) break; - if (_owner == DEFLATER_MARKER) { - // The deflation protocol finished the first part (setting owner), but - // it failed the second part (making ref_count negative) and bailed. - if (Atomic::cmpxchg(Self, &_owner, DEFLATER_MARKER) == DEFLATER_MARKER) { - // Acquired the monitor. - break; - } + if (AsyncDeflateIdleMonitors && + Atomic::cmpxchg(Self, &_owner, DEFLATER_MARKER) == DEFLATER_MARKER) { + // The deflation protocol finished the first part (setting owner), + // but it failed the second part (making ref_count negative) and + // bailed. Or the ObjectMonitor was async deflated and reused. + // Acquired the monitor. + break; } // The lock is still contested. @@ -762,7 +779,7 @@ // In the future we should reconcile EnterI() and ReenterI(). void ObjectMonitor::ReenterI(Thread * Self, ObjectWaiter * SelfNode) { - ADIM_guarantee(_ref_count > 0, "must be positive: ref_count=%d", _ref_count); + ADIM_guarantee(ref_count() > 0, "must be positive: ref_count=%d", ref_count()); assert(Self != NULL, "invariant"); assert(SelfNode != NULL, "invariant"); @@ -781,13 +798,13 @@ if (TryLock(Self) > 0) break; if (TrySpin(Self) > 0) break; - if (_owner == DEFLATER_MARKER) { - // The deflation protocol finished the first part (setting owner), but - // it failed the second part (making ref_count negative) and bailed. - if (Atomic::cmpxchg(Self, &_owner, DEFLATER_MARKER) == DEFLATER_MARKER) { - // Acquired the monitor. - break; - } + if (AsyncDeflateIdleMonitors && + Atomic::cmpxchg(Self, &_owner, DEFLATER_MARKER) == DEFLATER_MARKER) { + // The deflation protocol finished the first part (setting owner), + // but it failed the second part (making ref_count negative) and + // bailed. Or the ObjectMonitor was async deflated and reused. + // Acquired the monitor. + break; } // State transition wrappers around park() ... @@ -2083,7 +2100,7 @@ // Race here if monitor is not owned! The above ref_count bump // will cause subsequent async deflation to skip it. However, // previous or concurrent async deflation is a race. - if (om_ptr->_owner == DEFLATER_MARKER && om_ptr->ref_count() <= 0) { + if (om_ptr->owner_is_DEFLATER_MARKER() && om_ptr->ref_count() <= 0) { // Async deflation is in progress and our ref_count increment // above lost the race to async deflation. Attempt to restore // the header/dmw to the object's header so that we only retry --- old/src/hotspot/share/runtime/objectMonitor.hpp 2019-07-11 14:36:31.000000000 -0400 +++ new/src/hotspot/share/runtime/objectMonitor.hpp 2019-07-11 14:36:31.000000000 -0400 @@ -249,13 +249,6 @@ // _ref_count is for indicating that the ObjectMonitor* is in // use which is orthogonal to whether the ObjectMonitor itself // is in use for a locking operation. - return _contentions|_waiters|intptr_t(_owner)|intptr_t(_cxq)|intptr_t(_EntryList); - } - const char* is_busy_to_string(stringStream* ss); - - // Version of is_busy() that accounts for the special value in - // _owner when AsyncDeflateIdleMonitors is enabled. - intptr_t is_busy_async() const { intptr_t ret_code = _contentions | _waiters | intptr_t(_cxq) | intptr_t(_EntryList); if (!AsyncDeflateIdleMonitors) { ret_code |= intptr_t(_owner); @@ -266,10 +259,13 @@ } return ret_code; } + const char* is_busy_to_string(stringStream* ss); intptr_t is_entered(Thread* current) const; void* owner() const; // Returns NULL if DEFLATER_MARKER is observed. + // Returns true if owner field == DEFLATER_MARKER and false otherwise. + bool owner_is_DEFLATER_MARKER(); void set_owner(void* owner); jint waiters() const; --- old/src/hotspot/share/runtime/objectMonitor.inline.hpp 2019-07-11 14:36:33.000000000 -0400 +++ new/src/hotspot/share/runtime/objectMonitor.inline.hpp 2019-07-11 14:36:33.000000000 -0400 @@ -55,10 +55,17 @@ return owner != DEFLATER_MARKER ? owner : NULL; } +// Returns true if owner field == DEFLATER_MARKER and false otherwise. +// This accessor is called when we really need to know if the owner +// field == DEFLATER_MARKER and any non-NULL value won't do the trick. +inline bool ObjectMonitor::owner_is_DEFLATER_MARKER() { + return OrderAccess::load_acquire(&_owner) == DEFLATER_MARKER; +} + inline void ObjectMonitor::clear() { assert(_header != NULL, "must be non-NULL"); assert(_owner == NULL, "must be NULL: owner=" INTPTR_FORMAT, p2i(_owner)); - assert(_ref_count == 0, "must be 0: ref_count=%d", _ref_count); + assert(ref_count() == 0, "must be 0: ref_count=%d", ref_count()); _header = NULL; @@ -81,7 +88,7 @@ guarantee(_owner == NULL || _owner == DEFLATER_MARKER, "must be NULL or DEFLATER_MARKER: owner=" INTPTR_FORMAT, p2i(_owner)); - guarantee(_ref_count <= 0, "must be <= 0: ref_count=%d", _ref_count); + guarantee(ref_count() <= 0, "must be <= 0: ref_count=%d", ref_count()); } assert(_contentions == 0, "must be 0: contentions=%d", _contentions); assert(_waiters == 0, "must be 0: waiters=%d", _waiters); @@ -155,8 +162,8 @@ // counter is volatile. Atomic::dec(&_ref_count); // Can be negative as part of async deflation protocol. - guarantee(AsyncDeflateIdleMonitors || _ref_count >= 0, - "sanity check: ref_count=%d", _ref_count); + ADIM_guarantee(AsyncDeflateIdleMonitors || ref_count() >= 0, + "sanity check: ref_count=%d", ref_count()); } inline void ObjectMonitor::inc_ref_count() { @@ -165,8 +172,8 @@ // async deflation protocol. Atomic::inc(&_ref_count); // Can be negative as part of async deflation protocol. - guarantee(AsyncDeflateIdleMonitors || _ref_count > 0, - "sanity check: ref_count=%d", _ref_count); + ADIM_guarantee(AsyncDeflateIdleMonitors || ref_count() > 0, + "sanity check: ref_count=%d", ref_count()); } inline jint ObjectMonitor::ref_count() const { --- old/src/hotspot/share/runtime/serviceThread.cpp 2019-07-11 14:36:35.000000000 -0400 +++ new/src/hotspot/share/runtime/serviceThread.cpp 2019-07-11 14:36:35.000000000 -0400 @@ -202,7 +202,12 @@ if (count > 0) { log_debug(monitorinflation)("requesting async deflation of idle monitors for %d thread(s).", count); } - ObjectSynchronizer::set_is_async_deflation_requested(false); // async deflation has been requested + // The ServiceThread's async deflation request has been processed. + ObjectSynchronizer::set_is_async_deflation_requested(false); + + // The global in-use list was handled above, but the request won't + // be complete until the JavaThreads have handled their in-use + // lists. This is the nature of an async deflation request. } } } --- old/src/hotspot/share/runtime/synchronizer.cpp 2019-07-11 14:36:37.000000000 -0400 +++ new/src/hotspot/share/runtime/synchronizer.cpp 2019-07-11 14:36:37.000000000 -0400 @@ -255,6 +255,16 @@ assert(m->_recursions == 0, "invariant"); return true; } + + if (AsyncDeflateIdleMonitors && + Atomic::cmpxchg(Self, &m->_owner, DEFLATER_MARKER) == DEFLATER_MARKER) { + // The deflation protocol finished the first part (setting owner), + // but it failed the second part (making ref_count negative) and + // bailed. Or the ObjectMonitor was async deflated and reused. + // Acquired the monitor. + assert(m->_recursions == 0, "invariant"); + return true; + } } break; } @@ -1021,6 +1031,13 @@ return false; } +// Returns true if MonitorBound is set (> 0) and if the specified +// cnt is > MonitorBound. Otherwise returns false. +static bool is_MonitorBound_exceeded(const int cnt) { + const int mx = MonitorBound; + return mx > 0 && cnt > mx; +} + bool ObjectSynchronizer::is_async_deflation_needed() { if (!AsyncDeflateIdleMonitors) { return false; @@ -1039,6 +1056,10 @@ _last_async_deflation_time_ns = os::javaTimeNanos(); return true; } + if (is_MonitorBound_exceeded(gMonitorPopulation - gMonitorFreeCount)) { + // Not enough ObjectMonitors on the global free list. + return true; + } return false; } @@ -1111,6 +1132,9 @@ // Constraining monitor pool growth via MonitorBound ... // +// If MonitorBound is not set (<= 0), MonitorBound checks are disabled. +// +// When safepoint deflation is being used (!AsyncDeflateIdleMonitors): // The monitor pool is grow-only. We scavenge at STW safepoint-time, but the // the rate of scavenging is driven primarily by GC. As such, we can find // an inordinate number of monitors in circulation. @@ -1124,8 +1148,26 @@ // See also: GuaranteedSafepointInterval // // The current implementation uses asynchronous VM operations. +// +// When safepoint deflation is being used and MonitorBound is set, the +// boundry applies to (gMonitorPopulation - gMonitorFreeCount), i.e., +// if there are not enough ObjectMonitors on the global free list, then +// a safepoint deflation is induced. Picking a good MonitorBound value +// is non-trivial. +// +// When async deflation is being used: +// The monitor pool is still grow-only. Async deflation is requested +// by a safepoint's cleanup phase or by the ServiceThread at periodic +// intervals when is_async_deflation_needed() returns true. In +// addition to other policies that are checked, if there are not +// enough ObjectMonitors on the global free list, then +// is_async_deflation_needed() will return true. The ServiceThread +// calls deflate_global_idle_monitors_using_JT() and also sets the +// per-thread omShouldDeflateIdleMonitors flag as needed. static void InduceScavenge(Thread * Self, const char * Whence) { + assert(!AsyncDeflateIdleMonitors, "is not used by async deflation"); + // Induce STW safepoint to trim monitors // Ultimately, this results in a call to deflate_idle_monitors() in the near future. // More precisely, trigger an asynchronous STW safepoint as the number @@ -1157,9 +1199,10 @@ // Deflate any per-thread idle monitors for this JavaThread if // this is not an internal inflation; internal inflations can // occur in places where it is not safe to pause for a safepoint. - // Clean up your own mess. (Gibbs Rule 45) Otherwise, skip this + // Clean up your own mess (Gibbs Rule 45). Otherwise, skip this // deflation. deflate_global_idle_monitors_using_JT() is called - // by the ServiceThread. + // by the ServiceThread. Per-thread async deflation is triggered + // by the ServiceThread via omShouldDeflateIdleMonitors. debug_only(jt->check_for_valid_safepoint_state(false);) ObjectSynchronizer::deflate_per_thread_idle_monitors_using_JT(); } @@ -1203,10 +1246,12 @@ gFreeList = take->FreeNext; guarantee(take->object() == NULL, "invariant"); if (AsyncDeflateIdleMonitors) { - // Clear any values we allowed to linger during async deflation. + // We allowed 3 field values to linger during async deflation. + // We clear header and restore ref_count here, but we leave + // owner == DEFLATER_MARKER so the simple C2 ObjectMonitor + // enter optimization can no longer race with async deflation + // and reuse. take->_header = NULL; - take->set_owner(NULL); - if (take->ref_count() < 0) { // Add back max_jint to restore the ref_count field to its // proper value. @@ -1224,8 +1269,9 @@ Self->omFreeProvision += 1 + (Self->omFreeProvision/2); if (Self->omFreeProvision > MAXPRIVATE) Self->omFreeProvision = MAXPRIVATE; - const int mx = MonitorBound; - if (mx > 0 && (gMonitorPopulation-gMonitorFreeCount) > mx) { + if (!AsyncDeflateIdleMonitors && + is_MonitorBound_exceeded(gMonitorPopulation - gMonitorFreeCount)) { + // Not enough ObjectMonitors on the global free list. // We can't safely induce a STW safepoint from omAlloc() as our thread // state may not be appropriate for such activities and callers may hold // naked oops, so instead we defer the action. @@ -1673,6 +1719,9 @@ // prepare m for installation - set monitor to initial state m->Recycle(); m->set_header(mark); + // If we leave _owner == DEFLATER_MARKER here, then the simple C2 + // ObjectMonitor enter optimization can no longer race with async + // deflation and reuse. m->set_object(object); m->_Responsible = NULL; m->_SpinDuration = ObjectMonitor::Knob_SpinLimit; // consider: keep metastats by type/class @@ -1715,6 +1764,7 @@ // We maintain a list of in-use monitors for each thread. // +// For safepoint based deflation: // deflate_thread_local_monitors() scans a single thread's in-use list, while // deflate_idle_monitors() scans only a global list of in-use monitors which // is populated only as a thread dies (see omFlush()). @@ -1733,6 +1783,11 @@ // typically drives the scavenge rate. Large heaps can mean infrequent GC, // which in turn can mean large(r) numbers of ObjectMonitors in circulation. // This is an unfortunate aspect of this design. +// +// For async deflation: +// If a special deflation request is made, then the safepoint based +// deflation mechanism is used. Otherwise, an async deflation request +// is registered with the ServiceThread and it is notified. void ObjectSynchronizer::do_safepoint_work(DeflateMonitorCounters* _counters) { assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); @@ -1771,7 +1826,9 @@ const markOop dmw = mid->header(); guarantee(dmw->is_neutral(), "invariant: header=" INTPTR_FORMAT, p2i(dmw)); - if (mid->is_busy()) { + if (mid->is_busy() || mid->ref_count() != 0) { + // Easy checks are first - the ObjectMonitor is busy or ObjectMonitor* + // is in use so no deflation. deflated = false; } else { // Deflate the monitor if it is no longer being used @@ -1787,6 +1844,12 @@ // Restore the header back to obj obj->release_set_mark(dmw); + if (AsyncDeflateIdleMonitors) { + // clear() expects the owner field to be NULL and we won't race + // with the simple C2 ObjectMonitor enter optimization since + // we're at a safepoint. + mid->set_owner(NULL); + } mid->clear(); assert(mid->object() == NULL, "invariant: object=" INTPTR_FORMAT, @@ -1857,7 +1920,7 @@ // ObjectMonitor* using threads to retry. This is the second // part of the async deflation dance. - if (mid->_owner == DEFLATER_MARKER) { + if (mid->owner_is_DEFLATER_MARKER()) { // If owner is still DEFLATER_MARKER, then we have successfully // signaled any contending threads to retry. If it is not, then we // have lost the race to an entering thread and the ObjectMonitor @@ -2520,8 +2583,7 @@ void ObjectSynchronizer::chk_free_entry(JavaThread * jt, ObjectMonitor * n, outputStream * out, int *error_cnt_p) { stringStream ss; - if ((!AsyncDeflateIdleMonitors && n->is_busy()) || - (AsyncDeflateIdleMonitors && n->is_busy_async())) { + if (n->is_busy()) { if (jt != NULL) { out->print_cr("ERROR: jt=" INTPTR_FORMAT ", monitor=" INTPTR_FORMAT ": free per-thread monitor must not be busy: %s", p2i(jt), --- old/src/hotspot/share/runtime/vmOperations.cpp 2019-07-11 14:36:40.000000000 -0400 +++ new/src/hotspot/share/runtime/vmOperations.cpp 2019-07-11 14:36:39.000000000 -0400 @@ -473,11 +473,11 @@ } bool VM_Exit::doit_prologue() { - if (AsyncDeflateIdleMonitors) { + if (AsyncDeflateIdleMonitors && log_is_enabled(Info, monitorinflation)) { // AsyncDeflateIdleMonitors does a special deflation at the VM_Exit // safepoint in order to reduce the in-use monitor population that - // is reported ObjectSynchronizer::log_in_use_monitor_details() at - // VM exit. + // is reported by ObjectSynchronizer::log_in_use_monitor_details() + // at VM exit. ObjectSynchronizer::set_is_special_deflation_requested(true); } return true; --- old/src/hotspot/share/runtime/vmThread.cpp 2019-07-11 14:36:42.000000000 -0400 +++ new/src/hotspot/share/runtime/vmThread.cpp 2019-07-11 14:36:41.000000000 -0400 @@ -311,11 +311,11 @@ assert(should_terminate(), "termination flag must be set"); } - if (AsyncDeflateIdleMonitors) { + if (AsyncDeflateIdleMonitors && log_is_enabled(Info, monitorinflation)) { // AsyncDeflateIdleMonitors does a special deflation at the final // safepoint in order to reduce the in-use monitor population that - // is reported ObjectSynchronizer::log_in_use_monitor_details() at - // VM exit. + // is reported by ObjectSynchronizer::log_in_use_monitor_details() + // at VM exit. ObjectSynchronizer::set_is_special_deflation_requested(true); }