--- old/src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp 2019-08-28 15:05:46.482953676 -0400 +++ new/src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp 2019-08-28 15:05:46.310953682 -0400 @@ -40,12 +40,14 @@ #if defined(TIERED) // tiered, 64-bit, large machine #define DEFAULT_CACHE_LINE_SIZE 128 + #define OM_CACHE_LINE_SIZE 64 #elif defined(COMPILER1) // pure C1, 32-bit, small machine #define DEFAULT_CACHE_LINE_SIZE 16 #elif defined(COMPILER2) // pure C2, 64-bit, large machine #define DEFAULT_CACHE_LINE_SIZE 128 + #define OM_CACHE_LINE_SIZE 64 #endif #if defined(SOLARIS) --- old/src/hotspot/cpu/x86/globalDefinitions_x86.hpp 2019-08-28 15:05:47.110953654 -0400 +++ new/src/hotspot/cpu/x86/globalDefinitions_x86.hpp 2019-08-28 15:05:46.938953660 -0400 @@ -38,6 +38,7 @@ #ifdef _LP64 // tiered, 64-bit, large machine #define DEFAULT_CACHE_LINE_SIZE 128 + #define OM_CACHE_LINE_SIZE 64 #else // tiered, 32-bit, medium machine #define DEFAULT_CACHE_LINE_SIZE 64 @@ -50,6 +51,7 @@ #ifdef _LP64 // pure C2, 64-bit, large machine #define DEFAULT_CACHE_LINE_SIZE 128 + #define OM_CACHE_LINE_SIZE 64 #else // pure C2, 32-bit, medium machine #define DEFAULT_CACHE_LINE_SIZE 64 --- old/src/hotspot/cpu/x86/macroAssembler_x86.cpp 2019-08-28 15:05:47.710953633 -0400 +++ new/src/hotspot/cpu/x86/macroAssembler_x86.cpp 2019-08-28 15:05:47.546953639 -0400 @@ -1839,8 +1839,28 @@ // Unconditionally set box->_displaced_header = markWord::unused_mark(). // Without cast to int32_t movptr will destroy r10 which is typically obj. movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value())); + // The following code to verify that the object field still refers + // to the object we are trying to lock is not needed with safepoint + // based deflation. It is also not needed with async deflation when + // the DEFLATER_MARKER is allowed to linger in the owner field in an + // async deflated ObjectMonitor until replaced by the next owner value. + // We keep this code as a sanity check against bugs in other parts + // of the async deflation mechanism. + // + // If we weren't able to swing _owner from NULL to r15_thread + // then take the slow path. + jccb(Assembler::notZero, DONE_LABEL); + // r15_thread is now the owner so verify that the ObjectMonitor + // still refers to the same object. + cmpptr(objReg, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(object))); + // The ObjectMonitor still refers to the same object so + // r15_thread's ownership is valid. + jccb(Assembler::zero, DONE_LABEL); + // The ObjectMonitor does not refer to the same object so + // drop ownership. + movptr(Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); // Intentional fall-through into DONE_LABEL ... - // Propagate ICC.ZF from CAS above into DONE_LABEL. + // Propagate ICC.ZF from cmpptr() above into DONE_LABEL. #endif // _LP64 #if INCLUDE_RTM_OPT } // use_rtm() --- old/src/hotspot/share/oops/markWord.hpp 2019-08-28 15:05:48.414953609 -0400 +++ new/src/hotspot/share/oops/markWord.hpp 2019-08-28 15:05:48.242953615 -0400 @@ -234,6 +234,10 @@ bool is_unlocked() const { return (mask_bits(value(), biased_lock_mask_in_place) == unlocked_value); } + // ObjectMonitor::install_displaced_markword_in_object() uses + // is_marked() on ObjectMonitor::_header as part of the restoration + // protocol for an object's header. In this usage, the mark bits are + // only ever set (and cleared) on the ObjectMonitor::_header field. bool is_marked() const { return (mask_bits(value(), lock_mask_in_place) == marked_value); } --- old/src/hotspot/share/prims/jvm.cpp 2019-08-28 15:05:49.046953587 -0400 +++ new/src/hotspot/share/prims/jvm.cpp 2019-08-28 15:05:48.874953593 -0400 @@ -73,6 +73,7 @@ #include "runtime/os.inline.hpp" #include "runtime/perfData.hpp" #include "runtime/reflection.hpp" +#include "runtime/synchronizer.hpp" #include "runtime/thread.inline.hpp" #include "runtime/threadSMR.hpp" #include "runtime/vframe.inline.hpp" @@ -484,6 +485,11 @@ JVM_ENTRY_NO_ENV(void, JVM_GC(void)) JVMWrapper("JVM_GC"); if (!DisableExplicitGC) { + if (AsyncDeflateIdleMonitors) { + // AsyncDeflateIdleMonitors needs to know when System.gc() is + // called so any special deflation can be done at a safepoint. + ObjectSynchronizer::set_is_special_deflation_requested(true); + } Universe::heap()->collect(GCCause::_java_lang_system_gc); } JVM_END --- old/src/hotspot/share/prims/whitebox.cpp 2019-08-28 15:05:49.774953561 -0400 +++ new/src/hotspot/share/prims/whitebox.cpp 2019-08-28 15:05:49.590953568 -0400 @@ -70,6 +70,7 @@ #include "runtime/jniHandles.inline.hpp" #include "runtime/os.hpp" #include "runtime/sweeper.hpp" +#include "runtime/synchronizer.hpp" #include "runtime/thread.hpp" #include "runtime/threadSMR.hpp" #include "runtime/vm_version.hpp" @@ -462,6 +463,12 @@ WB_ENTRY(jboolean, WB_G1StartMarkCycle(JNIEnv* env, jobject o)) if (UseG1GC) { + if (AsyncDeflateIdleMonitors) { + // AsyncDeflateIdleMonitors needs to know when System.gc() or + // the equivalent is called so any special clean up can be done + // at a safepoint, e.g., TestHumongousClassLoader.java. + ObjectSynchronizer::set_is_special_deflation_requested(true); + } G1CollectedHeap* g1h = G1CollectedHeap::heap(); if (!g1h->concurrent_mark()->cm_thread()->during_cycle()) { g1h->collect(GCCause::_wb_conc_mark); @@ -1381,6 +1388,12 @@ WB_END WB_ENTRY(void, WB_FullGC(JNIEnv* env, jobject o)) + if (AsyncDeflateIdleMonitors) { + // AsyncDeflateIdleMonitors needs to know when System.gc() or + // the equivalent is called so any special clean up can be done + // at a safepoint, e.g., TestHumongousClassLoader.java. + ObjectSynchronizer::set_is_special_deflation_requested(true); + } Universe::heap()->soft_ref_policy()->set_should_clear_all_soft_refs(true); Universe::heap()->collect(GCCause::_wb_full_gc); #if INCLUDE_G1GC --- old/src/hotspot/share/runtime/basicLock.cpp 2019-08-28 15:05:50.462953537 -0400 +++ new/src/hotspot/share/runtime/basicLock.cpp 2019-08-28 15:05:50.270953544 -0400 @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "runtime/basicLock.hpp" +#include "runtime/objectMonitor.hpp" #include "runtime/synchronizer.hpp" void BasicLock::print_on(outputStream* st) const { @@ -62,8 +63,11 @@ // is small (given the support for inflated fast-path locking in the fast_lock, etc) // we'll leave that optimization for another time. + // Disallow async deflation of the inflated monitor so the + // displaced header stays stable until we've copied it. + ObjectMonitorHandle omh; if (displaced_header().is_neutral()) { - ObjectSynchronizer::inflate_helper(obj); + ObjectSynchronizer::inflate_helper(&omh, obj); // WARNING: We can not put check here, because the inflation // will not update the displaced header. Once BasicLock is inflated, // no one should ever look at its content. --- old/src/hotspot/share/runtime/globals.hpp 2019-08-28 15:05:51.078953516 -0400 +++ new/src/hotspot/share/runtime/globals.hpp 2019-08-28 15:05:50.898953522 -0400 @@ -716,11 +716,24 @@ product(intx, MonitorBound, 0, "Bound Monitor population") \ range(0, max_jint) \ \ + diagnostic(bool, AsyncDeflateIdleMonitors, true, \ + "Deflate idle monitors using JavaThreads and the ServiceThread.") \ + \ + /* notice: the max range value here is max_jint, not max_intx */ \ + /* because of overflow issue */ \ + diagnostic(intx, AsyncDeflationInterval, 250, \ + "Async deflate idle monitors every so many milliseconds when " \ + "MonitorUsedDeflationThreshold is exceeded (0 is off).") \ + range(0, max_jint) \ + \ + diagnostic(bool, CheckMonitorLists, false, \ + "Sanity check the lock free ObjectMonitor lists.") \ + \ experimental(intx, MonitorUsedDeflationThreshold, 90, \ - "Percentage of used monitors before triggering cleanup " \ - "safepoint which deflates monitors (0 is off). " \ - "The check is performed on GuaranteedSafepointInterval.") \ - range(0, 100) \ + "Percentage of used monitors before triggering deflation (0 is " \ + "off). The check is performed on GuaranteedSafepointInterval " \ + "or AsyncDeflateInterval.") \ + range(0, 100) \ \ experimental(intx, hashCode, 5, \ "(Unstable) select hashCode generation algorithm") \ --- old/src/hotspot/share/runtime/objectMonitor.cpp 2019-08-28 15:05:51.770953492 -0400 +++ new/src/hotspot/share/runtime/objectMonitor.cpp 2019-08-28 15:05:51.574953498 -0400 @@ -239,6 +239,8 @@ // Enter support void ObjectMonitor::enter(TRAPS) { + ADIM_guarantee(ref_count() > 0, "must be positive: ref_count=%d", ref_count()); + // The following code is ordered to check the most common cases first // and to reduce RTS->RTO cache line upgrades on SPARC and IA32 processors. Thread * const Self = THREAD; @@ -264,6 +266,16 @@ return; } + if (AsyncDeflateIdleMonitors && + Atomic::cmpxchg(Self, &_owner, DEFLATER_MARKER) == DEFLATER_MARKER) { + // The deflation protocol finished the first part (setting owner), + // but it failed the second part (making ref_count negative) and + // bailed. Or the ObjectMonitor was async deflated and reused. + // Acquired the monitor. + assert(_recursions == 0, "invariant"); + return; + } + // We've encountered genuine contention. assert(Self->_Stalled == 0, "invariant"); Self->_Stalled = intptr_t(this); @@ -291,12 +303,14 @@ JavaThread * jt = (JavaThread *) Self; assert(!SafepointSynchronize::is_at_safepoint(), "invariant"); assert(jt->thread_state() != _thread_blocked, "invariant"); - assert(this->object() != NULL, "invariant"); - assert(_contentions >= 0, "invariant"); + assert(AsyncDeflateIdleMonitors || this->object() != NULL, "invariant"); + assert(_contentions >= 0, "must not be negative: contentions=%d", _contentions); - // Prevent deflation at STW-time. See deflate_idle_monitors() and is_busy(). - // Ensure the object-monitor relationship remains stable while there's contention. - Atomic::inc(&_contentions); + // Prevent deflation. See ObjectSynchronizer::deflate_monitor(), + // ObjectSynchronizer::deflate_monitor_using_JT() and is_busy(). + // Ensure the object <-> monitor relationship remains stable while + // there's contention. + Atomic::add(1, &_contentions); JFR_ONLY(JfrConditionalFlushWithStacktrace flush(jt);) EventJavaMonitorEnter event; @@ -358,7 +372,7 @@ } Atomic::dec(&_contentions); - assert(_contentions >= 0, "invariant"); + assert(_contentions >= 0, "must not be negative: contentions=%d", _contentions); Self->_Stalled = 0; // Must either set _recursions = 0 or ASSERT _recursions == 0. @@ -413,18 +427,106 @@ return -1; } +// Install the displaced mark word (dmw) of a deflating ObjectMonitor +// into the header of the object associated with the monitor. This +// idempotent method is called by a thread that is deflating a +// monitor and by other threads that have detected a race with the +// deflation process. +void ObjectMonitor::install_displaced_markword_in_object(const oop obj) { + // This function must only be called when (owner == DEFLATER_MARKER + // && ref_count <= 0), but we can't guarantee that here because + // those values could change when the ObjectMonitor gets moved from + // the global free list to a per-thread free list. + + guarantee(obj != NULL, "must be non-NULL"); + if (object() != obj) { + // ObjectMonitor's object ref no longer refers to the target object + // so the object's header has already been restored. + return; + } + + markWord dmw = header(); + if (dmw.value() == 0) { + // ObjectMonitor's header/dmw has been cleared so the object's + // header has already been restored. + return; + } + + // A non-NULL dmw has to be either neutral (not locked and not marked) + // or is already participating in this restoration protocol. + assert(dmw.is_neutral() || (dmw.is_marked() && dmw.hash() == 0), + "failed precondition: dmw=" INTPTR_FORMAT, dmw.value()); + + markWord marked_dmw = markWord::zero(); + if (!dmw.is_marked() && dmw.hash() == 0) { + // This dmw has not yet started the restoration protocol so we + // mark a copy of the dmw to begin the protocol. + // Note: A dmw with a hashcode does not take this code path. + marked_dmw = dmw.set_marked(); + + // All of the callers to this function can be racing with each + // other trying to update the _header field. + dmw = (markWord) Atomic::cmpxchg(marked_dmw, &_header, dmw); + if (dmw.value() == 0) { + // ObjectMonitor's header/dmw has been cleared so the object's + // header has already been restored. + return; + } + // The _header field is now marked. The winner's 'dmw' variable + // contains the original, unmarked header/dmw value and any + // losers have a marked header/dmw value that will be cleaned + // up below. + } + + if (dmw.is_marked()) { + // Clear the mark from the header/dmw copy in preparation for + // possible restoration from this thread. + assert(dmw.hash() == 0, "hashcode must be 0: dmw=" INTPTR_FORMAT, + dmw.value()); + dmw = dmw.set_unmarked(); + } + assert(dmw.is_neutral(), "must be neutral: dmw=" INTPTR_FORMAT, dmw.value()); + + // Install displaced mark word if the object's header still points + // to this ObjectMonitor. All racing callers to this function will + // reach this point, but only one can win. + obj->cas_set_mark(dmw, markWord::encode(this)); + + // Note: It does not matter which thread restored the header/dmw + // into the object's header. The thread deflating the monitor just + // wanted the object's header restored and it is. The threads that + // detected a race with the deflation process also wanted the + // object's header restored before they retry their operation and + // because it is restored they will only retry once. + + if (marked_dmw.value() != 0) { + // Clear _header to NULL if it is still marked_dmw so a racing + // install_displaced_markword_in_object() can bail out sooner. + Atomic::cmpxchg(markWord::zero(), &_header, marked_dmw); + } +} + // Convert the fields used by is_busy() to a string that can be // used for diagnostic output. const char* ObjectMonitor::is_busy_to_string(stringStream* ss) { - ss->print("is_busy: contentions=%d, waiters=%d, owner=" INTPTR_FORMAT - ", cxq=" INTPTR_FORMAT ", EntryList=" INTPTR_FORMAT, _contentions, - _waiters, p2i(_owner), p2i(_cxq), p2i(_EntryList)); + ss->print("is_busy: contentions=%d, waiters=%d, ", _contentions, _waiters); + if (!AsyncDeflateIdleMonitors) { + ss->print("owner=" INTPTR_FORMAT, p2i(_owner)); + } else if (_owner != DEFLATER_MARKER) { + ss->print("owner=" INTPTR_FORMAT, p2i(_owner)); + } else { + ss->print("owner=" INTPTR_FORMAT, NULL); + } + ss->print(", cxq=" INTPTR_FORMAT ", EntryList=" INTPTR_FORMAT, p2i(_cxq), + p2i(_EntryList)); return ss->base(); } #define MAX_RECHECK_INTERVAL 1000 void ObjectMonitor::EnterI(TRAPS) { + ADIM_guarantee(ref_count() > 0, "must be positive: ref_count=%d", ref_count()); + Thread * const Self = THREAD; assert(Self->is_Java_thread(), "invariant"); assert(((JavaThread *) Self)->thread_state() == _thread_blocked, "invariant"); @@ -437,6 +539,17 @@ return; } + if (AsyncDeflateIdleMonitors && + Atomic::cmpxchg(Self, &_owner, DEFLATER_MARKER) == DEFLATER_MARKER) { + // The deflation protocol finished the first part (setting owner), + // but it failed the second part (making ref_count negative) and + // bailed. Or the ObjectMonitor was async deflated and reused. + // Acquired the monitor. + assert(_succ != Self, "invariant"); + assert(_Responsible != Self, "invariant"); + return; + } + assert(InitDone, "Unexpectedly not initialized"); // We try one round of spinning *before* enqueueing Self. @@ -553,6 +666,15 @@ if (TryLock(Self) > 0) break; + if (AsyncDeflateIdleMonitors && + Atomic::cmpxchg(Self, &_owner, DEFLATER_MARKER) == DEFLATER_MARKER) { + // The deflation protocol finished the first part (setting owner), + // but it failed the second part (making ref_count negative) and + // bailed. Or the ObjectMonitor was async deflated and reused. + // Acquired the monitor. + break; + } + // The lock is still contested. // Keep a tally of the # of futile wakeups. // Note that the counter is not protected by a lock or updated by atomics. @@ -657,6 +779,8 @@ // In the future we should reconcile EnterI() and ReenterI(). void ObjectMonitor::ReenterI(Thread * Self, ObjectWaiter * SelfNode) { + ADIM_guarantee(ref_count() > 0, "must be positive: ref_count=%d", ref_count()); + assert(Self != NULL, "invariant"); assert(SelfNode != NULL, "invariant"); assert(SelfNode->_thread == Self, "invariant"); @@ -674,6 +798,15 @@ if (TryLock(Self) > 0) break; if (TrySpin(Self) > 0) break; + if (AsyncDeflateIdleMonitors && + Atomic::cmpxchg(Self, &_owner, DEFLATER_MARKER) == DEFLATER_MARKER) { + // The deflation protocol finished the first part (setting owner), + // but it failed the second part (making ref_count negative) and + // bailed. Or the ObjectMonitor was async deflated and reused. + // Acquired the monitor. + break; + } + // State transition wrappers around park() ... // ReenterI() wisely defers state transitions until // it's clear we must park the thread. @@ -881,7 +1014,8 @@ // way we should encounter this situation is in the presence of // unbalanced JNI locking. TODO: CheckJNICalls. // See also: CR4414101 - assert(false, "Non-balanced monitor enter/exit! Likely JNI locking"); + assert(false, "Non-balanced monitor enter/exit! Likely JNI locking: " + "owner=" INTPTR_FORMAT, p2i(_owner)); return; } } @@ -1137,10 +1271,10 @@ JavaThread *jt = (JavaThread *)THREAD; guarantee(_owner != Self, "reenter already owner"); - enter(THREAD); // enter the monitor + enter(THREAD); + // Entered the monitor. guarantee(_recursions == 0, "reenter recursion"); _recursions = recursions; - return; } // Checks that the current THREAD owns this monitor and causes an @@ -1934,6 +2068,87 @@ DEBUG_ONLY(InitDone = true;) } +// For internal use by ObjectSynchronizer::monitors_iterate(). +ObjectMonitorHandle::ObjectMonitorHandle(ObjectMonitor * om_ptr) { + om_ptr->inc_ref_count(); + _om_ptr = om_ptr; +} + +ObjectMonitorHandle::~ObjectMonitorHandle() { + if (_om_ptr != NULL) { + _om_ptr->dec_ref_count(); + _om_ptr = NULL; + } +} + +// Save the ObjectMonitor* associated with the specified markWord and +// increment the ref_count. This function should only be called if +// the caller has verified mark.has_monitor() == true. The object +// parameter is needed to verify that ObjectMonitor* has not been +// deflated and reused for another object. +// +// This function returns true if the ObjectMonitor* has been safely +// saved. This function returns false if we have lost a race with +// async deflation; the caller should retry as appropriate. +// +bool ObjectMonitorHandle::save_om_ptr(oop object, markWord mark) { + guarantee(mark.has_monitor(), "sanity check: mark=" INTPTR_FORMAT, + mark.value()); + + ObjectMonitor * om_ptr = mark.monitor(); + om_ptr->inc_ref_count(); + + if (AsyncDeflateIdleMonitors) { + // Race here if monitor is not owned! The above ref_count bump + // will cause subsequent async deflation to skip it. However, + // previous or concurrent async deflation is a race. + if (om_ptr->owner_is_DEFLATER_MARKER() && om_ptr->ref_count() <= 0) { + // Async deflation is in progress and our ref_count increment + // above lost the race to async deflation. Attempt to restore + // the header/dmw to the object's header so that we only retry + // once if the deflater thread happens to be slow. + om_ptr->install_displaced_markword_in_object(object); + om_ptr->dec_ref_count(); + return false; + } + if (om_ptr->ref_count() <= 0) { + // Async deflation is in the process of bailing out, but has not + // yet restored the ref_count field so we return false to force + // a retry. We want a positive ref_count value for a true return. + om_ptr->dec_ref_count(); + return false; + } + // The ObjectMonitor could have been deflated and reused for + // another object before we bumped the ref_count so make sure + // our object still refers to this ObjectMonitor. + const markWord tmp = object->mark(); + if (!tmp.has_monitor() || tmp.monitor() != om_ptr) { + // Async deflation and reuse won the race so we have to retry. + // Skip object header restoration since that's already done. + om_ptr->dec_ref_count(); + return false; + } + } + + ADIM_guarantee(_om_ptr == NULL, "sanity check: _om_ptr=" INTPTR_FORMAT, + p2i(_om_ptr)); + _om_ptr = om_ptr; + return true; +} + +// For internal use by ObjectSynchronizer::inflate(). +void ObjectMonitorHandle::set_om_ptr(ObjectMonitor * om_ptr) { + if (_om_ptr == NULL) { + ADIM_guarantee(om_ptr != NULL, "cannot clear an unset om_ptr"); + om_ptr->inc_ref_count(); + _om_ptr = om_ptr; + } else { + ADIM_guarantee(om_ptr == NULL, "can only clear a set om_ptr"); + _om_ptr->dec_ref_count(); + _om_ptr = NULL; + } +} + void ObjectMonitor::print_on(outputStream* st) const { // The minimal things to print for markWord printing, more can be added for debugging and logging. st->print("{contentions=0x%08x,waiters=0x%08x" --- old/src/hotspot/share/runtime/objectMonitor.hpp 2019-08-28 15:05:52.430953469 -0400 +++ new/src/hotspot/share/runtime/objectMonitor.hpp 2019-08-28 15:05:52.266953474 -0400 @@ -126,6 +126,12 @@ // intptr_t. There's no reason to use a 64-bit type for this field // in a 64-bit JVM. +#ifndef OM_CACHE_LINE_SIZE +// Use DEFAULT_CACHE_LINE_SIZE if not already specified for +// the current build platform. +#define OM_CACHE_LINE_SIZE DEFAULT_CACHE_LINE_SIZE +#endif + class ObjectMonitor { public: enum { @@ -137,6 +143,7 @@ }; private: + friend class ObjectMonitorHandle; friend class ObjectSynchronizer; friend class ObjectWaiter; friend class VMStructs; @@ -146,22 +153,39 @@ // Enforced by the assert() in header_addr(). volatile markWord _header; // displaced object header word - mark void* volatile _object; // backward object pointer - strong root - public: - ObjectMonitor* _next_om; // Next ObjectMonitor* linkage - private: + typedef enum { + Free = 0, // Free must be 0 for monitor to be free after memset(..,0,..). + New, + Old + } AllocationState; + AllocationState _allocation_state; // Separate _header and _owner on different cache lines since both can - // have busy multi-threaded access. _header and _object are set at - // initial inflation and _object doesn't change until deflation so - // _object is a good choice to share the cache line with _header. - // _next_om shares _header's cache line for pre-monitor list historical - // reasons. _next_om only changes if the next ObjectMonitor is deflated. - DEFINE_PAD_MINUS_SIZE(0, DEFAULT_CACHE_LINE_SIZE, - sizeof(volatile markWord) + sizeof(void* volatile) + - sizeof(ObjectMonitor *)); + // have busy multi-threaded access. _header, _object and _allocation_state + // are set at initial inflation. _object and _allocation_state don't + // change until deflation so _object and _allocation_state are good + // choices to share the cache line with _header. + DEFINE_PAD_MINUS_SIZE(0, OM_CACHE_LINE_SIZE, sizeof(volatile markWord) + + sizeof(void* volatile) + sizeof(AllocationState)); + // Used by async deflation as a marker in the _owner field: + #define DEFLATER_MARKER reinterpret_cast(-1) protected: // protected for JvmtiRawMonitor void* volatile _owner; // pointer to owning thread OR BasicLock private: volatile jlong _previous_owner_tid; // thread id of the previous owner of the monitor + // Separate _owner and _ref_count on different cache lines since both + // can have busy multi-threaded access. _previous_owner_tid is only + // changed by ObjectMonitor::exit() so it is a good choice to share the + // cache line with _owner. + DEFINE_PAD_MINUS_SIZE(1, OM_CACHE_LINE_SIZE, sizeof(void* volatile) + + sizeof(volatile jlong)); + volatile jint _ref_count; // ref count for ObjectMonitor* and used by the async deflation + // protocol. See ObjectSynchronizer::deflate_monitor_using_JT(). + private: + // Separate _ref_count and _next_om on different cache lines since + // both can have busy multi-threaded access. + DEFINE_PAD_MINUS_SIZE(2, OM_CACHE_LINE_SIZE, sizeof(volatile jint)); + public: // for static synchronizer.cpp access: + ObjectMonitor* volatile _next_om; // Next ObjectMonitor* linkage protected: // protected for JvmtiRawMonitor volatile intptr_t _recursions; // recursion count, 0 for first entry ObjectWaiter* volatile _EntryList; // Threads blocked on entry or reentry. @@ -177,7 +201,8 @@ volatile jint _contentions; // Number of active contentions in enter(). It is used by is_busy() // along with other fields to determine if an ObjectMonitor can be - // deflated. See ObjectSynchronizer::deflate_monitor(). + // deflated. See ObjectSynchronizer::deflate_monitor() and + // ObjectSynchronizer::deflate_monitor_using_JT(). protected: ObjectWaiter* volatile _WaitSet; // LL of threads wait()ing on the monitor volatile jint _waiters; // number of waiting threads @@ -185,6 +210,7 @@ volatile int _WaitSetLock; // protects Wait Queue - simple spinlock public: + volatile int visit_marker; static void Initialize(); // Only perform a PerfData operation if the PerfData object has been @@ -244,13 +270,27 @@ intptr_t is_busy() const { // TODO-FIXME: assert _owner == null implies _recursions = 0 - return _contentions|_waiters|intptr_t(_owner)|intptr_t(_cxq)|intptr_t(_EntryList); + // We do not include _ref_count in the is_busy() check because + // _ref_count is for indicating that the ObjectMonitor* is in + // use which is orthogonal to whether the ObjectMonitor itself + // is in use for a locking operation. + intptr_t ret_code = _contentions | _waiters | intptr_t(_cxq) | intptr_t(_EntryList); + if (!AsyncDeflateIdleMonitors) { + ret_code |= intptr_t(_owner); + } else { + if (_owner != DEFLATER_MARKER) { + ret_code |= intptr_t(_owner); + } + } + return ret_code; } const char* is_busy_to_string(stringStream* ss); intptr_t is_entered(Thread* current) const; - void* owner() const; + void* owner() const; // Returns NULL if DEFLATER_MARKER is observed. + // Returns true if owner field == DEFLATER_MARKER and false otherwise. + bool owner_is_DEFLATER_MARKER(); void set_owner(void* owner); jint waiters() const; @@ -295,11 +335,21 @@ void* object() const; void* object_addr(); void set_object(void* obj); + void set_allocation_state(AllocationState s); + AllocationState allocation_state() const; + bool is_free() const; + bool is_active() const; + bool is_old() const; + bool is_new() const; + void dec_ref_count(); + void inc_ref_count(); + jint ref_count() const; // Returns true if the specified thread owns the ObjectMonitor. Otherwise // returns false and throws IllegalMonitorStateException (IMSE). bool check_owner(Thread* THREAD); void clear(); + void clear_using_JT(); void enter(TRAPS); void exit(bool not_suspended, TRAPS); @@ -323,10 +373,42 @@ void ReenterI(Thread* self, ObjectWaiter* self_node); void UnlinkAfterAcquire(Thread* self, ObjectWaiter* self_node); int TryLock(Thread* self); - int NotRunnable(Thread* self, Thread * Owner); + int NotRunnable(Thread* self, Thread* Owner); int TrySpin(Thread* self); void ExitEpilog(Thread* self, ObjectWaiter* Wakee); bool ExitSuspendEquivalent(JavaThread* self); + void install_displaced_markword_in_object(const oop obj); +}; + +// A helper object for managing an ObjectMonitor*'s ref_count. There +// are special safety considerations when async deflation is used. +class ObjectMonitorHandle : public StackObj { + private: + ObjectMonitor* _om_ptr; + public: + ObjectMonitorHandle() { _om_ptr = NULL; } + ~ObjectMonitorHandle(); + + ObjectMonitor* om_ptr() const { return _om_ptr; } + // Save the ObjectMonitor* associated with the specified markWord and + // increment the ref_count. + bool save_om_ptr(oop object, markWord mark); + + // For internal used by ObjectSynchronizer::monitors_iterate(). + ObjectMonitorHandle(ObjectMonitor* _om_ptr); + // For internal use by ObjectSynchronizer::inflate(). + void set_om_ptr(ObjectMonitor* om_ptr); }; +// Macro to use guarantee() for more strict AsyncDeflateIdleMonitors +// checks and assert() otherwise. +#define ADIM_guarantee(p, ...) \ + do { \ + if (AsyncDeflateIdleMonitors) { \ + guarantee(p, __VA_ARGS__); \ + } else { \ + assert(p, __VA_ARGS__); \ + } \ + } while (0) + #endif // SHARE_RUNTIME_OBJECTMONITOR_HPP --- old/src/hotspot/share/runtime/objectMonitor.inline.hpp 2019-08-28 15:05:53.090953446 -0400 +++ new/src/hotspot/share/runtime/objectMonitor.inline.hpp 2019-08-28 15:05:52.906953452 -0400 @@ -51,19 +51,53 @@ return _waiters; } +// Returns NULL if DEFLATER_MARKER is observed. inline void* ObjectMonitor::owner() const { - return _owner; + void* owner = _owner; + return owner != DEFLATER_MARKER ? owner : NULL; +} + +// Returns true if owner field == DEFLATER_MARKER and false otherwise. +// This accessor is called when we really need to know if the owner +// field == DEFLATER_MARKER and any non-NULL value won't do the trick. +inline bool ObjectMonitor::owner_is_DEFLATER_MARKER() { + return OrderAccess::load_acquire(&_owner) == DEFLATER_MARKER; } inline void ObjectMonitor::clear() { assert(Atomic::load(&_header).value() != 0, "must be non-zero"); + assert(_owner == NULL, "must be NULL: owner=" INTPTR_FORMAT, p2i(_owner)); + assert(ref_count() == 0, "must be 0: ref_count=%d", ref_count()); + + Atomic::store(markWord::zero(), &_header); + + clear_using_JT(); +} + +inline void ObjectMonitor::clear_using_JT() { + // Unlike other *_using_JT() functions, we cannot assert + // AsyncDeflateIdleMonitors or Thread::current()->is_Java_thread() + // because clear() calls this function for the rest of its checks. + + if (AsyncDeflateIdleMonitors) { + // Async deflation protocol uses the header, owner and ref_count + // fields. While the ObjectMonitor being deflated is on the global free + // list, we leave those three fields alone; owner == DEFLATER_MARKER + // and ref_count < 0 will force any racing threads to retry. The + // header field is used by install_displaced_markword_in_object() + // in the last part of the deflation protocol so we cannot check + // its value here. + guarantee(_owner == NULL || _owner == DEFLATER_MARKER, + "must be NULL or DEFLATER_MARKER: owner=" INTPTR_FORMAT, + p2i(_owner)); + guarantee(ref_count() <= 0, "must be <= 0: ref_count=%d", ref_count()); + } assert(_contentions == 0, "must be 0: contentions=%d", _contentions); assert(_waiters == 0, "must be 0: waiters=%d", _waiters); assert(_recursions == 0, "must be 0: recursions=" INTPTR_FORMAT, _recursions); assert(_object != NULL, "must be non-NULL"); - assert(_owner == NULL, "must be NULL: owner=" INTPTR_FORMAT, p2i(_owner)); - - Atomic::store(markWord::zero(), &_header); + + set_allocation_state(Free); _object = NULL; } @@ -88,4 +122,51 @@ _owner = owner; } +inline void ObjectMonitor::set_allocation_state(ObjectMonitor::AllocationState s) { + _allocation_state = s; +} + +inline ObjectMonitor::AllocationState ObjectMonitor::allocation_state() const { + return _allocation_state; +} + +inline bool ObjectMonitor::is_free() const { + return _allocation_state == Free; +} + +inline bool ObjectMonitor::is_active() const { + return !is_free(); +} + +inline bool ObjectMonitor::is_old() const { + return _allocation_state == Old; +} + +inline bool ObjectMonitor::is_new() const { + return _allocation_state == New; +} + +inline void ObjectMonitor::dec_ref_count() { + // The decrement only needs to be MO_ACQ_REL since the reference + // counter is volatile. + Atomic::dec(&_ref_count); + // Can be negative as part of async deflation protocol. + ADIM_guarantee(AsyncDeflateIdleMonitors || ref_count() >= 0, + "sanity check: ref_count=%d", ref_count()); +} + +inline void ObjectMonitor::inc_ref_count() { + // The increment needs to be MO_SEQ_CST so that the reference + // counter update is seen as soon as possible in a race with the + // async deflation protocol. + Atomic::inc(&_ref_count); + // Can be negative as part of async deflation protocol. + ADIM_guarantee(AsyncDeflateIdleMonitors || ref_count() > 0, + "sanity check: ref_count=%d", ref_count()); +} + +inline jint ObjectMonitor::ref_count() const { + return OrderAccess::load_acquire(&_ref_count); +} + #endif // SHARE_RUNTIME_OBJECTMONITOR_INLINE_HPP --- old/src/hotspot/share/runtime/safepoint.cpp 2019-08-28 15:05:53.782953422 -0400 +++ new/src/hotspot/share/runtime/safepoint.cpp 2019-08-28 15:05:53.614953427 -0400 @@ -523,8 +523,9 @@ } bool SafepointSynchronize::is_cleanup_needed() { - // Need a safepoint if there are many monitors to deflate. - if (ObjectSynchronizer::is_cleanup_needed()) return true; + // Need a cleanup safepoint if there are too many monitors in use + // and the monitor deflation needs to be done at a safepoint. + if (ObjectSynchronizer::is_safepoint_deflation_needed()) return true; // Need a safepoint if some inline cache buffers is non-empty if (!InlineCacheBuffer::is_empty()) return true; if (StringTable::needs_rehashing()) return true; @@ -543,6 +544,10 @@ _counters(counters) {} void do_thread(Thread* thread) { + // deflate_thread_local_monitors() handles or requests deflation of + // this thread's idle monitors. If !AsyncDeflateIdleMonitors or if + // there is a special cleanup request, deflation is handled now. + // Otherwise, async deflation is requested via a flag. ObjectSynchronizer::deflate_thread_local_monitors(thread, _counters); if (_nmethod_cl != NULL && thread->is_Java_thread() && ! thread->is_Code_cache_sweeper_thread()) { @@ -575,7 +580,11 @@ const char* name = "deflating global idle monitors"; EventSafepointCleanupTask event; TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup)); - ObjectSynchronizer::deflate_idle_monitors(_counters); + // AsyncDeflateIdleMonitors only uses DeflateMonitorCounters + // when a special cleanup has been requested. + // Note: This logging output will include global idle monitor + // elapsed times, but not global idle monitor deflation count. + ObjectSynchronizer::do_safepoint_work(_counters); post_safepoint_cleanup_task_event(event, safepoint_id, name); } --- old/src/hotspot/share/runtime/serviceThread.cpp 2019-08-28 15:05:54.422953399 -0400 +++ new/src/hotspot/share/runtime/serviceThread.cpp 2019-08-28 15:05:54.250953405 -0400 @@ -109,6 +109,7 @@ bool resolved_method_table_work = false; bool protection_domain_table_work = false; bool oopstorage_work = false; + bool deflate_idle_monitors = false; JvmtiDeferredEvent jvmti_event; { // Need state transition ThreadBlockInVM so that this thread @@ -134,10 +135,14 @@ (symboltable_work = SymbolTable::has_work()) | (resolved_method_table_work = ResolvedMethodTable::has_work()) | (protection_domain_table_work = SystemDictionary::pd_cache_table()->has_work()) | - (oopstorage_work = OopStorage::has_cleanup_work_and_reset()) + (oopstorage_work = OopStorage::has_cleanup_work_and_reset()) | + (deflate_idle_monitors = ObjectSynchronizer::is_async_deflation_needed()) ) == 0) { // Wait until notified that there is some work to do. - ml.wait(); + // If AsyncDeflateIdleMonitors, then we wait for + // GuaranteedSafepointInterval so that is_async_deflation_needed() + // is checked at the same interval. + ml.wait(AsyncDeflateIdleMonitors ? GuaranteedSafepointInterval : 0); } if (has_jvmti_events) { @@ -180,6 +185,27 @@ if (oopstorage_work) { cleanup_oopstorages(oopstorages, oopstorage_count); } + + if (deflate_idle_monitors) { + // Deflate any global idle monitors. + ObjectSynchronizer::deflate_global_idle_monitors_using_JT(); + + int count = 0; + for (JavaThreadIteratorWithHandle jtiwh; JavaThread *jt = jtiwh.next(); ) { + if (jt->om_in_use_count > 0 && !jt->is_exiting()) { + // This JavaThread is using ObjectMonitors so deflate any that + // are idle unless this JavaThread is exiting; do not race with + // ObjectSynchronizer::om_flush(). + ObjectSynchronizer::deflate_per_thread_idle_monitors_using_JT(jt); + count++; + } + } + if (count > 0) { + log_debug(monitorinflation)("did async deflation of idle monitors for %d thread(s).", count); + } + // The ServiceThread's async deflation request has been processed. + ObjectSynchronizer::set_is_async_deflation_requested(false); + } } } --- old/src/hotspot/share/runtime/sharedRuntime.cpp 2019-08-28 15:05:55.090953376 -0400 +++ new/src/hotspot/share/runtime/sharedRuntime.cpp 2019-08-28 15:05:54.906953382 -0400 @@ -64,8 +64,10 @@ #include "runtime/interfaceSupport.inline.hpp" #include "runtime/java.hpp" #include "runtime/javaCalls.hpp" +#include "runtime/objectMonitor.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" #include "runtime/vframe.inline.hpp" #include "runtime/vframeArray.hpp" #include "utilities/copy.hpp" @@ -3111,9 +3113,13 @@ kptr2 = fr.next_monitor_in_interpreter_frame(kptr2) ) { if (kptr2->obj() != NULL) { // Avoid 'holes' in the monitor array BasicLock *lock = kptr2->lock(); + // Disallow async deflation of the inflated monitor so the + // displaced header stays stable until we've copied it. + ObjectMonitorHandle omh; // Inflate so the displaced header becomes position-independent - if (lock->displaced_header().is_unlocked()) - ObjectSynchronizer::inflate_helper(kptr2->obj()); + if (lock->displaced_header().is_unlocked()) { + ObjectSynchronizer::inflate_helper(&omh, kptr2->obj()); + } // Now the displaced header is free to move buf[i++] = (intptr_t)lock->displaced_header().value(); buf[i++] = cast_from_oop(kptr2->obj()); --- old/src/hotspot/share/runtime/synchronizer.cpp 2019-08-28 15:05:55.770953352 -0400 +++ new/src/hotspot/share/runtime/synchronizer.cpp 2019-08-28 15:05:55.582953359 -0400 @@ -118,21 +118,345 @@ // global list of blocks of monitors PaddedObjectMonitor* volatile ObjectSynchronizer::g_block_list = NULL; +bool volatile ObjectSynchronizer::_is_async_deflation_requested = false; +bool volatile ObjectSynchronizer::_is_special_deflation_requested = false; +jlong ObjectSynchronizer::_last_async_deflation_time_ns = 0; + // Global ObjectMonitor free list. Newly allocated and deflated // ObjectMonitors are prepended here. -ObjectMonitor* volatile ObjectSynchronizer::g_free_list = NULL; +static ObjectMonitor* volatile g_free_list = NULL; // Global ObjectMonitor in-use list. When a JavaThread is exiting, // ObjectMonitors on its per-thread in-use list are prepended here. -ObjectMonitor* volatile ObjectSynchronizer::g_om_in_use_list = NULL; -int ObjectSynchronizer::g_om_in_use_count = 0; // # on g_om_in_use_list +static ObjectMonitor* volatile g_om_in_use_list = NULL; -static volatile intptr_t gListLock = 0; // protects global monitor lists -static volatile int g_om_free_count = 0; // # on g_free_list -static volatile int g_om_population = 0; // # Extant -- in circulation +static volatile int g_om_free_count = 0; // # on g_free_list +static volatile int g_om_in_use_count = 0; // # on g_om_in_use_list +static volatile int g_om_population = 0; // # Extant -- in circulation #define CHAINMARKER (cast_to_oop(-1)) +// =====================> List Management functions + +// Return true if the ObjectMonitor's next field is marked. +// Otherwise returns false. +static bool is_next_marked(ObjectMonitor* om) { + return ((intptr_t)OrderAccess::load_acquire(&om->_next_om) & 0x1) != 0; +} + +// Mark an ObjectMonitor* and return it. Note: the om parameter +// may or may not have been marked originally. +static ObjectMonitor* mark_om_ptr(ObjectMonitor* om) { + return (ObjectMonitor*)((intptr_t)om | 0x1); +} + +// Mark the next field in an ObjectMonitor. If marking was successful, +// then the unmarked next field is returned via parameter and true is +// returned. Otherwise false is returned. +static bool mark_next(ObjectMonitor* om, ObjectMonitor** next_p) { + // Get current next field without any marking value. + ObjectMonitor* next = (ObjectMonitor*) + ((intptr_t)OrderAccess::load_acquire(&om->_next_om) & ~0x1); + if (Atomic::cmpxchg(mark_om_ptr(next), &om->_next_om, next) != next) { + return false; // Could not mark the next field or it was already marked. + } + *next_p = next; + return true; +} + +// Loop until we mark the next field in an ObjectMonitor. The unmarked +// next field is returned. +static ObjectMonitor* mark_next_loop(ObjectMonitor* om) { + ObjectMonitor* next; + while (true) { + if (mark_next(om, &next)) { + // Marked om's next field so return the unmarked value. + return next; + } + } +} + +// Set the next field in an ObjectMonitor to the specified value. +// The caller of set_next() must be the same thread that marked the +// ObjectMonitor. +static void set_next(ObjectMonitor* om, ObjectMonitor* value) { + OrderAccess::release_store(&om->_next_om, value); +} + +// Mark the next field in the list head ObjectMonitor. If marking was +// successful, then the mid and the unmarked next field are returned +// via parameter and true is returned. Otherwise false is returned. +static bool mark_list_head(ObjectMonitor* volatile * list_p, + ObjectMonitor** mid_p, ObjectMonitor** next_p) { + while (true) { + ObjectMonitor* mid = OrderAccess::load_acquire(list_p); + if (mid == NULL) { + return false; // The list is empty so nothing to mark. + } + if (mark_next(mid, next_p)) { + if (OrderAccess::load_acquire(list_p) != mid) { + // The list head changed so we have to retry. + set_next(mid, *next_p); // unmark mid + continue; + } + // We marked next field to guard against races. + *mid_p = mid; + return true; + } + } +} + +// Return the unmarked next field in an ObjectMonitor. Note: the next +// field may or may not have been marked originally. +static ObjectMonitor* unmarked_next(ObjectMonitor* om) { + return (ObjectMonitor*)((intptr_t)OrderAccess::load_acquire(&om->_next_om) & ~0x1); +} + +#if 0 +// XXX - this is unused +// Unmark the next field in an ObjectMonitor. Requires that the next +// field be marked. +static void unmark_next(ObjectMonitor* om) { + ADIM_guarantee(is_next_marked(om), "next field must be marked: next=" INTPTR_FORMAT, p2i(om->_next_om)); + + ObjectMonitor* next = unmarked_next(om); + set_next(om, next); +} +#endif + +volatile int visit_counter = 42; +static void chk_for_list_loop(ObjectMonitor* list, int count) { + if (!CheckMonitorLists) { + return; + } + int l_visit_counter = Atomic::add(1, &visit_counter); + int l_count = 0; + ObjectMonitor* prev = NULL; + for (ObjectMonitor* mid = list; mid != NULL; mid = unmarked_next(mid)) { + if (mid->visit_marker == l_visit_counter) { + log_error(monitorinflation)("ERROR: prev=" INTPTR_FORMAT ", l_count=%d" + " refers to an ObjectMonitor that has" + " already been visited: mid=" INTPTR_FORMAT, + p2i(prev), l_count, p2i(mid)); + fatal("list=" INTPTR_FORMAT " of %d items has a loop.", p2i(list), count); + } + mid->visit_marker = l_visit_counter; + prev = mid; + if (++l_count > count + 1024 * 1024) { + fatal("list=" INTPTR_FORMAT " of %d items may have a loop; l_count=%d", + p2i(list), count, l_count); + } + } +} + +static void chk_om_not_on_list(ObjectMonitor* om, ObjectMonitor* list, int count) { + if (!CheckMonitorLists) { + return; + } + guarantee(list != om, "ERROR: om=" INTPTR_FORMAT " must not be head of the " + "list=" INTPTR_FORMAT ", count=%d", p2i(om), p2i(list), count); + int l_count = 0; + for (ObjectMonitor* mid = list; mid != NULL; mid = unmarked_next(mid)) { + if (unmarked_next(mid) == om) { + log_error(monitorinflation)("ERROR: mid=" INTPTR_FORMAT ", l_count=%d" + " next_om refers to om=" INTPTR_FORMAT, + p2i(mid), l_count, p2i(om)); + fatal("list=" INTPTR_FORMAT " of %d items has bad next_om value.", + p2i(list), count); + } + if (++l_count > count + 1024 * 1024) { + fatal("list=" INTPTR_FORMAT " of %d items may have a loop; l_count=%d", + p2i(list), count, l_count); + } + } +} + +static void chk_om_elems_not_on_list(ObjectMonitor* elems, int elems_count, + ObjectMonitor* list, int list_count) { + if (!CheckMonitorLists) { + return; + } + chk_for_list_loop(elems, elems_count); + for (ObjectMonitor* mid = elems; mid != NULL; mid = unmarked_next(mid)) { + chk_om_not_on_list(mid, list, list_count); + } +} + +// Prepend a list of ObjectMonitors to the specified *list_p. 'tail' is +// the last ObjectMonitor in the list and there are 'count' on the list. +// Also updates the specified *count_p. +static void prepend_list_to_common(ObjectMonitor* list, ObjectMonitor* tail, + int count, ObjectMonitor* volatile* list_p, + volatile int* count_p) { + chk_for_list_loop(OrderAccess::load_acquire(list_p), + OrderAccess::load_acquire(count_p)); + chk_om_elems_not_on_list(list, count, OrderAccess::load_acquire(list_p), + OrderAccess::load_acquire(count_p)); + while (true) { + ObjectMonitor* cur = OrderAccess::load_acquire(list_p); + // Prepend list to *list_p. + ObjectMonitor* next = NULL; + if (!mark_next(tail, &next)) { + continue; // failed to mark next field so try it all again + } + set_next(tail, cur); // tail now points to cur (and unmarks tail) + if (cur == NULL) { + // No potential race with takers or other prependers since + // *list_p is empty. + if (Atomic::cmpxchg(list, list_p, cur) == cur) { + // Successfully switched *list_p to the list value. + Atomic::add(count, count_p); + break; + } + // Implied else: try it all again + } else { + // Try to mark next field to guard against races: + if (!mark_next(cur, &next)) { + continue; // failed to mark next field so try it all again + } + // We marked the next field so try to switch *list_p to the list value. + if (Atomic::cmpxchg(list, list_p, cur) != cur) { + // The list head has changed so unmark the next field and try again: + set_next(cur, next); + continue; + } + Atomic::add(count, count_p); + set_next(cur, next); // unmark next field + break; + } + } +} + +// Prepend a newly allocated block of ObjectMonitors to g_block_list and +// g_free_list. Also updates g_om_population and g_om_free_count. +void ObjectSynchronizer::prepend_block_to_lists(PaddedObjectMonitor* new_blk) { + // First we handle g_block_list: + while (true) { + PaddedObjectMonitor* cur = OrderAccess::load_acquire(&g_block_list); + // Prepend new_blk to g_block_list. The first ObjectMonitor in + // a block is reserved for use as linkage to the next block. + OrderAccess::release_store(&new_blk[0]._next_om, cur); + if (Atomic::cmpxchg(new_blk, &g_block_list, cur) == cur) { + // Successfully switched g_block_list to the new_blk value. + Atomic::add(_BLOCKSIZE - 1, &g_om_population); + break; + } + // Implied else: try it all again + } + + // Second we handle g_free_list: + prepend_list_to_common(new_blk + 1, &new_blk[_BLOCKSIZE - 1], _BLOCKSIZE - 1, + &g_free_list, &g_om_free_count); +} + +// Prepend a list of ObjectMonitors to g_free_list. 'tail' is the last +// ObjectMonitor in the list and there are 'count' on the list. Also +// updates g_om_free_count. +static void prepend_list_to_g_free_list(ObjectMonitor* list, + ObjectMonitor* tail, int count) { + prepend_list_to_common(list, tail, count, &g_free_list, &g_om_free_count); +} + +// Prepend a list of ObjectMonitors to g_om_in_use_list. 'tail' is the last +// ObjectMonitor in the list and there are 'count' on the list. Also +// updates g_om_in_use_list. +static void prepend_list_to_g_om_in_use_list(ObjectMonitor* list, + ObjectMonitor* tail, int count) { + prepend_list_to_common(list, tail, count, &g_om_in_use_list, &g_om_in_use_count); +} + +// Prepend an ObjectMonitor to the specified list. Also updates +// the specified counter. +static void prepend_to_common(ObjectMonitor* m, ObjectMonitor* volatile * list_p, + int volatile * count_p) { + chk_for_list_loop(OrderAccess::load_acquire(list_p), + OrderAccess::load_acquire(count_p)); + chk_om_not_on_list(m, OrderAccess::load_acquire(list_p), + OrderAccess::load_acquire(count_p)); + + while (true) { + ObjectMonitor* cur = OrderAccess::load_acquire(list_p); + // Prepend ObjectMonitor to *list_p. + ObjectMonitor* next = NULL; + if (!mark_next(m, &next)) { + continue; // failed to mark next field so try it all again + } + set_next(m, cur); // m now points to cur (and unmarks m) + if (cur == NULL) { + // No potential race with other prependers since *list_p is empty. + if (Atomic::cmpxchg(m, list_p, cur) == cur) { + // Successfully switched *list_p to 'm'. + Atomic::inc(count_p); + break; + } + // Implied else: try it all again + } else { + // Try to mark next field to guard against races: + if (!mark_next(cur, &next)) { + continue; // failed to mark next field so try it all again + } + // We marked the next field so try to switch *list_p to 'm'. + if (Atomic::cmpxchg(m, list_p, cur) != cur) { + // The list head has changed so unmark the next field and try again: + set_next(cur, next); + continue; + } + Atomic::inc(count_p); + set_next(cur, next); // unmark next field + break; + } + } +} + +// Prepend an ObjectMonitor to a per-thread om_free_list. +// Also updates the per-thread om_free_count. +static void prepend_to_om_free_list(Thread* self, ObjectMonitor* m) { + prepend_to_common(m, &self->om_free_list, &self->om_free_count); +} + +// Prepend an ObjectMonitor to a per-thread om_in_use_list. +// Also updates the per-thread om_in_use_count. +static void prepend_to_om_in_use_list(Thread* self, ObjectMonitor* m) { + prepend_to_common(m, &self->om_in_use_list, &self->om_in_use_count); +} + +// Take an ObjectMonitor from the start of the specified list. Also +// decrements the specified counter. Returns NULL if none are available. +static ObjectMonitor* take_from_start_of_common(ObjectMonitor* volatile * list_p, + int volatile * count_p) { + chk_for_list_loop(OrderAccess::load_acquire(list_p), + OrderAccess::load_acquire(count_p)); + + ObjectMonitor* next = NULL; + ObjectMonitor* take = NULL; + // Mark the list head to guard against A-B-A race: + if (!mark_list_head(list_p, &take, &next)) { + return NULL; // None are available. + } + // Switch marked list head to next (which unmarks the list head, but + // leaves take marked): + OrderAccess::release_store(list_p, next); + Atomic::dec(count_p); + // Unmark take, but leave the next value for any lagging list + // walkers. It will get cleaned up when take is prepended to + // the in-use list: + set_next(take, next); + return take; +} + +// Take an ObjectMonitor from the start of the global free-list. Also +// updates g_om_free_count. Returns NULL if none are available. +static ObjectMonitor* take_from_start_of_g_free_list() { + return take_from_start_of_common(&g_free_list, &g_om_free_count); +} + +// Take an ObjectMonitor from the start of a per-thread free-list. +// Also updates om_free_count. Returns NULL if none are available. +static ObjectMonitor* take_from_start_of_om_free_list(Thread* self) { + return take_from_start_of_common(&self->om_free_list, &self->om_free_count); +} + + // =====================> Quick functions // The quick_* forms are special fast-path variants used to improve @@ -211,39 +535,59 @@ assert(((JavaThread *) self)->thread_state() == _thread_in_Java, "invariant"); NoSafepointVerifier nsv; if (obj == NULL) return false; // Need to throw NPE - const markWord mark = obj->mark(); - if (mark.has_monitor()) { - ObjectMonitor* const m = mark.monitor(); - assert(oopDesc::equals((oop) m->object(), obj), "invariant"); - Thread* const owner = (Thread *) m->_owner; - - // Lock contention and Transactional Lock Elision (TLE) diagnostics - // and observability - // Case: light contention possibly amenable to TLE - // Case: TLE inimical operations such as nested/recursive synchronization + while (true) { + const markWord mark = obj->mark(); - if (owner == self) { - m->_recursions++; - return true; - } + if (mark.has_monitor()) { + ObjectMonitorHandle omh; + if (!omh.save_om_ptr(obj, mark)) { + // Lost a race with async deflation so try again. + assert(AsyncDeflateIdleMonitors, "sanity check"); + continue; + } + ObjectMonitor* const m = omh.om_ptr(); + assert(oopDesc::equals((oop) m->object(), obj), "invariant"); + Thread* const owner = (Thread *) m->_owner; + + // Lock contention and Transactional Lock Elision (TLE) diagnostics + // and observability + // Case: light contention possibly amenable to TLE + // Case: TLE inimical operations such as nested/recursive synchronization + + if (owner == self) { + m->_recursions++; + return true; + } - // This Java Monitor is inflated so obj's header will never be - // displaced to this thread's BasicLock. Make the displaced header - // non-NULL so this BasicLock is not seen as recursive nor as - // being locked. We do this unconditionally so that this thread's - // BasicLock cannot be mis-interpreted by any stack walkers. For - // performance reasons, stack walkers generally first check for - // Biased Locking in the object's header, the second check is for - // stack-locking in the object's header, the third check is for - // recursive stack-locking in the displaced header in the BasicLock, - // and last are the inflated Java Monitor (ObjectMonitor) checks. - lock->set_displaced_header(markWord::unused_mark()); + // This Java Monitor is inflated so obj's header will never be + // displaced to this thread's BasicLock. Make the displaced header + // non-NULL so this BasicLock is not seen as recursive nor as + // being locked. We do this unconditionally so that this thread's + // BasicLock cannot be mis-interpreted by any stack walkers. For + // performance reasons, stack walkers generally first check for + // Biased Locking in the object's header, the second check is for + // stack-locking in the object's header, the third check is for + // recursive stack-locking in the displaced header in the BasicLock, + // and last are the inflated Java Monitor (ObjectMonitor) checks. + lock->set_displaced_header(markWord::unused_mark()); + + if (owner == NULL && Atomic::replace_if_null(self, &(m->_owner))) { + assert(m->_recursions == 0, "invariant"); + return true; + } - if (owner == NULL && Atomic::replace_if_null(self, &(m->_owner))) { - assert(m->_recursions == 0, "invariant"); - return true; + if (AsyncDeflateIdleMonitors && + Atomic::cmpxchg(self, &m->_owner, DEFLATER_MARKER) == DEFLATER_MARKER) { + // The deflation protocol finished the first part (setting owner), + // but it failed the second part (making ref_count negative) and + // bailed. Or the ObjectMonitor was async deflated and reused. + // Acquired the monitor. + assert(m->_recursions == 0, "invariant"); + return true; + } } + break; } // Note that we could inflate in quick_enter. @@ -327,7 +671,9 @@ } // We have to take the slow-path of possible inflation and then exit. - inflate(THREAD, object, inflate_cause_vm_internal)->exit(true, THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, object, inflate_cause_vm_internal); + omh.om_ptr()->exit(true, THREAD); } // ----------------------------------------------------------------------------- @@ -360,7 +706,9 @@ // must be non-zero to avoid looking like a re-entrant lock, // and must not look locked either. lock->set_displaced_header(markWord::unused_mark()); - inflate(THREAD, obj(), inflate_cause_monitor_enter)->enter(THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_monitor_enter); + omh.om_ptr()->enter(THREAD); } // This routine is used to handle interpreter/compiler slow case @@ -389,9 +737,10 @@ assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now"); } - ObjectMonitor* monitor = inflate(THREAD, obj(), inflate_cause_vm_internal); - - return monitor->complete_exit(THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_vm_internal); + intptr_t ret_code = omh.om_ptr()->complete_exit(THREAD); + return ret_code; } // NOTE: must use heavy weight monitor to handle complete_exit/reenter() @@ -401,9 +750,9 @@ assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now"); } - ObjectMonitor* monitor = inflate(THREAD, obj(), inflate_cause_vm_internal); - - monitor->reenter(recursion, THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_vm_internal); + omh.om_ptr()->reenter(recursion, THREAD); } // ----------------------------------------------------------------------------- // JNI locks on java objects @@ -415,7 +764,9 @@ assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now"); } THREAD->set_current_pending_monitor_is_from_java(false); - inflate(THREAD, obj(), inflate_cause_jni_enter)->enter(THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_jni_enter); + omh.om_ptr()->enter(THREAD); THREAD->set_current_pending_monitor_is_from_java(true); } @@ -428,7 +779,9 @@ } assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now"); - ObjectMonitor* monitor = inflate(THREAD, obj, inflate_cause_jni_exit); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj, inflate_cause_jni_exit); + ObjectMonitor* monitor = omh.om_ptr(); // If this thread has locked the object, exit the monitor. We // intentionally do not use CHECK here because we must exit the // monitor even if an exception is pending. @@ -469,7 +822,9 @@ if (millis < 0) { THROW_MSG_0(vmSymbols::java_lang_IllegalArgumentException(), "timeout value is negative"); } - ObjectMonitor* monitor = inflate(THREAD, obj(), inflate_cause_wait); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_wait); + ObjectMonitor* monitor = omh.om_ptr(); DTRACE_MONITOR_WAIT_PROBE(monitor, obj(), THREAD, millis); monitor->wait(millis, true, THREAD); @@ -478,7 +833,8 @@ // that's fixed we can uncomment the following line, remove the call // and change this function back into a "void" func. // DTRACE_MONITOR_PROBE(waited, monitor, obj(), THREAD); - return dtrace_waited_probe(monitor, obj, THREAD); + int ret_code = dtrace_waited_probe(monitor, obj, THREAD); + return ret_code; } void ObjectSynchronizer::wait_uninterruptibly(Handle obj, jlong millis, TRAPS) { @@ -489,7 +845,9 @@ if (millis < 0) { THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(), "timeout value is negative"); } - inflate(THREAD, obj(), inflate_cause_wait)->wait(millis, false, THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_wait); + omh.om_ptr()->wait(millis, false, THREAD); } void ObjectSynchronizer::notify(Handle obj, TRAPS) { @@ -502,7 +860,9 @@ if (mark.has_locker() && THREAD->is_lock_owned((address)mark.locker())) { return; } - inflate(THREAD, obj(), inflate_cause_notify)->notify(THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_notify); + omh.om_ptr()->notify(THREAD); } // NOTE: see comment of notify() @@ -516,7 +876,9 @@ if (mark.has_locker() && THREAD->is_lock_owned((address)mark.locker())) { return; } - inflate(THREAD, obj(), inflate_cause_notify)->notifyAll(THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_notify); + omh.om_ptr()->notifyAll(THREAD); } // ----------------------------------------------------------------------------- @@ -541,15 +903,15 @@ // performed by the CPU(s) or platform. struct SharedGlobals { - char _pad_prefix[DEFAULT_CACHE_LINE_SIZE]; + char _pad_prefix[OM_CACHE_LINE_SIZE]; // These are highly shared mostly-read variables. // To avoid false-sharing they need to be the sole occupants of a cache line. volatile int stw_random; volatile int stw_cycle; - DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int) * 2); + DEFINE_PAD_MINUS_SIZE(1, OM_CACHE_LINE_SIZE, sizeof(volatile int) * 2); // Hot RW variable -- Sequester to avoid false-sharing volatile int hc_sequence; - DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int)); + DEFINE_PAD_MINUS_SIZE(2, OM_CACHE_LINE_SIZE, sizeof(volatile int)); }; static SharedGlobals GVars; @@ -710,79 +1072,93 @@ assert(Universe::verify_in_progress() || DumpSharedSpaces || ((JavaThread *)self)->thread_state() != _thread_blocked, "invariant"); - ObjectMonitor* monitor = NULL; - markWord temp, test; - intptr_t hash; - markWord mark = read_stable_mark(obj); + while (true) { + ObjectMonitor* monitor = NULL; + markWord temp, test; + intptr_t hash; + markWord mark = read_stable_mark(obj); - // object should remain ineligible for biased locking - assert(!mark.has_bias_pattern(), "invariant"); + // object should remain ineligible for biased locking + assert(!mark.has_bias_pattern(), "invariant"); - if (mark.is_neutral()) { - hash = mark.hash(); // this is a normal header - if (hash != 0) { // if it has hash, just return it - return hash; - } - hash = get_next_hash(self, obj); // allocate a new hash code - temp = mark.copy_set_hash(hash); // merge the hash code into header - // use (machine word version) atomic operation to install the hash - test = obj->cas_set_mark(temp, mark); - if (test == mark) { - return hash; - } - // If atomic operation failed, we must inflate the header - // into heavy weight monitor. We could add more code here - // for fast path, but it does not worth the complexity. - } else if (mark.has_monitor()) { - monitor = mark.monitor(); - temp = monitor->header(); - assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value()); - hash = temp.hash(); - if (hash != 0) { - return hash; - } - // Skip to the following code to reduce code size - } else if (self->is_lock_owned((address)mark.locker())) { - temp = mark.displaced_mark_helper(); // this is a lightweight monitor owned - assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value()); - hash = temp.hash(); // by current thread, check if the displaced - if (hash != 0) { // header contains hash code - return hash; - } - // WARNING: - // The displaced header in the BasicLock on a thread's stack - // is strictly immutable. It CANNOT be changed in ANY cases. - // So we have to inflate the stack lock into an ObjectMonitor - // even if the current thread owns the lock. The BasicLock on - // a thread's stack can be asynchronously read by other threads - // during an inflate() call so any change to that stack memory - // may not propagate to other threads correctly. - } - - // Inflate the monitor to set hash code - monitor = inflate(self, obj, inflate_cause_hash_code); - // Load displaced header and check it has hash code - mark = monitor->header(); - assert(mark.is_neutral(), "invariant: header=" INTPTR_FORMAT, mark.value()); - hash = mark.hash(); - if (hash == 0) { - hash = get_next_hash(self, obj); - temp = mark.copy_set_hash(hash); // merge hash code into header - assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value()); - uintptr_t v = Atomic::cmpxchg(temp.value(), (volatile uintptr_t*)monitor->header_addr(), mark.value()); - test = markWord(v); - if (test != mark) { - // The only non-deflation update to the ObjectMonitor's - // header/dmw field is to merge in the hash code. If someone - // adds a new usage of the header/dmw field, please update - // this code. - hash = test.hash(); - assert(test.is_neutral(), "invariant: header=" INTPTR_FORMAT, test.value()); - assert(hash != 0, "Trivial unexpected object/monitor header usage."); + if (mark.is_neutral()) { + hash = mark.hash(); // this is a normal header + if (hash != 0) { // if it has hash, just return it + return hash; + } + hash = get_next_hash(self, obj); // allocate a new hash code + temp = mark.copy_set_hash(hash); // merge the hash code into header + // use (machine word version) atomic operation to install the hash + test = obj->cas_set_mark(temp, mark); + if (test == mark) { + return hash; + } + // If atomic operation failed, we must inflate the header + // into heavy weight monitor. We could add more code here + // for fast path, but it does not worth the complexity. + } else if (mark.has_monitor()) { + ObjectMonitorHandle omh; + if (!omh.save_om_ptr(obj, mark)) { + // Lost a race with async deflation so try again. + assert(AsyncDeflateIdleMonitors, "sanity check"); + continue; + } + monitor = omh.om_ptr(); + temp = monitor->header(); + assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value()); + hash = temp.hash(); + if (hash != 0) { + return hash; + } + // Skip to the following code to reduce code size + } else if (self->is_lock_owned((address)mark.locker())) { + temp = mark.displaced_mark_helper(); // this is a lightweight monitor owned + assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value()); + hash = temp.hash(); // by current thread, check if the displaced + if (hash != 0) { // header contains hash code + return hash; + } + // WARNING: + // The displaced header in the BasicLock on a thread's stack + // is strictly immutable. It CANNOT be changed in ANY cases. + // So we have to inflate the stack lock into an ObjectMonitor + // even if the current thread owns the lock. The BasicLock on + // a thread's stack can be asynchronously read by other threads + // during an inflate() call so any change to that stack memory + // may not propagate to other threads correctly. + } + + // Inflate the monitor to set hash code + ObjectMonitorHandle omh; + inflate(&omh, self, obj, inflate_cause_hash_code); + monitor = omh.om_ptr(); + // Load displaced header and check it has hash code + mark = monitor->header(); + assert(mark.is_neutral(), "invariant: header=" INTPTR_FORMAT, mark.value()); + hash = mark.hash(); + if (hash == 0) { + hash = get_next_hash(self, obj); + temp = mark.copy_set_hash(hash); // merge hash code into header + assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value()); + uintptr_t v = Atomic::cmpxchg(temp.value(), (volatile uintptr_t*)monitor->header_addr(), mark.value()); + test = markWord(v); + if (test != mark) { + // The only non-deflation update to the ObjectMonitor's + // header/dmw field is to merge in the hash code. If someone + // adds a new usage of the header/dmw field, please update + // this code. + // ObjectMonitor::install_displaced_markword_in_object() + // does mark the header/dmw field as part of async deflation, + // but that protocol cannot happen now due to the + // ObjectMonitorHandle above. + hash = test.hash(); + assert(test.is_neutral(), "invariant: header=" INTPTR_FORMAT, test.value()); + assert(hash != 0, "Trivial unexpected object/monitor header usage."); + } } + // We finally get the hash + return hash; } - // We finally get the hash - return hash; } // Deprecated -- use FastHashCode() instead. @@ -802,20 +1178,28 @@ assert(thread == JavaThread::current(), "Can only be called on current thread"); oop obj = h_obj(); - markWord mark = read_stable_mark(obj); + while (true) { + markWord mark = read_stable_mark(obj); - // Uncontended case, header points to stack - if (mark.has_locker()) { - return thread->is_lock_owned((address)mark.locker()); - } - // Contended case, header points to ObjectMonitor (tagged pointer) - if (mark.has_monitor()) { - ObjectMonitor* monitor = mark.monitor(); - return monitor->is_entered(thread) != 0; + // Uncontended case, header points to stack + if (mark.has_locker()) { + return thread->is_lock_owned((address)mark.locker()); + } + // Contended case, header points to ObjectMonitor (tagged pointer) + if (mark.has_monitor()) { + ObjectMonitorHandle omh; + if (!omh.save_om_ptr(obj, mark)) { + // Lost a race with async deflation so try again. + assert(AsyncDeflateIdleMonitors, "sanity check"); + continue; + } + bool ret_code = omh.om_ptr()->is_entered(thread) != 0; + return ret_code; + } + // Unlocked case, header in place + assert(mark.is_neutral(), "sanity check"); + return false; } - // Unlocked case, header in place - assert(mark.is_neutral(), "sanity check"); - return false; } // Be aware of this method could revoke bias of the lock object. @@ -841,27 +1225,37 @@ assert(self == JavaThread::current(), "Can only be called on current thread"); oop obj = h_obj(); - markWord mark = read_stable_mark(obj); - // CASE: stack-locked. Mark points to a BasicLock on the owner's stack. - if (mark.has_locker()) { - return self->is_lock_owned((address)mark.locker()) ? - owner_self : owner_other; - } + while (true) { + markWord mark = read_stable_mark(obj); - // CASE: inflated. Mark (tagged pointer) points to an ObjectMonitor. - // The Object:ObjectMonitor relationship is stable as long as we're - // not at a safepoint. - if (mark.has_monitor()) { - void* owner = mark.monitor()->_owner; - if (owner == NULL) return owner_none; - return (owner == self || - self->is_lock_owned((address)owner)) ? owner_self : owner_other; - } + // CASE: stack-locked. Mark points to a BasicLock on the owner's stack. + if (mark.has_locker()) { + return self->is_lock_owned((address)mark.locker()) ? + owner_self : owner_other; + } - // CASE: neutral - assert(mark.is_neutral(), "sanity check"); - return owner_none; // it's unlocked + // CASE: inflated. Mark (tagged pointer) points to an ObjectMonitor. + // The Object:ObjectMonitor relationship is stable as long as we're + // not at a safepoint and AsyncDeflateIdleMonitors is false. + if (mark.has_monitor()) { + ObjectMonitorHandle omh; + if (!omh.save_om_ptr(obj, mark)) { + // Lost a race with async deflation so try again. + assert(AsyncDeflateIdleMonitors, "sanity check"); + continue; + } + ObjectMonitor* monitor = omh.om_ptr(); + void* owner = monitor->_owner; + if (owner == NULL) return owner_none; + return (owner == self || + self->is_lock_owned((address)owner)) ? owner_self : owner_other; + } + + // CASE: neutral + assert(mark.is_neutral(), "sanity check"); + return owner_none; // it's unlocked + } } // FIXME: jvmti should call this @@ -876,33 +1270,41 @@ } oop obj = h_obj(); - address owner = NULL; - markWord mark = read_stable_mark(obj); + while (true) { + address owner = NULL; + markWord mark = read_stable_mark(obj); - // Uncontended case, header points to stack - if (mark.has_locker()) { - owner = (address) mark.locker(); - } + // Uncontended case, header points to stack + if (mark.has_locker()) { + owner = (address) mark.locker(); + } - // Contended case, header points to ObjectMonitor (tagged pointer) - else if (mark.has_monitor()) { - ObjectMonitor* monitor = mark.monitor(); - assert(monitor != NULL, "monitor should be non-null"); - owner = (address) monitor->owner(); - } + // Contended case, header points to ObjectMonitor (tagged pointer) + else if (mark.has_monitor()) { + ObjectMonitorHandle omh; + if (!omh.save_om_ptr(obj, mark)) { + // Lost a race with async deflation so try again. + assert(AsyncDeflateIdleMonitors, "sanity check"); + continue; + } + ObjectMonitor* monitor = omh.om_ptr(); + assert(monitor != NULL, "monitor should be non-null"); + owner = (address) monitor->owner(); + } - if (owner != NULL) { - // owning_thread_from_monitor_owner() may also return NULL here - return Threads::owning_thread_from_monitor_owner(t_list, owner); - } + if (owner != NULL) { + // owning_thread_from_monitor_owner() may also return NULL here + return Threads::owning_thread_from_monitor_owner(t_list, owner); + } - // Unlocked case, header in place - // Cannot have assertion since this object may have been - // locked by another thread when reaching here. - // assert(mark.is_neutral(), "sanity check"); + // Unlocked case, header in place + // Cannot have assertion since this object may have been + // locked by another thread when reaching here. + // assert(mark.is_neutral(), "sanity check"); - return NULL; + return NULL; + } } // Visitors ... @@ -913,32 +1315,94 @@ assert(block->object() == CHAINMARKER, "must be a block header"); for (int i = _BLOCKSIZE - 1; i > 0; i--) { ObjectMonitor* mid = (ObjectMonitor *)(block + i); - oop object = (oop)mid->object(); - if (object != NULL) { - // Only process with closure if the object is set. + if (mid->is_active()) { + ObjectMonitorHandle omh(mid); + + if (mid->object() == NULL || + (AsyncDeflateIdleMonitors && mid->ref_count() < 0)) { + // Only process with closure if the object is set. + // For async deflation, race here if monitor is not owned! + // The above ref_count bump (in ObjectMonitorHandle ctr) + // will cause subsequent async deflation to skip it. + // However, previous or concurrent async deflation is a race + // so skip this ObjectMonitor if it is being async deflated. + continue; + } closure->do_monitor(mid); } } - block = (PaddedObjectMonitor*)block->_next_om; + // unmarked_next() is not needed with g_block_list (no next field marking). + block = (PaddedObjectMonitor*)OrderAccess::load_acquire(&block->_next_om); } } static bool monitors_used_above_threshold() { - if (g_om_population == 0) { + if (OrderAccess::load_acquire(&g_om_population) == 0) { return false; } - int monitors_used = g_om_population - g_om_free_count; - int monitor_usage = (monitors_used * 100LL) / g_om_population; - return monitor_usage > MonitorUsedDeflationThreshold; + if (MonitorUsedDeflationThreshold > 0) { + int monitors_used = OrderAccess::load_acquire(&g_om_population) - + OrderAccess::load_acquire(&g_om_free_count); + int monitor_usage = (monitors_used * 100LL) / + OrderAccess::load_acquire(&g_om_population); + return monitor_usage > MonitorUsedDeflationThreshold; + } + return false; } -bool ObjectSynchronizer::is_cleanup_needed() { - if (MonitorUsedDeflationThreshold > 0) { - return monitors_used_above_threshold(); +// Returns true if MonitorBound is set (> 0) and if the specified +// cnt is > MonitorBound. Otherwise returns false. +static bool is_MonitorBound_exceeded(const int cnt) { + const int mx = MonitorBound; + return mx > 0 && cnt > mx; +} + +bool ObjectSynchronizer::is_async_deflation_needed() { + if (!AsyncDeflateIdleMonitors) { + return false; + } + if (is_async_deflation_requested()) { + // Async deflation request. + return true; + } + if (AsyncDeflationInterval > 0 && + time_since_last_async_deflation_ms() > AsyncDeflationInterval && + monitors_used_above_threshold()) { + // It's been longer than our specified deflate interval and there + // are too many monitors in use. We don't deflate more frequently + // than AsyncDeflationInterval (unless is_async_deflation_requested) + // in order to not swamp the ServiceThread. + _last_async_deflation_time_ns = os::javaTimeNanos(); + return true; + } + if (is_MonitorBound_exceeded(OrderAccess::load_acquire(&g_om_population) - + OrderAccess::load_acquire(&g_om_free_count))) { + // Not enough ObjectMonitors on the global free list. + return true; } return false; } +bool ObjectSynchronizer::is_safepoint_deflation_needed() { + if (!AsyncDeflateIdleMonitors) { + if (monitors_used_above_threshold()) { + // Too many monitors in use. + return true; + } + return false; + } + if (is_special_deflation_requested()) { + // For AsyncDeflateIdleMonitors only do a safepoint deflation + // if there is a special deflation request. + return true; + } + return false; +} + +jlong ObjectSynchronizer::time_since_last_async_deflation_ms() { + return (os::javaTimeNanos() - _last_async_deflation_time_ns) / (NANOUNITS / MILLIUNITS); +} + void ObjectSynchronizer::oops_do(OopClosure* f) { // We only scan the global used list here (for moribund threads), and // the thread-local monitors in Thread::oops_do(). @@ -947,18 +1411,21 @@ void ObjectSynchronizer::global_used_oops_do(OopClosure* f) { assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); - list_oops_do(g_om_in_use_list, f); + list_oops_do(OrderAccess::load_acquire(&g_om_in_use_list), OrderAccess::load_acquire(&g_om_in_use_count), f); } void ObjectSynchronizer::thread_local_used_oops_do(Thread* thread, OopClosure* f) { assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); - list_oops_do(thread->om_in_use_list, f); + list_oops_do(OrderAccess::load_acquire(&thread->om_in_use_list), OrderAccess::load_acquire(&thread->om_in_use_count), f); } -void ObjectSynchronizer::list_oops_do(ObjectMonitor* list, OopClosure* f) { +void ObjectSynchronizer::list_oops_do(ObjectMonitor* list, int count, OopClosure* f) { assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); - ObjectMonitor* mid; - for (mid = list; mid != NULL; mid = mid->_next_om) { + chk_for_list_loop(list, count); + // The oops_do() phase does not overlap with monitor deflation + // so no need to update the ObjectMonitor's ref_count for this + // ObjectMonitor* use. + for (ObjectMonitor* mid = list; mid != NULL; mid = unmarked_next(mid)) { if (mid->object() != NULL) { f->do_oop((oop*)mid->object_addr()); } @@ -974,9 +1441,6 @@ // STW-time -- disassociates idle monitors from objects. Such // scavenged monitors are returned to the g_free_list. // -// The global list is protected by gListLock. All the critical sections -// are short and operate in constant-time. -// // ObjectMonitors reside in type-stable memory (TSM) and are immortal. // // Lifecycle: @@ -990,6 +1454,7 @@ // // If MonitorBound is not set (<= 0), MonitorBound checks are disabled. // +// When safepoint deflation is being used (!AsyncDeflateIdleMonitors): // The monitor pool is grow-only. We scavenge at STW safepoint-time, but the // the rate of scavenging is driven primarily by GC. As such, we can find // an inordinate number of monitors in circulation. @@ -1004,13 +1469,26 @@ // // The current implementation uses asynchronous VM operations. // -// If MonitorBound is set, the boundry applies to +// When safepoint deflation is being used and MonitorBound is set, the +// boundry applies to // (g_om_population - g_om_free_count) // i.e., if there are not enough ObjectMonitors on the global free list, // then a safepoint deflation is induced. Picking a good MonitorBound value // is non-trivial. +// +// When async deflation is being used: +// The monitor pool is still grow-only. Async deflation is requested +// by a safepoint's cleanup phase or by the ServiceThread at periodic +// intervals when is_async_deflation_needed() returns true. In +// addition to other policies that are checked, if there are not +// enough ObjectMonitors on the global free list, then +// is_async_deflation_needed() will return true. The ServiceThread +// calls deflate_global_idle_monitors_using_JT() and also calls +// deflate_per_thread_idle_monitors_using_JT() as needed. static void InduceScavenge(Thread* self, const char * Whence) { + assert(!AsyncDeflateIdleMonitors, "is not used by async deflation"); + // Induce STW safepoint to trim monitors // Ultimately, this results in a call to deflate_idle_monitors() in the near future. // More precisely, trigger an asynchronous STW safepoint as the number @@ -1026,31 +1504,30 @@ } } -ObjectMonitor* ObjectSynchronizer::om_alloc(Thread* self) { +ObjectMonitor* ObjectSynchronizer::om_alloc(Thread* self, + const InflateCause cause) { // A large MAXPRIVATE value reduces both list lock contention // and list coherency traffic, but also tends to increase the // number of ObjectMonitors in circulation as well as the STW // scavenge costs. As usual, we lean toward time in space-time // tradeoffs. const int MAXPRIVATE = 1024; + stringStream ss; for (;;) { ObjectMonitor* m; // 1: try to allocate from the thread's local om_free_list. // Threads will attempt to allocate first from their local list, then - // from the global list, and only after those attempts fail will the thread - // attempt to instantiate new monitors. Thread-local free lists take - // heat off the gListLock and improve allocation latency, as well as reducing - // coherency traffic on the shared global list. - m = self->om_free_list; + // from the global list, and only after those attempts fail will the + // thread attempt to instantiate new monitors. Thread-local free lists + // improve allocation latency, as well as reducing coherency traffic + // on the shared global list. + m = take_from_start_of_om_free_list(self); if (m != NULL) { - self->om_free_list = m->_next_om; - self->om_free_count--; guarantee(m->object() == NULL, "invariant"); - m->_next_om = self->om_in_use_list; - self->om_in_use_list = m; - self->om_in_use_count++; + m->set_allocation_state(ObjectMonitor::New); + prepend_to_om_in_use_list(self, m); return m; } @@ -1059,25 +1536,42 @@ // If the muxTry() fails then drop immediately into case 3. // If we're using thread-local free lists then try // to reprovision the caller's free list. - if (g_free_list != NULL) { + if (OrderAccess::load_acquire(&g_free_list) != NULL) { // Reprovision the thread's om_free_list. // Use bulk transfers to reduce the allocation rate and heat // on various locks. - Thread::muxAcquire(&gListLock, "om_alloc(1)"); - for (int i = self->om_free_provision; --i >= 0 && g_free_list != NULL;) { - g_om_free_count--; - ObjectMonitor* take = g_free_list; - g_free_list = take->_next_om; + for (int i = self->om_free_provision; --i >= 0;) { + ObjectMonitor* take = take_from_start_of_g_free_list(); + if (take == NULL) { + break; // No more are available. + } guarantee(take->object() == NULL, "invariant"); + if (AsyncDeflateIdleMonitors) { + // We allowed 3 field values to linger during async deflation. + // We clear header and restore ref_count here, but we leave + // owner == DEFLATER_MARKER so the simple C2 ObjectMonitor + // enter optimization can no longer race with async deflation + // and reuse. + take->set_header(markWord::zero()); + if (take->ref_count() < 0) { + // Add back max_jint to restore the ref_count field to its + // proper value. + Atomic::add(max_jint, &take->_ref_count); + + assert(take->ref_count() >= 0, "must not be negative: ref_count=%d", + take->ref_count()); + } + } take->Recycle(); + assert(take->is_free(), "invariant"); om_release(self, take, false); } - Thread::muxRelease(&gListLock); self->om_free_provision += 1 + (self->om_free_provision/2); if (self->om_free_provision > MAXPRIVATE) self->om_free_provision = MAXPRIVATE; - const int mx = MonitorBound; - if (mx > 0 && (g_om_population-g_om_free_count) > mx) { + if (!AsyncDeflateIdleMonitors && + is_MonitorBound_exceeded(OrderAccess::load_acquire(&g_om_population) - + OrderAccess::load_acquire(&g_om_free_count))) { // Not enough ObjectMonitors on the global free list. // We can't safely induce a STW safepoint from om_alloc() as our thread // state may not be appropriate for such activities and callers may hold @@ -1098,10 +1592,10 @@ assert(_BLOCKSIZE > 1, "invariant"); size_t neededsize = sizeof(PaddedObjectMonitor) * _BLOCKSIZE; PaddedObjectMonitor* temp; - size_t aligned_size = neededsize + (DEFAULT_CACHE_LINE_SIZE - 1); + size_t aligned_size = neededsize + (OM_CACHE_LINE_SIZE - 1); void* real_malloc_addr = (void*)NEW_C_HEAP_ARRAY(char, aligned_size, mtInternal); - temp = (PaddedObjectMonitor*)align_up(real_malloc_addr, DEFAULT_CACHE_LINE_SIZE); + temp = (PaddedObjectMonitor*)align_up(real_malloc_addr, OM_CACHE_LINE_SIZE); // NOTE: (almost) no way to recover if allocation failed. // We might be able to induce a STW safepoint and scavenge enough @@ -1121,11 +1615,12 @@ // look like: class Block { Block * next; int N; ObjectMonitor Body [N] ; } for (int i = 1; i < _BLOCKSIZE; i++) { - temp[i]._next_om = (ObjectMonitor *)&temp[i+1]; + OrderAccess::release_store(&temp[i]._next_om, (ObjectMonitor*)&temp[i+1]); + assert(temp[i].is_free(), "invariant"); } // terminate the last monitor as the end of list - temp[_BLOCKSIZE - 1]._next_om = NULL; + OrderAccess::release_store(&temp[_BLOCKSIZE - 1]._next_om, (ObjectMonitor*)NULL); // Element [0] is reserved for global list linkage temp[0].set_object(CHAINMARKER); @@ -1134,24 +1629,7 @@ // block in hand. This avoids some lock traffic and redundant // list activity. - // Acquire the gListLock to manipulate g_block_list and g_free_list. - // An Oyama-Taura-Yonezawa scheme might be more efficient. - Thread::muxAcquire(&gListLock, "om_alloc(2)"); - g_om_population += _BLOCKSIZE-1; - g_om_free_count += _BLOCKSIZE-1; - - // Add the new block to the list of extant blocks (g_block_list). - // The very first ObjectMonitor in a block is reserved and dedicated. - // It serves as blocklist "next" linkage. - temp[0]._next_om = g_block_list; - // There are lock-free uses of g_block_list so make sure that - // the previous stores happen before we update g_block_list. - OrderAccess::release_store(&g_block_list, temp); - - // Add the new string of ObjectMonitors to the global free list - temp[_BLOCKSIZE - 1]._next_om = g_free_list; - g_free_list = temp + 1; - Thread::muxRelease(&gListLock); + prepend_block_to_lists(temp); } } @@ -1164,8 +1642,8 @@ // // Key constraint: all ObjectMonitors on a thread's free list and the global // free list must have their object field set to null. This prevents the -// scavenger -- deflate_monitor_list() -- from reclaiming them while we -// are trying to release them. +// scavenger -- deflate_monitor_list() or deflate_monitor_list_using_JT() +// -- from reclaiming them while we are trying to release them. void ObjectSynchronizer::om_release(Thread* self, ObjectMonitor* m, bool from_per_thread_alloc) { @@ -1175,31 +1653,71 @@ guarantee((m->is_busy() | m->_recursions) == 0, "freeing in-use monitor: " "%s, recursions=" INTPTR_FORMAT, m->is_busy_to_string(&ss), m->_recursions); + m->set_allocation_state(ObjectMonitor::Free); // _next_om is used for both per-thread in-use and free lists so // we have to remove 'm' from the in-use list first (as needed). if (from_per_thread_alloc) { // Need to remove 'm' from om_in_use_list. + // We use the more complicated mark-cur_mid_in_use-and-mid-as-we-go + // protocol because async deflation can do list deletions in parallel. ObjectMonitor* cur_mid_in_use = NULL; + ObjectMonitor* mid = NULL; + ObjectMonitor* next = NULL; bool extracted = false; - for (ObjectMonitor* mid = self->om_in_use_list; mid != NULL; cur_mid_in_use = mid, mid = mid->_next_om) { + + if (!mark_list_head(&self->om_in_use_list, &mid, &next)) { + fatal("thread=" INTPTR_FORMAT " in-use list must not be empty.", p2i(self)); + } + while (true) { if (m == mid) { - // extract from per-thread in-use list - if (mid == self->om_in_use_list) { - self->om_in_use_list = mid->_next_om; - } else if (cur_mid_in_use != NULL) { - cur_mid_in_use->_next_om = mid->_next_om; // maintain the current thread in-use list + // We found 'm' on the per-thread in-use list so try to extract it. + // First try the list head: + if (Atomic::cmpxchg(next, &self->om_in_use_list, mid) != mid) { + // We could not switch the list head to next. + ObjectMonitor* marked_mid = mark_om_ptr(mid); + // Switch cur_mid_in_use's next field to next (which also + // unmarks cur_mid_in_use): + ADIM_guarantee(cur_mid_in_use != NULL, "must not be NULL"); + if (Atomic::cmpxchg(next, &cur_mid_in_use->_next_om, marked_mid) + != marked_mid) { + // We could not switch cur_mid_in_use's next field. This + // should not be possible since it was marked so we: + fatal("mid=" INTPTR_FORMAT " must be referred to by the list " + "head: &om_in_use_list=" INTPTR_FORMAT " or by " + "cur_mid_in_use's next field: cur_mid_in_use=" INTPTR_FORMAT + ", next_om=" INTPTR_FORMAT, p2i(mid), + p2i((ObjectMonitor**)&self->om_in_use_list), + p2i(cur_mid_in_use), p2i(cur_mid_in_use->_next_om)); + } } extracted = true; - self->om_in_use_count--; + Atomic::dec(&self->om_in_use_count); + // Unmark mid, but leave the next value for any lagging list + // walkers. It will get cleaned up when mid is prepended to + // the thread's free list: + set_next(mid, next); break; } + if (cur_mid_in_use != NULL) { + set_next(cur_mid_in_use, mid); // umark cur_mid_in_use + } + // The next cur_mid_in_use keeps mid's marked next field so + // that it is stable for a possible next field change. It + // cannot be deflated while it is marked. + cur_mid_in_use = mid; + mid = next; + if (mid == NULL) { + // Reached end of the list and didn't find m so: + fatal("must find m=" INTPTR_FORMAT "on om_in_use_list=" INTPTR_FORMAT, + p2i(m), p2i(self->om_in_use_list)); + } + // Mark mid's next field so we can possibly extract it: + next = mark_next_loop(mid); } - assert(extracted, "Should have extracted from in-use list"); } - m->_next_om = self->om_free_list; - self->om_free_list = m; - self->om_free_count++; + prepend_to_om_free_list(self, m); + guarantee(m->is_free(), "invariant"); } // Return ObjectMonitors on a moribund thread's free and in-use @@ -1214,62 +1732,110 @@ // scanned by a GC safepoint, either via Thread::oops_do() (before // om_flush() is called) or via ObjectSynchronizer::oops_do() (after // om_flush() is called). +// +// With AsyncDeflateIdleMonitors, deflate_global_idle_monitors_using_JT() +// and deflate_per_thread_idle_monitors_using_JT() (in another thread) can +// run at the same time as om_flush() so we have to follow a careful +// protocol to prevent list corruption. void ObjectSynchronizer::om_flush(Thread* self) { - ObjectMonitor* free_list = self->om_free_list; - ObjectMonitor* free_tail = NULL; + // This function can race with an async deflater thread. Since + // deflation has to process the per-thread in-use list before + // prepending the deflated ObjectMonitors to the global free list, + // we process the per-thread lists in the same order to prevent + // ordering races. + int in_use_count = 0; + ObjectMonitor* in_use_list = NULL; + ObjectMonitor* in_use_tail = NULL; + ObjectMonitor* next = NULL; + + // An async deflation thread checks to see if the target thread + // is exiting, but if it has made it past that check before we + // started exiting, then it is racing to get to the in-use list. + if (mark_list_head(&self->om_in_use_list, &in_use_list, &next)) { + chk_for_list_loop(in_use_list, OrderAccess::load_acquire(&self->om_in_use_count)); + // At this point, we have marked the in-use list head so an + // async deflation thread cannot come in after us. If an async + // deflation thread is ahead of us, then we'll detect that and + // wait for it to finish its work. + // + // The thread is going away, however the ObjectMonitors on the + // om_in_use_list may still be in-use by other threads. Link + // them to in_use_tail, which will be linked into the global + // in-use list g_om_in_use_list below. + // + // Account for the in-use list head before the loop since it is + // already marked (by this thread): + in_use_tail = in_use_list; + in_use_count++; + for (ObjectMonitor* cur_om = unmarked_next(in_use_list); cur_om != NULL;) { + if (is_next_marked(cur_om)) { + // This next field is marked so there must be an async deflater + // thread ahead of us so we'll give it a chance to finish. + while (is_next_marked(cur_om)) { + os::naked_short_sleep(1); + } + // Refetch the possibly changed next field and try again. + cur_om = unmarked_next(in_use_tail); + continue; + } + if (!cur_om->is_active()) { + // cur_om was deflated and the allocation state was changed + // to Free while it was marked. We happened to see it just + // after it was unmarked (and added to the free list). + // Refetch the possibly changed next field and try again. + cur_om = unmarked_next(in_use_tail); + continue; + } + in_use_tail = cur_om; + in_use_count++; + cur_om = unmarked_next(cur_om); + } + guarantee(in_use_tail != NULL, "invariant"); + int l_om_in_use_count = OrderAccess::load_acquire(&self->om_in_use_count); + ADIM_guarantee(l_om_in_use_count == in_use_count, "in-use counts don't " + "match: l_om_in_use_count=%d, in_use_count=%d", + l_om_in_use_count, in_use_count); + // Clear the in-use count before unmarking the in-use list head + // to avoid races: + OrderAccess::release_store(&self->om_in_use_count, 0); + // Clear the in-use list head (which also unmarks it): + OrderAccess::release_store(&self->om_in_use_list, (ObjectMonitor*)NULL); + // Unmark the disconnected list head: + set_next(in_use_list, next); + } + int free_count = 0; + ObjectMonitor* free_list = OrderAccess::load_acquire(&self->om_free_list); + ObjectMonitor* free_tail = NULL; if (free_list != NULL) { - ObjectMonitor* s; + chk_for_list_loop(free_list, OrderAccess::load_acquire(&self->om_free_count)); // The thread is going away. Set 'free_tail' to the last per-thread free - // monitor which will be linked to g_free_list below under the gListLock. + // monitor which will be linked to g_free_list below. stringStream ss; - for (s = free_list; s != NULL; s = s->_next_om) { + for (ObjectMonitor* s = free_list; s != NULL; s = unmarked_next(s)) { free_count++; free_tail = s; guarantee(s->object() == NULL, "invariant"); guarantee(!s->is_busy(), "must be !is_busy: %s", s->is_busy_to_string(&ss)); } guarantee(free_tail != NULL, "invariant"); - assert(self->om_free_count == free_count, "free-count off"); - self->om_free_list = NULL; - self->om_free_count = 0; + int l_om_free_count = OrderAccess::load_acquire(&self->om_free_count); + ADIM_guarantee(l_om_free_count == free_count, "free counts don't match: " + "l_om_free_count=%d, free_count=%d", l_om_free_count, + free_count); + OrderAccess::release_store(&self->om_free_list, (ObjectMonitor*)NULL); + OrderAccess::release_store(&self->om_free_count, 0); } - ObjectMonitor* in_use_list = self->om_in_use_list; - ObjectMonitor* in_use_tail = NULL; - int in_use_count = 0; - if (in_use_list != NULL) { - // The thread is going away, however the ObjectMonitors on the - // om_in_use_list may still be in-use by other threads. Link - // them to in_use_tail, which will be linked into the global - // in-use list g_om_in_use_list below, under the gListLock. - ObjectMonitor *cur_om; - for (cur_om = in_use_list; cur_om != NULL; cur_om = cur_om->_next_om) { - in_use_tail = cur_om; - in_use_count++; - } - guarantee(in_use_tail != NULL, "invariant"); - assert(self->om_in_use_count == in_use_count, "in-use count off"); - self->om_in_use_list = NULL; - self->om_in_use_count = 0; - } - - Thread::muxAcquire(&gListLock, "om_flush"); if (free_tail != NULL) { - free_tail->_next_om = g_free_list; - g_free_list = free_list; - g_om_free_count += free_count; + prepend_list_to_g_free_list(free_list, free_tail, free_count); } if (in_use_tail != NULL) { - in_use_tail->_next_om = g_om_in_use_list; - g_om_in_use_list = in_use_list; - g_om_in_use_count += in_use_count; + prepend_list_to_g_om_in_use_list(in_use_list, in_use_tail, in_use_count); } - Thread::muxRelease(&gListLock); - LogStreamHandle(Debug, monitorinflation) lsh_debug; LogStreamHandle(Info, monitorinflation) lsh_info; LogStream* ls = NULL; @@ -1298,19 +1864,28 @@ } // Fast path code shared by multiple functions -void ObjectSynchronizer::inflate_helper(oop obj) { - markWord mark = obj->mark(); - if (mark.has_monitor()) { - assert(ObjectSynchronizer::verify_objmon_isinpool(mark.monitor()), "monitor is invalid"); - assert(mark.monitor()->header().is_neutral(), "monitor must record a good object header"); +void ObjectSynchronizer::inflate_helper(ObjectMonitorHandle* omh_p, oop obj) { + while (true) { + markWord mark = obj->mark(); + if (mark.has_monitor()) { + if (!omh_p->save_om_ptr(obj, mark)) { + // Lost a race with async deflation so try again. + assert(AsyncDeflateIdleMonitors, "sanity check"); + continue; + } + ObjectMonitor* monitor = omh_p->om_ptr(); + assert(ObjectSynchronizer::verify_objmon_isinpool(monitor), "monitor is invalid"); + markWord dmw = monitor->header(); + assert(dmw.is_neutral(), "sanity check: header=" INTPTR_FORMAT, dmw.value()); + return; + } + inflate(omh_p, Thread::current(), obj, inflate_cause_vm_internal); return; } - inflate(Thread::current(), obj, inflate_cause_vm_internal); } -ObjectMonitor* ObjectSynchronizer::inflate(Thread* self, - oop object, - const InflateCause cause) { +void ObjectSynchronizer::inflate(ObjectMonitorHandle* omh_p, Thread* self, + oop object, const InflateCause cause) { // Inflate mutates the heap ... // Relaxing assertion for bug 6320749. assert(Universe::verify_in_progress() || @@ -1331,12 +1906,17 @@ // CASE: inflated if (mark.has_monitor()) { - ObjectMonitor* inf = mark.monitor(); + if (!omh_p->save_om_ptr(object, mark)) { + // Lost a race with async deflation so try again. + assert(AsyncDeflateIdleMonitors, "sanity check"); + continue; + } + ObjectMonitor* inf = omh_p->om_ptr(); markWord dmw = inf->header(); assert(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value()); assert(oopDesc::equals((oop) inf->object(), object), "invariant"); assert(ObjectSynchronizer::verify_objmon_isinpool(inf), "monitor is invalid"); - return inf; + return; } // CASE: inflation in progress - inflating over a stack-lock. @@ -1372,7 +1952,7 @@ LogStreamHandle(Trace, monitorinflation) lsh; if (mark.has_locker()) { - ObjectMonitor* m = om_alloc(self); + ObjectMonitor* m = om_alloc(self, cause); // Optimistically prepare the objectmonitor - anticipate successful CAS // We do this before the CAS in order to minimize the length of time // in which INFLATING appears in the mark. @@ -1419,7 +1999,7 @@ markWord dmw = mark.displaced_mark_helper(); // Catch if the object's header is not neutral (not locked and // not marked is what we care about here). - assert(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value()); + ADIM_guarantee(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value()); // Setup monitor fields to proper values -- prepare the monitor m->set_header(dmw); @@ -1433,6 +2013,10 @@ m->set_object(object); // TODO-FIXME: assert BasicLock->dhw != 0. + omh_p->set_om_ptr(m); + assert(m->is_new(), "freshly allocated monitor must be new"); + m->set_allocation_state(ObjectMonitor::Old); + // Must preserve store ordering. The monitor state must // be stable at the time of publishing the monitor address. guarantee(object->mark() == markWord::INFLATING(), "invariant"); @@ -1450,7 +2034,8 @@ if (event.should_commit()) { post_monitor_inflate_event(&event, object, cause); } - return m; + ADIM_guarantee(!m->is_free(), "inflated monitor to be returned cannot be free"); + return; } // CASE: neutral @@ -1465,19 +2050,32 @@ // Catch if the object's header is not neutral (not locked and // not marked is what we care about here). - assert(mark.is_neutral(), "invariant: header=" INTPTR_FORMAT, mark.value()); - ObjectMonitor* m = om_alloc(self); + ADIM_guarantee(mark.is_neutral(), "invariant: header=" INTPTR_FORMAT,mark.value()); + ObjectMonitor* m = om_alloc(self, cause); // prepare m for installation - set monitor to initial state m->Recycle(); m->set_header(mark); + // If we leave _owner == DEFLATER_MARKER here, then the simple C2 + // ObjectMonitor enter optimization can no longer race with async + // deflation and reuse. m->set_object(object); m->_Responsible = NULL; m->_SpinDuration = ObjectMonitor::Knob_SpinLimit; // consider: keep metastats by type/class + omh_p->set_om_ptr(m); + assert(m->is_new(), "freshly allocated monitor must be new"); + m->set_allocation_state(ObjectMonitor::Old); + if (object->cas_set_mark(markWord::encode(m), mark) != mark) { + guarantee(!m->owner_is_DEFLATER_MARKER() || m->ref_count() >= 0, + "race between deflation and om_release() with m=" INTPTR_FORMAT + ", _owner=" INTPTR_FORMAT ", ref_count=%d", p2i(m), + p2i(m->_owner), m->ref_count()); m->set_header(markWord::zero()); m->set_object(NULL); m->Recycle(); + omh_p->set_om_ptr(NULL); + // om_release() will reset the allocation state om_release(self, m, true); m = NULL; continue; @@ -1498,13 +2096,15 @@ if (event.should_commit()) { post_monitor_inflate_event(&event, object, cause); } - return m; + ADIM_guarantee(!m->is_free(), "inflated monitor to be returned cannot be free"); + return; } } // We maintain a list of in-use monitors for each thread. // +// For safepoint based deflation: // deflate_thread_local_monitors() scans a single thread's in-use list, while // deflate_idle_monitors() scans only a global list of in-use monitors which // is populated only as a thread dies (see om_flush()). @@ -1523,6 +2123,31 @@ // typically drives the scavenge rate. Large heaps can mean infrequent GC, // which in turn can mean large(r) numbers of ObjectMonitors in circulation. // This is an unfortunate aspect of this design. +// +// For async deflation: +// If a special deflation request is made, then the safepoint based +// deflation mechanism is used. Otherwise, an async deflation request +// is registered with the ServiceThread and it is notified. + +void ObjectSynchronizer::do_safepoint_work(DeflateMonitorCounters* counters) { + assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); + + // The per-thread in-use lists are handled in + // ParallelSPCleanupThreadClosure::do_thread(). + + if (!AsyncDeflateIdleMonitors || is_special_deflation_requested()) { + // Use the older mechanism for the global in-use list or if a + // special deflation has been requested before the safepoint. + ObjectSynchronizer::deflate_idle_monitors(counters); + return; + } + + log_debug(monitorinflation)("requesting async deflation of idle monitors."); + // Request deflation of idle monitors by the ServiceThread: + set_is_async_deflation_requested(true); + MonitorLocker ml(Service_lock, Mutex::_no_safepoint_check_flag); + ml.notify_all(); +} // Deflate a single monitor if not in-use // Return true if deflated, false if in-use @@ -1541,7 +2166,9 @@ const markWord dmw = mid->header(); guarantee(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value()); - if (mid->is_busy()) { + if (mid->is_busy() || mid->ref_count() != 0) { + // Easy checks are first - the ObjectMonitor is busy or ObjectMonitor* + // is in use so no deflation. deflated = false; } else { // Deflate the monitor if it is no longer being used @@ -1557,21 +2184,33 @@ // Restore the header back to obj obj->release_set_mark(dmw); + if (AsyncDeflateIdleMonitors) { + // clear() expects the owner field to be NULL and we won't race + // with the simple C2 ObjectMonitor enter optimization since + // we're at a safepoint. + mid->set_owner(NULL); + } mid->clear(); assert(mid->object() == NULL, "invariant: object=" INTPTR_FORMAT, p2i(mid->object())); + assert(mid->is_free(), "invariant"); // Move the deflated ObjectMonitor to the working free list - // defined by free_head_p and free_tail_p. + // defined by free_head_p and free_tail_p. No races on this list + // so no need for load_acquire() or store_release(). if (*free_head_p == NULL) *free_head_p = mid; if (*free_tail_p != NULL) { // We append to the list so the caller can use mid->_next_om // to fix the linkages in its context. ObjectMonitor* prevtail = *free_tail_p; // Should have been cleaned up by the caller: - assert(prevtail->_next_om == NULL, "cleaned up deflated?"); - prevtail->_next_om = mid; + // Note: Should not have to mark prevtail here since we're at a + // safepoint and ObjectMonitors on the local free list should + // not be accessed in parallel. + assert(prevtail->_next_om == NULL, "must be NULL: _next_om=" + INTPTR_FORMAT, p2i(prevtail->_next_om)); + set_next(prevtail, mid); } *free_tail_p = mid; // At this point, mid->_next_om still refers to its current @@ -1583,9 +2222,150 @@ return deflated; } -// Walk a given monitor list, and deflate idle monitors -// The given list could be a per-thread list or a global list -// Caller acquires gListLock as needed. +// Deflate the specified ObjectMonitor if not in-use using a JavaThread. +// Returns true if it was deflated and false otherwise. +// +// The async deflation protocol sets owner to DEFLATER_MARKER and +// makes ref_count negative as signals to contending threads that +// an async deflation is in progress. There are a number of checks +// as part of the protocol to make sure that the calling thread has +// not lost the race to a contending thread or to a thread that just +// wants to use the ObjectMonitor*. +// +// The ObjectMonitor has been successfully async deflated when: +// (owner == DEFLATER_MARKER && ref_count < 0) +// Contending threads or ObjectMonitor* using threads that see those +// values know to retry their operation. +// +bool ObjectSynchronizer::deflate_monitor_using_JT(ObjectMonitor* mid, + ObjectMonitor** free_head_p, + ObjectMonitor** free_tail_p) { + assert(AsyncDeflateIdleMonitors, "sanity check"); + assert(Thread::current()->is_Java_thread(), "precondition"); + // A newly allocated ObjectMonitor should not be seen here so we + // avoid an endless inflate/deflate cycle. + assert(mid->is_old(), "must be old: allocation_state=%d", + (int) mid->allocation_state()); + + if (mid->is_busy() || mid->ref_count() != 0) { + // Easy checks are first - the ObjectMonitor is busy or ObjectMonitor* + // is in use so no deflation. + return false; + } + + if (Atomic::replace_if_null(DEFLATER_MARKER, &(mid->_owner))) { + // ObjectMonitor is not owned by another thread. Our setting + // owner to DEFLATER_MARKER forces any contending thread through + // the slow path. This is just the first part of the async + // deflation dance. + + if (mid->_contentions != 0 || mid->_waiters != 0) { + // Another thread has raced to enter the ObjectMonitor after + // mid->is_busy() above or has already entered and waited on + // it which makes it busy so no deflation. Restore owner to + // NULL if it is still DEFLATER_MARKER. + Atomic::cmpxchg((void*)NULL, &mid->_owner, DEFLATER_MARKER); + return false; + } + + if (Atomic::cmpxchg(-max_jint, &mid->_ref_count, (jint)0) == 0) { + // Make ref_count negative to force any contending threads or + // ObjectMonitor* using threads to retry. This is the second + // part of the async deflation dance. + + if (mid->owner_is_DEFLATER_MARKER()) { + // If owner is still DEFLATER_MARKER, then we have successfully + // signaled any contending threads to retry. If it is not, then we + // have lost the race to an entering thread and the ObjectMonitor + // is now busy. This is the third and final part of the async + // deflation dance. + // Note: This owner check solves the ABA problem with ref_count + // where another thread acquired the ObjectMonitor, finished + // using it and restored the ref_count to zero. + + // Sanity checks for the races: + guarantee(mid->_contentions == 0, "must be 0: contentions=%d", + mid->_contentions); + guarantee(mid->_waiters == 0, "must be 0: waiters=%d", mid->_waiters); + guarantee(mid->_cxq == NULL, "must be no contending threads: cxq=" + INTPTR_FORMAT, p2i(mid->_cxq)); + guarantee(mid->_EntryList == NULL, + "must be no entering threads: EntryList=" INTPTR_FORMAT, + p2i(mid->_EntryList)); + + const oop obj = (oop) mid->object(); + if (log_is_enabled(Trace, monitorinflation)) { + ResourceMark rm; + log_trace(monitorinflation)("deflate_monitor_using_JT: " + "object=" INTPTR_FORMAT ", mark=" + INTPTR_FORMAT ", type='%s'", + p2i(obj), obj->mark().value(), + obj->klass()->external_name()); + } + + // Install the old mark word if nobody else has already done it. + mid->install_displaced_markword_in_object(obj); + mid->clear_using_JT(); + + assert(mid->object() == NULL, "must be NULL: object=" INTPTR_FORMAT, + p2i(mid->object())); + assert(mid->is_free(), "must be free: allocation_state=%d", + (int) mid->allocation_state()); + + // Move the deflated ObjectMonitor to the working free list + // defined by free_head_p and free_tail_p. No races on this list + // so no need for load_acquire() or store_release(). + if (*free_head_p == NULL) { + // First one on the list. + *free_head_p = mid; + } + if (*free_tail_p != NULL) { + // We append to the list so the caller can use mid->_next_om + // to fix the linkages in its context. + ObjectMonitor* prevtail = *free_tail_p; + // Should have been cleaned up by the caller: + ObjectMonitor* next = mark_next_loop(prevtail); + assert(unmarked_next(prevtail) == NULL, "must be NULL: _next_om=" + INTPTR_FORMAT, p2i(unmarked_next(prevtail))); + set_next(prevtail, mid); // prevtail now points to mid (and is unmarked) + } + *free_tail_p = mid; + + // At this point, mid->_next_om still refers to its current + // value and another ObjectMonitor's _next_om field still + // refers to this ObjectMonitor. Those linkages have to be + // cleaned up by the caller who has the complete context. + + // We leave owner == DEFLATER_MARKER and ref_count < 0 + // to force any racing threads to retry. + return true; // Success, ObjectMonitor has been deflated. + } + + // The owner was changed from DEFLATER_MARKER so we lost the + // race since the ObjectMonitor is now busy. + + // Add back max_jint to restore the ref_count field to its + // proper value (which may not be what we saw above): + Atomic::add(max_jint, &mid->_ref_count); + + assert(mid->ref_count() >= 0, "must not be negative: ref_count=%d", + mid->ref_count()); + return false; + } + + // The ref_count was no longer 0 so we lost the race since the + // ObjectMonitor is now busy or the ObjectMonitor* is now is use. + // Restore owner to NULL if it is still DEFLATER_MARKER: + Atomic::cmpxchg((void*)NULL, &mid->_owner, DEFLATER_MARKER); + } + + // The owner field is no longer NULL so we lost the race since the + // ObjectMonitor is now busy. + return false; +} + +// Walk a given monitor list, and deflate idle monitors. +// The given list could be a per-thread list or a global list. // // In the case of parallel processing of thread local monitor lists, // work is done by Threads::parallel_threads_do() which ensures that @@ -1596,47 +2376,237 @@ // See also ParallelSPCleanupTask and // SafepointSynchronize::do_cleanup_tasks() in safepoint.cpp and // Threads::parallel_java_threads_do() in thread.cpp. -int ObjectSynchronizer::deflate_monitor_list(ObjectMonitor** list_p, +int ObjectSynchronizer::deflate_monitor_list(ObjectMonitor* volatile * list_p, + int volatile * count_p, ObjectMonitor** free_head_p, ObjectMonitor** free_tail_p) { - ObjectMonitor* mid; - ObjectMonitor* next; ObjectMonitor* cur_mid_in_use = NULL; + ObjectMonitor* mid = NULL; + ObjectMonitor* next = NULL; int deflated_count = 0; - for (mid = *list_p; mid != NULL;) { + // We use the simpler mark-mid-as-we-go protocol since there are no + // parallel list deletions since we are at a safepoint. + if (!mark_list_head(list_p, &mid, &next)) { + return 0; // The list is empty so nothing to deflate. + } + + while (true) { oop obj = (oop) mid->object(); if (obj != NULL && deflate_monitor(mid, obj, free_head_p, free_tail_p)) { // Deflation succeeded and already updated free_head_p and // free_tail_p as needed. Finish the move to the local free list // by unlinking mid from the global or per-thread in-use list. - if (mid == *list_p) { - *list_p = mid->_next_om; - } else if (cur_mid_in_use != NULL) { - cur_mid_in_use->_next_om = mid->_next_om; // maintain the current thread in-use list + if (Atomic::cmpxchg(next, list_p, mid) != mid) { + // We could not switch the list head to next. + ADIM_guarantee(cur_mid_in_use != NULL, "must not be NULL"); + if (Atomic::cmpxchg(next, &cur_mid_in_use->_next_om, mid) != mid) { + // deflate_monitor_list() is called at a safepoint so the + // global or per-thread in-use list should not be modified + // in parallel so we: + fatal("mid=" INTPTR_FORMAT " must be referred to by the list head: " + "list_p=" INTPTR_FORMAT " or by cur_mid_in_use's next field: " + "cur_mid_in_use=" INTPTR_FORMAT ", next_om=" INTPTR_FORMAT, + p2i(mid), p2i((ObjectMonitor**)list_p), p2i(cur_mid_in_use), + p2i(cur_mid_in_use->_next_om)); + } } - next = mid->_next_om; - mid->_next_om = NULL; // This mid is current tail in the free_head_p list + // At this point mid is disconnected from the in-use list so + // its marked next field no longer has any effects. + deflated_count++; + Atomic::dec(count_p); + chk_for_list_loop(OrderAccess::load_acquire(list_p), + OrderAccess::load_acquire(count_p)); + chk_om_not_on_list(mid, OrderAccess::load_acquire(list_p), + OrderAccess::load_acquire(count_p)); + // mid is current tail in the free_head_p list so NULL terminate it + // (which also unmarks it): + set_next(mid, NULL); + + // All the list management is done so move on to the next one: + mid = next; + } else { + set_next(mid, next); // unmark next field + + // All the list management is done so move on to the next one: + cur_mid_in_use = mid; mid = next; + } + if (mid == NULL) { + break; // Reached end of the list so nothing more to deflate. + } + // Mark mid's next field so we can possibly deflate it: + next = mark_next_loop(mid); + } + return deflated_count; +} + +// Walk a given ObjectMonitor list and deflate idle ObjectMonitors using +// a JavaThread. Returns the number of deflated ObjectMonitors. The given +// list could be a per-thread in-use list or the global in-use list. +// If a safepoint has started, then we save state via saved_mid_in_use_p +// and return to the caller to honor the safepoint. +// +int ObjectSynchronizer::deflate_monitor_list_using_JT(ObjectMonitor* volatile * list_p, + int volatile * count_p, + ObjectMonitor** free_head_p, + ObjectMonitor** free_tail_p, + ObjectMonitor** saved_mid_in_use_p) { + assert(AsyncDeflateIdleMonitors, "sanity check"); + assert(Thread::current()->is_Java_thread(), "precondition"); + + ObjectMonitor* cur_mid_in_use = NULL; + ObjectMonitor* mid = NULL; + ObjectMonitor* next = NULL; + ObjectMonitor* next_next = NULL; + int deflated_count = 0; + + // We use the more complicated mark-cur_mid_in_use-and-mid-as-we-go + // protocol because om_release() can do list deletions in parallel. + // We also mark-next-next-as-we-go to prevent an om_flush() that is + // behind this thread from passing us. + if (*saved_mid_in_use_p == NULL) { + // No saved state so start at the beginning. + // Mark the list head's next field so we can possibly deflate it: + if (!mark_list_head(list_p, &mid, &next)) { + return 0; // The list is empty so nothing to deflate. + } + } else { + // We're restarting after a safepoint so restore the necessary state + // before we resume. + cur_mid_in_use = *saved_mid_in_use_p; + // Mark cur_mid_in_use's next field so we can possibly update its + // next field to extract a deflated ObjectMonitor. + mid = mark_next_loop(cur_mid_in_use); + if (mid == NULL) { + set_next(cur_mid_in_use, NULL); // unmark next field + *saved_mid_in_use_p = NULL; + return 0; // The remainder is empty so nothing more to deflate. + } + // Mark mid's next field so we can possibly deflate it: + next = mark_next_loop(mid); + } + + while (true) { + // The current mid's next field is marked at this point. If we have + // a cur_mid_in_use, then its next field is also marked at this point. + + if (next != NULL) { + // We mark the next -> next field so that an om_flush() + // thread that is behind us cannot pass us when we + // unmark the current mid's next field. + next_next = mark_next_loop(next); + } + + // Only try to deflate if there is an associated Java object and if + // mid is old (is not newly allocated and is not newly freed). + if (mid->object() != NULL && mid->is_old() && + deflate_monitor_using_JT(mid, free_head_p, free_tail_p)) { + // Deflation succeeded and already updated free_head_p and + // free_tail_p as needed. Finish the move to the local free list + // by unlinking mid from the global or per-thread in-use list. + if (Atomic::cmpxchg(next, list_p, mid) != mid) { + // We could not switch the list head to next. + ObjectMonitor* marked_mid = mark_om_ptr(mid); + ObjectMonitor* marked_next = mark_om_ptr(next); + // Switch cur_mid_in_use's next field to marked next: + ADIM_guarantee(cur_mid_in_use != NULL, "must not be NULL"); + if (Atomic::cmpxchg(marked_next, &cur_mid_in_use->_next_om, + marked_mid) != marked_mid) { + // We could not switch cur_mid_in_use's next field. This + // should not be possible since it was marked so we: + fatal("mid=" INTPTR_FORMAT " must be referred to by the list head: " + "&list_p=" INTPTR_FORMAT " or by cur_mid_in_use's next field: " + "cur_mid_in_use=" INTPTR_FORMAT ", next_om=" INTPTR_FORMAT, + p2i(mid), p2i((ObjectMonitor**)list_p), p2i(cur_mid_in_use), + p2i(cur_mid_in_use->_next_om)); + } + } + // At this point mid is disconnected from the in-use list so + // its marked next field no longer has any effects. deflated_count++; + Atomic::dec(count_p); + chk_for_list_loop(OrderAccess::load_acquire(list_p), + OrderAccess::load_acquire(count_p)); + chk_om_not_on_list(mid, OrderAccess::load_acquire(list_p), + OrderAccess::load_acquire(count_p)); + // mid is current tail in the free_head_p list so NULL terminate it + // (which also unmarks it): + set_next(mid, NULL); + + // All the list management is done so move on to the next one: + mid = next; // mid keeps non-NULL next's marked next field + next = next_next; } else { + // mid is considered in-use if it does not have an associated + // Java object or mid is not old or deflation did not succeed. + // A mid->is_new() node can be seen here when it is freshly + // returned by om_alloc() (and skips the deflation code path). + // A mid->is_old() node can be seen here when deflation failed. + // A mid->is_free() node can be seen here when a fresh node from + // om_alloc() is released by om_release() due to losing the race + // in inflate(). + + // All the list management is done so move on to the next one: + if (cur_mid_in_use != NULL) { + set_next(cur_mid_in_use, mid); // umark cur_mid_in_use + } + // The next cur_mid_in_use keeps mid's marked next field so + // that it is stable for a possible next field change. It + // cannot be modified by om_release() while it is marked. cur_mid_in_use = mid; - mid = mid->_next_om; + mid = next; // mid keeps non-NULL next's marked next field + next = next_next; + + if (SafepointSynchronize::is_synchronizing() && + cur_mid_in_use != OrderAccess::load_acquire(list_p) && + cur_mid_in_use->is_old()) { + // If a safepoint has started and cur_mid_in_use is not the list + // head and is old, then it is safe to use as saved state. Return + // to the caller before blocking. + *saved_mid_in_use_p = cur_mid_in_use; + set_next(cur_mid_in_use, mid); // umark cur_mid_in_use + if (mid != NULL) { + set_next(mid, next); // umark mid + } + return deflated_count; + } + } + if (mid == NULL) { + if (cur_mid_in_use != NULL) { + set_next(cur_mid_in_use, mid); // umark cur_mid_in_use + } + break; // Reached end of the list so nothing more to deflate. } + + // The current mid's next field is marked at this point. If we have + // a cur_mid_in_use, then its next field is also marked at this point. } + // We finished the list without a safepoint starting so there's + // no need to save state. + *saved_mid_in_use_p = NULL; return deflated_count; } void ObjectSynchronizer::prepare_deflate_idle_monitors(DeflateMonitorCounters* counters) { - counters->n_in_use = 0; // currently associated with objects - counters->n_in_circulation = 0; // extant - counters->n_scavenged = 0; // reclaimed (global and per-thread) - counters->per_thread_scavenged = 0; // per-thread scavenge total - counters->per_thread_times = 0.0; // per-thread scavenge times + OrderAccess::release_store(&counters->n_in_use, 0); // currently associated with objects + OrderAccess::release_store(&counters->n_in_circulation, 0); // extant + OrderAccess::release_store(&counters->n_scavenged, 0); // reclaimed (global and per-thread) + OrderAccess::release_store(&counters->per_thread_scavenged, 0); // per-thread scavenge total + counters->per_thread_times = 0.0; // per-thread scavenge times } void ObjectSynchronizer::deflate_idle_monitors(DeflateMonitorCounters* counters) { assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); + + if (AsyncDeflateIdleMonitors) { + // Nothing to do when global idle ObjectMonitors are deflated using + // a JavaThread unless a special deflation has been requested. + if (!is_special_deflation_requested()) { + return; + } + } + bool deflated = false; ObjectMonitor* free_head_p = NULL; // Local SLL of scavenged monitors @@ -1647,33 +2617,28 @@ timer.start(); } - // Prevent om_flush from changing mids in Thread dtor's during deflation - // And in case the vm thread is acquiring a lock during a safepoint - // See e.g. 6320749 - Thread::muxAcquire(&gListLock, "deflate_idle_monitors"); - // Note: the thread-local monitors lists get deflated in // a separate pass. See deflate_thread_local_monitors(). // For moribund threads, scan g_om_in_use_list int deflated_count = 0; - if (g_om_in_use_list) { - counters->n_in_circulation += g_om_in_use_count; - deflated_count = deflate_monitor_list((ObjectMonitor **)&g_om_in_use_list, &free_head_p, &free_tail_p); - g_om_in_use_count -= deflated_count; - counters->n_scavenged += deflated_count; - counters->n_in_use += g_om_in_use_count; + if (OrderAccess::load_acquire(&g_om_in_use_list) != NULL) { + // Update n_in_circulation before g_om_in_use_count is updated by deflation. + Atomic::add(OrderAccess::load_acquire(&g_om_in_use_count), &counters->n_in_circulation); + + deflated_count = deflate_monitor_list(&g_om_in_use_list, &g_om_in_use_count, &free_head_p, &free_tail_p); + Atomic::add(OrderAccess::load_acquire(&g_om_in_use_count), &counters->n_in_use); } if (free_head_p != NULL) { // Move the deflated ObjectMonitors back to the global free list. - guarantee(free_tail_p != NULL && counters->n_scavenged > 0, "invariant"); - assert(free_tail_p->_next_om == NULL, "invariant"); - // constant-time list splice - prepend scavenged segment to g_free_list - free_tail_p->_next_om = g_free_list; - g_free_list = free_head_p; + // No races on the working free list so no need for load_acquire(). + guarantee(free_tail_p != NULL && deflated_count > 0, "invariant"); + assert(free_tail_p->_next_om == NULL, "must be NULL: _next_om=" + INTPTR_FORMAT, p2i(free_tail_p->_next_om)); + prepend_list_to_g_free_list(free_head_p, free_tail_p, deflated_count); + Atomic::add(deflated_count, &counters->n_scavenged); } - Thread::muxRelease(&gListLock); timer.stop(); LogStreamHandle(Debug, monitorinflation) lsh_debug; @@ -1689,39 +2654,162 @@ } } +// Deflate global idle ObjectMonitors using a JavaThread. +// +void ObjectSynchronizer::deflate_global_idle_monitors_using_JT() { + assert(AsyncDeflateIdleMonitors, "sanity check"); + assert(Thread::current()->is_Java_thread(), "precondition"); + JavaThread* self = JavaThread::current(); + + deflate_common_idle_monitors_using_JT(true /* is_global */, self); +} + +// Deflate the specified JavaThread's idle ObjectMonitors using a JavaThread. +// +void ObjectSynchronizer::deflate_per_thread_idle_monitors_using_JT(JavaThread* target) { + assert(AsyncDeflateIdleMonitors, "sanity check"); + assert(Thread::current()->is_Java_thread(), "precondition"); + + deflate_common_idle_monitors_using_JT(false /* !is_global */, target); +} + +// Deflate global or per-thread idle ObjectMonitors using a JavaThread. +// +void ObjectSynchronizer::deflate_common_idle_monitors_using_JT(bool is_global, JavaThread* target) { + JavaThread* self = JavaThread::current(); + + int deflated_count = 0; + ObjectMonitor* free_head_p = NULL; // Local SLL of scavenged ObjectMonitors + ObjectMonitor* free_tail_p = NULL; + ObjectMonitor* saved_mid_in_use_p = NULL; + elapsedTimer timer; + + if (log_is_enabled(Info, monitorinflation)) { + timer.start(); + } + + if (is_global) { + OM_PERFDATA_OP(MonExtant, set_value(OrderAccess::load_acquire(&g_om_in_use_count))); + } else { + OM_PERFDATA_OP(MonExtant, inc(OrderAccess::load_acquire(&target->om_in_use_count))); + } + + do { + int local_deflated_count; + if (is_global) { + local_deflated_count = deflate_monitor_list_using_JT(&g_om_in_use_list, &g_om_in_use_count, &free_head_p, &free_tail_p, &saved_mid_in_use_p); + } else { + local_deflated_count = deflate_monitor_list_using_JT(&target->om_in_use_list, &target->om_in_use_count, &free_head_p, &free_tail_p, &saved_mid_in_use_p); + } + deflated_count += local_deflated_count; + + if (free_head_p != NULL) { + // Move the deflated ObjectMonitors to the global free list. + // No races on the working list so no need for load_acquire(). + guarantee(free_tail_p != NULL && local_deflated_count > 0, "free_tail_p=" INTPTR_FORMAT ", local_deflated_count=%d", p2i(free_tail_p), local_deflated_count); + // Note: The target thread can be doing an om_alloc() that + // is trying to prepend an ObjectMonitor on its in-use list + // at the same time that we have deflated the current in-use + // list head and put it on the local free list. prepend_to_common() + // will detect the race and retry which avoids list corruption, + // but the next field in free_tail_p can flicker to marked + // and then unmarked while prepend_to_common() is sorting it + // all out. + assert(unmarked_next(free_tail_p) == NULL, "must be NULL: _next_om=" + INTPTR_FORMAT, p2i(unmarked_next(free_tail_p))); + + prepend_list_to_g_free_list(free_head_p, free_tail_p, local_deflated_count); + + OM_PERFDATA_OP(Deflations, inc(local_deflated_count)); + } + + if (saved_mid_in_use_p != NULL) { + // deflate_monitor_list_using_JT() detected a safepoint starting. + timer.stop(); + { + if (is_global) { + log_debug(monitorinflation)("pausing deflation of global idle monitors for a safepoint."); + } else { + log_debug(monitorinflation)("jt=" INTPTR_FORMAT ": pausing deflation of per-thread idle monitors for a safepoint.", p2i(target)); + } + assert(SafepointSynchronize::is_synchronizing(), "sanity check"); + ThreadBlockInVM blocker(self); + } + // Prepare for another loop after the safepoint. + free_head_p = NULL; + free_tail_p = NULL; + if (log_is_enabled(Info, monitorinflation)) { + timer.start(); + } + } + } while (saved_mid_in_use_p != NULL); + timer.stop(); + + LogStreamHandle(Debug, monitorinflation) lsh_debug; + LogStreamHandle(Info, monitorinflation) lsh_info; + LogStream* ls = NULL; + if (log_is_enabled(Debug, monitorinflation)) { + ls = &lsh_debug; + } else if (deflated_count != 0 && log_is_enabled(Info, monitorinflation)) { + ls = &lsh_info; + } + if (ls != NULL) { + if (is_global) { + ls->print_cr("async-deflating global idle monitors, %3.7f secs, %d monitors", timer.seconds(), deflated_count); + } else { + ls->print_cr("jt=" INTPTR_FORMAT ": async-deflating per-thread idle monitors, %3.7f secs, %d monitors", p2i(target), timer.seconds(), deflated_count); + } + } +} + void ObjectSynchronizer::finish_deflate_idle_monitors(DeflateMonitorCounters* counters) { // Report the cumulative time for deflating each thread's idle // monitors. Note: if the work is split among more than one // worker thread, then the reported time will likely be more // than a beginning to end measurement of the phase. - log_info(safepoint, cleanup)("deflating per-thread idle monitors, %3.7f secs, monitors=%d", counters->per_thread_times, counters->per_thread_scavenged); + // Note: AsyncDeflateIdleMonitors only deflates per-thread idle + // monitors at a safepoint when a special deflation has been requested. + log_info(safepoint, cleanup)("deflating per-thread idle monitors, %3.7f secs, monitors=%d", + counters->per_thread_times, + OrderAccess::load_acquire(&counters->per_thread_scavenged)); + + bool needs_special_deflation = is_special_deflation_requested(); + if (!AsyncDeflateIdleMonitors || needs_special_deflation) { + // AsyncDeflateIdleMonitors does not use these counters unless + // there is a special deflation request. - g_om_free_count += counters->n_scavenged; + OM_PERFDATA_OP(Deflations, inc(counters->n_scavenged)); + OM_PERFDATA_OP(MonExtant, set_value(counters->n_in_circulation)); + } if (log_is_enabled(Debug, monitorinflation)) { // exit_globals()'s call to audit_and_print_stats() is done // at the Info level. ObjectSynchronizer::audit_and_print_stats(false /* on_exit */); } else if (log_is_enabled(Info, monitorinflation)) { - Thread::muxAcquire(&gListLock, "finish_deflate_idle_monitors"); log_info(monitorinflation)("g_om_population=%d, g_om_in_use_count=%d, " - "g_om_free_count=%d", g_om_population, - g_om_in_use_count, g_om_free_count); - Thread::muxRelease(&gListLock); + "g_om_free_count=%d", + OrderAccess::load_acquire(&g_om_population), + OrderAccess::load_acquire(&g_om_in_use_count), + OrderAccess::load_acquire(&g_om_free_count)); } ForceMonitorScavenge = 0; // Reset - - OM_PERFDATA_OP(Deflations, inc(counters->n_scavenged)); - OM_PERFDATA_OP(MonExtant, set_value(counters->n_in_circulation)); - GVars.stw_random = os::random(); GVars.stw_cycle++; + if (needs_special_deflation) { + set_is_special_deflation_requested(false); // special deflation is done + } } void ObjectSynchronizer::deflate_thread_local_monitors(Thread* thread, DeflateMonitorCounters* counters) { assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); + if (AsyncDeflateIdleMonitors && !is_special_deflation_requested()) { + // Nothing to do if a special deflation has NOT been requested. + return; + } + ObjectMonitor* free_head_p = NULL; // Local SLL of scavenged monitors ObjectMonitor* free_tail_p = NULL; elapsedTimer timer; @@ -1731,25 +2819,21 @@ timer.start(); } - int deflated_count = deflate_monitor_list(thread->om_in_use_list_addr(), &free_head_p, &free_tail_p); + // Update n_in_circulation before om_in_use_count is updated by deflation. + Atomic::add(OrderAccess::load_acquire(&thread->om_in_use_count), &counters->n_in_circulation); - Thread::muxAcquire(&gListLock, "deflate_thread_local_monitors"); - - // Adjust counters - counters->n_in_circulation += thread->om_in_use_count; - thread->om_in_use_count -= deflated_count; - counters->n_scavenged += deflated_count; - counters->n_in_use += thread->om_in_use_count; - counters->per_thread_scavenged += deflated_count; + int deflated_count = deflate_monitor_list(&thread->om_in_use_list, &thread->om_in_use_count, &free_head_p, &free_tail_p); + Atomic::add(OrderAccess::load_acquire(&thread->om_in_use_count), &counters->n_in_use); if (free_head_p != NULL) { // Move the deflated ObjectMonitors back to the global free list. + // No races on the working list so no need for load_acquire(). guarantee(free_tail_p != NULL && deflated_count > 0, "invariant"); - assert(free_tail_p->_next_om == NULL, "invariant"); - - // constant-time list splice - prepend scavenged segment to g_free_list - free_tail_p->_next_om = g_free_list; - g_free_list = free_head_p; + assert(free_tail_p->_next_om == NULL, "must be NULL: _next_om=" + INTPTR_FORMAT, p2i(free_tail_p->_next_om)); + prepend_list_to_g_free_list(free_head_p, free_tail_p, deflated_count); + Atomic::add(deflated_count, &counters->n_scavenged); + Atomic::add(deflated_count, &counters->per_thread_scavenged); } timer.stop(); @@ -1758,8 +2842,6 @@ // should be cheap. counters->per_thread_times += timer.seconds(); - Thread::muxRelease(&gListLock); - LogStreamHandle(Debug, monitorinflation) lsh_debug; LogStreamHandle(Info, monitorinflation) lsh_info; LogStream* ls = NULL; @@ -1810,9 +2892,7 @@ assert(THREAD == JavaThread::current(), "must be current Java thread"); NoSafepointVerifier nsv; ReleaseJavaMonitorsClosure rjmc(THREAD); - Thread::muxAcquire(&gListLock, "release_monitors_owned_by_thread"); ObjectSynchronizer::monitors_iterate(&rjmc); - Thread::muxRelease(&gListLock); THREAD->clear_pending_exception(); } @@ -1866,11 +2946,6 @@ } assert(ls != NULL, "sanity check"); - if (!on_exit) { - // Not at VM exit so grab the global list lock. - Thread::muxAcquire(&gListLock, "audit_and_print_stats"); - } - // Log counts for the global and per-thread monitor lists: int chk_om_population = log_monitor_list_counts(ls); int error_cnt = 0; @@ -1878,12 +2953,14 @@ ls->print_cr("Checking global lists:"); // Check g_om_population: - if (g_om_population == chk_om_population) { + if (OrderAccess::load_acquire(&g_om_population) == chk_om_population) { ls->print_cr("g_om_population=%d equals chk_om_population=%d", - g_om_population, chk_om_population); + OrderAccess::load_acquire(&g_om_population), + chk_om_population); } else { ls->print_cr("ERROR: g_om_population=%d is not equal to " - "chk_om_population=%d", g_om_population, + "chk_om_population=%d", + OrderAccess::load_acquire(&g_om_population), chk_om_population); error_cnt++; } @@ -1894,10 +2971,6 @@ // Check g_free_list and g_om_free_count: chk_global_free_list_and_count(ls, &error_cnt); - if (!on_exit) { - Thread::muxRelease(&gListLock); - } - ls->print_cr("Checking per-thread lists:"); for (JavaThreadIteratorWithHandle jtiwh; JavaThread *jt = jtiwh.next(); ) { @@ -1919,7 +2992,7 @@ // When exiting this log output is at the Info level. When called // at a safepoint, this log output is at the Trace level since // there can be a lot of it. - log_in_use_monitor_details(ls, on_exit); + log_in_use_monitor_details(ls); } ls->flush(); @@ -1948,12 +3021,13 @@ ": free per-thread monitor must have NULL _header " "field: _header=" INTPTR_FORMAT, p2i(jt), p2i(n), n->header().value()); - } else { + *error_cnt_p = *error_cnt_p + 1; + } else if (!AsyncDeflateIdleMonitors) { out->print_cr("ERROR: monitor=" INTPTR_FORMAT ": free global monitor " "must have NULL _header field: _header=" INTPTR_FORMAT, p2i(n), n->header().value()); + *error_cnt_p = *error_cnt_p + 1; } - *error_cnt_p = *error_cnt_p + 1; } if (n->object() != NULL) { if (jt != NULL) { @@ -1974,18 +3048,23 @@ void ObjectSynchronizer::chk_global_free_list_and_count(outputStream * out, int *error_cnt_p) { int chk_om_free_count = 0; - for (ObjectMonitor* n = g_free_list; n != NULL; n = n->_next_om) { + for (ObjectMonitor* n = OrderAccess::load_acquire(&g_free_list); n != NULL; n = unmarked_next(n)) { chk_free_entry(NULL /* jt */, n, out, error_cnt_p); chk_om_free_count++; } - if (g_om_free_count == chk_om_free_count) { + if (OrderAccess::load_acquire(&g_om_free_count) == chk_om_free_count) { out->print_cr("g_om_free_count=%d equals chk_om_free_count=%d", - g_om_free_count, chk_om_free_count); + OrderAccess::load_acquire(&g_om_free_count), + chk_om_free_count); } else { - out->print_cr("ERROR: g_om_free_count=%d is not equal to " - "chk_om_free_count=%d", g_om_free_count, + // With lock free access to g_free_list, it is possible for an + // ObjectMonitor to be prepended to g_free_list after we started + // calculating chk_om_free_count so g_om_free_count may not + // match anymore. + out->print_cr("WARNING: g_om_free_count=%d is not equal to " + "chk_om_free_count=%d", + OrderAccess::load_acquire(&g_om_free_count), chk_om_free_count); - *error_cnt_p = *error_cnt_p + 1; } } @@ -1993,16 +3072,18 @@ void ObjectSynchronizer::chk_global_in_use_list_and_count(outputStream * out, int *error_cnt_p) { int chk_om_in_use_count = 0; - for (ObjectMonitor* n = g_om_in_use_list; n != NULL; n = n->_next_om) { + for (ObjectMonitor* n = OrderAccess::load_acquire(&g_om_in_use_list); n != NULL; n = unmarked_next(n)) { chk_in_use_entry(NULL /* jt */, n, out, error_cnt_p); chk_om_in_use_count++; } - if (g_om_in_use_count == chk_om_in_use_count) { - out->print_cr("g_om_in_use_count=%d equals chk_om_in_use_count=%d", g_om_in_use_count, + if (OrderAccess::load_acquire(&g_om_in_use_count) == chk_om_in_use_count) { + out->print_cr("g_om_in_use_count=%d equals chk_om_in_use_count=%d", + OrderAccess::load_acquire(&g_om_in_use_count), chk_om_in_use_count); } else { out->print_cr("ERROR: g_om_in_use_count=%d is not equal to chk_om_in_use_count=%d", - g_om_in_use_count, chk_om_in_use_count); + OrderAccess::load_acquire(&g_om_in_use_count), + chk_om_in_use_count); *error_cnt_p = *error_cnt_p + 1; } } @@ -2071,16 +3152,19 @@ outputStream * out, int *error_cnt_p) { int chk_om_free_count = 0; - for (ObjectMonitor* n = jt->om_free_list; n != NULL; n = n->_next_om) { + for (ObjectMonitor* n = OrderAccess::load_acquire(&jt->om_free_list); n != NULL; n = unmarked_next(n)) { chk_free_entry(jt, n, out, error_cnt_p); chk_om_free_count++; } - if (jt->om_free_count == chk_om_free_count) { + if (OrderAccess::load_acquire(&jt->om_free_count) == chk_om_free_count) { out->print_cr("jt=" INTPTR_FORMAT ": om_free_count=%d equals " - "chk_om_free_count=%d", p2i(jt), jt->om_free_count, chk_om_free_count); + "chk_om_free_count=%d", p2i(jt), + OrderAccess::load_acquire(&jt->om_free_count), + chk_om_free_count); } else { out->print_cr("ERROR: jt=" INTPTR_FORMAT ": om_free_count=%d is not " - "equal to chk_om_free_count=%d", p2i(jt), jt->om_free_count, + "equal to chk_om_free_count=%d", p2i(jt), + OrderAccess::load_acquire(&jt->om_free_count), chk_om_free_count); *error_cnt_p = *error_cnt_p + 1; } @@ -2091,17 +3175,19 @@ outputStream * out, int *error_cnt_p) { int chk_om_in_use_count = 0; - for (ObjectMonitor* n = jt->om_in_use_list; n != NULL; n = n->_next_om) { + for (ObjectMonitor* n = OrderAccess::load_acquire(&jt->om_in_use_list); n != NULL; n = unmarked_next(n)) { chk_in_use_entry(jt, n, out, error_cnt_p); chk_om_in_use_count++; } - if (jt->om_in_use_count == chk_om_in_use_count) { + if (OrderAccess::load_acquire(&jt->om_in_use_count) == chk_om_in_use_count) { out->print_cr("jt=" INTPTR_FORMAT ": om_in_use_count=%d equals " - "chk_om_in_use_count=%d", p2i(jt), jt->om_in_use_count, + "chk_om_in_use_count=%d", p2i(jt), + OrderAccess::load_acquire(&jt->om_in_use_count), chk_om_in_use_count); } else { out->print_cr("ERROR: jt=" INTPTR_FORMAT ": om_in_use_count=%d is not " - "equal to chk_om_in_use_count=%d", p2i(jt), jt->om_in_use_count, + "equal to chk_om_in_use_count=%d", p2i(jt), + OrderAccess::load_acquire(&jt->om_in_use_count), chk_om_in_use_count); *error_cnt_p = *error_cnt_p + 1; } @@ -2110,27 +3196,22 @@ // Log details about ObjectMonitors on the in-use lists. The 'BHL' // flags indicate why the entry is in-use, 'object' and 'object type' // indicate the associated object and its type. -void ObjectSynchronizer::log_in_use_monitor_details(outputStream * out, - bool on_exit) { - if (!on_exit) { - // Not at VM exit so grab the global list lock. - Thread::muxAcquire(&gListLock, "log_in_use_monitor_details"); - } - +void ObjectSynchronizer::log_in_use_monitor_details(outputStream * out) { stringStream ss; - if (g_om_in_use_count > 0) { + if (OrderAccess::load_acquire(&g_om_in_use_count) > 0) { out->print_cr("In-use global monitor info:"); out->print_cr("(B -> is_busy, H -> has hash code, L -> lock status)"); - out->print_cr("%18s %s %18s %18s", - "monitor", "BHL", "object", "object type"); - out->print_cr("================== === ================== =================="); - for (ObjectMonitor* n = g_om_in_use_list; n != NULL; n = n->_next_om) { + out->print_cr("%18s %s %7s %18s %18s", + "monitor", "BHL", "ref_cnt", "object", "object type"); + out->print_cr("================== === ======= ================== =================="); + for (ObjectMonitor* n = OrderAccess::load_acquire(&g_om_in_use_list); n != NULL; n = unmarked_next(n)) { const oop obj = (oop) n->object(); const markWord mark = n->header(); ResourceMark rm; - out->print(INTPTR_FORMAT " %d%d%d " INTPTR_FORMAT " %s", p2i(n), - n->is_busy() != 0, mark.hash() != 0, n->owner() != NULL, - p2i(obj), obj->klass()->external_name()); + out->print(INTPTR_FORMAT " %d%d%d %7d " INTPTR_FORMAT " %s", + p2i(n), n->is_busy() != 0, mark.hash() != 0, + n->owner() != NULL, (int)n->ref_count(), p2i(obj), + obj->klass()->external_name()); if (n->is_busy() != 0) { out->print(" (%s)", n->is_busy_to_string(&ss)); ss.reset(); @@ -2139,24 +3220,20 @@ } } - if (!on_exit) { - Thread::muxRelease(&gListLock); - } - out->print_cr("In-use per-thread monitor info:"); out->print_cr("(B -> is_busy, H -> has hash code, L -> lock status)"); - out->print_cr("%18s %18s %s %18s %18s", - "jt", "monitor", "BHL", "object", "object type"); - out->print_cr("================== ================== === ================== =================="); + out->print_cr("%18s %18s %s %7s %18s %18s", + "jt", "monitor", "BHL", "ref_cnt", "object", "object type"); + out->print_cr("================== ================== === ======= ================== =================="); for (JavaThreadIteratorWithHandle jtiwh; JavaThread *jt = jtiwh.next(); ) { - for (ObjectMonitor* n = jt->om_in_use_list; n != NULL; n = n->_next_om) { + for (ObjectMonitor* n = OrderAccess::load_acquire(&jt->om_in_use_list); n != NULL; n = unmarked_next(n)) { const oop obj = (oop) n->object(); const markWord mark = n->header(); ResourceMark rm; - out->print(INTPTR_FORMAT " " INTPTR_FORMAT " %d%d%d " INTPTR_FORMAT - " %s", p2i(jt), p2i(n), n->is_busy() != 0, - mark.hash() != 0, n->owner() != NULL, p2i(obj), - obj->klass()->external_name()); + out->print(INTPTR_FORMAT " " INTPTR_FORMAT " %d%d%d %7d " + INTPTR_FORMAT " %s", p2i(jt), p2i(n), n->is_busy() != 0, + mark.hash() != 0, n->owner() != NULL, (int)n->ref_count(), + p2i(obj), obj->klass()->external_name()); if (n->is_busy() != 0) { out->print(" (%s)", n->is_busy_to_string(&ss)); ss.reset(); @@ -2176,8 +3253,11 @@ "Global Lists:", "InUse", "Free", "Total"); out->print_cr("================== ========== ========== =========="); out->print_cr("%18s %10d %10d %10d", "", - g_om_in_use_count, g_om_free_count, g_om_population); - pop_count += g_om_in_use_count + g_om_free_count; + OrderAccess::load_acquire(&g_om_in_use_count), + OrderAccess::load_acquire(&g_om_free_count), + OrderAccess::load_acquire(&g_om_population)); + pop_count += OrderAccess::load_acquire(&g_om_in_use_count) + + OrderAccess::load_acquire(&g_om_free_count); out->print_cr("%18s %10s %10s %10s", "Per-Thread Lists:", "InUse", "Free", "Provision"); @@ -2185,8 +3265,11 @@ for (JavaThreadIteratorWithHandle jtiwh; JavaThread *jt = jtiwh.next(); ) { out->print_cr(INTPTR_FORMAT " %10d %10d %10d", p2i(jt), - jt->om_in_use_count, jt->om_free_count, jt->om_free_provision); - pop_count += jt->om_in_use_count + jt->om_free_count; + OrderAccess::load_acquire(&jt->om_in_use_count), + OrderAccess::load_acquire(&jt->om_free_count), + jt->om_free_provision); + pop_count += OrderAccess::load_acquire(&jt->om_in_use_count) + + OrderAccess::load_acquire(&jt->om_free_count); } return pop_count; } @@ -2208,7 +3291,8 @@ assert((diff % sizeof(PaddedObjectMonitor)) == 0, "must be aligned"); return 1; } - block = (PaddedObjectMonitor*)block->_next_om; + // unmarked_next() is not needed with g_block_list (no next field marking). + block = (PaddedObjectMonitor*)OrderAccess::load_acquire(&block->_next_om); } return 0; } --- old/src/hotspot/share/runtime/synchronizer.hpp 2019-08-28 15:05:56.502953327 -0400 +++ new/src/hotspot/share/runtime/synchronizer.hpp 2019-08-28 15:05:56.318953333 -0400 @@ -32,16 +32,23 @@ #include "runtime/perfData.hpp" class ObjectMonitor; +class ObjectMonitorHandle; class ThreadsList; -typedef PaddedEnd PaddedObjectMonitor; +#ifndef OM_CACHE_LINE_SIZE +// Use DEFAULT_CACHE_LINE_SIZE if not already specified for +// the current build platform. +#define OM_CACHE_LINE_SIZE DEFAULT_CACHE_LINE_SIZE +#endif + +typedef PaddedEnd PaddedObjectMonitor; struct DeflateMonitorCounters { - int n_in_use; // currently associated with objects - int n_in_circulation; // extant - int n_scavenged; // reclaimed (global and per-thread) - int per_thread_scavenged; // per-thread scavenge total - double per_thread_times; // per-thread scavenge times + volatile int n_in_use; // currently associated with objects + volatile int n_in_circulation; // extant + volatile int n_scavenged; // reclaimed (global and per-thread) + volatile int per_thread_scavenged; // per-thread scavenge total + double per_thread_times; // per-thread scavenge times }; class ObjectSynchronizer : AllStatic { @@ -109,15 +116,16 @@ static void reenter (Handle obj, intptr_t recursion, TRAPS); // thread-specific and global ObjectMonitor free list accessors - static ObjectMonitor* om_alloc(Thread* self); + static ObjectMonitor* om_alloc(Thread* self, const InflateCause cause); static void om_release(Thread* self, ObjectMonitor* m, bool FromPerThreadAlloc); static void om_flush(Thread* self); // Inflate light weight monitor to heavy weight monitor - static ObjectMonitor* inflate(Thread* self, oop obj, const InflateCause cause); + static void inflate(ObjectMonitorHandle* omh_p, Thread* self, oop obj, + const InflateCause cause); // This version is only for internal use - static void inflate_helper(oop obj); + static void inflate_helper(ObjectMonitorHandle* omh_p, oop obj); static const char* inflate_cause_name(const InflateCause cause); // Returns the identity hash value for an oop @@ -139,18 +147,38 @@ // Basically we deflate all monitors that are not busy. // An adaptive profile-based deflation policy could be used if needed static void deflate_idle_monitors(DeflateMonitorCounters* counters); + static void deflate_global_idle_monitors_using_JT(); + static void deflate_per_thread_idle_monitors_using_JT(JavaThread* target); + static void deflate_common_idle_monitors_using_JT(bool is_global, JavaThread* target); static void deflate_thread_local_monitors(Thread* thread, DeflateMonitorCounters* counters); static void prepare_deflate_idle_monitors(DeflateMonitorCounters* counters); static void finish_deflate_idle_monitors(DeflateMonitorCounters* counters); // For a given monitor list: global or per-thread, deflate idle monitors - static int deflate_monitor_list(ObjectMonitor** list_p, + static int deflate_monitor_list(ObjectMonitor* volatile * list_p, + int volatile * count_p, ObjectMonitor** free_head_p, ObjectMonitor** free_tail_p); + // For a given in-use monitor list: global or per-thread, deflate idle + // monitors using a JavaThread. + static int deflate_monitor_list_using_JT(ObjectMonitor* volatile * list_p, + int volatile * count_p, + ObjectMonitor** free_head_p, + ObjectMonitor** free_tail_p, + ObjectMonitor** saved_mid_in_use_p); static bool deflate_monitor(ObjectMonitor* mid, oop obj, ObjectMonitor** free_head_p, ObjectMonitor** free_tail_p); - static bool is_cleanup_needed(); + static bool deflate_monitor_using_JT(ObjectMonitor* mid, + ObjectMonitor** free_head_p, + ObjectMonitor** free_tail_p); + static bool is_async_deflation_needed(); + static bool is_safepoint_deflation_needed(); + static bool is_async_deflation_requested() { return _is_async_deflation_requested; } + static bool is_special_deflation_requested() { return _is_special_deflation_requested; } + static void set_is_async_deflation_requested(bool new_value) { _is_async_deflation_requested = new_value; } + static void set_is_special_deflation_requested(bool new_value) { _is_special_deflation_requested = new_value; } + static jlong time_since_last_async_deflation_ms(); static void oops_do(OopClosure* f); // Process oops in thread local used monitors static void thread_local_used_oops_do(Thread* thread, OopClosure* f); @@ -171,28 +199,29 @@ static void chk_per_thread_free_list_and_count(JavaThread *jt, outputStream * out, int *error_cnt_p); - static void log_in_use_monitor_details(outputStream * out, bool on_exit); + static void log_in_use_monitor_details(outputStream * out); static int log_monitor_list_counts(outputStream * out); static int verify_objmon_isinpool(ObjectMonitor *addr) PRODUCT_RETURN0; + static void do_safepoint_work(DeflateMonitorCounters* counters); + private: friend class SynchronizerTest; enum { _BLOCKSIZE = 128 }; // global list of blocks of monitors static PaddedObjectMonitor* volatile g_block_list; - // global monitor free list - static ObjectMonitor* volatile g_free_list; - // global monitor in-use list, for moribund threads, - // monitors they inflated need to be scanned for deflation - static ObjectMonitor* volatile g_om_in_use_list; - // count of entries in g_om_in_use_list - static int g_om_in_use_count; + static volatile bool _is_async_deflation_requested; + static volatile bool _is_special_deflation_requested; + static jlong _last_async_deflation_time_ns; + + // Function to prepend new blocks to the appropriate lists: + static void prepend_block_to_lists(PaddedObjectMonitor* new_blk); // Process oops in all global used monitors (i.e. moribund thread's monitors) static void global_used_oops_do(OopClosure* f); // Process oops in monitors on the given list - static void list_oops_do(ObjectMonitor* list, OopClosure* f); + static void list_oops_do(ObjectMonitor* list, int count, OopClosure* f); // Support for SynchronizerTest access to GVars fields: static u_char* get_gvars_addr(); --- old/src/hotspot/share/runtime/thread.hpp 2019-08-28 15:05:57.922953277 -0400 +++ new/src/hotspot/share/runtime/thread.hpp 2019-08-28 15:05:57.754953283 -0400 @@ -410,11 +410,11 @@ // Per-thread ObjectMonitor lists: public: - ObjectMonitor* om_free_list; // SLL of free ObjectMonitors - int om_free_count; // # on om_free_list + ObjectMonitor* volatile om_free_list; // SLL of free ObjectMonitors + volatile int om_free_count; // # on om_free_list int om_free_provision; // # to try to allocate next - ObjectMonitor* om_in_use_list; // SLL of in-use ObjectMonitors - int om_in_use_count; // # on om_in_use_list + ObjectMonitor* volatile om_in_use_list; // SLL of in-use ObjectMonitors + volatile int om_in_use_count; // # on om_in_use_list #ifdef ASSERT private: @@ -522,7 +522,6 @@ os::set_native_thread_name(name); } - ObjectMonitor** om_in_use_list_addr() { return (ObjectMonitor **)&om_in_use_list; } Monitor* SR_lock() const { return _SR_lock; } bool has_async_exception() const { return (_suspend_flags & _has_async_exception) != 0; } --- old/src/hotspot/share/runtime/vmOperations.cpp 2019-08-28 15:05:58.554953255 -0400 +++ new/src/hotspot/share/runtime/vmOperations.cpp 2019-08-28 15:05:58.390953261 -0400 @@ -41,6 +41,7 @@ #include "runtime/frame.inline.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/sweeper.hpp" +#include "runtime/synchronizer.hpp" #include "runtime/thread.inline.hpp" #include "runtime/threadSMR.inline.hpp" #include "runtime/vmOperations.hpp" @@ -468,6 +469,17 @@ } } +bool VM_Exit::doit_prologue() { + if (AsyncDeflateIdleMonitors && log_is_enabled(Info, monitorinflation)) { + // AsyncDeflateIdleMonitors does a special deflation at the VM_Exit + // safepoint in order to reduce the in-use monitor population that + // is reported by ObjectSynchronizer::log_in_use_monitor_details() + // at VM exit. + ObjectSynchronizer::set_is_special_deflation_requested(true); + } + return true; +} + void VM_Exit::doit() { if (VerifyBeforeExit) { --- old/src/hotspot/share/runtime/vmOperations.hpp 2019-08-28 15:05:59.210953232 -0400 +++ new/src/hotspot/share/runtime/vmOperations.hpp 2019-08-28 15:05:59.030953239 -0400 @@ -498,6 +498,7 @@ } } VMOp_Type type() const { return VMOp_Exit; } + bool doit_prologue(); void doit(); }; --- old/src/hotspot/share/runtime/vmStructs.cpp 2019-08-28 15:05:59.902953208 -0400 +++ new/src/hotspot/share/runtime/vmStructs.cpp 2019-08-28 15:05:59.690953216 -0400 @@ -93,6 +93,7 @@ #include "runtime/serviceThread.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" #include "runtime/thread.inline.hpp" #include "runtime/threadSMR.hpp" #include "runtime/vframeArray.hpp" @@ -909,11 +910,11 @@ volatile_nonstatic_field(ObjectMonitor, _header, markWord) \ unchecked_nonstatic_field(ObjectMonitor, _object, sizeof(void *)) /* NOTE: no type */ \ unchecked_nonstatic_field(ObjectMonitor, _owner, sizeof(void *)) /* NOTE: no type */ \ + volatile_nonstatic_field(ObjectMonitor, _next_om, ObjectMonitor*) \ + volatile_nonstatic_field(BasicLock, _displaced_header, markWord) \ volatile_nonstatic_field(ObjectMonitor, _contentions, jint) \ volatile_nonstatic_field(ObjectMonitor, _waiters, jint) \ volatile_nonstatic_field(ObjectMonitor, _recursions, intptr_t) \ - nonstatic_field(ObjectMonitor, _next_om, ObjectMonitor*) \ - volatile_nonstatic_field(BasicLock, _displaced_header, markWord) \ nonstatic_field(BasicObjectLock, _lock, BasicLock) \ nonstatic_field(BasicObjectLock, _obj, oop) \ static_ptr_volatile_field(ObjectSynchronizer, g_block_list, PaddedObjectMonitor*) \ --- old/src/hotspot/share/runtime/vmThread.cpp 2019-08-28 15:06:00.574953185 -0400 +++ new/src/hotspot/share/runtime/vmThread.cpp 2019-08-28 15:06:00.386953191 -0400 @@ -40,6 +40,7 @@ #include "runtime/mutexLocker.hpp" #include "runtime/os.hpp" #include "runtime/safepoint.hpp" +#include "runtime/synchronizer.hpp" #include "runtime/thread.inline.hpp" #include "runtime/vmThread.hpp" #include "runtime/vmOperations.hpp" @@ -310,6 +311,14 @@ assert(should_terminate(), "termination flag must be set"); } + if (AsyncDeflateIdleMonitors && log_is_enabled(Info, monitorinflation)) { + // AsyncDeflateIdleMonitors does a special deflation at the final + // safepoint in order to reduce the in-use monitor population that + // is reported by ObjectSynchronizer::log_in_use_monitor_details() + // at VM exit. + ObjectSynchronizer::set_is_special_deflation_requested(true); + } + // 4526887 let VM thread exit at Safepoint _cur_vm_operation = &halt_op; SafepointSynchronize::begin(); --- old/test/hotspot/gtest/oops/test_markOop.cpp 2019-08-28 15:06:01.234953162 -0400 +++ new/test/hotspot/gtest/oops/test_markOop.cpp 2019-08-28 15:06:01.034953169 -0400 @@ -117,6 +117,10 @@ // This is no longer biased, because ObjectLocker revokes the bias. assert_test_pattern(h_obj, "is_neutral no_hash"); + // Hash the object then print it. + intx hash = h_obj->identity_hash(); + assert_test_pattern(h_obj, "is_neutral hash=0x"); + // Wait gets the lock inflated. { ObjectLocker ol(h_obj, THREAD); @@ -131,14 +135,18 @@ done.wait_with_safepoint_check(THREAD); // wait till the thread is done. } - // Make the object older. Not all GCs use this field. - Universe::heap()->collect(GCCause::_java_lang_system_gc); - if (UseParallelGC) { - assert_test_pattern(h_obj, "is_neutral no_hash age 1"); - } + if (!AsyncDeflateIdleMonitors) { + // With AsyncDeflateIdleMonitors, the collect() call below + // does not guarantee monitor deflation. + // Make the object older. Not all GCs use this field. + Universe::heap()->collect(GCCause::_java_lang_system_gc); + if (UseParallelGC) { + assert_test_pattern(h_obj, "is_neutral no_hash age 1"); + } - // Hash the object then print it. - intx hash = h_obj->identity_hash(); - assert_test_pattern(h_obj, "is_neutral hash=0x"); + // Hash the object then print it. + intx hash = h_obj->identity_hash(); + assert_test_pattern(h_obj, "is_neutral hash=0x"); + } } #endif // PRODUCT --- old/test/hotspot/jtreg/runtime/logging/SafepointCleanupTest.java 2019-08-28 15:06:01.918953138 -0400 +++ new/test/hotspot/jtreg/runtime/logging/SafepointCleanupTest.java 2019-08-28 15:06:01.738953144 -0400 @@ -29,12 +29,17 @@ * @modules java.base/jdk.internal.misc * java.management * @run driver SafepointCleanupTest + * @run driver SafepointCleanupTest -XX:+AsyncDeflateIdleMonitors */ import jdk.test.lib.process.OutputAnalyzer; import jdk.test.lib.process.ProcessTools; public class SafepointCleanupTest { + static final String ASYNC_DISABLE_OPTION = "-XX:-AsyncDeflateIdleMonitors"; + static final String ASYNC_ENABLE_OPTION = "-XX:+AsyncDeflateIdleMonitors"; + static final String UNLOCK_DIAG_OPTION = "-XX:+UnlockDiagnosticVMOptions"; + static void analyzeOutputOn(ProcessBuilder pb) throws Exception { OutputAnalyzer output = new OutputAnalyzer(pb.start()); output.shouldContain("[safepoint,cleanup]"); @@ -53,19 +58,40 @@ } public static void main(String[] args) throws Exception { + String async_option; + if (args.length == 0) { + // By default test deflating idle monitors at a safepoint. + async_option = ASYNC_DISABLE_OPTION; + } else { + async_option = args[0]; + } + if (!async_option.equals(ASYNC_DISABLE_OPTION) && + !async_option.equals(ASYNC_ENABLE_OPTION)) { + throw new RuntimeException("Unknown async_option value: '" + + async_option + "'"); + } + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-Xlog:safepoint+cleanup=info", + UNLOCK_DIAG_OPTION, + async_option, InnerClass.class.getName()); analyzeOutputOn(pb); pb = ProcessTools.createJavaProcessBuilder("-XX:+TraceSafepointCleanupTime", + UNLOCK_DIAG_OPTION, + async_option, InnerClass.class.getName()); analyzeOutputOn(pb); pb = ProcessTools.createJavaProcessBuilder("-Xlog:safepoint+cleanup=off", + UNLOCK_DIAG_OPTION, + async_option, InnerClass.class.getName()); analyzeOutputOff(pb); pb = ProcessTools.createJavaProcessBuilder("-XX:-TraceSafepointCleanupTime", + UNLOCK_DIAG_OPTION, + async_option, InnerClass.class.getName()); analyzeOutputOff(pb); } --- old/test/jdk/java/rmi/server/UnicastRemoteObject/unexportObject/UnexportLeak.java 2019-08-28 15:06:02.586953115 -0400 +++ new/test/jdk/java/rmi/server/UnicastRemoteObject/unexportObject/UnexportLeak.java 2019-08-28 15:06:02.402953121 -0400 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -74,7 +74,7 @@ } /** - * Force desparate garbage collection so that all WeakReference instances + * Force desperate garbage collection so that all WeakReference instances * will be cleared. */ private static void flushRefs() { @@ -85,6 +85,9 @@ chain.addElement(hungry); } } catch (OutOfMemoryError e) { + // An inflated Java monitor can keep 'obj' alive so request + // an explicit GC to make sure things are cleaned up. + System.gc(); } } }