--- old/src/hotspot/cpu/aarch64/globals_aarch64.hpp 2019-12-11 14:51:51.000000000 -0500 +++ new/src/hotspot/cpu/aarch64/globals_aarch64.hpp 2019-12-11 14:51:50.000000000 -0500 @@ -72,6 +72,9 @@ define_pd_global(intx, InitArrayShortSize, BytesPerLong); define_pd_global(bool, ThreadLocalHandshakes, true); +// ObjectMonitor ref_count not implemented in C2 fast_lock() or +// fast_unlock() so use a handshake for safety. +define_pd_global(bool, HandshakeAfterDeflateIdleMonitors, true); #if defined(COMPILER1) || defined(COMPILER2) define_pd_global(intx, InlineSmallCode, 1000); --- old/src/hotspot/cpu/arm/globals_arm.hpp 2019-12-11 14:51:52.000000000 -0500 +++ new/src/hotspot/cpu/arm/globals_arm.hpp 2019-12-11 14:51:52.000000000 -0500 @@ -71,6 +71,10 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong); define_pd_global(bool, ThreadLocalHandshakes, false); +// ObjectMonitor ref_count not implemented in C2 fast_lock() or +// fast_unlock() so use a handshake for safety. +// Will use a safepoint instead of a handshake on this platform. +define_pd_global(bool, HandshakeAfterDeflateIdleMonitors, true); #define ARCH_FLAGS(develop, \ product, \ --- old/src/hotspot/cpu/ppc/globals_ppc.hpp 2019-12-11 14:51:53.000000000 -0500 +++ new/src/hotspot/cpu/ppc/globals_ppc.hpp 2019-12-11 14:51:53.000000000 -0500 @@ -75,6 +75,9 @@ define_pd_global(intx, InitArrayShortSize, 9*BytesPerLong); define_pd_global(bool, ThreadLocalHandshakes, true); +// ObjectMonitor ref_count not implemented in C2 fast_lock() or +// fast_unlock() so use a handshake for safety. +define_pd_global(bool, HandshakeAfterDeflateIdleMonitors, true); // Platform dependent flag handling: flags only defined on this platform. #define ARCH_FLAGS(develop, \ --- old/src/hotspot/cpu/s390/globals_s390.hpp 2019-12-11 14:51:55.000000000 -0500 +++ new/src/hotspot/cpu/s390/globals_s390.hpp 2019-12-11 14:51:54.000000000 -0500 @@ -77,6 +77,9 @@ define_pd_global(intx, InitArrayShortSize, 1*BytesPerLong); define_pd_global(bool, ThreadLocalHandshakes, true); +// ObjectMonitor ref_count not implemented in C2 fast_lock() or +// fast_unlock() so use a handshake for safety. +define_pd_global(bool, HandshakeAfterDeflateIdleMonitors, true); #define ARCH_FLAGS(develop, \ product, \ --- old/src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp 2019-12-11 14:51:56.000000000 -0500 +++ new/src/hotspot/cpu/sparc/globalDefinitions_sparc.hpp 2019-12-11 14:51:56.000000000 -0500 @@ -42,12 +42,14 @@ #if defined(TIERED) // tiered, 64-bit, large machine #define DEFAULT_CACHE_LINE_SIZE 128 + #define OM_CACHE_LINE_SIZE 64 #elif defined(COMPILER1) // pure C1, 32-bit, small machine #define DEFAULT_CACHE_LINE_SIZE 16 #elif defined(COMPILER2) // pure C2, 64-bit, large machine #define DEFAULT_CACHE_LINE_SIZE 128 + #define OM_CACHE_LINE_SIZE 64 #endif #if defined(SOLARIS) --- old/src/hotspot/cpu/sparc/globals_sparc.hpp 2019-12-11 14:51:57.000000000 -0500 +++ new/src/hotspot/cpu/sparc/globals_sparc.hpp 2019-12-11 14:51:57.000000000 -0500 @@ -81,6 +81,9 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong); define_pd_global(bool, ThreadLocalHandshakes, true); +// ObjectMonitor ref_count not implemented in C2 fast_lock() or +// fast_unlock() so use a handshake for safety. +define_pd_global(bool, HandshakeAfterDeflateIdleMonitors, true); #define ARCH_FLAGS(develop, \ product, \ --- old/src/hotspot/cpu/x86/globalDefinitions_x86.hpp 2019-12-11 14:51:58.000000000 -0500 +++ new/src/hotspot/cpu/x86/globalDefinitions_x86.hpp 2019-12-11 14:51:58.000000000 -0500 @@ -40,6 +40,7 @@ #ifdef _LP64 // tiered, 64-bit, large machine #define DEFAULT_CACHE_LINE_SIZE 128 + #define OM_CACHE_LINE_SIZE 64 #else // tiered, 32-bit, medium machine #define DEFAULT_CACHE_LINE_SIZE 64 @@ -52,6 +53,7 @@ #ifdef _LP64 // pure C2, 64-bit, large machine #define DEFAULT_CACHE_LINE_SIZE 128 + #define OM_CACHE_LINE_SIZE 64 #else // pure C2, 32-bit, medium machine #define DEFAULT_CACHE_LINE_SIZE 64 --- old/src/hotspot/cpu/x86/globals_x86.hpp 2019-12-11 14:51:59.000000000 -0500 +++ new/src/hotspot/cpu/x86/globals_x86.hpp 2019-12-11 14:51:59.000000000 -0500 @@ -91,9 +91,20 @@ #if defined(_LP64) || defined(_WINDOWS) define_pd_global(bool, ThreadLocalHandshakes, true); +// ObjectMonitor ref_count is implemented in LP64 C2 fast_lock() +// and fast_unlock() so we don't need the handshake by default. +#ifdef _LP64 +define_pd_global(bool, HandshakeAfterDeflateIdleMonitors, false); +#else +define_pd_global(bool, HandshakeAfterDeflateIdleMonitors, true); +#endif #else // get_thread() is slow on linux 32 bit, therefore off by default define_pd_global(bool, ThreadLocalHandshakes, false); +// ObjectMonitor ref_count not implemented in C2 fast_lock() or +// fast_unlock() so use a handshake for safety. +// Will use a safepoint instead of a handshake on this platform. +define_pd_global(bool, HandshakeAfterDeflateIdleMonitors, true); #endif #define ARCH_FLAGS(develop, \ --- old/src/hotspot/cpu/x86/macroAssembler_x86.cpp 2019-12-11 14:52:00.000000000 -0500 +++ new/src/hotspot/cpu/x86/macroAssembler_x86.cpp 2019-12-11 14:52:00.000000000 -0500 @@ -1296,6 +1296,58 @@ #ifdef COMPILER2 +// Increment the ObjectMonitor's ref_count for safety or force a branch +// to 'done' with ICC.ZF=0 to indicate failure/take the slow path. +void MacroAssembler::inc_om_ref_count(Register obj_reg, Register om_reg, Register tmp_reg, Label& done) { + atomic_incl(Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count))); + + Label LGoSlowPath; + if (AsyncDeflateIdleMonitors) { + // Race here if monitor is not owned! The above ref_count bump + // will cause subsequent async deflation to skip it. However, + // previous or concurrent async deflation is a race. + + // First check: if the owner field == DEFLATER_MARKER: + movptr(tmp_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + // DEFLATER_MARKER == reinterpret_cast(-1) so the compiler + // doesn't like to use the define here: + cmpptr(tmp_reg, -1); + // If marked for async deflation, then take the slow path. This is a + // simpler check than what ObjectMonitorHandle::save_om_ptr() does + // so ObjectMonitor::install_displaced_markword_in_object() doesn't + // have to be implemented in macro assembler. + jccb(Assembler::equal, LGoSlowPath); + + // Second check: if ref_count field <= 0: + movptr(tmp_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count))); + cmpptr(tmp_reg, 0); + // If async deflation is in the process of bailing out, but has not + // yet restored the ref_count field, then we take the slow path. We + // want a stable ref_count value for the fast path. + jccb(Assembler::lessEqual, LGoSlowPath); + + // Final check: if object field == obj_reg: + cmpptr(obj_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(object))); + // If the ObjectMonitor has been deflated and recycled, then take + // the slow path. + jccb(Assembler::notEqual, LGoSlowPath); + } + + Label LRetToCaller; + // We leave the ref_count incremented to protect the caller's code + // paths against async deflation. + jmpb(LRetToCaller); + + bind(LGoSlowPath); + lock(); + decrementl(Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count))); + // Jump to 'done' with ICC.ZF=0 to indicate failure/take the slow path. + orl(tmp_reg, 1); + jmp(done); + + bind(LRetToCaller); +} + #if INCLUDE_RTM_OPT // Update rtm_counters based on abort status @@ -1529,11 +1581,21 @@ assert(UseRTMLocking, "why call this otherwise?"); assert(tmpReg == rax, ""); assert(scrReg == rdx, ""); - Label L_rtm_retry, L_decrement_retry, L_on_abort; + Label L_rtm_retry, L_decrement_retry, L_on_abort, L_local_done; int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner); // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value())); + + if (!HandshakeAfterDeflateIdleMonitors) { + // Increment the ObjectMonitor's ref_count for safety or force the + // enter slow path via DONE_LABEL. + // In rtm_inflated_locking(), initially tmpReg contains the object's + // mark word which, in this case, is the (ObjectMonitor* | monitor_value). + // Also this code uses scrReg as its temporary register. + inc_om_ref_count(objReg, tmpReg /* om_reg */, scrReg /* tmp_reg */, DONE_LABEL); + } + movptr(boxReg, tmpReg); // Save ObjectMonitor address if (RTMRetryCount > 0) { @@ -1555,7 +1617,7 @@ movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); movptr(tmpReg, Address(tmpReg, owner_offset)); testptr(tmpReg, tmpReg); - jcc(Assembler::zero, DONE_LABEL); + jcc(Assembler::zero, L_local_done); if (UseRTMXendForLockBusy) { xend(); jmp(L_decrement_retry); @@ -1590,7 +1652,7 @@ if (RTMRetryCount > 0) { // success done else retry - jccb(Assembler::equal, DONE_LABEL) ; + jccb(Assembler::equal, L_local_done); bind(L_decrement_retry); // Spin and retry if lock is busy. rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry); @@ -1598,6 +1660,19 @@ else { bind(L_decrement_retry); } + + // rtm_inflated_locking() exit paths come here except for a failed + // inc_om_ref_count() which goes directly to DONE_LABEL. + bind(L_local_done); + if (!HandshakeAfterDeflateIdleMonitors) { + pushf(); // Preserve flags. + // Decrement the ObjectMonitor's ref_count. + lock(); + decrementl(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count))); + popf(); // Restore flags so we have the proper ICC.ZF value. + } + + jmp(DONE_LABEL) ; } #endif // INCLUDE_RTM_OPT @@ -1823,14 +1898,33 @@ #else // _LP64 // It's inflated and we use scrReg for ObjectMonitor* in this section. movq(scrReg, tmpReg); - xorq(tmpReg, tmpReg); - lock(); - cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + // Unconditionally set box->_displaced_header = markWord::unused_mark(). // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value())); + + if (!HandshakeAfterDeflateIdleMonitors) { + // Increment the ObjectMonitor's ref_count for safety or force the + // enter slow path via DONE_LABEL. + // In fast_lock(), scrReg contains the object's mark word which, + // in this case, is the (ObjectMonitor* | monitor_value). Also this + // code uses tmpReg as its temporary register. + inc_om_ref_count(objReg, scrReg /* om_reg */, tmpReg /* tmp_reg */, DONE_LABEL); + } + + xorq(tmpReg, tmpReg); + lock(); + cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); // Intentional fall-through into DONE_LABEL ... // Propagate ICC.ZF from CAS above into DONE_LABEL. + + if (!HandshakeAfterDeflateIdleMonitors) { + pushf(); // Preserve flags. + // Decrement the ObjectMonitor's ref_count. + lock(); + decrementl(Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count))); + popf(); // Restore flags so we have the proper ICC.ZF value. + } #endif // _LP64 #if INCLUDE_RTM_OPT } // use_rtm() @@ -1910,7 +2004,7 @@ jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword testptr(tmpReg, markWord::monitor_value); // Inflated? - jccb (Assembler::zero, Stacked); + jcc (Assembler::zero, Stacked); // It's inflated. #if INCLUDE_RTM_OPT @@ -1921,7 +2015,7 @@ testptr(boxReg, boxReg); jccb(Assembler::notZero, L_regular_inflated_unlock); xend(); - jmpb(DONE_LABEL); + jmp(DONE_LABEL); bind(L_regular_inflated_unlock); } #endif @@ -1983,18 +2077,28 @@ bind (CheckSucc); #else // _LP64 // It's inflated + + if (!HandshakeAfterDeflateIdleMonitors) { + // Increment the ObjectMonitor's ref_count for safety or force the + // exit slow path via DONE_LABEL. + // In fast_unlock(), tmpReg contains the object's mark word which, + // in this case, is the (ObjectMonitor* | monitor_value). Also this + // code uses boxReg as its temporary register. + inc_om_ref_count(objReg, tmpReg /* om_reg */, boxReg /* tmp_reg */, DONE_LABEL); + } + + // Try to avoid passing control into the slow path ... + Label LSuccess, LGoSlowPath; xorptr(boxReg, boxReg); orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); - jccb (Assembler::notZero, DONE_LABEL); + jccb(Assembler::notZero, LGoSlowPath); movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); jccb (Assembler::notZero, CheckSucc); // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD); - jmpb (DONE_LABEL); + jmpb(LSuccess); - // Try to avoid passing control into the slow_path ... - Label LSuccess, LGoSlowPath ; bind (CheckSucc); // The following optional optimization can be elided if necessary @@ -2043,10 +2147,18 @@ // Intentional fall-through into slow path bind (LGoSlowPath); + if (!HandshakeAfterDeflateIdleMonitors) { + lock(); + decrementl(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count))); + } orl (boxReg, 1); // set ICC.ZF=0 to indicate failure jmpb (DONE_LABEL); bind (LSuccess); + if (!HandshakeAfterDeflateIdleMonitors) { + lock(); + decrementl(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count))); + } testl (boxReg, 0); // set ICC.ZF=1 to indicate success jmpb (DONE_LABEL); --- old/src/hotspot/cpu/x86/macroAssembler_x86.hpp 2019-12-11 14:52:02.000000000 -0500 +++ new/src/hotspot/cpu/x86/macroAssembler_x86.hpp 2019-12-11 14:52:01.000000000 -0500 @@ -682,6 +682,7 @@ BiasedLockingCounters* counters = NULL); void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); #ifdef COMPILER2 + void inc_om_ref_count(Register obj_reg, Register om_reg, Register temp_reg, Label& done); // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. // See full desription in macroAssembler_x86.cpp. void fast_lock(Register obj, Register box, Register tmp, --- old/src/hotspot/cpu/zero/globals_zero.hpp 2019-12-11 14:52:03.000000000 -0500 +++ new/src/hotspot/cpu/zero/globals_zero.hpp 2019-12-11 14:52:02.000000000 -0500 @@ -74,6 +74,9 @@ define_pd_global(bool, CompactStrings, false); define_pd_global(bool, ThreadLocalHandshakes, true); +// No ObjectMonitor C2 fast_lock() or fast_unlock() so no need to +// use a handshake for safety. +define_pd_global(bool, HandshakeAfterDeflateIdleMonitors, false); #define ARCH_FLAGS(develop, \ product, \ --- old/src/hotspot/share/logging/logTag.hpp 2019-12-11 14:52:04.000000000 -0500 +++ new/src/hotspot/share/logging/logTag.hpp 2019-12-11 14:52:04.000000000 -0500 @@ -117,6 +117,7 @@ LOG_TAG(oops) \ LOG_TAG(oopstorage) \ LOG_TAG(os) \ + LOG_TAG(owner) \ LOG_TAG(pagesize) \ LOG_TAG(patch) \ LOG_TAG(path) \ --- old/src/hotspot/share/oops/markWord.hpp 2019-12-11 14:52:05.000000000 -0500 +++ new/src/hotspot/share/oops/markWord.hpp 2019-12-11 14:52:05.000000000 -0500 @@ -217,6 +217,10 @@ bool is_unlocked() const { return (mask_bits(value(), biased_lock_mask_in_place) == unlocked_value); } + // ObjectMonitor::install_displaced_markword_in_object() uses + // is_marked() on ObjectMonitor::_header as part of the restoration + // protocol for an object's header. In this usage, the mark bits are + // only ever set (and cleared) on the ObjectMonitor::_header field. bool is_marked() const { return (mask_bits(value(), lock_mask_in_place) == marked_value); } --- old/src/hotspot/share/prims/jvm.cpp 2019-12-11 14:52:06.000000000 -0500 +++ new/src/hotspot/share/prims/jvm.cpp 2019-12-11 14:52:06.000000000 -0500 @@ -73,6 +73,7 @@ #include "runtime/os.inline.hpp" #include "runtime/perfData.hpp" #include "runtime/reflection.hpp" +#include "runtime/synchronizer.hpp" #include "runtime/thread.inline.hpp" #include "runtime/threadSMR.hpp" #include "runtime/vframe.inline.hpp" @@ -484,6 +485,11 @@ JVM_ENTRY_NO_ENV(void, JVM_GC(void)) JVMWrapper("JVM_GC"); if (!DisableExplicitGC) { + if (AsyncDeflateIdleMonitors) { + // AsyncDeflateIdleMonitors needs to know when System.gc() is + // called so any special deflation can be done at a safepoint. + ObjectSynchronizer::set_is_special_deflation_requested(true); + } Universe::heap()->collect(GCCause::_java_lang_system_gc); } JVM_END --- old/src/hotspot/share/prims/jvmtiEnvBase.cpp 2019-12-11 14:52:08.000000000 -0500 +++ new/src/hotspot/share/prims/jvmtiEnvBase.cpp 2019-12-11 14:52:07.000000000 -0500 @@ -651,14 +651,22 @@ #ifdef ASSERT uint32_t debug_bits = 0; #endif + // Note: The is_thread_fully_suspended() part of the assert() is + // from an older implementation that recognized suspension as + // being safe. However, that was racy in the face of rogue resumes. + // Should be replaced with "calling_thread == java_thread". assert((SafepointSynchronize::is_at_safepoint() || java_thread->is_thread_fully_suspended(false, &debug_bits)), "at safepoint or target thread is suspended"); oop obj = NULL; - ObjectMonitor *mon = java_thread->current_waiting_monitor(); + // For all of the get_current_contended_monitor() call sites, we + // are either at a safepoint or the calling thread is operating + // on itself so this ObjectMonitorHandle is not strictly necessary. + ObjectMonitorHandle omh; + ObjectMonitor *mon = java_thread->current_waiting_monitor(&omh); if (mon == NULL) { // thread is not doing an Object.wait() call - mon = java_thread->current_pending_monitor(); + mon = java_thread->current_pending_monitor(&omh); if (mon != NULL) { // The thread is trying to enter() an ObjectMonitor. obj = (oop)mon->object(); @@ -734,16 +742,24 @@ HandleMark hm; oop wait_obj = NULL; { + // For all of the get_locked_objects_in_frame() call sites, we + // are either at a safepoint or the calling thread is operating + // on itself so this ObjectMonitorHandle is not strictly necessary. + ObjectMonitorHandle omh; // save object of current wait() call (if any) for later comparison - ObjectMonitor *mon = java_thread->current_waiting_monitor(); + ObjectMonitor *mon = java_thread->current_waiting_monitor(&omh); if (mon != NULL) { wait_obj = (oop)mon->object(); } } oop pending_obj = NULL; { + // For all of the get_locked_objects_in_frame() call sites, we + // are either at a safepoint or the calling thread is operating + // on itself so this ObjectMonitorHandle is not strictly necessary. + ObjectMonitorHandle omh; // save object of current enter() call (if any) for later comparison - ObjectMonitor *mon = java_thread->current_pending_monitor(); + ObjectMonitor *mon = java_thread->current_pending_monitor(&omh); if (mon != NULL) { pending_obj = (oop)mon->object(); } --- old/src/hotspot/share/prims/whitebox.cpp 2019-12-11 14:52:09.000000000 -0500 +++ new/src/hotspot/share/prims/whitebox.cpp 2019-12-11 14:52:09.000000000 -0500 @@ -71,6 +71,7 @@ #include "runtime/jniHandles.inline.hpp" #include "runtime/os.hpp" #include "runtime/sweeper.hpp" +#include "runtime/synchronizer.hpp" #include "runtime/thread.hpp" #include "runtime/threadSMR.hpp" #include "runtime/vm_version.hpp" @@ -463,6 +464,12 @@ WB_ENTRY(jboolean, WB_G1StartMarkCycle(JNIEnv* env, jobject o)) if (UseG1GC) { + if (AsyncDeflateIdleMonitors) { + // AsyncDeflateIdleMonitors needs to know when System.gc() or + // the equivalent is called so any special clean up can be done + // at a safepoint, e.g., TestHumongousClassLoader.java. + ObjectSynchronizer::set_is_special_deflation_requested(true); + } G1CollectedHeap* g1h = G1CollectedHeap::heap(); if (!g1h->concurrent_mark()->cm_thread()->during_cycle()) { g1h->collect(GCCause::_wb_conc_mark); @@ -1419,6 +1426,12 @@ WB_END WB_ENTRY(void, WB_FullGC(JNIEnv* env, jobject o)) + if (AsyncDeflateIdleMonitors) { + // AsyncDeflateIdleMonitors needs to know when System.gc() or + // the equivalent is called so any special clean up can be done + // at a safepoint, e.g., TestHumongousClassLoader.java. + ObjectSynchronizer::set_is_special_deflation_requested(true); + } Universe::heap()->soft_ref_policy()->set_should_clear_all_soft_refs(true); Universe::heap()->collect(GCCause::_wb_full_gc); #if INCLUDE_G1GC --- old/src/hotspot/share/runtime/basicLock.cpp 2019-12-11 14:52:10.000000000 -0500 +++ new/src/hotspot/share/runtime/basicLock.cpp 2019-12-11 14:52:10.000000000 -0500 @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "runtime/basicLock.hpp" +#include "runtime/objectMonitor.hpp" #include "runtime/synchronizer.hpp" void BasicLock::print_on(outputStream* st) const { @@ -62,8 +63,11 @@ // is small (given the support for inflated fast-path locking in the fast_lock, etc) // we'll leave that optimization for another time. + // Disallow async deflation of the inflated monitor so the + // displaced header stays stable until we've copied it. + ObjectMonitorHandle omh; if (displaced_header().is_neutral()) { - ObjectSynchronizer::inflate_helper(obj); + ObjectSynchronizer::inflate_helper(&omh, obj); // WARNING: We can not put check here, because the inflation // will not update the displaced header. Once BasicLock is inflated, // no one should ever look at its content. --- old/src/hotspot/share/runtime/globals.hpp 2019-12-11 14:52:12.000000000 -0500 +++ new/src/hotspot/share/runtime/globals.hpp 2019-12-11 14:52:11.000000000 -0500 @@ -714,11 +714,25 @@ product(intx, MonitorBound, 0, "(Deprecated) Bound Monitor population") \ range(0, max_jint) \ \ + diagnostic(bool, AsyncDeflateIdleMonitors, true, \ + "Deflate idle monitors using the ServiceThread.") \ + \ + /* notice: the max range value here is max_jint, not max_intx */ \ + /* because of overflow issue */ \ + diagnostic(intx, AsyncDeflationInterval, 250, \ + "Async deflate idle monitors every so many milliseconds when " \ + "MonitorUsedDeflationThreshold is exceeded (0 is off).") \ + range(0, max_jint) \ + \ + diagnostic_pd(bool, HandshakeAfterDeflateIdleMonitors, \ + "Handshake with all JavaThreads after async deflating idle " \ + "monitors to force threads to leave C2 monitor code.") \ + \ experimental(intx, MonitorUsedDeflationThreshold, 90, \ - "Percentage of used monitors before triggering cleanup " \ - "safepoint which deflates monitors (0 is off). " \ - "The check is performed on GuaranteedSafepointInterval.") \ - range(0, 100) \ + "Percentage of used monitors before triggering deflation (0 is " \ + "off). The check is performed on GuaranteedSafepointInterval " \ + "or AsyncDeflateInterval.") \ + range(0, 100) \ \ experimental(intx, hashCode, 5, \ "(Unstable) select hashCode generation algorithm") \ --- old/src/hotspot/share/runtime/init.cpp 2019-12-11 14:52:13.000000000 -0500 +++ new/src/hotspot/share/runtime/init.cpp 2019-12-11 14:52:13.000000000 -0500 @@ -178,8 +178,12 @@ if (log_is_enabled(Info, monitorinflation)) { // The ObjectMonitor subsystem uses perf counters so // do this before perfMemory_exit(). - // ObjectSynchronizer::finish_deflate_idle_monitors()'s call - // to audit_and_print_stats() is done at the Debug level. + // These other two audit_and_print_stats() calls are done at the + // Debug level at a safepoint: + // - for safepoint based deflation auditing: + // ObjectSynchronizer::finish_deflate_idle_monitors() + // - for async deflation auditing: + // ObjectSynchronizer::do_safepoint_work() ObjectSynchronizer::audit_and_print_stats(true /* on_exit */); } perfMemory_exit(); --- old/src/hotspot/share/runtime/objectMonitor.cpp 2019-12-11 14:52:14.000000000 -0500 +++ new/src/hotspot/share/runtime/objectMonitor.cpp 2019-12-11 14:52:14.000000000 -0500 @@ -241,11 +241,14 @@ // Enter support void ObjectMonitor::enter(TRAPS) { + jint l_ref_count = ref_count(); + ADIM_guarantee(l_ref_count > 0, "must be positive: l_ref_count=%d, ref_count=%d", l_ref_count, ref_count()); + // The following code is ordered to check the most common cases first // and to reduce RTS->RTO cache line upgrades on SPARC and IA32 processors. Thread * const Self = THREAD; - void * cur = Atomic::cmpxchg(&_owner, (void*)NULL, Self); + void* cur = try_set_owner_from(Self, NULL); if (cur == NULL) { assert(_recursions == 0, "invariant"); return; @@ -260,9 +263,17 @@ if (Self->is_lock_owned((address)cur)) { assert(_recursions == 0, "internal state error"); _recursions = 1; - // Commute owner from a thread-specific on-stack BasicLockObject address to - // a full-fledged "Thread *". - _owner = Self; + simply_set_owner_from_BasicLock(Self, cur); // Convert from BasicLock* to Thread*. + return; + } + + if (AsyncDeflateIdleMonitors && + try_set_owner_from(Self, DEFLATER_MARKER) == DEFLATER_MARKER) { + // The deflation protocol finished the first part (setting owner), + // but it failed the second part (making ref_count negative) and + // bailed. Or the ObjectMonitor was async deflated and reused. + // Acquired the monitor. + assert(_recursions == 0, "invariant"); return; } @@ -293,10 +304,9 @@ assert(!SafepointSynchronize::is_at_safepoint(), "invariant"); assert(jt->thread_state() != _thread_blocked, "invariant"); assert(this->object() != NULL, "invariant"); - assert(_contentions >= 0, "invariant"); + assert(_contentions >= 0, "must not be negative: contentions=%d", _contentions); - // Prevent deflation at STW-time. See deflate_idle_monitors() and is_busy(). - // Ensure the object-monitor relationship remains stable while there's contention. + // Keep track of contention for JVM/TI and M&M queries. Atomic::inc(&_contentions); JFR_ONLY(JfrConditionalFlushWithStacktrace flush(jt);) @@ -359,7 +369,7 @@ } Atomic::dec(&_contentions); - assert(_contentions >= 0, "invariant"); + assert(_contentions >= 0, "must not be negative: contentions=%d", _contentions); Self->_Stalled = 0; // Must either set _recursions = 0 or ASSERT _recursions == 0. @@ -403,7 +413,7 @@ int ObjectMonitor::TryLock(Thread * Self) { void * own = _owner; if (own != NULL) return 0; - if (Atomic::replace_if_null(&_owner, Self)) { + if (try_set_owner_from(Self, NULL) == NULL) { assert(_recursions == 0, "invariant"); return 1; } @@ -414,18 +424,103 @@ return -1; } +// Install the displaced mark word (dmw) of a deflating ObjectMonitor +// into the header of the object associated with the monitor. This +// idempotent method is called by a thread that is deflating a +// monitor and by other threads that have detected a race with the +// deflation process. +void ObjectMonitor::install_displaced_markword_in_object(const oop obj) { + // This function must only be called when (owner == DEFLATER_MARKER + // && ref_count <= 0), but we can't guarantee that here because + // those values could change when the ObjectMonitor gets moved from + // the global free list to a per-thread free list. + + guarantee(obj != NULL, "must be non-NULL"); + if (object() != obj) { + // ObjectMonitor's object ref no longer refers to the target object + // so the object's header has already been restored. + return; + } + + markWord dmw = header(); + if (dmw.value() == 0) { + // ObjectMonitor's header/dmw has been cleared so the ObjectMonitor + // has been deflated and taken off the global free list. + return; + } + + // A non-NULL dmw has to be either neutral (not locked and not marked) + // or is already participating in this restoration protocol. + assert(dmw.is_neutral() || (dmw.is_marked() && dmw.hash() == 0), + "failed precondition: dmw=" INTPTR_FORMAT, dmw.value()); + + markWord marked_dmw = markWord::zero(); + if (!dmw.is_marked() && dmw.hash() == 0) { + // This dmw has not yet started the restoration protocol so we + // mark a copy of the dmw to begin the protocol. + // Note: A dmw with a hashcode does not take this code path. + marked_dmw = dmw.set_marked(); + + // All of the callers to this function can be racing with each + // other trying to update the _header field. + dmw = (markWord) Atomic::cmpxchg(&_header, dmw, marked_dmw); + if (dmw.value() == 0) { + // ObjectMonitor's header/dmw has been cleared so the object's + // header has already been restored. + return; + } + // The _header field is now marked. The winner's 'dmw' variable + // contains the original, unmarked header/dmw value and any + // losers have a marked header/dmw value that will be cleaned + // up below. + } + + if (dmw.is_marked()) { + // Clear the mark from the header/dmw copy in preparation for + // possible restoration from this thread. + assert(dmw.hash() == 0, "hashcode must be 0: dmw=" INTPTR_FORMAT, + dmw.value()); + dmw = dmw.set_unmarked(); + } + assert(dmw.is_neutral(), "must be neutral: dmw=" INTPTR_FORMAT, dmw.value()); + + // Install displaced mark word if the object's header still points + // to this ObjectMonitor. All racing callers to this function will + // reach this point, but only one can win. + obj->cas_set_mark(dmw, markWord::encode(this)); + + // Note: It does not matter which thread restored the header/dmw + // into the object's header. The thread deflating the monitor just + // wanted the object's header restored and it is. The threads that + // detected a race with the deflation process also wanted the + // object's header restored before they retry their operation and + // because it is restored they will only retry once. +} + // Convert the fields used by is_busy() to a string that can be // used for diagnostic output. const char* ObjectMonitor::is_busy_to_string(stringStream* ss) { - ss->print("is_busy: contentions=%d, waiters=%d, owner=" INTPTR_FORMAT - ", cxq=" INTPTR_FORMAT ", EntryList=" INTPTR_FORMAT, _contentions, - _waiters, p2i(_owner), p2i(_cxq), p2i(_EntryList)); + ss->print("is_busy: contentions=%d, waiters=%d, ", _contentions, _waiters); + if (!AsyncDeflateIdleMonitors) { + ss->print("owner=" INTPTR_FORMAT, p2i(_owner)); + } else if (_owner != DEFLATER_MARKER) { + ss->print("owner=" INTPTR_FORMAT, p2i(_owner)); + } else { + // We report NULL instead of DEFLATER_MARKER here because is_busy() + // ignores DEFLATER_MARKER values. + ss->print("owner=" INTPTR_FORMAT, NULL); + } + ss->print(", cxq=" INTPTR_FORMAT ", EntryList=" INTPTR_FORMAT, p2i(_cxq), + p2i(_EntryList)); return ss->base(); } #define MAX_RECHECK_INTERVAL 1000 void ObjectMonitor::EnterI(TRAPS) { + jint l_ref_count = ref_count(); + ADIM_guarantee(l_ref_count > 0, "must be positive: l_ref_count=%d, ref_count=%d", l_ref_count, ref_count()); + Thread * const Self = THREAD; assert(Self->is_Java_thread(), "invariant"); assert(((JavaThread *) Self)->thread_state() == _thread_blocked, "invariant"); @@ -438,6 +533,17 @@ return; } + if (AsyncDeflateIdleMonitors && + try_set_owner_from(Self, DEFLATER_MARKER) == DEFLATER_MARKER) { + // The deflation protocol finished the first part (setting owner), + // but it failed the second part (making ref_count negative) and + // bailed. Or the ObjectMonitor was async deflated and reused. + // Acquired the monitor. + assert(_succ != Self, "invariant"); + assert(_Responsible != Self, "invariant"); + return; + } + assert(InitDone, "Unexpectedly not initialized"); // We try one round of spinning *before* enqueueing Self. @@ -554,6 +660,15 @@ if (TryLock(Self) > 0) break; + if (AsyncDeflateIdleMonitors && + try_set_owner_from(Self, DEFLATER_MARKER) == DEFLATER_MARKER) { + // The deflation protocol finished the first part (setting owner), + // but it failed the second part (making ref_count negative) and + // bailed. Or the ObjectMonitor was async deflated and reused. + // Acquired the monitor. + break; + } + // The lock is still contested. // Keep a tally of the # of futile wakeups. // Note that the counter is not protected by a lock or updated by atomics. @@ -658,6 +773,9 @@ // In the future we should reconcile EnterI() and ReenterI(). void ObjectMonitor::ReenterI(Thread * Self, ObjectWaiter * SelfNode) { + jint l_ref_count = ref_count(); + ADIM_guarantee(l_ref_count > 0, "must be positive: l_ref_count=%d, ref_count=%d", l_ref_count, ref_count()); + assert(Self != NULL, "invariant"); assert(SelfNode != NULL, "invariant"); assert(SelfNode->_thread == Self, "invariant"); @@ -675,6 +793,15 @@ if (TryLock(Self) > 0) break; if (TrySpin(Self) > 0) break; + if (AsyncDeflateIdleMonitors && + try_set_owner_from(Self, DEFLATER_MARKER) == DEFLATER_MARKER) { + // The deflation protocol finished the first part (setting owner), + // but it failed the second part (making ref_count negative) and + // bailed. Or the ObjectMonitor was async deflated and reused. + // Acquired the monitor. + break; + } + // State transition wrappers around park() ... // ReenterI() wisely defers state transitions until // it's clear we must park the thread. @@ -818,8 +945,8 @@ // We'd like to assert that: (THREAD->thread_state() != _thread_blocked) ; // There's one exception to the claim above, however. EnterI() can call // exit() to drop a lock if the acquirer has been externally suspended. -// In that case exit() is called with _thread_state as _thread_blocked, -// but the monitor's _contentions field is > 0, which inhibits reclamation. +// In that case exit() is called with _thread_state == _thread_blocked, +// but the monitor's ref_count is > 0, which inhibits reclamation. // // 1-0 exit // ~~~~~~~~ @@ -864,13 +991,10 @@ void ObjectMonitor::exit(bool not_suspended, TRAPS) { Thread * const Self = THREAD; if (THREAD != _owner) { - if (THREAD->is_lock_owned((address) _owner)) { - // Transmute _owner from a BasicLock pointer to a Thread address. - // We don't need to hold _mutex for this transition. - // Non-null to Non-null is safe as long as all readers can - // tolerate either flavor. + void* cur = _owner; + if (THREAD->is_lock_owned((address)cur)) { assert(_recursions == 0, "invariant"); - _owner = THREAD; + simply_set_owner_from_BasicLock(Self, cur); // Convert from BasicLock* to Thread*. _recursions = 0; } else { // Apparent unbalanced locking ... @@ -914,10 +1038,14 @@ for (;;) { assert(THREAD == _owner, "invariant"); + // Drop the lock. // release semantics: prior loads and stores from within the critical section // must not float (reorder) past the following store that drops the lock. - Atomic::release_store(&_owner, (void*)NULL); // drop the lock - OrderAccess::storeload(); // See if we need to wake a successor + // Uses a storeload to separate release_store(owner) from the + // successor check. The try_set_owner() below uses cmpxchg() so + // we get the fence down there. + release_clear_owner_with_barrier(Self, /* needs_fence */ false); + if ((intptr_t(_EntryList)|intptr_t(_cxq)) == 0 || _succ != NULL) { return; } @@ -959,7 +1087,7 @@ // to reacquire the lock the responsibility for ensuring succession // falls to the new owner. // - if (!Atomic::replace_if_null(&_owner, THREAD)) { + if (try_set_owner_from(Self, NULL) != NULL) { return; } @@ -1091,9 +1219,9 @@ // out-of-scope (non-extant). Wakee = NULL; - // Drop the lock - Atomic::release_store(&_owner, (void*)NULL); - OrderAccess::fence(); // ST _owner vs LD in unpark() + // Drop the lock. + // Uses a fence to separate release_store(owner) from the LD in unpark(). + release_clear_owner_with_barrier(Self, /* needs_fence */ true); DTRACE_MONITOR_PROBE(contended__exit, this, object(), Self); Trigger->unpark(); @@ -1120,9 +1248,10 @@ assert(InitDone, "Unexpectedly not initialized"); if (THREAD != _owner) { - if (THREAD->is_lock_owned ((address)_owner)) { + void* cur = _owner; + if (THREAD->is_lock_owned((address)cur)) { assert(_recursions == 0, "internal state error"); - _owner = THREAD; // Convert from basiclock addr to Thread addr + simply_set_owner_from_BasicLock(Self, cur); // Convert from BasicLock* to Thread*. _recursions = 0; } } @@ -1143,10 +1272,10 @@ JavaThread *jt = (JavaThread *)THREAD; guarantee(_owner != Self, "reenter already owner"); - enter(THREAD); // enter the monitor + enter(THREAD); + // Entered the monitor. guarantee(_recursions == 0, "reenter recursion"); _recursions = recursions; - return; } // Checks that the current THREAD owns this monitor and causes an @@ -1170,8 +1299,9 @@ if (_owner == THREAD) { return true; } - if (THREAD->is_lock_owned((address)_owner)) { - _owner = THREAD; // convert from BasicLock addr to Thread addr + void* cur = _owner; + if (THREAD->is_lock_owned((address)cur)) { + simply_set_owner_from_BasicLock(THREAD, cur); // Convert from BasicLock* to Thread*. _recursions = 0; return true; } @@ -1680,7 +1810,7 @@ Thread * ox = (Thread *) _owner; if (ox == NULL) { - ox = (Thread*)Atomic::cmpxchg(&_owner, (void*)NULL, Self); + ox = (Thread*)try_set_owner_from(Self, NULL); if (ox == NULL) { // The CAS succeeded -- this thread acquired ownership // Take care of some bookkeeping to exit spin state. @@ -1944,6 +2074,131 @@ DEBUG_ONLY(InitDone = true;) } +ObjectMonitorHandle::~ObjectMonitorHandle() { + if (_om_ptr != NULL) { + _om_ptr->dec_ref_count(); + _om_ptr = NULL; + } +} + +// Save the ObjectMonitor* associated with the specified markWord and +// increment the ref_count. This function should only be called if +// the caller has verified mark.has_monitor() == true. The object +// parameter is needed to verify that ObjectMonitor* has not been +// deflated and reused for another object. +// +// This function returns true if the ObjectMonitor* has been safely +// saved. This function returns false if we have lost a race with +// async deflation; the caller should retry as appropriate. +// +bool ObjectMonitorHandle::save_om_ptr(oop object, markWord mark) { + // is_marked() is a superset of has_monitor() so make sure we + // are called with the proper markWord value. + guarantee(mark.has_monitor() && !mark.is_marked(), "sanity check: mark=" + INTPTR_FORMAT, mark.value()); + + ObjectMonitor* om_ptr = mark.monitor(); + om_ptr->inc_ref_count(); + + if (AsyncDeflateIdleMonitors) { + // Race here if monitor is not owned! The above ref_count bump + // will cause subsequent async deflation to skip it. However, + // previous or concurrent async deflation is a race. + if (om_ptr->owner_is_DEFLATER_MARKER() && om_ptr->ref_count() <= 0) { + // Async deflation is in progress and our ref_count increment + // above lost the race to async deflation. Attempt to restore + // the header/dmw to the object's header so that we only retry + // once if the deflater thread happens to be slow. + om_ptr->install_displaced_markword_in_object(object); + om_ptr->dec_ref_count(); + return false; + } + if (om_ptr->ref_count() <= 0) { + // Async deflation is in the process of bailing out, but has not + // yet restored the ref_count field so we return false to force + // a retry. We want a positive ref_count value for a true return. + om_ptr->dec_ref_count(); + return false; + } + // The ObjectMonitor could have been deflated and reused for + // another object before we bumped the ref_count so make sure + // our object still refers to this ObjectMonitor. + const markWord tmp = object->mark(); + if (!tmp.has_monitor() || tmp.monitor() != om_ptr) { + // Async deflation and reuse won the race so we have to retry. + // Skip object header restoration since that's already done. + om_ptr->dec_ref_count(); + return false; + } + } + + ADIM_guarantee(_om_ptr == NULL, "sanity check: _om_ptr=" INTPTR_FORMAT, + p2i(_om_ptr)); + _om_ptr = om_ptr; + return true; +} + +// For internal use by ObjectSynchronizer::inflate(). +// This function is only used when we don't have to worry about async +// deflation of the specified ObjectMonitor*. +void ObjectMonitorHandle::set_om_ptr(ObjectMonitor* om_ptr) { + if (_om_ptr == NULL) { + ADIM_guarantee(om_ptr != NULL, "cannot clear an unset om_ptr"); + om_ptr->inc_ref_count(); + _om_ptr = om_ptr; + } else { + ADIM_guarantee(om_ptr == NULL, "can only clear a set om_ptr"); + _om_ptr->dec_ref_count(); + _om_ptr = NULL; + } +} + +// Save the specified ObjectMonitor* if it is safe, i.e., not being +// async deflated. +// +// This function returns true if the ObjectMonitor* has been safely +// saved. This function returns false if the specified ObjectMonitor* +// is NULL or if we have lost a race with async deflation; the caller +// can retry as appropriate. +bool ObjectMonitorHandle::set_om_ptr_if_safe(ObjectMonitor* om_ptr) { + if (om_ptr == NULL) { + return false; // Nothing to save if input is NULL + } + + om_ptr->inc_ref_count(); + + if (AsyncDeflateIdleMonitors) { + if (om_ptr->owner_is_DEFLATER_MARKER() && om_ptr->ref_count() <= 0) { + // Async deflation is in progress and our ref_count increment + // above lost the race to async deflation. + om_ptr->dec_ref_count(); + return false; + } + if (om_ptr->ref_count() <= 0) { + // Async deflation is in the process of bailing out, but has not + // yet restored the ref_count field so we return false to force + // a retry. We want a positive ref_count value for a true return. + om_ptr->dec_ref_count(); + return false; + } + // Unlike save_om_ptr(), we don't have context to determine if + // the ObjectMonitor has been deflated and reused for another + // object. + } + + ADIM_guarantee(_om_ptr == NULL, "sanity check: _om_ptr=" INTPTR_FORMAT, + p2i(_om_ptr)); + _om_ptr = om_ptr; + return true; +} + +// Unset the _om_ptr field and decrement the ref_count field. +void ObjectMonitorHandle::unset_om_ptr() { + ADIM_guarantee(_om_ptr != NULL, "_om_ptr must not be NULL"); + _om_ptr->dec_ref_count(); + _om_ptr = NULL; +} + void ObjectMonitor::print_on(outputStream* st) const { // The minimal things to print for markWord printing, more can be added for debugging and logging. st->print("{contentions=0x%08x,waiters=0x%08x" @@ -1959,14 +2214,26 @@ // (ObjectMonitor) 0x00007fdfb6012e40 = { // _header = 0x0000000000000001 // _object = 0x000000070ff45fd0 -// _next_om = 0x0000000000000000 +// _allocation_state = Old // _pad_buf0 = { // [0] = '\0' // ... -// [103] = '\0' +// [43] = '\0' // } // _owner = 0x0000000000000000 // _previous_owner_tid = 0 +// _pad_buf1 = { +// [0] = '\0' +// ... +// [47] = '\0' +// } +// _ref_count = 1 +// _pad_buf2 = { +// [0] = '\0' +// ... +// [47] = '\0' +// } +// _next_om = 0x0000000000000000 // _recursions = 0 // _EntryList = 0x0000000000000000 // _cxq = 0x0000000000000000 @@ -1984,7 +2251,17 @@ st->print_cr("(ObjectMonitor*) " INTPTR_FORMAT " = {", p2i(this)); st->print_cr(" _header = " INTPTR_FORMAT, header().value()); st->print_cr(" _object = " INTPTR_FORMAT, p2i(_object)); - st->print_cr(" _next_om = " INTPTR_FORMAT, p2i(_next_om)); + st->print(" _allocation_state = "); + if (is_free()) { + st->print("Free"); + } else if (is_old()) { + st->print("Old"); + } else if (is_new()) { + st->print("New"); + } else { + st->print("unknown=%d", _allocation_state); + } + st->cr(); st->print_cr(" _pad_buf0 = {"); st->print_cr(" [0] = '\\0'"); st->print_cr(" ..."); @@ -1992,6 +2269,18 @@ st->print_cr(" }"); st->print_cr(" _owner = " INTPTR_FORMAT, p2i(_owner)); st->print_cr(" _previous_owner_tid = " JLONG_FORMAT, _previous_owner_tid); + st->print_cr(" _pad_buf1 = {"); + st->print_cr(" [0] = '\\0'"); + st->print_cr(" ..."); + st->print_cr(" [%d] = '\\0'", (int)sizeof(_pad_buf1) - 1); + st->print_cr(" }"); + st->print_cr(" _ref_count = %d", ref_count()); + st->print_cr(" _pad_buf2 = {"); + st->print_cr(" [0] = '\\0'"); + st->print_cr(" ..."); + st->print_cr(" [%d] = '\\0'", (int)sizeof(_pad_buf1) - 1); + st->print_cr(" }"); + st->print_cr(" _next_om = " INTPTR_FORMAT, p2i(Atomic::load(&_next_om))); st->print_cr(" _recursions = " INTX_FORMAT, _recursions); st->print_cr(" _EntryList = " INTPTR_FORMAT, p2i(_EntryList)); st->print_cr(" _cxq = " INTPTR_FORMAT, p2i(_cxq)); --- old/src/hotspot/share/runtime/objectMonitor.hpp 2019-12-11 14:52:16.000000000 -0500 +++ new/src/hotspot/share/runtime/objectMonitor.hpp 2019-12-11 14:52:16.000000000 -0500 @@ -120,7 +120,14 @@ // intptr_t. There's no reason to use a 64-bit type for this field // in a 64-bit JVM. +#ifndef OM_CACHE_LINE_SIZE +// Use DEFAULT_CACHE_LINE_SIZE if not already specified for +// the current build platform. +#define OM_CACHE_LINE_SIZE DEFAULT_CACHE_LINE_SIZE +#endif + class ObjectMonitor { + friend class ObjectMonitorHandle; friend class ObjectSynchronizer; friend class ObjectWaiter; friend class VMStructs; @@ -130,20 +137,37 @@ // Enforced by the assert() in header_addr(). volatile markWord _header; // displaced object header word - mark void* volatile _object; // backward object pointer - strong root - public: - ObjectMonitor* _next_om; // Next ObjectMonitor* linkage - private: + typedef enum { + Free = 0, // Free must be 0 for monitor to be free after memset(..,0,..). + New, + Old + } AllocationState; + AllocationState _allocation_state; // Separate _header and _owner on different cache lines since both can - // have busy multi-threaded access. _header and _object are set at - // initial inflation and _object doesn't change until deflation so - // _object is a good choice to share the cache line with _header. - // _next_om shares _header's cache line for pre-monitor list historical - // reasons. _next_om only changes if the next ObjectMonitor is deflated. - DEFINE_PAD_MINUS_SIZE(0, DEFAULT_CACHE_LINE_SIZE, - sizeof(volatile markWord) + sizeof(void* volatile) + - sizeof(ObjectMonitor *)); + // have busy multi-threaded access. _header, _object and _allocation_state + // are set at initial inflation. _object and _allocation_state don't + // change until deflation so _object and _allocation_state are good + // choices to share the cache line with _header. + DEFINE_PAD_MINUS_SIZE(0, OM_CACHE_LINE_SIZE, sizeof(volatile markWord) + + sizeof(void* volatile) + sizeof(AllocationState)); + // Used by async deflation as a marker in the _owner field: + #define DEFLATER_MARKER reinterpret_cast(-1) void* volatile _owner; // pointer to owning thread OR BasicLock volatile jlong _previous_owner_tid; // thread id of the previous owner of the monitor + // Separate _owner and _ref_count on different cache lines since both + // can have busy multi-threaded access. _previous_owner_tid is only + // changed by ObjectMonitor::exit() so it is a good choice to share the + // cache line with _owner. + DEFINE_PAD_MINUS_SIZE(1, OM_CACHE_LINE_SIZE, sizeof(void* volatile) + + sizeof(volatile jlong)); + jint _ref_count; // ref count for ObjectMonitor* and used by the async deflation + // protocol. See ObjectSynchronizer::deflate_monitor_using_JT(). + // Separate _ref_count and _next_om on different cache lines since + // both can have busy multi-threaded access. + DEFINE_PAD_MINUS_SIZE(2, OM_CACHE_LINE_SIZE, sizeof(volatile jint)); + public: // for static synchronizer.cpp access: + ObjectMonitor* _next_om; // Next ObjectMonitor* linkage + private: volatile intx _recursions; // recursion count, 0 for first entry ObjectWaiter* volatile _EntryList; // Threads blocked on entry or reentry. // The list is actually composed of WaitNodes, @@ -158,7 +182,8 @@ volatile jint _contentions; // Number of active contentions in enter(). It is used by is_busy() // along with other fields to determine if an ObjectMonitor can be - // deflated. See ObjectSynchronizer::deflate_monitor(). + // deflated. See ObjectSynchronizer::deflate_monitor() and + // ObjectSynchronizer::deflate_monitor_using_JT(). protected: ObjectWaiter* volatile _WaitSet; // LL of threads wait()ing on the monitor volatile jint _waiters; // number of waiting threads @@ -200,6 +225,7 @@ static int header_offset_in_bytes() { return offset_of(ObjectMonitor, _header); } static int object_offset_in_bytes() { return offset_of(ObjectMonitor, _object); } static int owner_offset_in_bytes() { return offset_of(ObjectMonitor, _owner); } + static int ref_count_offset_in_bytes() { return offset_of(ObjectMonitor, _ref_count); } static int recursions_offset_in_bytes() { return offset_of(ObjectMonitor, _recursions); } static int cxq_offset_in_bytes() { return offset_of(ObjectMonitor, _cxq); } static int succ_offset_in_bytes() { return offset_of(ObjectMonitor, _succ); } @@ -225,14 +251,38 @@ intptr_t is_busy() const { // TODO-FIXME: assert _owner == null implies _recursions = 0 - return _contentions|_waiters|intptr_t(_owner)|intptr_t(_cxq)|intptr_t(_EntryList); + // We do not include _ref_count in the is_busy() check because + // _ref_count is for indicating that the ObjectMonitor* is in + // use which is orthogonal to whether the ObjectMonitor itself + // is in use for a locking operation. + intptr_t ret_code = _contentions | _waiters | intptr_t(_cxq) | intptr_t(_EntryList); + if (!AsyncDeflateIdleMonitors) { + ret_code |= intptr_t(_owner); + } else { + if (_owner != DEFLATER_MARKER) { + ret_code |= intptr_t(_owner); + } + } + return ret_code; } const char* is_busy_to_string(stringStream* ss); intptr_t is_entered(Thread* current) const; - void* owner() const; - void set_owner(void* owner); + void* owner() const; // Returns NULL if DEFLATER_MARKER is observed. + // Returns true if owner field == DEFLATER_MARKER and false otherwise. + bool owner_is_DEFLATER_MARKER(); + // Clear _owner field; current value must match old_value. + void release_clear_owner_with_barrier(void* old_value, bool needs_fence); + // Simply set _owner field to new_value; current value must match old_value. + void simply_set_owner_from(void* new_value, void* old_value); + // Simply set _owner field to new_value; current value must match old_value1 or old_value2. + void simply_set_owner_from(void* new_value, void* old_value1, void* old_value2); + // Simply set _owner field to self; current value must match basic_lock_p. + void simply_set_owner_from_BasicLock(Thread* self, void* basic_lock_p); + // Try to set _owner field to new_value if the current value matches + // old_value. Otherwise, does not change the _owner field. + void* try_set_owner_from(void* new_value, void* old_value); jint waiters() const; @@ -261,7 +311,9 @@ // _cxq == 0 _succ == NULL _owner == NULL _waiters == 0 // _contentions == 0 EntryList == NULL // _recursions == 0 _WaitSet == NULL - DEBUG_ONLY(stringStream ss;) +#ifdef ASSERT + stringStream ss; +#endif assert((is_busy() | _recursions) == 0, "freeing in-use monitor: %s, " "recursions=" INTX_FORMAT, is_busy_to_string(&ss), _recursions); _succ = NULL; @@ -276,11 +328,20 @@ void* object() const; void* object_addr(); void set_object(void* obj); + void set_allocation_state(AllocationState s); + AllocationState allocation_state() const; + bool is_free() const; + bool is_old() const; + bool is_new() const; + void dec_ref_count(); + void inc_ref_count(); + jint ref_count() const; // Returns true if the specified thread owns the ObjectMonitor. Otherwise // returns false and throws IllegalMonitorStateException (IMSE). bool check_owner(Thread* THREAD); void clear(); + void clear_using_JT(); void enter(TRAPS); void exit(bool not_suspended, TRAPS); @@ -307,10 +368,44 @@ void ReenterI(Thread* self, ObjectWaiter* self_node); void UnlinkAfterAcquire(Thread* self, ObjectWaiter* self_node); int TryLock(Thread* self); - int NotRunnable(Thread* self, Thread * Owner); + int NotRunnable(Thread* self, Thread* Owner); int TrySpin(Thread* self); void ExitEpilog(Thread* self, ObjectWaiter* Wakee); bool ExitSuspendEquivalent(JavaThread* self); + void install_displaced_markword_in_object(const oop obj); +}; + +// A helper object for managing an ObjectMonitor*'s ref_count. There +// are special safety considerations when async deflation is used. +class ObjectMonitorHandle : public StackObj { + private: + ObjectMonitor* _om_ptr; + public: + ObjectMonitorHandle() { _om_ptr = NULL; } + ~ObjectMonitorHandle(); + + ObjectMonitor* om_ptr() const { return _om_ptr; } + // Save the ObjectMonitor* associated with the specified markWord and + // increment the ref_count. + bool save_om_ptr(oop object, markWord mark); + // Save the specified ObjectMonitor* if safe and increment the ref_count. + bool set_om_ptr_if_safe(ObjectMonitor* om_ptr); + // Unset the _om_ptr field and decrement the ref_count. + void unset_om_ptr(); + + // For internal use by ObjectSynchronizer::inflate(). + void set_om_ptr(ObjectMonitor* om_ptr); }; +// Macro to use guarantee() for more strict AsyncDeflateIdleMonitors +// checks and assert() otherwise. +#define ADIM_guarantee(p, ...) \ + do { \ + if (AsyncDeflateIdleMonitors) { \ + guarantee(p, __VA_ARGS__); \ + } else { \ + assert(p, __VA_ARGS__); \ + } \ + } while (0) + #endif // SHARE_RUNTIME_OBJECTMONITOR_HPP --- old/src/hotspot/share/runtime/objectMonitor.inline.hpp 2019-12-11 14:52:17.000000000 -0500 +++ new/src/hotspot/share/runtime/objectMonitor.inline.hpp 2019-12-11 14:52:17.000000000 -0500 @@ -25,6 +25,7 @@ #ifndef SHARE_RUNTIME_OBJECTMONITOR_INLINE_HPP #define SHARE_RUNTIME_OBJECTMONITOR_INLINE_HPP +#include "logging/log.hpp" #include "runtime/atomic.hpp" inline intptr_t ObjectMonitor::is_entered(TRAPS) const { @@ -51,19 +52,57 @@ return _waiters; } +// Returns NULL if DEFLATER_MARKER is observed. inline void* ObjectMonitor::owner() const { - return _owner; + void* owner = _owner; + return owner != DEFLATER_MARKER ? owner : NULL; +} + +// Returns true if owner field == DEFLATER_MARKER and false otherwise. +// This accessor is called when we really need to know if the owner +// field == DEFLATER_MARKER and any non-NULL value won't do the trick. +inline bool ObjectMonitor::owner_is_DEFLATER_MARKER() { + return _owner == DEFLATER_MARKER; } inline void ObjectMonitor::clear() { assert(Atomic::load(&_header).value() != 0, "must be non-zero"); + assert(_owner == NULL, "must be NULL: owner=" INTPTR_FORMAT, p2i(_owner)); +#ifdef ASSERT + jint l_ref_count = ref_count(); +#endif + assert(l_ref_count == 0, "must be 0: l_ref_count=%d, ref_count=%d", l_ref_count, ref_count()); + + Atomic::store(&_header, markWord::zero()); + + clear_using_JT(); +} + +inline void ObjectMonitor::clear_using_JT() { + // Unlike other *_using_JT() functions, we cannot assert + // AsyncDeflateIdleMonitors or Thread::current()->is_Java_thread() + // because clear() calls this function for the rest of its checks. + + if (AsyncDeflateIdleMonitors) { + // Async deflation protocol uses the header, owner and ref_count + // fields. While the ObjectMonitor being deflated is on the global free + // list, we leave those three fields alone; owner == DEFLATER_MARKER + // and ref_count < 0 will force any racing threads to retry. The + // header field is used by install_displaced_markword_in_object() + // in the last part of the deflation protocol so we cannot check + // its value here. + guarantee(_owner == NULL || _owner == DEFLATER_MARKER, + "must be NULL or DEFLATER_MARKER: owner=" INTPTR_FORMAT, + p2i(_owner)); + jint l_ref_count = ref_count(); + guarantee(l_ref_count <= 0, "must be <= 0: l_ref_count=%d, ref_count=%d", l_ref_count, ref_count()); + } assert(_contentions == 0, "must be 0: contentions=%d", _contentions); assert(_waiters == 0, "must be 0: waiters=%d", _waiters); assert(_recursions == 0, "must be 0: recursions=" INTX_FORMAT, _recursions); assert(_object != NULL, "must be non-NULL"); - assert(_owner == NULL, "must be NULL: owner=" INTPTR_FORMAT, p2i(_owner)); - Atomic::store(&_header, markWord::zero()); + set_allocation_state(Free); _object = NULL; } @@ -84,8 +123,122 @@ return _contentions; } -inline void ObjectMonitor::set_owner(void* owner) { - _owner = owner; +// Clear _owner field; current value must match old_value. +// If needs_fence is true, we issue a fence() after the release_store(). +// Otherwise, a storeload() is good enough. See the callers for more info. +inline void ObjectMonitor::release_clear_owner_with_barrier(void* old_value, + bool needs_fence) { + void* prev = _owner; + ADIM_guarantee(prev == old_value, "unexpected prev owner=" INTPTR_FORMAT + ", expected=" INTPTR_FORMAT, p2i(prev), p2i(old_value)); + Atomic::release_store(&_owner, (void*)NULL); + if (needs_fence) { + OrderAccess::fence(); + } else { + OrderAccess::storeload(); + } + log_trace(monitorinflation, owner)("release_clear_owner_with_barrier(): mid=" + INTPTR_FORMAT ", prev=" INTPTR_FORMAT + ", needs_fence=%d", p2i(this), p2i(prev), + needs_fence); +} + +// Simply set _owner field to new_value; current value must match old_value. +// (Simple means no memory sync needed.) +inline void ObjectMonitor::simply_set_owner_from(void* new_value, void* old_value) { + void* prev = _owner; + ADIM_guarantee(prev == old_value, "unexpected prev owner=" INTPTR_FORMAT + ", expected=" INTPTR_FORMAT, p2i(prev), p2i(old_value)); + _owner = new_value; + log_trace(monitorinflation, owner)("simply_set_owner_from(): mid=" + INTPTR_FORMAT ", prev=" INTPTR_FORMAT + ", new=" INTPTR_FORMAT, p2i(this), + p2i(prev), p2i(new_value)); +} + +// Simply set _owner field to new_value; current value must match old_value1 or old_value2. +// (Simple means no memory sync needed.) +inline void ObjectMonitor::simply_set_owner_from(void* new_value, void* old_value1, void* old_value2) { + void* prev = _owner; + ADIM_guarantee(prev == old_value1 || prev == old_value2, + "unexpected prev owner=" INTPTR_FORMAT ", expected1=" + INTPTR_FORMAT " or expected2=" INTPTR_FORMAT, p2i(prev), + p2i(old_value1), p2i(old_value2)); + _owner = new_value; + log_trace(monitorinflation, owner)("simply_set_owner_from(old1=" INTPTR_FORMAT + ", old2=" INTPTR_FORMAT "): mid=" + INTPTR_FORMAT ", prev=" INTPTR_FORMAT + ", new=" INTPTR_FORMAT, p2i(old_value1), + p2i(old_value2), p2i(this), p2i(prev), + p2i(new_value)); +} + +// Simply set _owner field to self; current value must match basic_lock_p. +inline void ObjectMonitor::simply_set_owner_from_BasicLock(Thread* self, void* basic_lock_p) { + void* prev = _owner; + ADIM_guarantee(prev == basic_lock_p, "unexpected prev owner=" INTPTR_FORMAT + ", expected=" INTPTR_FORMAT, p2i(prev), p2i(basic_lock_p)); + // Non-null owner field to non-null owner field is safe without + // cmpxchg() as long as all readers can tolerate either flavor. + _owner = self; + log_trace(monitorinflation, owner)("simply_set_owner_from_BasicLock(): mid=" + INTPTR_FORMAT ", prev=" INTPTR_FORMAT + ", new=" INTPTR_FORMAT ", basic_lock_p=" + INTPTR_FORMAT, p2i(this), p2i(prev), + p2i(self), p2i(basic_lock_p)); +} + +// Try to set _owner field to new_value if the current value matches +// old_value. Otherwise, does not change the _owner field. +inline void* ObjectMonitor::try_set_owner_from(void* new_value, void* old_value) { + void* prev = Atomic::cmpxchg(&_owner, old_value, new_value); + if (prev == old_value) { + log_trace(monitorinflation, owner)("try_set_owner_from(): mid=" + INTPTR_FORMAT ", prev=" INTPTR_FORMAT + ", new=" INTPTR_FORMAT, p2i(this), + p2i(prev), p2i(new_value)); + } + return prev; +} + +inline void ObjectMonitor::set_allocation_state(ObjectMonitor::AllocationState s) { + _allocation_state = s; +} + +inline ObjectMonitor::AllocationState ObjectMonitor::allocation_state() const { + return _allocation_state; +} + +inline bool ObjectMonitor::is_free() const { + return _allocation_state == Free; +} + +inline bool ObjectMonitor::is_old() const { + return _allocation_state == Old; +} + +inline bool ObjectMonitor::is_new() const { + return _allocation_state == New; +} + +inline void ObjectMonitor::dec_ref_count() { + Atomic::dec(&_ref_count); + // Can be negative as part of async deflation protocol. + jint l_ref_count = ref_count(); + ADIM_guarantee(AsyncDeflateIdleMonitors || l_ref_count >= 0, + "sanity check: l_ref_count=%d, ref_count=%d", l_ref_count, ref_count()); +} + +inline void ObjectMonitor::inc_ref_count() { + Atomic::inc(&_ref_count); + // Can be negative as part of async deflation protocol. + jint l_ref_count = ref_count(); + ADIM_guarantee(AsyncDeflateIdleMonitors || l_ref_count > 0, + "sanity check: l_ref_count=%d, ref_count=%d", l_ref_count, ref_count()); +} + +inline jint ObjectMonitor::ref_count() const { + return Atomic::load(&_ref_count); } #endif // SHARE_RUNTIME_OBJECTMONITOR_INLINE_HPP --- old/src/hotspot/share/runtime/safepoint.cpp 2019-12-11 14:52:19.000000000 -0500 +++ new/src/hotspot/share/runtime/safepoint.cpp 2019-12-11 14:52:18.000000000 -0500 @@ -523,8 +523,9 @@ } bool SafepointSynchronize::is_cleanup_needed() { - // Need a safepoint if there are many monitors to deflate. - if (ObjectSynchronizer::is_cleanup_needed()) return true; + // Need a cleanup safepoint if there are too many monitors in use + // and the monitor deflation needs to be done at a safepoint. + if (ObjectSynchronizer::is_safepoint_deflation_needed()) return true; // Need a safepoint if some inline cache buffers is non-empty if (!InlineCacheBuffer::is_empty()) return true; if (StringTable::needs_rehashing()) return true; @@ -547,6 +548,10 @@ _counters(counters) {} void do_thread(Thread* thread) { + // deflate_thread_local_monitors() handles or requests deflation of + // this thread's idle monitors. If !AsyncDeflateIdleMonitors or if + // there is a special cleanup request, deflation is handled now. + // Otherwise, async deflation is requested via a flag. ObjectSynchronizer::deflate_thread_local_monitors(thread, _counters); if (_nmethod_cl != NULL && thread->is_Java_thread() && ! thread->is_Code_cache_sweeper_thread()) { @@ -579,7 +584,11 @@ const char* name = "deflating global idle monitors"; EventSafepointCleanupTask event; TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup)); - ObjectSynchronizer::deflate_idle_monitors(_counters); + // AsyncDeflateIdleMonitors only uses DeflateMonitorCounters + // when a special cleanup has been requested. + // Note: This logging output will include global idle monitor + // elapsed times, but not global idle monitor deflation count. + ObjectSynchronizer::do_safepoint_work(_counters); post_safepoint_cleanup_task_event(event, safepoint_id, name); } --- old/src/hotspot/share/runtime/serviceThread.cpp 2019-12-11 14:52:20.000000000 -0500 +++ new/src/hotspot/share/runtime/serviceThread.cpp 2019-12-11 14:52:20.000000000 -0500 @@ -106,6 +106,7 @@ bool thread_id_table_work = false; bool protection_domain_table_work = false; bool oopstorage_work = false; + bool deflate_idle_monitors = false; JvmtiDeferredEvent jvmti_event; { // Need state transition ThreadBlockInVM so that this thread @@ -132,10 +133,14 @@ (resolved_method_table_work = ResolvedMethodTable::has_work()) | (thread_id_table_work = ThreadIdTable::has_work()) | (protection_domain_table_work = SystemDictionary::pd_cache_table()->has_work()) | - (oopstorage_work = OopStorage::has_cleanup_work_and_reset()) + (oopstorage_work = OopStorage::has_cleanup_work_and_reset()) | + (deflate_idle_monitors = ObjectSynchronizer::is_async_deflation_needed()) ) == 0) { // Wait until notified that there is some work to do. - ml.wait(); + // If AsyncDeflateIdleMonitors, then we wait for + // GuaranteedSafepointInterval so that is_async_deflation_needed() + // is checked at the same interval. + ml.wait(AsyncDeflateIdleMonitors ? GuaranteedSafepointInterval : 0); } if (has_jvmti_events) { @@ -187,6 +192,10 @@ if (oopstorage_work) { cleanup_oopstorages(); } + + if (deflate_idle_monitors) { + ObjectSynchronizer::deflate_idle_monitors_using_JT(); + } } } --- old/src/hotspot/share/runtime/sharedRuntime.cpp 2019-12-11 14:52:21.000000000 -0500 +++ new/src/hotspot/share/runtime/sharedRuntime.cpp 2019-12-11 14:52:21.000000000 -0500 @@ -63,8 +63,10 @@ #include "runtime/interfaceSupport.inline.hpp" #include "runtime/java.hpp" #include "runtime/javaCalls.hpp" +#include "runtime/objectMonitor.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" #include "runtime/vframe.inline.hpp" #include "runtime/vframeArray.hpp" #include "utilities/copy.hpp" @@ -3111,9 +3113,13 @@ kptr2 = fr.next_monitor_in_interpreter_frame(kptr2) ) { if (kptr2->obj() != NULL) { // Avoid 'holes' in the monitor array BasicLock *lock = kptr2->lock(); + // Disallow async deflation of the inflated monitor so the + // displaced header stays stable until we've copied it. + ObjectMonitorHandle omh; // Inflate so the displaced header becomes position-independent - if (lock->displaced_header().is_unlocked()) - ObjectSynchronizer::inflate_helper(kptr2->obj()); + if (lock->displaced_header().is_unlocked()) { + ObjectSynchronizer::inflate_helper(&omh, kptr2->obj()); + } // Now the displaced header is free to move buf[i++] = (intptr_t)lock->displaced_header().value(); buf[i++] = cast_from_oop(kptr2->obj()); --- old/src/hotspot/share/runtime/synchronizer.cpp 2019-12-11 14:52:23.000000000 -0500 +++ new/src/hotspot/share/runtime/synchronizer.cpp 2019-12-11 14:52:22.000000000 -0500 @@ -37,11 +37,13 @@ #include "runtime/atomic.hpp" #include "runtime/biasedLocking.hpp" #include "runtime/handles.inline.hpp" +#include "runtime/handshake.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/mutexLocker.hpp" #include "runtime/objectMonitor.hpp" #include "runtime/objectMonitor.inline.hpp" #include "runtime/osThread.hpp" +#include "runtime/safepointMechanism.inline.hpp" #include "runtime/safepointVerifiers.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" @@ -117,22 +119,295 @@ static volatile intptr_t gInflationLocks[NINFLATIONLOCKS]; // global list of blocks of monitors -PaddedObjectMonitor* volatile ObjectSynchronizer::g_block_list = NULL; -// Global ObjectMonitor free list. Newly allocated and deflated -// ObjectMonitors are prepended here. -ObjectMonitor* volatile ObjectSynchronizer::g_free_list = NULL; -// Global ObjectMonitor in-use list. When a JavaThread is exiting, -// ObjectMonitors on its per-thread in-use list are prepended here. -ObjectMonitor* volatile ObjectSynchronizer::g_om_in_use_list = NULL; -int ObjectSynchronizer::g_om_in_use_count = 0; // # on g_om_in_use_list - -static volatile intptr_t gListLock = 0; // protects global monitor lists -static volatile int g_om_free_count = 0; // # on g_free_list -static volatile int g_om_population = 0; // # Extant -- in circulation +PaddedObjectMonitor* ObjectSynchronizer::g_block_list = NULL; +bool volatile ObjectSynchronizer::_is_async_deflation_requested = false; +bool volatile ObjectSynchronizer::_is_special_deflation_requested = false; +jlong ObjectSynchronizer::_last_async_deflation_time_ns = 0; + +struct ListGlobals { + char _pad_prefix[OM_CACHE_LINE_SIZE]; + // These are highly shared list related variables. + // To avoid false-sharing they need to be the sole occupants of a cache line. + + // Global ObjectMonitor free list. Newly allocated and deflated + // ObjectMonitors are prepended here. + ObjectMonitor* free_list; + DEFINE_PAD_MINUS_SIZE(1, OM_CACHE_LINE_SIZE, sizeof(ObjectMonitor*)); + + // Global ObjectMonitor in-use list. When a JavaThread is exiting, + // ObjectMonitors on its per-thread in-use list are prepended here. + ObjectMonitor* in_use_list; + DEFINE_PAD_MINUS_SIZE(2, OM_CACHE_LINE_SIZE, sizeof(ObjectMonitor*)); + + // Global ObjectMonitor wait list. If HandshakeAfterDeflateIdleMonitors + // is true, deflated ObjectMonitors wait on this list until after a + // handshake or a safepoint for platforms that don't support handshakes. + // After the handshake or safepoint, the deflated ObjectMonitors are + // prepended to free_list. + ObjectMonitor* wait_list; + DEFINE_PAD_MINUS_SIZE(3, OM_CACHE_LINE_SIZE, sizeof(ObjectMonitor*)); + + int free_count; // # on free_list + DEFINE_PAD_MINUS_SIZE(4, OM_CACHE_LINE_SIZE, sizeof(int)); + + int in_use_count; // # on in_use_list + DEFINE_PAD_MINUS_SIZE(5, OM_CACHE_LINE_SIZE, sizeof(int)); + + int population; // # Extant -- in circulation + DEFINE_PAD_MINUS_SIZE(6, OM_CACHE_LINE_SIZE, sizeof(int)); + + int wait_count; // # on wait_list + DEFINE_PAD_MINUS_SIZE(7, OM_CACHE_LINE_SIZE, sizeof(int)); +}; +static ListGlobals LVars; #define CHAINMARKER (cast_to_oop(-1)) +// =====================> Spinlock functions + +// ObjectMonitors are not lockable outside of this file. We use spinlocks +// implemented using a bit in the _next_om field instead of the heavier +// weight locking mechanisms for faster list management. + +#define OM_LOCK_BIT 0x1 + +// Return true if the ObjectMonitor is locked. +// Otherwise returns false. +static bool is_locked(ObjectMonitor* om) { + return ((intptr_t)Atomic::load(&om->_next_om) & OM_LOCK_BIT) == OM_LOCK_BIT; +} + +// Mark an ObjectMonitor* with OM_LOCK_BIT and return it. +// Note: the om parameter may or may not have been marked originally. +static ObjectMonitor* mark_om_ptr(ObjectMonitor* om) { + return (ObjectMonitor*)((intptr_t)om | OM_LOCK_BIT); +} + +// Try to lock an ObjectMonitor. Returns true if locking was successful. +// Otherwise returns false. +static bool try_om_lock(ObjectMonitor* om) { + // Get current next field without any OM_LOCK_BIT value. + ObjectMonitor* next = (ObjectMonitor*)((intptr_t)Atomic::load(&om->_next_om) & ~OM_LOCK_BIT); + if (Atomic::cmpxchg(&om->_next_om, next, mark_om_ptr(next)) != next) { + return false; // Cannot lock the ObjectMonitor. + } + return true; +} + +// Lock an ObjectMonitor. +static void om_lock(ObjectMonitor* om) { + while (true) { + if (try_om_lock(om)) { + return; + } + } +} + +// Unlock an ObjectMonitor. +static void om_unlock(ObjectMonitor* om) { + ObjectMonitor* next = Atomic::load(&om->_next_om); + guarantee(((intptr_t)next & OM_LOCK_BIT) == OM_LOCK_BIT, "next=" INTPTR_FORMAT + " must have OM_LOCK_BIT=%x set.", p2i(next), OM_LOCK_BIT); + + next = (ObjectMonitor*)((intptr_t)next & ~OM_LOCK_BIT); // Clear OM_LOCK_BIT. + Atomic::store(&om->_next_om, next); +} + +// Get the list head after locking it. Returns the list head or NULL +// if the list is empty. +static ObjectMonitor* get_list_head_locked(ObjectMonitor** list_p) { + while (true) { + ObjectMonitor* mid = Atomic::load(list_p); + if (mid == NULL) { + return NULL; // The list is empty. + } + if (try_om_lock(mid)) { + if (Atomic::load(list_p) != mid) { + // The list head changed so we have to retry. + om_unlock(mid); + continue; + } + return mid; + } + } +} + +// Return the unmarked next field in an ObjectMonitor. Note: the next +// field may or may not have been marked with OM_LOCK_BIT originally. +static ObjectMonitor* unmarked_next(ObjectMonitor* om) { + return (ObjectMonitor*)((intptr_t)Atomic::load(&om->_next_om) & ~OM_LOCK_BIT); +} + +#undef OM_LOCK_BIT + + +// =====================> List Management functions + +// Set the next field in an ObjectMonitor to the specified value. +static void set_next(ObjectMonitor* om, ObjectMonitor* value) { + Atomic::store(&om->_next_om, value); +} + +// Prepend a list of ObjectMonitors to the specified *list_p. 'tail' is +// the last ObjectMonitor in the list and there are 'count' on the list. +// Also updates the specified *count_p. +static void prepend_list_to_common(ObjectMonitor* list, ObjectMonitor* tail, + int count, ObjectMonitor** list_p, + int* count_p) { + while (true) { + ObjectMonitor* cur = Atomic::load(list_p); + // Prepend list to *list_p. + if (!try_om_lock(tail)) { + continue; // failed to lock tail so try it all again + } + set_next(tail, cur); // tail now points to cur (and unlocks tail) + if (cur == NULL) { + // No potential race with takers or other prependers since + // *list_p is empty. + if (Atomic::cmpxchg(list_p, cur, list) == cur) { + // Successfully switched *list_p to the list value. + Atomic::add(count_p, count); + break; + } + // Implied else: try it all again + } else { + if (!try_om_lock(cur)) { + continue; // failed to lock cur so try it all again + } + // We locked cur so try to switch *list_p to the list value. + if (Atomic::cmpxchg(list_p, cur, list) != cur) { + // The list head has changed so unlock cur and try again: + om_unlock(cur); + continue; + } + Atomic::add(count_p, count); + om_unlock(cur); + break; + } + } +} + +// Prepend a newly allocated block of ObjectMonitors to g_block_list and +// LVars.free_list. Also updates LVars.population and LVars.free_count. +void ObjectSynchronizer::prepend_block_to_lists(PaddedObjectMonitor* new_blk) { + // First we handle g_block_list: + while (true) { + PaddedObjectMonitor* cur = Atomic::load(&g_block_list); + // Prepend new_blk to g_block_list. The first ObjectMonitor in + // a block is reserved for use as linkage to the next block. + new_blk[0]._next_om = cur; + if (Atomic::cmpxchg(&g_block_list, cur, new_blk) == cur) { + // Successfully switched g_block_list to the new_blk value. + Atomic::add(&LVars.population, _BLOCKSIZE - 1); + break; + } + // Implied else: try it all again + } + + // Second we handle LVars.free_list: + prepend_list_to_common(new_blk + 1, &new_blk[_BLOCKSIZE - 1], _BLOCKSIZE - 1, + &LVars.free_list, &LVars.free_count); +} + +// Prepend a list of ObjectMonitors to LVars.free_list. 'tail' is the last +// ObjectMonitor in the list and there are 'count' on the list. Also +// updates LVars.free_count. +static void prepend_list_to_global_free_list(ObjectMonitor* list, + ObjectMonitor* tail, int count) { + prepend_list_to_common(list, tail, count, &LVars.free_list, &LVars.free_count); +} + +// Prepend a list of ObjectMonitors to LVars.wait_list. 'tail' is the last +// ObjectMonitor in the list and there are 'count' on the list. Also +// updates LVars.wait_count. +static void prepend_list_to_global_wait_list(ObjectMonitor* list, + ObjectMonitor* tail, int count) { + assert(HandshakeAfterDeflateIdleMonitors, "sanity check"); + prepend_list_to_common(list, tail, count, &LVars.wait_list, &LVars.wait_count); +} + +// Prepend a list of ObjectMonitors to LVars.in_use_list. 'tail' is the last +// ObjectMonitor in the list and there are 'count' on the list. Also +// updates LVars.in_use_list. +static void prepend_list_to_global_in_use_list(ObjectMonitor* list, + ObjectMonitor* tail, int count) { + prepend_list_to_common(list, tail, count, &LVars.in_use_list, &LVars.in_use_count); +} + +// Prepend an ObjectMonitor to the specified list. Also updates +// the specified counter. +static void prepend_to_common(ObjectMonitor* m, ObjectMonitor** list_p, + int* count_p) { + while (true) { + om_lock(m); // Lock m so we can safely update its next field. + ObjectMonitor* cur = NULL; + // Lock the list head to guard against A-B-A race: + if ((cur = get_list_head_locked(list_p)) != NULL) { + // List head is now locked so we can safely switch it. + set_next(m, cur); // m now points to cur (and unlocks m) + Atomic::store(list_p, m); // Switch list head to unlocked m. + om_unlock(cur); + break; + } + // The list is empty so try to set the list head. + assert(cur == NULL, "cur must be NULL: cur=" INTPTR_FORMAT, p2i(cur)); + set_next(m, cur); // m now points to NULL (and unlocks m) + if (Atomic::cmpxchg(list_p, cur, m) == cur) { + // List head is now unlocked m. + break; + } + // Implied else: try it all again + } + Atomic::inc(count_p); +} + +// Prepend an ObjectMonitor to a per-thread om_free_list. +// Also updates the per-thread om_free_count. +static void prepend_to_om_free_list(Thread* self, ObjectMonitor* m) { + prepend_to_common(m, &self->om_free_list, &self->om_free_count); +} + +// Prepend an ObjectMonitor to a per-thread om_in_use_list. +// Also updates the per-thread om_in_use_count. +static void prepend_to_om_in_use_list(Thread* self, ObjectMonitor* m) { + prepend_to_common(m, &self->om_in_use_list, &self->om_in_use_count); +} + +// Take an ObjectMonitor from the start of the specified list. Also +// decrements the specified counter. Returns NULL if none are available. +static ObjectMonitor* take_from_start_of_common(ObjectMonitor** list_p, + int* count_p) { + ObjectMonitor* take = NULL; + // Lock the list head to guard against A-B-A race: + if ((take = get_list_head_locked(list_p)) == NULL) { + return NULL; // None are available. + } + ObjectMonitor* next = unmarked_next(take); + // Switch locked list head to next (which unlocks the list head, but + // leaves take locked): + Atomic::store(list_p, next); + Atomic::dec(count_p); + // Unlock take, but leave the next value for any lagging list + // walkers. It will get cleaned up when take is prepended to + // the in-use list: + om_unlock(take); + return take; +} + +// Take an ObjectMonitor from the start of the LVars.free_list. Also +// updates LVars.free_count. Returns NULL if none are available. +static ObjectMonitor* take_from_start_of_global_free_list() { + return take_from_start_of_common(&LVars.free_list, &LVars.free_count); +} + +// Take an ObjectMonitor from the start of a per-thread free-list. +// Also updates om_free_count. Returns NULL if none are available. +static ObjectMonitor* take_from_start_of_om_free_list(Thread* self) { + return take_from_start_of_common(&self->om_free_list, &self->om_free_count); +} + + // =====================> Quick functions // The quick_* forms are special fast-path variants used to improve @@ -211,39 +486,59 @@ assert(((JavaThread *) self)->thread_state() == _thread_in_Java, "invariant"); NoSafepointVerifier nsv; if (obj == NULL) return false; // Need to throw NPE - const markWord mark = obj->mark(); - if (mark.has_monitor()) { - ObjectMonitor* const m = mark.monitor(); - assert(m->object() == obj, "invariant"); - Thread* const owner = (Thread *) m->_owner; - - // Lock contention and Transactional Lock Elision (TLE) diagnostics - // and observability - // Case: light contention possibly amenable to TLE - // Case: TLE inimical operations such as nested/recursive synchronization + while (true) { + const markWord mark = obj->mark(); - if (owner == self) { - m->_recursions++; - return true; - } + if (mark.has_monitor()) { + ObjectMonitorHandle omh; + if (!omh.save_om_ptr(obj, mark)) { + // Lost a race with async deflation so try again. + assert(AsyncDeflateIdleMonitors, "sanity check"); + continue; + } + ObjectMonitor* const m = omh.om_ptr(); + assert(m->object() == obj, "invariant"); + Thread* const owner = (Thread *) m->_owner; + + // Lock contention and Transactional Lock Elision (TLE) diagnostics + // and observability + // Case: light contention possibly amenable to TLE + // Case: TLE inimical operations such as nested/recursive synchronization + + if (owner == self) { + m->_recursions++; + return true; + } - // This Java Monitor is inflated so obj's header will never be - // displaced to this thread's BasicLock. Make the displaced header - // non-NULL so this BasicLock is not seen as recursive nor as - // being locked. We do this unconditionally so that this thread's - // BasicLock cannot be mis-interpreted by any stack walkers. For - // performance reasons, stack walkers generally first check for - // Biased Locking in the object's header, the second check is for - // stack-locking in the object's header, the third check is for - // recursive stack-locking in the displaced header in the BasicLock, - // and last are the inflated Java Monitor (ObjectMonitor) checks. - lock->set_displaced_header(markWord::unused_mark()); + // This Java Monitor is inflated so obj's header will never be + // displaced to this thread's BasicLock. Make the displaced header + // non-NULL so this BasicLock is not seen as recursive nor as + // being locked. We do this unconditionally so that this thread's + // BasicLock cannot be mis-interpreted by any stack walkers. For + // performance reasons, stack walkers generally first check for + // Biased Locking in the object's header, the second check is for + // stack-locking in the object's header, the third check is for + // recursive stack-locking in the displaced header in the BasicLock, + // and last are the inflated Java Monitor (ObjectMonitor) checks. + lock->set_displaced_header(markWord::unused_mark()); + + if (owner == NULL && m->try_set_owner_from(self, NULL) == NULL) { + assert(m->_recursions == 0, "invariant"); + return true; + } - if (owner == NULL && Atomic::replace_if_null(&(m->_owner), self)) { - assert(m->_recursions == 0, "invariant"); - return true; + if (AsyncDeflateIdleMonitors && + m->try_set_owner_from(self, DEFLATER_MARKER) == DEFLATER_MARKER) { + // The deflation protocol finished the first part (setting owner), + // but it failed the second part (making ref_count negative) and + // bailed. Or the ObjectMonitor was async deflated and reused. + // Acquired the monitor. + assert(m->_recursions == 0, "invariant"); + return true; + } } + break; } // Note that we could inflate in quick_enter. @@ -295,7 +590,9 @@ // must be non-zero to avoid looking like a re-entrant lock, // and must not look locked either. lock->set_displaced_header(markWord::unused_mark()); - inflate(THREAD, obj(), inflate_cause_monitor_enter)->enter(THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_monitor_enter); + omh.om_ptr()->enter(THREAD); } void ObjectSynchronizer::exit(oop object, BasicLock* lock, TRAPS) { @@ -344,7 +641,9 @@ } // We have to take the slow-path of possible inflation and then exit. - inflate(THREAD, object, inflate_cause_vm_internal)->exit(true, THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, object, inflate_cause_vm_internal); + omh.om_ptr()->exit(true, THREAD); } // ----------------------------------------------------------------------------- @@ -365,9 +664,10 @@ assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now"); } - ObjectMonitor* monitor = inflate(THREAD, obj(), inflate_cause_vm_internal); - - return monitor->complete_exit(THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_vm_internal); + intptr_t ret_code = omh.om_ptr()->complete_exit(THREAD); + return ret_code; } // NOTE: must use heavy weight monitor to handle complete_exit/reenter() @@ -377,9 +677,9 @@ assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now"); } - ObjectMonitor* monitor = inflate(THREAD, obj(), inflate_cause_vm_internal); - - monitor->reenter(recursions, THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_vm_internal); + omh.om_ptr()->reenter(recursions, THREAD); } // ----------------------------------------------------------------------------- // JNI locks on java objects @@ -391,7 +691,9 @@ assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now"); } THREAD->set_current_pending_monitor_is_from_java(false); - inflate(THREAD, obj(), inflate_cause_jni_enter)->enter(THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_jni_enter); + omh.om_ptr()->enter(THREAD); THREAD->set_current_pending_monitor_is_from_java(true); } @@ -404,7 +706,9 @@ } assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now"); - ObjectMonitor* monitor = inflate(THREAD, obj, inflate_cause_jni_exit); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj, inflate_cause_jni_exit); + ObjectMonitor* monitor = omh.om_ptr(); // If this thread has locked the object, exit the monitor. We // intentionally do not use CHECK here because we must exit the // monitor even if an exception is pending. @@ -445,7 +749,9 @@ if (millis < 0) { THROW_MSG_0(vmSymbols::java_lang_IllegalArgumentException(), "timeout value is negative"); } - ObjectMonitor* monitor = inflate(THREAD, obj(), inflate_cause_wait); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_wait); + ObjectMonitor* monitor = omh.om_ptr(); DTRACE_MONITOR_WAIT_PROBE(monitor, obj(), THREAD, millis); monitor->wait(millis, true, THREAD); @@ -454,7 +760,8 @@ // that's fixed we can uncomment the following line, remove the call // and change this function back into a "void" func. // DTRACE_MONITOR_PROBE(waited, monitor, obj(), THREAD); - return dtrace_waited_probe(monitor, obj, THREAD); + int ret_code = dtrace_waited_probe(monitor, obj, THREAD); + return ret_code; } void ObjectSynchronizer::wait_uninterruptibly(Handle obj, jlong millis, TRAPS) { @@ -465,7 +772,9 @@ if (millis < 0) { THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(), "timeout value is negative"); } - inflate(THREAD, obj(), inflate_cause_wait)->wait(millis, false, THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_wait); + omh.om_ptr()->wait(millis, false, THREAD); } void ObjectSynchronizer::notify(Handle obj, TRAPS) { @@ -478,7 +787,9 @@ if (mark.has_locker() && THREAD->is_lock_owned((address)mark.locker())) { return; } - inflate(THREAD, obj(), inflate_cause_notify)->notify(THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_notify); + omh.om_ptr()->notify(THREAD); } // NOTE: see comment of notify() @@ -492,7 +803,9 @@ if (mark.has_locker() && THREAD->is_lock_owned((address)mark.locker())) { return; } - inflate(THREAD, obj(), inflate_cause_notify)->notifyAll(THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_notify); + omh.om_ptr()->notifyAll(THREAD); } // ----------------------------------------------------------------------------- @@ -517,15 +830,15 @@ // performed by the CPU(s) or platform. struct SharedGlobals { - char _pad_prefix[DEFAULT_CACHE_LINE_SIZE]; + char _pad_prefix[OM_CACHE_LINE_SIZE]; // These are highly shared mostly-read variables. // To avoid false-sharing they need to be the sole occupants of a cache line. volatile int stw_random; volatile int stw_cycle; - DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int) * 2); + DEFINE_PAD_MINUS_SIZE(1, OM_CACHE_LINE_SIZE, sizeof(volatile int) * 2); // Hot RW variable -- Sequester to avoid false-sharing volatile int hc_sequence; - DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int)); + DEFINE_PAD_MINUS_SIZE(2, OM_CACHE_LINE_SIZE, sizeof(volatile int)); }; static SharedGlobals GVars; @@ -685,84 +998,113 @@ assert(Universe::verify_in_progress() || DumpSharedSpaces || ((JavaThread *)self)->thread_state() != _thread_blocked, "invariant"); - ObjectMonitor* monitor = NULL; - markWord temp, test; - intptr_t hash; - markWord mark = read_stable_mark(obj); + while (true) { + ObjectMonitor* monitor = NULL; + markWord temp, test; + intptr_t hash; + markWord mark = read_stable_mark(obj); - // object should remain ineligible for biased locking - assert(!mark.has_bias_pattern(), "invariant"); + // object should remain ineligible for biased locking + assert(!mark.has_bias_pattern(), "invariant"); - if (mark.is_neutral()) { // if this is a normal header + if (mark.is_neutral()) { // if this is a normal header + hash = mark.hash(); + if (hash != 0) { // if it has a hash, just return it + return hash; + } + hash = get_next_hash(self, obj); // get a new hash + temp = mark.copy_set_hash(hash); // merge the hash into header + // try to install the hash + test = obj->cas_set_mark(temp, mark); + if (test == mark) { // if the hash was installed, return it + return hash; + } + // Failed to install the hash. It could be that another thread + // installed the hash just before our attempt or inflation has + // occurred or... so we fall thru to inflate the monitor for + // stability and then install the hash. + } else if (mark.has_monitor()) { + ObjectMonitorHandle omh; + if (!omh.save_om_ptr(obj, mark)) { + // Lost a race with async deflation so try again. + assert(AsyncDeflateIdleMonitors, "sanity check"); + continue; + } + monitor = omh.om_ptr(); + temp = monitor->header(); + // Allow for a lagging install_displaced_markword_in_object() to + // have marked the ObjectMonitor's header/dmw field. + assert(temp.is_neutral() || (AsyncDeflateIdleMonitors && temp.is_marked()), + "invariant: header=" INTPTR_FORMAT, temp.value()); + hash = temp.hash(); + if (hash != 0) { // if it has a hash, just return it + return hash; + } + // Fall thru so we only have one place that installs the hash in + // the ObjectMonitor. + } else if (self->is_lock_owned((address)mark.locker())) { + // This is a stack lock owned by the calling thread so fetch the + // displaced markWord from the BasicLock on the stack. + temp = mark.displaced_mark_helper(); + assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value()); + hash = temp.hash(); + if (hash != 0) { // if it has a hash, just return it + return hash; + } + // WARNING: + // The displaced header in the BasicLock on a thread's stack + // is strictly immutable. It CANNOT be changed in ANY cases. + // So we have to inflate the stack lock into an ObjectMonitor + // even if the current thread owns the lock. The BasicLock on + // a thread's stack can be asynchronously read by other threads + // during an inflate() call so any change to that stack memory + // may not propagate to other threads correctly. + } + + // Inflate the monitor to set the hash. + ObjectMonitorHandle omh; + inflate(&omh, self, obj, inflate_cause_hash_code); + monitor = omh.om_ptr(); + // Load ObjectMonitor's header/dmw field and see if it has a hash. + mark = monitor->header(); + // Allow for a lagging install_displaced_markword_in_object() to + // have marked the ObjectMonitor's header/dmw field. + assert(mark.is_neutral() || (AsyncDeflateIdleMonitors && mark.is_marked()), + "invariant: header=" INTPTR_FORMAT, mark.value()); hash = mark.hash(); - if (hash != 0) { // if it has a hash, just return it - return hash; - } - hash = get_next_hash(self, obj); // get a new hash - temp = mark.copy_set_hash(hash); // merge the hash into header - // try to install the hash - test = obj->cas_set_mark(temp, mark); - if (test == mark) { // if the hash was installed, return it - return hash; - } - // Failed to install the hash. It could be that another thread - // installed the hash just before our attempt or inflation has - // occurred or... so we fall thru to inflate the monitor for - // stability and then install the hash. - } else if (mark.has_monitor()) { - monitor = mark.monitor(); - temp = monitor->header(); - assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value()); - hash = temp.hash(); - if (hash != 0) { // if it has a hash, just return it - return hash; - } - // Fall thru so we only have one place that installs the hash in - // the ObjectMonitor. - } else if (self->is_lock_owned((address)mark.locker())) { - // This is a stack lock owned by the calling thread so fetch the - // displaced markWord from the BasicLock on the stack. - temp = mark.displaced_mark_helper(); - assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value()); - hash = temp.hash(); - if (hash != 0) { // if it has a hash, just return it - return hash; - } - // WARNING: - // The displaced header in the BasicLock on a thread's stack - // is strictly immutable. It CANNOT be changed in ANY cases. - // So we have to inflate the stack lock into an ObjectMonitor - // even if the current thread owns the lock. The BasicLock on - // a thread's stack can be asynchronously read by other threads - // during an inflate() call so any change to that stack memory - // may not propagate to other threads correctly. - } - - // Inflate the monitor to set the hash. - monitor = inflate(self, obj, inflate_cause_hash_code); - // Load ObjectMonitor's header/dmw field and see if it has a hash. - mark = monitor->header(); - assert(mark.is_neutral(), "invariant: header=" INTPTR_FORMAT, mark.value()); - hash = mark.hash(); - if (hash == 0) { // if it does not have a hash - hash = get_next_hash(self, obj); // get a new hash - temp = mark.copy_set_hash(hash); // merge the hash into header - assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value()); - uintptr_t v = Atomic::cmpxchg((volatile uintptr_t*)monitor->header_addr(), mark.value(), temp.value()); - test = markWord(v); - if (test != mark) { - // The attempt to update the ObjectMonitor's header/dmw field - // did not work. This can happen if another thread managed to - // merge in the hash just before our cmpxchg(). - // If we add any new usages of the header/dmw field, this code - // will need to be updated. - hash = test.hash(); - assert(test.is_neutral(), "invariant: header=" INTPTR_FORMAT, test.value()); - assert(hash != 0, "should only have lost the race to a thread that set a non-zero hash"); + if (hash == 0) { // if it does not have a hash + hash = get_next_hash(self, obj); // get a new hash + temp = mark.copy_set_hash(hash); // merge the hash into header + if (AsyncDeflateIdleMonitors && temp.is_marked()) { + // A lagging install_displaced_markword_in_object() has marked + // the ObjectMonitor's header/dmw field. We clear it to avoid + // any confusion if we are able to set the hash. + temp.set_unmarked(); + } + assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value()); + uintptr_t v = Atomic::cmpxchg((volatile uintptr_t*)monitor->header_addr(), mark.value(), temp.value()); + test = markWord(v); + if (test != mark) { + // The attempt to update the ObjectMonitor's header/dmw field + // did not work. This can happen if another thread managed to + // merge in the hash just before our cmpxchg(). With async + // deflation, a lagging install_displaced_markword_in_object() + // could have just marked or just unmarked the header/dmw field. + // If we add any new usages of the header/dmw field, this code + // will need to be updated. + if (AsyncDeflateIdleMonitors) { + // Since async deflation gives us two possible reasons for + // the cmwxchg() to fail, it is easier to simply retry. + continue; + } + hash = test.hash(); + assert(test.is_neutral(), "invariant: header=" INTPTR_FORMAT, test.value()); + assert(hash != 0, "should only have lost the race to a thread that set a non-zero hash"); + } } + // We finally get the hash. + return hash; } - // We finally get the hash. - return hash; } // Deprecated -- use FastHashCode() instead. @@ -782,20 +1124,28 @@ assert(thread == JavaThread::current(), "Can only be called on current thread"); oop obj = h_obj(); - markWord mark = read_stable_mark(obj); + while (true) { + markWord mark = read_stable_mark(obj); - // Uncontended case, header points to stack - if (mark.has_locker()) { - return thread->is_lock_owned((address)mark.locker()); - } - // Contended case, header points to ObjectMonitor (tagged pointer) - if (mark.has_monitor()) { - ObjectMonitor* monitor = mark.monitor(); - return monitor->is_entered(thread) != 0; + // Uncontended case, header points to stack + if (mark.has_locker()) { + return thread->is_lock_owned((address)mark.locker()); + } + // Contended case, header points to ObjectMonitor (tagged pointer) + if (mark.has_monitor()) { + ObjectMonitorHandle omh; + if (!omh.save_om_ptr(obj, mark)) { + // Lost a race with async deflation so try again. + assert(AsyncDeflateIdleMonitors, "sanity check"); + continue; + } + bool ret_code = omh.om_ptr()->is_entered(thread) != 0; + return ret_code; + } + // Unlocked case, header in place + assert(mark.is_neutral(), "sanity check"); + return false; } - // Unlocked case, header in place - assert(mark.is_neutral(), "sanity check"); - return false; } // Be aware of this method could revoke bias of the lock object. @@ -821,27 +1171,37 @@ assert(self == JavaThread::current(), "Can only be called on current thread"); oop obj = h_obj(); - markWord mark = read_stable_mark(obj); - // CASE: stack-locked. Mark points to a BasicLock on the owner's stack. - if (mark.has_locker()) { - return self->is_lock_owned((address)mark.locker()) ? - owner_self : owner_other; - } + while (true) { + markWord mark = read_stable_mark(obj); - // CASE: inflated. Mark (tagged pointer) points to an ObjectMonitor. - // The Object:ObjectMonitor relationship is stable as long as we're - // not at a safepoint. - if (mark.has_monitor()) { - void* owner = mark.monitor()->_owner; - if (owner == NULL) return owner_none; - return (owner == self || - self->is_lock_owned((address)owner)) ? owner_self : owner_other; - } + // CASE: stack-locked. Mark points to a BasicLock on the owner's stack. + if (mark.has_locker()) { + return self->is_lock_owned((address)mark.locker()) ? + owner_self : owner_other; + } - // CASE: neutral - assert(mark.is_neutral(), "sanity check"); - return owner_none; // it's unlocked + // CASE: inflated. Mark (tagged pointer) points to an ObjectMonitor. + // The Object:ObjectMonitor relationship is stable as long as we're + // not at a safepoint and AsyncDeflateIdleMonitors is false. + if (mark.has_monitor()) { + ObjectMonitorHandle omh; + if (!omh.save_om_ptr(obj, mark)) { + // Lost a race with async deflation so try again. + assert(AsyncDeflateIdleMonitors, "sanity check"); + continue; + } + ObjectMonitor* monitor = omh.om_ptr(); + void* owner = monitor->_owner; + if (owner == NULL) return owner_none; + return (owner == self || + self->is_lock_owned((address)owner)) ? owner_self : owner_other; + } + + // CASE: neutral + assert(mark.is_neutral(), "sanity check"); + return owner_none; // it's unlocked + } } // FIXME: jvmti should call this @@ -856,69 +1216,116 @@ } oop obj = h_obj(); - address owner = NULL; - markWord mark = read_stable_mark(obj); + while (true) { + address owner = NULL; + markWord mark = read_stable_mark(obj); - // Uncontended case, header points to stack - if (mark.has_locker()) { - owner = (address) mark.locker(); - } + // Uncontended case, header points to stack + if (mark.has_locker()) { + owner = (address) mark.locker(); + } - // Contended case, header points to ObjectMonitor (tagged pointer) - else if (mark.has_monitor()) { - ObjectMonitor* monitor = mark.monitor(); - assert(monitor != NULL, "monitor should be non-null"); - owner = (address) monitor->owner(); - } + // Contended case, header points to ObjectMonitor (tagged pointer) + else if (mark.has_monitor()) { + ObjectMonitorHandle omh; + if (!omh.save_om_ptr(obj, mark)) { + // Lost a race with async deflation so try again. + assert(AsyncDeflateIdleMonitors, "sanity check"); + continue; + } + ObjectMonitor* monitor = omh.om_ptr(); + assert(monitor != NULL, "monitor should be non-null"); + owner = (address) monitor->owner(); + } - if (owner != NULL) { - // owning_thread_from_monitor_owner() may also return NULL here - return Threads::owning_thread_from_monitor_owner(t_list, owner); - } + if (owner != NULL) { + // owning_thread_from_monitor_owner() may also return NULL here + return Threads::owning_thread_from_monitor_owner(t_list, owner); + } - // Unlocked case, header in place - // Cannot have assertion since this object may have been - // locked by another thread when reaching here. - // assert(mark.is_neutral(), "sanity check"); + // Unlocked case, header in place + // Cannot have assertion since this object may have been + // locked by another thread when reaching here. + // assert(mark.is_neutral(), "sanity check"); - return NULL; + return NULL; + } } // Visitors ... void ObjectSynchronizer::monitors_iterate(MonitorClosure* closure) { - PaddedObjectMonitor* block = Atomic::load_acquire(&g_block_list); + PaddedObjectMonitor* block = Atomic::load(&g_block_list); while (block != NULL) { assert(block->object() == CHAINMARKER, "must be a block header"); for (int i = _BLOCKSIZE - 1; i > 0; i--) { ObjectMonitor* mid = (ObjectMonitor *)(block + i); - oop object = (oop)mid->object(); - if (object != NULL) { - // Only process with closure if the object is set. + ObjectMonitorHandle omh; + if (!mid->is_free() && omh.set_om_ptr_if_safe(mid)) { + // The ObjectMonitor* is not free and it has been made safe. + if (mid->object() == NULL) { + // Only process with closure if the object is set. + continue; + } closure->do_monitor(mid); } } - block = (PaddedObjectMonitor*)block->_next_om; + // unmarked_next() is not needed with g_block_list (no locking + // used with with block linkage _next_om fields). + block = (PaddedObjectMonitor*)Atomic::load(&block->_next_om); } } static bool monitors_used_above_threshold() { - if (g_om_population == 0) { + if (Atomic::load(&LVars.population) == 0) { return false; } - int monitors_used = g_om_population - g_om_free_count; - int monitor_usage = (monitors_used * 100LL) / g_om_population; - return monitor_usage > MonitorUsedDeflationThreshold; -} - -bool ObjectSynchronizer::is_cleanup_needed() { if (MonitorUsedDeflationThreshold > 0) { - if (monitors_used_above_threshold()) { - return true; + int monitors_used = Atomic::load(&LVars.population) - Atomic::load(&LVars.free_count); + if (HandshakeAfterDeflateIdleMonitors) { + monitors_used -= Atomic::load(&LVars.wait_count); } + int monitor_usage = (monitors_used * 100LL) / Atomic::load(&LVars.population); + return monitor_usage > MonitorUsedDeflationThreshold; } - return needs_monitor_scavenge(); + return false; +} + +// Returns true if MonitorBound is set (> 0) and if the specified +// cnt is > MonitorBound. Otherwise returns false. +static bool is_MonitorBound_exceeded(const int cnt) { + const int mx = MonitorBound; + return mx > 0 && cnt > mx; +} + +bool ObjectSynchronizer::is_async_deflation_needed() { + if (!AsyncDeflateIdleMonitors) { + return false; + } + if (is_async_deflation_requested()) { + // Async deflation request. + return true; + } + if (AsyncDeflationInterval > 0 && + time_since_last_async_deflation_ms() > AsyncDeflationInterval && + monitors_used_above_threshold()) { + // It's been longer than our specified deflate interval and there + // are too many monitors in use. We don't deflate more frequently + // than AsyncDeflationInterval (unless is_async_deflation_requested) + // in order to not swamp the ServiceThread. + _last_async_deflation_time_ns = os::javaTimeNanos(); + return true; + } + int monitors_used = Atomic::load(&LVars.population) - Atomic::load(&LVars.free_count); + if (HandshakeAfterDeflateIdleMonitors) { + monitors_used -= Atomic::load(&LVars.wait_count); + } + if (is_MonitorBound_exceeded(monitors_used)) { + // Not enough ObjectMonitors on the global free list. + return true; + } + return false; } bool ObjectSynchronizer::needs_monitor_scavenge() { @@ -929,6 +1336,26 @@ return false; } +bool ObjectSynchronizer::is_safepoint_deflation_needed() { + if (!AsyncDeflateIdleMonitors) { + if (monitors_used_above_threshold()) { + // Too many monitors in use. + return true; + } + return needs_monitor_scavenge(); + } + if (is_special_deflation_requested()) { + // For AsyncDeflateIdleMonitors only do a safepoint deflation + // if there is a special deflation request. + return true; + } + return false; +} + +jlong ObjectSynchronizer::time_since_last_async_deflation_ms() { + return (os::javaTimeNanos() - _last_async_deflation_time_ns) / (NANOUNITS / MILLIUNITS); +} + void ObjectSynchronizer::oops_do(OopClosure* f) { // We only scan the global used list here (for moribund threads), and // the thread-local monitors in Thread::oops_do(). @@ -937,18 +1364,21 @@ void ObjectSynchronizer::global_used_oops_do(OopClosure* f) { assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); - list_oops_do(g_om_in_use_list, f); + list_oops_do(Atomic::load(&LVars.in_use_list), Atomic::load(&LVars.in_use_count), f); } void ObjectSynchronizer::thread_local_used_oops_do(Thread* thread, OopClosure* f) { assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); - list_oops_do(thread->om_in_use_list, f); + list_oops_do(thread->om_in_use_list, thread->om_in_use_count, f); } -void ObjectSynchronizer::list_oops_do(ObjectMonitor* list, OopClosure* f) { +void ObjectSynchronizer::list_oops_do(ObjectMonitor* list, int count, OopClosure* f) { assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); - ObjectMonitor* mid; - for (mid = list; mid != NULL; mid = mid->_next_om) { + // The oops_do() phase does not overlap with monitor deflation + // so no need to update the ObjectMonitor's ref_count for this + // ObjectMonitor* use and no need to mark ObjectMonitors for the + // list traversal. + for (ObjectMonitor* mid = list; mid != NULL; mid = unmarked_next(mid)) { if (mid->object() != NULL) { f->do_oop((oop*)mid->object_addr()); } @@ -959,27 +1389,25 @@ // ----------------------------------------------------------------------------- // ObjectMonitor Lifecycle // ----------------------- -// Inflation unlinks monitors from the global g_free_list and -// associates them with objects. Deflation -- which occurs at -// STW-time -- disassociates idle monitors from objects. Such -// scavenged monitors are returned to the g_free_list. -// -// The global list is protected by gListLock. All the critical sections -// are short and operate in constant-time. +// Inflation unlinks monitors from LVars.free_list or a per-thread free +// list and associates them with objects. Deflation -- which occurs at +// STW-time or asynchronously -- disassociates idle monitors from objects. +// Such scavenged monitors are returned to the LVars.free_list. // // ObjectMonitors reside in type-stable memory (TSM) and are immortal. // // Lifecycle: -// -- unassigned and on the global free list -// -- unassigned and on a thread's private om_free_list +// -- unassigned and on the LVars.free_list +// -- unassigned and on a per-thread free list // -- assigned to an object. The object is inflated and the mark refers -// to the objectmonitor. +// to the ObjectMonitor. // Constraining monitor pool growth via MonitorBound ... // // If MonitorBound is not set (<= 0), MonitorBound checks are disabled. // +// When safepoint deflation is being used (!AsyncDeflateIdleMonitors): // The monitor pool is grow-only. We scavenge at STW safepoint-time, but the // the rate of scavenging is driven primarily by GC. As such, we can find // an inordinate number of monitors in circulation. @@ -992,76 +1420,110 @@ // we'll incur more safepoints, which are harmful to performance. // See also: GuaranteedSafepointInterval // -// If MonitorBound is set, the boundry applies to -// (g_om_population - g_om_free_count) +// When safepoint deflation is being used and MonitorBound is set, the +// boundry applies to +// (LVars.population - LVars.free_count) // i.e., if there are not enough ObjectMonitors on the global free list, // then a safepoint deflation is induced. Picking a good MonitorBound value // is non-trivial. +// +// When async deflation is being used: +// The monitor pool is still grow-only. Async deflation is requested +// by a safepoint's cleanup phase or by the ServiceThread at periodic +// intervals when is_async_deflation_needed() returns true. In +// addition to other policies that are checked, if there are not +// enough ObjectMonitors on the global free list, then +// is_async_deflation_needed() will return true. The ServiceThread +// calls deflate_global_idle_monitors_using_JT() and also calls +// deflate_per_thread_idle_monitors_using_JT() as needed. static void InduceScavenge(Thread* self, const char * Whence) { + assert(!AsyncDeflateIdleMonitors, "is not used by async deflation"); + // Induce STW safepoint to trim monitors // Ultimately, this results in a call to deflate_idle_monitors() in the near future. // More precisely, trigger a cleanup safepoint as the number // of active monitors passes the specified threshold. // TODO: assert thread state is reasonable - if (Atomic::xchg (&_forceMonitorScavenge, 1) == 0) { + if (Atomic::xchg(&_forceMonitorScavenge, 1) == 0) { VMThread::check_for_forced_cleanup(); } } -ObjectMonitor* ObjectSynchronizer::om_alloc(Thread* self) { +ObjectMonitor* ObjectSynchronizer::om_alloc(Thread* self, + const InflateCause cause) { // A large MAXPRIVATE value reduces both list lock contention // and list coherency traffic, but also tends to increase the // number of ObjectMonitors in circulation as well as the STW // scavenge costs. As usual, we lean toward time in space-time // tradeoffs. const int MAXPRIVATE = 1024; + stringStream ss; for (;;) { ObjectMonitor* m; // 1: try to allocate from the thread's local om_free_list. // Threads will attempt to allocate first from their local list, then - // from the global list, and only after those attempts fail will the thread - // attempt to instantiate new monitors. Thread-local free lists take - // heat off the gListLock and improve allocation latency, as well as reducing - // coherency traffic on the shared global list. - m = self->om_free_list; + // from the global list, and only after those attempts fail will the + // thread attempt to instantiate new monitors. Thread-local free lists + // improve allocation latency, as well as reducing coherency traffic + // on the shared global list. + m = take_from_start_of_om_free_list(self); if (m != NULL) { - self->om_free_list = m->_next_om; - self->om_free_count--; guarantee(m->object() == NULL, "invariant"); - m->_next_om = self->om_in_use_list; - self->om_in_use_list = m; - self->om_in_use_count++; + m->set_allocation_state(ObjectMonitor::New); + prepend_to_om_in_use_list(self, m); return m; } - // 2: try to allocate from the global g_free_list + // 2: try to allocate from the global LVars.free_list // CONSIDER: use muxTry() instead of muxAcquire(). // If the muxTry() fails then drop immediately into case 3. // If we're using thread-local free lists then try // to reprovision the caller's free list. - if (g_free_list != NULL) { + if (Atomic::load(&LVars.free_list) != NULL) { // Reprovision the thread's om_free_list. // Use bulk transfers to reduce the allocation rate and heat // on various locks. - Thread::muxAcquire(&gListLock, "om_alloc(1)"); - for (int i = self->om_free_provision; --i >= 0 && g_free_list != NULL;) { - g_om_free_count--; - ObjectMonitor* take = g_free_list; - g_free_list = take->_next_om; + for (int i = self->om_free_provision; --i >= 0;) { + ObjectMonitor* take = take_from_start_of_global_free_list(); + if (take == NULL) { + break; // No more are available. + } guarantee(take->object() == NULL, "invariant"); + if (AsyncDeflateIdleMonitors) { + // We allowed 3 field values to linger during async deflation. + // We clear header and restore ref_count here, but we leave + // owner == DEFLATER_MARKER so the simple C2 ObjectMonitor + // enter optimization can no longer race with async deflation + // and reuse. + take->set_header(markWord::zero()); + if (take->ref_count() < 0) { + // Add back max_jint to restore the ref_count field to its + // proper value. + Atomic::add(&take->_ref_count, max_jint); + +#ifdef ASSERT + jint l_ref_count = take->ref_count(); +#endif + assert(l_ref_count >= 0, "must not be negative: l_ref_count=%d, ref_count=%d", + l_ref_count, take->ref_count()); + } + } take->Recycle(); + // Since we're taking from the global free-list, take must be Free. + // om_release() also sets the allocation state to Free because it + // is called from other code paths. + assert(take->is_free(), "invariant"); om_release(self, take, false); } - Thread::muxRelease(&gListLock); - self->om_free_provision += 1 + (self->om_free_provision/2); + self->om_free_provision += 1 + (self->om_free_provision / 2); if (self->om_free_provision > MAXPRIVATE) self->om_free_provision = MAXPRIVATE; - const int mx = MonitorBound; - if (mx > 0 && (g_om_population-g_om_free_count) > mx) { + if (!AsyncDeflateIdleMonitors && + is_MonitorBound_exceeded(Atomic::load(&LVars.population) - Atomic::load(&LVars.free_count))) { // Not enough ObjectMonitors on the global free list. // We can't safely induce a STW safepoint from om_alloc() as our thread // state may not be appropriate for such activities and callers may hold @@ -1082,9 +1544,9 @@ assert(_BLOCKSIZE > 1, "invariant"); size_t neededsize = sizeof(PaddedObjectMonitor) * _BLOCKSIZE; PaddedObjectMonitor* temp; - size_t aligned_size = neededsize + (DEFAULT_CACHE_LINE_SIZE - 1); + size_t aligned_size = neededsize + (OM_CACHE_LINE_SIZE - 1); void* real_malloc_addr = NEW_C_HEAP_ARRAY(char, aligned_size, mtInternal); - temp = (PaddedObjectMonitor*)align_up(real_malloc_addr, DEFAULT_CACHE_LINE_SIZE); + temp = (PaddedObjectMonitor*)align_up(real_malloc_addr, OM_CACHE_LINE_SIZE); (void)memset((void *) temp, 0, neededsize); // Format the block. @@ -1096,11 +1558,12 @@ // look like: class Block { Block * next; int N; ObjectMonitor Body [N] ; } for (int i = 1; i < _BLOCKSIZE; i++) { - temp[i]._next_om = (ObjectMonitor *)&temp[i+1]; + temp[i]._next_om = (ObjectMonitor*)&temp[i + 1]; + assert(temp[i].is_free(), "invariant"); } // terminate the last monitor as the end of list - temp[_BLOCKSIZE - 1]._next_om = NULL; + temp[_BLOCKSIZE - 1]._next_om = (ObjectMonitor*)NULL; // Element [0] is reserved for global list linkage temp[0].set_object(CHAINMARKER); @@ -1109,24 +1572,7 @@ // block in hand. This avoids some lock traffic and redundant // list activity. - // Acquire the gListLock to manipulate g_block_list and g_free_list. - // An Oyama-Taura-Yonezawa scheme might be more efficient. - Thread::muxAcquire(&gListLock, "om_alloc(2)"); - g_om_population += _BLOCKSIZE-1; - g_om_free_count += _BLOCKSIZE-1; - - // Add the new block to the list of extant blocks (g_block_list). - // The very first ObjectMonitor in a block is reserved and dedicated. - // It serves as blocklist "next" linkage. - temp[0]._next_om = g_block_list; - // There are lock-free uses of g_block_list so make sure that - // the previous stores happen before we update g_block_list. - Atomic::release_store(&g_block_list, temp); - - // Add the new string of ObjectMonitors to the global free list - temp[_BLOCKSIZE - 1]._next_om = g_free_list; - g_free_list = temp + 1; - Thread::muxRelease(&gListLock); + prepend_block_to_lists(temp); } } @@ -1139,8 +1585,8 @@ // // Key constraint: all ObjectMonitors on a thread's free list and the global // free list must have their object field set to null. This prevents the -// scavenger -- deflate_monitor_list() -- from reclaiming them while we -// are trying to release them. +// scavenger -- deflate_monitor_list() or deflate_monitor_list_using_JT() +// -- from reclaiming them while we are trying to release them. void ObjectSynchronizer::om_release(Thread* self, ObjectMonitor* m, bool from_per_thread_alloc) { @@ -1150,31 +1596,64 @@ guarantee((m->is_busy() | m->_recursions) == 0, "freeing in-use monitor: " "%s, recursions=" INTX_FORMAT, m->is_busy_to_string(&ss), m->_recursions); + m->set_allocation_state(ObjectMonitor::Free); // _next_om is used for both per-thread in-use and free lists so // we have to remove 'm' from the in-use list first (as needed). if (from_per_thread_alloc) { // Need to remove 'm' from om_in_use_list. + // We use the more complicated lock-cur_mid_in_use-and-mid-as-we-go + // protocol because async deflation can do list deletions in parallel. ObjectMonitor* cur_mid_in_use = NULL; + ObjectMonitor* mid = NULL; + ObjectMonitor* next = NULL; bool extracted = false; - for (ObjectMonitor* mid = self->om_in_use_list; mid != NULL; cur_mid_in_use = mid, mid = mid->_next_om) { + + if ((mid = get_list_head_locked(&self->om_in_use_list)) == NULL) { + fatal("thread=" INTPTR_FORMAT " in-use list must not be empty.", p2i(self)); + } + next = unmarked_next(mid); + while (true) { if (m == mid) { - // extract from per-thread in-use list - if (mid == self->om_in_use_list) { - self->om_in_use_list = mid->_next_om; - } else if (cur_mid_in_use != NULL) { - cur_mid_in_use->_next_om = mid->_next_om; // maintain the current thread in-use list + // We found 'm' on the per-thread in-use list so try to extract it. + if (cur_mid_in_use == NULL) { + // mid is the list head and it is locked. Switch the list head + // to next which unlocks the list head, but leaves mid locked: + Atomic::store(&self->om_in_use_list, next); + } else { + // mid and cur_mid_in_use are locked. Switch cur_mid_in_use's + // next field to next which unlocks cur_mid_in_use, but leaves + // mid locked: + set_next(cur_mid_in_use, next); } extracted = true; - self->om_in_use_count--; + Atomic::dec(&self->om_in_use_count); + // Unlock mid, but leave the next value for any lagging list + // walkers. It will get cleaned up when mid is prepended to + // the thread's free list: + om_unlock(mid); break; } + if (cur_mid_in_use != NULL) { + om_unlock(cur_mid_in_use); + } + // The next cur_mid_in_use keeps mid's locked state so + // that it is stable for a possible next field change. It + // cannot be deflated while it is locked. + cur_mid_in_use = mid; + mid = next; + if (mid == NULL) { + // Reached end of the list and didn't find m so: + fatal("must find m=" INTPTR_FORMAT "on om_in_use_list=" INTPTR_FORMAT, + p2i(m), p2i(self->om_in_use_list)); + } + // Lock mid so we can possibly extract it: + om_lock(mid); + next = unmarked_next(mid); } - assert(extracted, "Should have extracted from in-use list"); } - m->_next_om = self->om_free_list; - self->om_free_list = m; - self->om_free_count++; + prepend_to_om_free_list(self, m); + guarantee(m->is_free(), "invariant"); } // Return ObjectMonitors on a moribund thread's free and in-use @@ -1189,62 +1668,104 @@ // scanned by a GC safepoint, either via Thread::oops_do() (before // om_flush() is called) or via ObjectSynchronizer::oops_do() (after // om_flush() is called). +// +// With AsyncDeflateIdleMonitors, deflate_global_idle_monitors_using_JT() +// and deflate_per_thread_idle_monitors_using_JT() (in another thread) can +// run at the same time as om_flush() so we have to follow a careful +// protocol to prevent list corruption. void ObjectSynchronizer::om_flush(Thread* self) { + // This function can race with an async deflater thread. Since + // deflation has to process the per-thread in-use list before + // prepending the deflated ObjectMonitors to the global free list, + // we process the per-thread lists in the same order to prevent + // ordering races. + int in_use_count = 0; + ObjectMonitor* in_use_list = NULL; + ObjectMonitor* in_use_tail = NULL; + + // An async deflation thread checks to see if the target thread + // is exiting, but if it has made it past that check before we + // started exiting, then it is racing to get to the in-use list. + if ((in_use_list = get_list_head_locked(&self->om_in_use_list)) != NULL) { + // At this point, we have marked the in-use list head so an + // async deflation thread cannot come in after us. If an async + // deflation thread is ahead of us, then we'll detect that and + // wait for it to finish its work. + // + // The thread is going away, however the ObjectMonitors on the + // om_in_use_list may still be in-use by other threads. Link + // them to in_use_tail, which will be linked into the global + // in-use list (LVars.in_use_list) below. + // + // Account for the in-use list head before the loop since it is + // already marked (by this thread): + in_use_tail = in_use_list; + in_use_count++; + for (ObjectMonitor* cur_om = unmarked_next(in_use_list); cur_om != NULL;) { + if (is_locked(cur_om)) { + // cur_om is locked so there must be an async deflater + // thread ahead of us so we'll give it a chance to finish. + while (is_locked(cur_om)) { + os::naked_short_sleep(1); + } + // Refetch the possibly changed next field and try again. + cur_om = unmarked_next(in_use_tail); + continue; + } + if (cur_om->is_free()) { + // cur_om was deflated and the allocation state was changed + // to Free while it was marked. We happened to see it just + // after it was unmarked (and added to the free list). + // Refetch the possibly changed next field and try again. + cur_om = unmarked_next(in_use_tail); + continue; + } + in_use_tail = cur_om; + in_use_count++; + cur_om = unmarked_next(cur_om); + } + guarantee(in_use_tail != NULL, "invariant"); + int l_om_in_use_count = self->om_in_use_count; + ADIM_guarantee(l_om_in_use_count == in_use_count, "in-use counts don't " + "match: l_om_in_use_count=%d, in_use_count=%d", + l_om_in_use_count, in_use_count); + self->om_in_use_count = 0; + // Clear the in-use list head (which also unlocks it): + Atomic::store(&self->om_in_use_list, (ObjectMonitor*)NULL); + om_unlock(in_use_list); + } + + int free_count = 0; ObjectMonitor* free_list = self->om_free_list; ObjectMonitor* free_tail = NULL; - int free_count = 0; if (free_list != NULL) { - ObjectMonitor* s; // The thread is going away. Set 'free_tail' to the last per-thread free - // monitor which will be linked to g_free_list below under the gListLock. + // monitor which will be linked to LVars.free_list below. stringStream ss; - for (s = free_list; s != NULL; s = s->_next_om) { + for (ObjectMonitor* s = free_list; s != NULL; s = unmarked_next(s)) { free_count++; free_tail = s; guarantee(s->object() == NULL, "invariant"); guarantee(!s->is_busy(), "must be !is_busy: %s", s->is_busy_to_string(&ss)); } guarantee(free_tail != NULL, "invariant"); - assert(self->om_free_count == free_count, "free-count off"); - self->om_free_list = NULL; + int l_om_free_count = self->om_free_count; + ADIM_guarantee(l_om_free_count == free_count, "free counts don't match: " + "l_om_free_count=%d, free_count=%d", l_om_free_count, + free_count); self->om_free_count = 0; + Atomic::store(&self->om_free_list, (ObjectMonitor*)NULL); } - ObjectMonitor* in_use_list = self->om_in_use_list; - ObjectMonitor* in_use_tail = NULL; - int in_use_count = 0; - if (in_use_list != NULL) { - // The thread is going away, however the ObjectMonitors on the - // om_in_use_list may still be in-use by other threads. Link - // them to in_use_tail, which will be linked into the global - // in-use list g_om_in_use_list below, under the gListLock. - ObjectMonitor *cur_om; - for (cur_om = in_use_list; cur_om != NULL; cur_om = cur_om->_next_om) { - in_use_tail = cur_om; - in_use_count++; - } - guarantee(in_use_tail != NULL, "invariant"); - assert(self->om_in_use_count == in_use_count, "in-use count off"); - self->om_in_use_list = NULL; - self->om_in_use_count = 0; - } - - Thread::muxAcquire(&gListLock, "om_flush"); if (free_tail != NULL) { - free_tail->_next_om = g_free_list; - g_free_list = free_list; - g_om_free_count += free_count; + prepend_list_to_global_free_list(free_list, free_tail, free_count); } if (in_use_tail != NULL) { - in_use_tail->_next_om = g_om_in_use_list; - g_om_in_use_list = in_use_list; - g_om_in_use_count += in_use_count; + prepend_list_to_global_in_use_list(in_use_list, in_use_tail, in_use_count); } - Thread::muxRelease(&gListLock); - LogStreamHandle(Debug, monitorinflation) lsh_debug; LogStreamHandle(Info, monitorinflation) lsh_info; LogStream* ls = NULL; @@ -1273,19 +1794,28 @@ } // Fast path code shared by multiple functions -void ObjectSynchronizer::inflate_helper(oop obj) { - markWord mark = obj->mark(); - if (mark.has_monitor()) { - assert(ObjectSynchronizer::verify_objmon_isinpool(mark.monitor()), "monitor is invalid"); - assert(mark.monitor()->header().is_neutral(), "monitor must record a good object header"); +void ObjectSynchronizer::inflate_helper(ObjectMonitorHandle* omh_p, oop obj) { + while (true) { + markWord mark = obj->mark(); + if (mark.has_monitor()) { + if (!omh_p->save_om_ptr(obj, mark)) { + // Lost a race with async deflation so try again. + assert(AsyncDeflateIdleMonitors, "sanity check"); + continue; + } + ObjectMonitor* monitor = omh_p->om_ptr(); + assert(ObjectSynchronizer::verify_objmon_isinpool(monitor), "monitor is invalid"); + markWord dmw = monitor->header(); + assert(dmw.is_neutral(), "sanity check: header=" INTPTR_FORMAT, dmw.value()); + return; + } + inflate(omh_p, Thread::current(), obj, inflate_cause_vm_internal); return; } - inflate(Thread::current(), obj, inflate_cause_vm_internal); } -ObjectMonitor* ObjectSynchronizer::inflate(Thread* self, - oop object, - const InflateCause cause) { +void ObjectSynchronizer::inflate(ObjectMonitorHandle* omh_p, Thread* self, + oop object, const InflateCause cause) { // Inflate mutates the heap ... // Relaxing assertion for bug 6320749. assert(Universe::verify_in_progress() || @@ -1306,12 +1836,17 @@ // CASE: inflated if (mark.has_monitor()) { - ObjectMonitor* inf = mark.monitor(); + if (!omh_p->save_om_ptr(object, mark)) { + // Lost a race with async deflation so try again. + assert(AsyncDeflateIdleMonitors, "sanity check"); + continue; + } + ObjectMonitor* inf = omh_p->om_ptr(); markWord dmw = inf->header(); assert(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value()); assert(inf->object() == object, "invariant"); assert(ObjectSynchronizer::verify_objmon_isinpool(inf), "monitor is invalid"); - return inf; + return; } // CASE: inflation in progress - inflating over a stack-lock. @@ -1347,7 +1882,7 @@ LogStreamHandle(Trace, monitorinflation) lsh; if (mark.has_locker()) { - ObjectMonitor* m = om_alloc(self); + ObjectMonitor* m = om_alloc(self, cause); // Optimistically prepare the objectmonitor - anticipate successful CAS // We do this before the CAS in order to minimize the length of time // in which INFLATING appears in the mark. @@ -1357,6 +1892,7 @@ markWord cmp = object->cas_set_mark(markWord::INFLATING(), mark); if (cmp != mark) { + // om_release() will reset the allocation state from New to Free. om_release(self, m, true); continue; // Interference -- just retry } @@ -1394,7 +1930,7 @@ markWord dmw = mark.displaced_mark_helper(); // Catch if the object's header is not neutral (not locked and // not marked is what we care about here). - assert(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value()); + ADIM_guarantee(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value()); // Setup monitor fields to proper values -- prepare the monitor m->set_header(dmw); @@ -1404,15 +1940,26 @@ // Note that a thread can inflate an object // that it has stack-locked -- as might happen in wait() -- directly // with CAS. That is, we can avoid the xchg-NULL .... ST idiom. - m->set_owner(mark.locker()); + if (AsyncDeflateIdleMonitors) { + m->simply_set_owner_from(mark.locker(), NULL, DEFLATER_MARKER); + } else { + m->simply_set_owner_from(mark.locker(), NULL); + } m->set_object(object); // TODO-FIXME: assert BasicLock->dhw != 0. + omh_p->set_om_ptr(m); + // Must preserve store ordering. The monitor state must // be stable at the time of publishing the monitor address. guarantee(object->mark() == markWord::INFLATING(), "invariant"); object->release_set_mark(markWord::encode(m)); + // Once ObjectMonitor is configured and the object is associated + // with the ObjectMonitor, it is safe to allow async deflation: + assert(m->is_new(), "freshly allocated monitor must be new"); + m->set_allocation_state(ObjectMonitor::Old); + // Hopefully the performance counters are allocated on distinct cache lines // to avoid false sharing on MP systems ... OM_PERFDATA_OP(Inflations, inc()); @@ -1425,7 +1972,8 @@ if (event.should_commit()) { post_monitor_inflate_event(&event, object, cause); } - return m; + ADIM_guarantee(!m->is_free(), "inflated monitor to be returned cannot be free"); + return; } // CASE: neutral @@ -1439,19 +1987,26 @@ // Catch if the object's header is not neutral (not locked and // not marked is what we care about here). - assert(mark.is_neutral(), "invariant: header=" INTPTR_FORMAT, mark.value()); - ObjectMonitor* m = om_alloc(self); + ADIM_guarantee(mark.is_neutral(), "invariant: header=" INTPTR_FORMAT,mark.value()); + ObjectMonitor* m = om_alloc(self, cause); // prepare m for installation - set monitor to initial state m->Recycle(); m->set_header(mark); + // If we leave _owner == DEFLATER_MARKER here, then the simple C2 + // ObjectMonitor enter optimization can no longer race with async + // deflation and reuse. m->set_object(object); m->_Responsible = NULL; m->_SpinDuration = ObjectMonitor::Knob_SpinLimit; // consider: keep metastats by type/class + omh_p->set_om_ptr(m); + if (object->cas_set_mark(markWord::encode(m), mark) != mark) { m->set_header(markWord::zero()); m->set_object(NULL); m->Recycle(); + omh_p->set_om_ptr(NULL); + // om_release() will reset the allocation state from New to Free. om_release(self, m, true); m = NULL; continue; @@ -1460,6 +2015,11 @@ // live-lock -- "Inflated" is an absorbing state. } + // Once the ObjectMonitor is configured and object is associated + // with the ObjectMonitor, it is safe to allow async deflation: + assert(m->is_new(), "freshly allocated monitor must be new"); + m->set_allocation_state(ObjectMonitor::Old); + // Hopefully the performance counters are allocated on distinct // cache lines to avoid false sharing on MP systems ... OM_PERFDATA_OP(Inflations, inc()); @@ -1472,13 +2032,15 @@ if (event.should_commit()) { post_monitor_inflate_event(&event, object, cause); } - return m; + ADIM_guarantee(!m->is_free(), "inflated monitor to be returned cannot be free"); + return; } } // We maintain a list of in-use monitors for each thread. // +// For safepoint based deflation: // deflate_thread_local_monitors() scans a single thread's in-use list, while // deflate_idle_monitors() scans only a global list of in-use monitors which // is populated only as a thread dies (see om_flush()). @@ -1497,6 +2059,40 @@ // typically drives the scavenge rate. Large heaps can mean infrequent GC, // which in turn can mean large(r) numbers of ObjectMonitors in circulation. // This is an unfortunate aspect of this design. +// +// For async deflation: +// If a special deflation request is made, then the safepoint based +// deflation mechanism is used. Otherwise, an async deflation request +// is registered with the ServiceThread and it is notified. + +void ObjectSynchronizer::do_safepoint_work(DeflateMonitorCounters* counters) { + assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); + + // The per-thread in-use lists are handled in + // ParallelSPCleanupThreadClosure::do_thread(). + + if (!AsyncDeflateIdleMonitors || is_special_deflation_requested()) { + // Use the older mechanism for the global in-use list or if a + // special deflation has been requested before the safepoint. + ObjectSynchronizer::deflate_idle_monitors(counters); + return; + } + + log_debug(monitorinflation)("requesting async deflation of idle monitors."); + // Request deflation of idle monitors by the ServiceThread: + set_is_async_deflation_requested(true); + MonitorLocker ml(Service_lock, Mutex::_no_safepoint_check_flag); + ml.notify_all(); + + if (log_is_enabled(Debug, monitorinflation)) { + // exit_globals()'s call to audit_and_print_stats() is done + // at the Info level and not at a safepoint. + // For safepoint based deflation, audit_and_print_stats() is called + // in ObjectSynchronizer::finish_deflate_idle_monitors() at the + // Debug level at a safepoint. + ObjectSynchronizer::audit_and_print_stats(false /* on_exit */); + } +} // Deflate a single monitor if not in-use // Return true if deflated, false if in-use @@ -1515,7 +2111,9 @@ const markWord dmw = mid->header(); guarantee(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value()); - if (mid->is_busy()) { + if (mid->is_busy() || mid->ref_count() != 0) { + // Easy checks are first - the ObjectMonitor is busy or ObjectMonitor* + // is in use so no deflation. deflated = false; } else { // Deflate the monitor if it is no longer being used @@ -1531,21 +2129,34 @@ // Restore the header back to obj obj->release_set_mark(dmw); + if (AsyncDeflateIdleMonitors) { + // clear() expects the owner field to be NULL and we won't race + // with the simple C2 ObjectMonitor enter optimization since + // we're at a safepoint. DEFLATER_MARKER is the only non-NULL + // value we should see here. + mid->try_set_owner_from(NULL, DEFLATER_MARKER); + } mid->clear(); assert(mid->object() == NULL, "invariant: object=" INTPTR_FORMAT, p2i(mid->object())); + assert(mid->is_free(), "invariant"); // Move the deflated ObjectMonitor to the working free list - // defined by free_head_p and free_tail_p. + // defined by free_head_p and free_tail_p. No races on this list + // so no need for load_acquire() or store_release(). if (*free_head_p == NULL) *free_head_p = mid; if (*free_tail_p != NULL) { // We append to the list so the caller can use mid->_next_om // to fix the linkages in its context. ObjectMonitor* prevtail = *free_tail_p; // Should have been cleaned up by the caller: - assert(prevtail->_next_om == NULL, "cleaned up deflated?"); - prevtail->_next_om = mid; + // Note: Should not have to lock prevtail here since we're at a + // safepoint and ObjectMonitors on the local free list should + // not be accessed in parallel. + assert(prevtail->_next_om == NULL, "must be NULL: _next_om=" + INTPTR_FORMAT, p2i(prevtail->_next_om)); + set_next(prevtail, mid); } *free_tail_p = mid; // At this point, mid->_next_om still refers to its current @@ -1557,9 +2168,153 @@ return deflated; } -// Walk a given monitor list, and deflate idle monitors -// The given list could be a per-thread list or a global list -// Caller acquires gListLock as needed. +// Deflate the specified ObjectMonitor if not in-use using a JavaThread. +// Returns true if it was deflated and false otherwise. +// +// The async deflation protocol sets owner to DEFLATER_MARKER and +// makes ref_count negative as signals to contending threads that +// an async deflation is in progress. There are a number of checks +// as part of the protocol to make sure that the calling thread has +// not lost the race to a contending thread or to a thread that just +// wants to use the ObjectMonitor*. +// +// The ObjectMonitor has been successfully async deflated when: +// (owner == DEFLATER_MARKER && ref_count < 0) +// Contending threads or ObjectMonitor* using threads that see those +// values know to retry their operation. +// +bool ObjectSynchronizer::deflate_monitor_using_JT(ObjectMonitor* mid, + ObjectMonitor** free_head_p, + ObjectMonitor** free_tail_p) { + assert(AsyncDeflateIdleMonitors, "sanity check"); + assert(Thread::current()->is_Java_thread(), "precondition"); + // A newly allocated ObjectMonitor should not be seen here so we + // avoid an endless inflate/deflate cycle. + assert(mid->is_old(), "must be old: allocation_state=%d", + (int) mid->allocation_state()); + + if (mid->is_busy() || mid->ref_count() != 0) { + // Easy checks are first - the ObjectMonitor is busy or ObjectMonitor* + // is in use so no deflation. + return false; + } + + if (mid->try_set_owner_from(DEFLATER_MARKER, NULL) == NULL) { + // ObjectMonitor is not owned by another thread. Our setting + // owner to DEFLATER_MARKER forces any contending thread through + // the slow path. This is just the first part of the async + // deflation dance. + + if (mid->_contentions != 0 || mid->_waiters != 0) { + // Another thread has raced to enter the ObjectMonitor after + // mid->is_busy() above or has already entered and waited on + // it which makes it busy so no deflation. Restore owner to + // NULL if it is still DEFLATER_MARKER. + mid->try_set_owner_from(NULL, DEFLATER_MARKER); + return false; + } + + if (Atomic::cmpxchg(&mid->_ref_count, (jint)0, -max_jint) == 0) { + // Make ref_count negative to force any contending threads or + // ObjectMonitor* using threads to retry. This is the second + // part of the async deflation dance. + + if (mid->owner_is_DEFLATER_MARKER()) { + // If owner is still DEFLATER_MARKER, then we have successfully + // signaled any contending threads to retry. If it is not, then we + // have lost the race to an entering thread and the ObjectMonitor + // is now busy. This is the third and final part of the async + // deflation dance. + // Note: This owner check solves the ABA problem with ref_count + // where another thread acquired the ObjectMonitor, finished + // using it and restored the ref_count to zero. + + // Sanity checks for the races: + guarantee(mid->_contentions == 0, "must be 0: contentions=%d", + mid->_contentions); + guarantee(mid->_waiters == 0, "must be 0: waiters=%d", mid->_waiters); + guarantee(mid->_cxq == NULL, "must be no contending threads: cxq=" + INTPTR_FORMAT, p2i(mid->_cxq)); + guarantee(mid->_EntryList == NULL, + "must be no entering threads: EntryList=" INTPTR_FORMAT, + p2i(mid->_EntryList)); + + const oop obj = (oop) mid->object(); + if (log_is_enabled(Trace, monitorinflation)) { + ResourceMark rm; + log_trace(monitorinflation)("deflate_monitor_using_JT: " + "object=" INTPTR_FORMAT ", mark=" + INTPTR_FORMAT ", type='%s'", + p2i(obj), obj->mark().value(), + obj->klass()->external_name()); + } + + // Install the old mark word if nobody else has already done it. + mid->install_displaced_markword_in_object(obj); + mid->clear_using_JT(); + + assert(mid->object() == NULL, "must be NULL: object=" INTPTR_FORMAT, + p2i(mid->object())); + assert(mid->is_free(), "must be free: allocation_state=%d", + (int) mid->allocation_state()); + + // Move the deflated ObjectMonitor to the working free list + // defined by free_head_p and free_tail_p. No races on this list + // so no need for load_acquire() or store_release(). + if (*free_head_p == NULL) { + // First one on the list. + *free_head_p = mid; + } + if (*free_tail_p != NULL) { + // We append to the list so the caller can use mid->_next_om + // to fix the linkages in its context. + ObjectMonitor* prevtail = *free_tail_p; + // Should have been cleaned up by the caller: + om_lock(prevtail); + assert(unmarked_next(prevtail) == NULL, "must be NULL: _next_om=" + INTPTR_FORMAT, p2i(unmarked_next(prevtail))); + set_next(prevtail, mid); // prevtail now points to mid (and is unlocked) + } + *free_tail_p = mid; + + // At this point, mid->_next_om still refers to its current + // value and another ObjectMonitor's _next_om field still + // refers to this ObjectMonitor. Those linkages have to be + // cleaned up by the caller who has the complete context. + + // We leave owner == DEFLATER_MARKER and ref_count < 0 + // to force any racing threads to retry. + return true; // Success, ObjectMonitor has been deflated. + } + + // The owner was changed from DEFLATER_MARKER so we lost the + // race since the ObjectMonitor is now busy. + + // Add back max_jint to restore the ref_count field to its + // proper value (which may not be what we saw above): + Atomic::add(&mid->_ref_count, max_jint); + +#ifdef ASSERT + jint l_ref_count = mid->ref_count(); +#endif + assert(l_ref_count >= 0, "must not be negative: l_ref_count=%d, ref_count=%d", + l_ref_count, mid->ref_count()); + return false; + } + + // The ref_count was no longer 0 so we lost the race since the + // ObjectMonitor is now busy or the ObjectMonitor* is now is use. + // Restore owner to NULL if it is still DEFLATER_MARKER: + mid->try_set_owner_from(NULL, DEFLATER_MARKER); + } + + // The owner field is no longer NULL so we lost the race since the + // ObjectMonitor is now busy. + return false; +} + +// Walk a given monitor list, and deflate idle monitors. +// The given list could be a per-thread list or a global list. // // In the case of parallel processing of thread local monitor lists, // work is done by Threads::parallel_threads_do() which ensures that @@ -1571,46 +2326,219 @@ // SafepointSynchronize::do_cleanup_tasks() in safepoint.cpp and // Threads::parallel_java_threads_do() in thread.cpp. int ObjectSynchronizer::deflate_monitor_list(ObjectMonitor** list_p, + int* count_p, ObjectMonitor** free_head_p, ObjectMonitor** free_tail_p) { - ObjectMonitor* mid; - ObjectMonitor* next; ObjectMonitor* cur_mid_in_use = NULL; + ObjectMonitor* mid = NULL; + ObjectMonitor* next = NULL; int deflated_count = 0; - for (mid = *list_p; mid != NULL;) { + // We use the simpler lock-mid-as-we-go protocol since there are no + // parallel list deletions since we are at a safepoint. + if ((mid = get_list_head_locked(list_p)) == NULL) { + return 0; // The list is empty so nothing to deflate. + } + next = unmarked_next(mid); + + while (true) { oop obj = (oop) mid->object(); if (obj != NULL && deflate_monitor(mid, obj, free_head_p, free_tail_p)) { // Deflation succeeded and already updated free_head_p and // free_tail_p as needed. Finish the move to the local free list // by unlinking mid from the global or per-thread in-use list. - if (mid == *list_p) { - *list_p = mid->_next_om; - } else if (cur_mid_in_use != NULL) { - cur_mid_in_use->_next_om = mid->_next_om; // maintain the current thread in-use list + if (cur_mid_in_use == NULL) { + // mid is the list head and it is locked. Switch the list head + // to next which unlocks the list head, but leaves mid locked: + Atomic::store(list_p, next); + } else { + // mid is locked. Switch cur_mid_in_use's next field to next + // which is safe because we have no parallel list deletions, + // but we leave mid locked: + set_next(cur_mid_in_use, next); } - next = mid->_next_om; - mid->_next_om = NULL; // This mid is current tail in the free_head_p list - mid = next; + // At this point mid is disconnected from the in-use list so + // its lock no longer has any effects on the in-use list. deflated_count++; + Atomic::dec(count_p); + // mid is current tail in the free_head_p list so NULL terminate it + // (which also unlocks it): + set_next(mid, NULL); } else { + om_unlock(mid); cur_mid_in_use = mid; - mid = mid->_next_om; } + // All the list management is done so move on to the next one: + mid = next; + if (mid == NULL) { + break; // Reached end of the list so nothing more to deflate. + } + // Lock mid so we can possibly deflate it: + om_lock(mid); + next = unmarked_next(mid); } return deflated_count; } +// Walk a given ObjectMonitor list and deflate idle ObjectMonitors using +// a JavaThread. Returns the number of deflated ObjectMonitors. The given +// list could be a per-thread in-use list or the global in-use list. +// If a safepoint has started, then we save state via saved_mid_in_use_p +// and return to the caller to honor the safepoint. +// +int ObjectSynchronizer::deflate_monitor_list_using_JT(ObjectMonitor** list_p, + int* count_p, + ObjectMonitor** free_head_p, + ObjectMonitor** free_tail_p, + ObjectMonitor** saved_mid_in_use_p) { + assert(AsyncDeflateIdleMonitors, "sanity check"); + JavaThread* self = JavaThread::current(); + + ObjectMonitor* cur_mid_in_use = NULL; + ObjectMonitor* mid = NULL; + ObjectMonitor* next = NULL; + ObjectMonitor* next_next = NULL; + int deflated_count = 0; + + // We use the more complicated lock-cur_mid_in_use-and-mid-as-we-go + // protocol because om_release() can do list deletions in parallel. + // We also lock-next-next-as-we-go to prevent an om_flush() that is + // behind this thread from passing us. + if (*saved_mid_in_use_p == NULL) { + // No saved state so start at the beginning. + // Lock the list head so we can possibly deflate it: + if ((mid = get_list_head_locked(list_p)) == NULL) { + return 0; // The list is empty so nothing to deflate. + } + next = unmarked_next(mid); + } else { + // We're restarting after a safepoint so restore the necessary state + // before we resume. + cur_mid_in_use = *saved_mid_in_use_p; + // Lock cur_mid_in_use so we can possibly update its + // next field to extract a deflated ObjectMonitor. + om_lock(cur_mid_in_use); + mid = unmarked_next(cur_mid_in_use); + if (mid == NULL) { + om_unlock(cur_mid_in_use); + *saved_mid_in_use_p = NULL; + return 0; // The remainder is empty so nothing more to deflate. + } + // Lock mid so we can possibly deflate it: + om_lock(mid); + next = unmarked_next(mid); + } + + while (true) { + // The current mid's next field is marked at this point. If we have + // a cur_mid_in_use, then its next field is also marked at this point. + + if (next != NULL) { + // We lock next so that an om_flush() thread that is behind us + // cannot pass us when we unlock the current mid. + om_lock(next); + next_next = unmarked_next(next); + } + + // Only try to deflate if there is an associated Java object and if + // mid is old (is not newly allocated and is not newly freed). + if (mid->object() != NULL && mid->is_old() && + deflate_monitor_using_JT(mid, free_head_p, free_tail_p)) { + // Deflation succeeded and already updated free_head_p and + // free_tail_p as needed. Finish the move to the local free list + // by unlinking mid from the global or per-thread in-use list. + if (cur_mid_in_use == NULL) { + // mid is the list head and it is locked. Switch the list head + // to next which is also locked (if not NULL) and also leave + // mid locked: + Atomic::store(list_p, next); + } else { + ObjectMonitor* locked_next = mark_om_ptr(next); + // mid and cur_mid_in_use are locked. Switch cur_mid_in_use's + // next field to locked_next and also leave mid locked: + set_next(cur_mid_in_use, locked_next); + } + // At this point mid is disconnected from the in-use list so + // its lock longer has any effects on in-use list. + deflated_count++; + Atomic::dec(count_p); + // mid is current tail in the free_head_p list so NULL terminate it + // (which also unlocks it): + set_next(mid, NULL); + + // All the list management is done so move on to the next one: + mid = next; // mid keeps non-NULL next's locked next field + next = next_next; + } else { + // mid is considered in-use if it does not have an associated + // Java object or mid is not old or deflation did not succeed. + // A mid->is_new() node can be seen here when it is freshly + // returned by om_alloc() (and skips the deflation code path). + // A mid->is_old() node can be seen here when deflation failed. + // A mid->is_free() node can be seen here when a fresh node from + // om_alloc() is released by om_release() due to losing the race + // in inflate(). + + // All the list management is done so move on to the next one: + if (cur_mid_in_use != NULL) { + om_unlock(cur_mid_in_use); + } + // The next cur_mid_in_use keeps mid's lock state so + // that it is stable for a possible next field change. It + // cannot be modified by om_release() while it is locked. + cur_mid_in_use = mid; + mid = next; // mid keeps non-NULL next's locked state + next = next_next; + + if (SafepointMechanism::should_block(self) && + cur_mid_in_use != Atomic::load(list_p) && cur_mid_in_use->is_old()) { + // If a safepoint has started and cur_mid_in_use is not the list + // head and is old, then it is safe to use as saved state. Return + // to the caller before blocking. + *saved_mid_in_use_p = cur_mid_in_use; + om_unlock(cur_mid_in_use); + if (mid != NULL) { + om_unlock(mid); + } + return deflated_count; + } + } + if (mid == NULL) { + if (cur_mid_in_use != NULL) { + om_unlock(cur_mid_in_use); + } + break; // Reached end of the list so nothing more to deflate. + } + + // The current mid's next field is locked at this point. If we have + // a cur_mid_in_use, then it is also locked at this point. + } + // We finished the list without a safepoint starting so there's + // no need to save state. + *saved_mid_in_use_p = NULL; + return deflated_count; +} + void ObjectSynchronizer::prepare_deflate_idle_monitors(DeflateMonitorCounters* counters) { counters->n_in_use = 0; // currently associated with objects counters->n_in_circulation = 0; // extant counters->n_scavenged = 0; // reclaimed (global and per-thread) counters->per_thread_scavenged = 0; // per-thread scavenge total counters->per_thread_times = 0.0; // per-thread scavenge times + OrderAccess::storestore(); // flush inits for worker threads } void ObjectSynchronizer::deflate_idle_monitors(DeflateMonitorCounters* counters) { assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); + + if (AsyncDeflateIdleMonitors) { + // Nothing to do when global idle ObjectMonitors are deflated using + // a JavaThread unless a special deflation has been requested. + if (!is_special_deflation_requested()) { + return; + } + } + bool deflated = false; ObjectMonitor* free_head_p = NULL; // Local SLL of scavenged monitors @@ -1621,33 +2549,28 @@ timer.start(); } - // Prevent om_flush from changing mids in Thread dtor's during deflation - // And in case the vm thread is acquiring a lock during a safepoint - // See e.g. 6320749 - Thread::muxAcquire(&gListLock, "deflate_idle_monitors"); - // Note: the thread-local monitors lists get deflated in // a separate pass. See deflate_thread_local_monitors(). - // For moribund threads, scan g_om_in_use_list + // For moribund threads, scan LVars.in_use_list int deflated_count = 0; - if (g_om_in_use_list) { - counters->n_in_circulation += g_om_in_use_count; - deflated_count = deflate_monitor_list((ObjectMonitor **)&g_om_in_use_list, &free_head_p, &free_tail_p); - g_om_in_use_count -= deflated_count; - counters->n_scavenged += deflated_count; - counters->n_in_use += g_om_in_use_count; + if (Atomic::load(&LVars.in_use_list) != NULL) { + // Update n_in_circulation before LVars.in_use_count is updated by deflation. + Atomic::add(&counters->n_in_circulation, Atomic::load(&LVars.in_use_count)); + + deflated_count = deflate_monitor_list(&LVars.in_use_list, &LVars.in_use_count, &free_head_p, &free_tail_p); + Atomic::add(&counters->n_in_use, Atomic::load(&LVars.in_use_count)); } if (free_head_p != NULL) { // Move the deflated ObjectMonitors back to the global free list. - guarantee(free_tail_p != NULL && counters->n_scavenged > 0, "invariant"); - assert(free_tail_p->_next_om == NULL, "invariant"); - // constant-time list splice - prepend scavenged segment to g_free_list - free_tail_p->_next_om = g_free_list; - g_free_list = free_head_p; + // No races on the working free list so no need for load_acquire(). + guarantee(free_tail_p != NULL && deflated_count > 0, "invariant"); + assert(free_tail_p->_next_om == NULL, "must be NULL: _next_om=" + INTPTR_FORMAT, p2i(free_tail_p->_next_om)); + prepend_list_to_global_free_list(free_head_p, free_tail_p, deflated_count); + Atomic::add(&counters->n_scavenged, deflated_count); } - Thread::muxRelease(&gListLock); timer.stop(); LogStreamHandle(Debug, monitorinflation) lsh_debug; @@ -1663,6 +2586,187 @@ } } +class HandshakeForDeflation : public HandshakeClosure { + public: + HandshakeForDeflation() : HandshakeClosure("HandshakeForDeflation") {} + + void do_thread(Thread* thread) { + log_trace(monitorinflation)("HandshakeForDeflation::do_thread: thread=" + INTPTR_FORMAT, p2i(thread)); + } +}; + +void ObjectSynchronizer::deflate_idle_monitors_using_JT() { + assert(AsyncDeflateIdleMonitors, "sanity check"); + + // Deflate any global idle monitors. + deflate_global_idle_monitors_using_JT(); + + int count = 0; + for (JavaThreadIteratorWithHandle jtiwh; JavaThread *jt = jtiwh.next(); ) { + if (jt->om_in_use_count > 0 && !jt->is_exiting()) { + // This JavaThread is using ObjectMonitors so deflate any that + // are idle unless this JavaThread is exiting; do not race with + // ObjectSynchronizer::om_flush(). + deflate_per_thread_idle_monitors_using_JT(jt); + count++; + } + } + if (count > 0) { + log_debug(monitorinflation)("did async deflation of idle monitors for %d thread(s).", count); + } + + log_info(monitorinflation)("async global_population=%d, global_in_use_count=%d, " + "global_free_count=%d, global_wait_count=%d", + Atomic::load(&LVars.population), Atomic::load(&LVars.in_use_count), + Atomic::load(&LVars.free_count), Atomic::load(&LVars.wait_count)); + + // The ServiceThread's async deflation request has been processed. + set_is_async_deflation_requested(false); + + if (HandshakeAfterDeflateIdleMonitors && Atomic::load(&LVars.wait_count) > 0) { + // There are deflated ObjectMonitors waiting for a handshake + // (or a safepoint) for safety. + + ObjectMonitor* list = Atomic::load(&LVars.wait_list); + ADIM_guarantee(list != NULL, "LVars.wait_list must not be NULL"); + int count = Atomic::load(&LVars.wait_count); + Atomic::store(&LVars.wait_count, 0); + Atomic::store(&LVars.wait_list, (ObjectMonitor*)NULL); + + // Find the tail for prepend_list_to_common(). No need to mark + // ObjectMonitors for this list walk since only the deflater + // thread manages the wait list. + int l_count = 0; + ObjectMonitor* tail = NULL; + for (ObjectMonitor* n = list; n != NULL; n = unmarked_next(n)) { + tail = n; + l_count++; + } + ADIM_guarantee(count == l_count, "count=%d != l_count=%d", count, l_count); + + // Will execute a safepoint if !ThreadLocalHandshakes: + HandshakeForDeflation hfd_hc; + Handshake::execute(&hfd_hc); + + prepend_list_to_common(list, tail, count, &LVars.free_list, &LVars.free_count); + + log_info(monitorinflation)("moved %d idle monitors from global waiting list to global free list", count); + } +} + +// Deflate global idle ObjectMonitors using a JavaThread. +// +void ObjectSynchronizer::deflate_global_idle_monitors_using_JT() { + assert(AsyncDeflateIdleMonitors, "sanity check"); + assert(Thread::current()->is_Java_thread(), "precondition"); + JavaThread* self = JavaThread::current(); + + deflate_common_idle_monitors_using_JT(true /* is_global */, self); +} + +// Deflate the specified JavaThread's idle ObjectMonitors using a JavaThread. +// +void ObjectSynchronizer::deflate_per_thread_idle_monitors_using_JT(JavaThread* target) { + assert(AsyncDeflateIdleMonitors, "sanity check"); + assert(Thread::current()->is_Java_thread(), "precondition"); + + deflate_common_idle_monitors_using_JT(false /* !is_global */, target); +} + +// Deflate global or per-thread idle ObjectMonitors using a JavaThread. +// +void ObjectSynchronizer::deflate_common_idle_monitors_using_JT(bool is_global, JavaThread* target) { + JavaThread* self = JavaThread::current(); + + int deflated_count = 0; + ObjectMonitor* free_head_p = NULL; // Local SLL of scavenged ObjectMonitors + ObjectMonitor* free_tail_p = NULL; + ObjectMonitor* saved_mid_in_use_p = NULL; + elapsedTimer timer; + + if (log_is_enabled(Info, monitorinflation)) { + timer.start(); + } + + if (is_global) { + OM_PERFDATA_OP(MonExtant, set_value(Atomic::load(&LVars.in_use_count))); + } else { + OM_PERFDATA_OP(MonExtant, inc(target->om_in_use_count)); + } + + do { + int local_deflated_count; + if (is_global) { + local_deflated_count = deflate_monitor_list_using_JT(&LVars.in_use_list, &LVars.in_use_count, &free_head_p, &free_tail_p, &saved_mid_in_use_p); + } else { + local_deflated_count = deflate_monitor_list_using_JT(&target->om_in_use_list, &target->om_in_use_count, &free_head_p, &free_tail_p, &saved_mid_in_use_p); + } + deflated_count += local_deflated_count; + + if (free_head_p != NULL) { + // Move the deflated ObjectMonitors to the global free list. + // No races on the working list so no need for load_acquire(). + guarantee(free_tail_p != NULL && local_deflated_count > 0, "free_tail_p=" INTPTR_FORMAT ", local_deflated_count=%d", p2i(free_tail_p), local_deflated_count); + // Note: The target thread can be doing an om_alloc() that + // is trying to prepend an ObjectMonitor on its in-use list + // at the same time that we have deflated the current in-use + // list head and put it on the local free list. prepend_to_common() + // will detect the race and retry which avoids list corruption, + // but the next field in free_tail_p can flicker to marked + // and then unmarked while prepend_to_common() is sorting it + // all out. + assert(unmarked_next(free_tail_p) == NULL, "must be NULL: _next_om=" + INTPTR_FORMAT, p2i(unmarked_next(free_tail_p))); + + if (HandshakeAfterDeflateIdleMonitors) { + prepend_list_to_global_wait_list(free_head_p, free_tail_p, local_deflated_count); + } else { + prepend_list_to_global_free_list(free_head_p, free_tail_p, local_deflated_count); + } + + OM_PERFDATA_OP(Deflations, inc(local_deflated_count)); + } + + if (saved_mid_in_use_p != NULL) { + // deflate_monitor_list_using_JT() detected a safepoint starting. + timer.stop(); + { + if (is_global) { + log_debug(monitorinflation)("pausing deflation of global idle monitors for a safepoint."); + } else { + log_debug(monitorinflation)("jt=" INTPTR_FORMAT ": pausing deflation of per-thread idle monitors for a safepoint.", p2i(target)); + } + assert(SafepointMechanism::should_block(self), "sanity check"); + ThreadBlockInVM blocker(self); + } + // Prepare for another loop after the safepoint. + free_head_p = NULL; + free_tail_p = NULL; + if (log_is_enabled(Info, monitorinflation)) { + timer.start(); + } + } + } while (saved_mid_in_use_p != NULL); + timer.stop(); + + LogStreamHandle(Debug, monitorinflation) lsh_debug; + LogStreamHandle(Info, monitorinflation) lsh_info; + LogStream* ls = NULL; + if (log_is_enabled(Debug, monitorinflation)) { + ls = &lsh_debug; + } else if (deflated_count != 0 && log_is_enabled(Info, monitorinflation)) { + ls = &lsh_info; + } + if (ls != NULL) { + if (is_global) { + ls->print_cr("async-deflating global idle monitors, %3.7f secs, %d monitors", timer.seconds(), deflated_count); + } else { + ls->print_cr("jt=" INTPTR_FORMAT ": async-deflating per-thread idle monitors, %3.7f secs, %d monitors", p2i(target), timer.seconds(), deflated_count); + } + } +} + void ObjectSynchronizer::finish_deflate_idle_monitors(DeflateMonitorCounters* counters) { // Report the cumulative time for deflating each thread's idle // monitors. Note: if the work is split among more than one @@ -1670,18 +2774,25 @@ // than a beginning to end measurement of the phase. log_info(safepoint, cleanup)("deflating per-thread idle monitors, %3.7f secs, monitors=%d", counters->per_thread_times, counters->per_thread_scavenged); - g_om_free_count += counters->n_scavenged; + bool needs_special_deflation = is_special_deflation_requested(); + if (AsyncDeflateIdleMonitors && !needs_special_deflation) { + // Nothing to do when idle ObjectMonitors are deflated using + // a JavaThread unless a special deflation has been requested. + return; + } if (log_is_enabled(Debug, monitorinflation)) { // exit_globals()'s call to audit_and_print_stats() is done - // at the Info level. + // at the Info level and not at a safepoint. + // For async deflation, audit_and_print_stats() is called in + // ObjectSynchronizer::do_safepoint_work() at the Debug level + // at a safepoint. ObjectSynchronizer::audit_and_print_stats(false /* on_exit */); } else if (log_is_enabled(Info, monitorinflation)) { - Thread::muxAcquire(&gListLock, "finish_deflate_idle_monitors"); - log_info(monitorinflation)("g_om_population=%d, g_om_in_use_count=%d, " - "g_om_free_count=%d", g_om_population, - g_om_in_use_count, g_om_free_count); - Thread::muxRelease(&gListLock); + log_info(monitorinflation)("global_population=%d, global_in_use_count=%d, " + "global_free_count=%d, global_wait_count=%d", + Atomic::load(&LVars.population), Atomic::load(&LVars.in_use_count), + Atomic::load(&LVars.free_count), Atomic::load(&LVars.wait_count)); } Atomic::store(&_forceMonitorScavenge, 0); // Reset @@ -1691,11 +2802,20 @@ GVars.stw_random = os::random(); GVars.stw_cycle++; + + if (needs_special_deflation) { + set_is_special_deflation_requested(false); // special deflation is done + } } void ObjectSynchronizer::deflate_thread_local_monitors(Thread* thread, DeflateMonitorCounters* counters) { assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); + if (AsyncDeflateIdleMonitors && !is_special_deflation_requested()) { + // Nothing to do if a special deflation has NOT been requested. + return; + } + ObjectMonitor* free_head_p = NULL; // Local SLL of scavenged monitors ObjectMonitor* free_tail_p = NULL; elapsedTimer timer; @@ -1705,25 +2825,21 @@ timer.start(); } - int deflated_count = deflate_monitor_list(thread->om_in_use_list_addr(), &free_head_p, &free_tail_p); - - Thread::muxAcquire(&gListLock, "deflate_thread_local_monitors"); + // Update n_in_circulation before om_in_use_count is updated by deflation. + Atomic::add(&counters->n_in_circulation, thread->om_in_use_count); - // Adjust counters - counters->n_in_circulation += thread->om_in_use_count; - thread->om_in_use_count -= deflated_count; - counters->n_scavenged += deflated_count; - counters->n_in_use += thread->om_in_use_count; - counters->per_thread_scavenged += deflated_count; + int deflated_count = deflate_monitor_list(&thread->om_in_use_list, &thread->om_in_use_count, &free_head_p, &free_tail_p); + Atomic::add(&counters->n_in_use, thread->om_in_use_count); if (free_head_p != NULL) { // Move the deflated ObjectMonitors back to the global free list. + // No races on the working list so no need for load_acquire(). guarantee(free_tail_p != NULL && deflated_count > 0, "invariant"); - assert(free_tail_p->_next_om == NULL, "invariant"); - - // constant-time list splice - prepend scavenged segment to g_free_list - free_tail_p->_next_om = g_free_list; - g_free_list = free_head_p; + assert(free_tail_p->_next_om == NULL, "must be NULL: _next_om=" + INTPTR_FORMAT, p2i(free_tail_p->_next_om)); + prepend_list_to_global_free_list(free_head_p, free_tail_p, deflated_count); + Atomic::add(&counters->n_scavenged, deflated_count); + Atomic::add(&counters->per_thread_scavenged, deflated_count); } timer.stop(); @@ -1732,8 +2848,6 @@ // should be cheap. counters->per_thread_times += timer.seconds(); - Thread::muxRelease(&gListLock); - LogStreamHandle(Debug, monitorinflation) lsh_debug; LogStreamHandle(Info, monitorinflation) lsh_info; LogStream* ls = NULL; @@ -1784,9 +2898,7 @@ assert(THREAD == JavaThread::current(), "must be current Java thread"); NoSafepointVerifier nsv; ReleaseJavaMonitorsClosure rjmc(THREAD); - Thread::muxAcquire(&gListLock, "release_monitors_owned_by_thread"); ObjectSynchronizer::monitors_iterate(&rjmc); - Thread::muxRelease(&gListLock); THREAD->clear_pending_exception(); } @@ -1840,36 +2952,34 @@ } assert(ls != NULL, "sanity check"); - if (!on_exit) { - // Not at VM exit so grab the global list lock. - Thread::muxAcquire(&gListLock, "audit_and_print_stats"); - } - // Log counts for the global and per-thread monitor lists: int chk_om_population = log_monitor_list_counts(ls); int error_cnt = 0; ls->print_cr("Checking global lists:"); - // Check g_om_population: - if (g_om_population == chk_om_population) { - ls->print_cr("g_om_population=%d equals chk_om_population=%d", - g_om_population, chk_om_population); + // Check LVars.population: + if (Atomic::load(&LVars.population) == chk_om_population) { + ls->print_cr("global_population=%d equals chk_om_population=%d", + Atomic::load(&LVars.population), chk_om_population); } else { - ls->print_cr("ERROR: g_om_population=%d is not equal to " - "chk_om_population=%d", g_om_population, - chk_om_population); - error_cnt++; + // With lock free access to the monitor lists, it is possible for + // log_monitor_list_counts() to return a value that doesn't match + // LVars.population. So far a higher value has been seen in testing + // so something is being double counted by log_monitor_list_counts(). + ls->print_cr("WARNING: global_population=%d is not equal to " + "chk_om_population=%d", Atomic::load(&LVars.population), chk_om_population); } - // Check g_om_in_use_list and g_om_in_use_count: + // Check LVars.in_use_list and LVars.in_use_count: chk_global_in_use_list_and_count(ls, &error_cnt); - // Check g_free_list and g_om_free_count: + // Check LVars.free_list and LVars.free_count: chk_global_free_list_and_count(ls, &error_cnt); - if (!on_exit) { - Thread::muxRelease(&gListLock); + if (HandshakeAfterDeflateIdleMonitors) { + // Check LVars.wait_list and LVars.wait_count: + chk_global_wait_list_and_count(ls, &error_cnt); } ls->print_cr("Checking per-thread lists:"); @@ -1893,7 +3003,7 @@ // When exiting this log output is at the Info level. When called // at a safepoint, this log output is at the Trace level since // there can be a lot of it. - log_in_use_monitor_details(ls, on_exit); + log_in_use_monitor_details(ls); } ls->flush(); @@ -1922,12 +3032,13 @@ ": free per-thread monitor must have NULL _header " "field: _header=" INTPTR_FORMAT, p2i(jt), p2i(n), n->header().value()); - } else { + *error_cnt_p = *error_cnt_p + 1; + } else if (!AsyncDeflateIdleMonitors) { out->print_cr("ERROR: monitor=" INTPTR_FORMAT ": free global monitor " "must have NULL _header field: _header=" INTPTR_FORMAT, p2i(n), n->header().value()); + *error_cnt_p = *error_cnt_p + 1; } - *error_cnt_p = *error_cnt_p + 1; } if (n->object() != NULL) { if (jt != NULL) { @@ -1944,21 +3055,81 @@ } } +// Lock the next ObjectMonitor for traversal. The current ObjectMonitor +// is unlocked after the next ObjectMonitor is locked. *cur_p and *next_p +// are updated to their next values in the list traversal. *cur_p is set +// to NULL when the end of the list is reached. +static void lock_next_for_traversal(ObjectMonitor** cur_p, ObjectMonitor** next_p) { + ObjectMonitor* prev = *cur_p; // Save current for unlocking. + if (*next_p == NULL) { // Reached the end of the list. + om_unlock(prev); // Unlock previous. + *cur_p = NULL; // Tell the caller we are done. + return; + } + om_lock(*next_p); // Lock next. + om_unlock(prev); // Unlock previous. + *cur_p = *next_p; // Update current. + *next_p = unmarked_next(*cur_p); // Update next. +} + // Check the global free list and count; log the results of the checks. void ObjectSynchronizer::chk_global_free_list_and_count(outputStream * out, int *error_cnt_p) { int chk_om_free_count = 0; - for (ObjectMonitor* n = g_free_list; n != NULL; n = n->_next_om) { - chk_free_entry(NULL /* jt */, n, out, error_cnt_p); - chk_om_free_count++; - } - if (g_om_free_count == chk_om_free_count) { - out->print_cr("g_om_free_count=%d equals chk_om_free_count=%d", - g_om_free_count, chk_om_free_count); + ObjectMonitor* cur = NULL; + ObjectMonitor* next = NULL; + if ((cur = get_list_head_locked(&LVars.free_list)) != NULL) { + next = unmarked_next(cur); + // Marked the global free list head so process the list. + while (true) { + chk_free_entry(NULL /* jt */, cur, out, error_cnt_p); + chk_om_free_count++; + + lock_next_for_traversal(&cur, &next); + if (cur == NULL) { + break; + } + } + } + if (Atomic::load(&LVars.free_count) == chk_om_free_count) { + out->print_cr("global_free_count=%d equals chk_om_free_count=%d", + Atomic::load(&LVars.free_count), chk_om_free_count); } else { - out->print_cr("ERROR: g_om_free_count=%d is not equal to " - "chk_om_free_count=%d", g_om_free_count, - chk_om_free_count); + // With lock free access to LVars.free_list, it is possible for an + // ObjectMonitor to be prepended to LVars.free_list after we started + // calculating chk_om_free_count so LVars.free_count may not + // match anymore. + out->print_cr("WARNING: global_free_count=%d is not equal to " + "chk_om_free_count=%d", Atomic::load(&LVars.free_count), chk_om_free_count); + } +} + +// Check the global wait list and count; log the results of the checks. +void ObjectSynchronizer::chk_global_wait_list_and_count(outputStream * out, + int *error_cnt_p) { + int chk_om_wait_count = 0; + ObjectMonitor* cur = NULL; + ObjectMonitor* next = NULL; + if ((cur = get_list_head_locked(&LVars.wait_list)) != NULL) { + next = unmarked_next(cur); + // Marked the global wait list head so process the list. + while (true) { + // Rules for LVars.wait_list are the same as of LVars.free_list: + chk_free_entry(NULL /* jt */, cur, out, error_cnt_p); + chk_om_wait_count++; + + lock_next_for_traversal(&cur, &next); + if (cur == NULL) { + break; + } + } + } + if (Atomic::load(&LVars.wait_count) == chk_om_wait_count) { + out->print_cr("global_wait_count=%d equals chk_om_wait_count=%d", + Atomic::load(&LVars.wait_count), chk_om_wait_count); + } else { + out->print_cr("ERROR: global_wait_count=%d is not equal to " + "chk_om_wait_count=%d", Atomic::load(&LVars.wait_count), chk_om_wait_count); *error_cnt_p = *error_cnt_p + 1; } } @@ -1967,17 +3138,30 @@ void ObjectSynchronizer::chk_global_in_use_list_and_count(outputStream * out, int *error_cnt_p) { int chk_om_in_use_count = 0; - for (ObjectMonitor* n = g_om_in_use_list; n != NULL; n = n->_next_om) { - chk_in_use_entry(NULL /* jt */, n, out, error_cnt_p); - chk_om_in_use_count++; - } - if (g_om_in_use_count == chk_om_in_use_count) { - out->print_cr("g_om_in_use_count=%d equals chk_om_in_use_count=%d", g_om_in_use_count, - chk_om_in_use_count); + ObjectMonitor* cur = NULL; + ObjectMonitor* next = NULL; + if ((cur = get_list_head_locked(&LVars.in_use_list)) != NULL) { + next = unmarked_next(cur); + // Marked the global in-use list head so process the list. + while (true) { + chk_in_use_entry(NULL /* jt */, cur, out, error_cnt_p); + chk_om_in_use_count++; + + lock_next_for_traversal(&cur, &next); + if (cur == NULL) { + break; + } + } + } + if (Atomic::load(&LVars.in_use_count) == chk_om_in_use_count) { + out->print_cr("global_in_use_count=%d equals chk_om_in_use_count=%d", + Atomic::load(&LVars.in_use_count), chk_om_in_use_count); } else { - out->print_cr("ERROR: g_om_in_use_count=%d is not equal to chk_om_in_use_count=%d", - g_om_in_use_count, chk_om_in_use_count); - *error_cnt_p = *error_cnt_p + 1; + // With lock free access to the monitor lists, it is possible for + // an exiting JavaThread to put its in-use ObjectMonitors on the + // global in-use list after chk_om_in_use_count is calculated above. + out->print_cr("WARNING: global_in_use_count=%d is not equal to chk_om_in_use_count=%d", + Atomic::load(&LVars.in_use_count), chk_om_in_use_count); } } @@ -2045,13 +3229,25 @@ outputStream * out, int *error_cnt_p) { int chk_om_free_count = 0; - for (ObjectMonitor* n = jt->om_free_list; n != NULL; n = n->_next_om) { - chk_free_entry(jt, n, out, error_cnt_p); - chk_om_free_count++; + ObjectMonitor* cur = NULL; + ObjectMonitor* next = NULL; + if ((cur = get_list_head_locked(&jt->om_free_list)) != NULL) { + next = unmarked_next(cur); + // Marked the per-thread free list head so process the list. + while (true) { + chk_free_entry(jt, cur, out, error_cnt_p); + chk_om_free_count++; + + lock_next_for_traversal(&cur, &next); + if (cur == NULL) { + break; + } + } } if (jt->om_free_count == chk_om_free_count) { out->print_cr("jt=" INTPTR_FORMAT ": om_free_count=%d equals " - "chk_om_free_count=%d", p2i(jt), jt->om_free_count, chk_om_free_count); + "chk_om_free_count=%d", p2i(jt), jt->om_free_count, + chk_om_free_count); } else { out->print_cr("ERROR: jt=" INTPTR_FORMAT ": om_free_count=%d is not " "equal to chk_om_free_count=%d", p2i(jt), jt->om_free_count, @@ -2065,18 +3261,29 @@ outputStream * out, int *error_cnt_p) { int chk_om_in_use_count = 0; - for (ObjectMonitor* n = jt->om_in_use_list; n != NULL; n = n->_next_om) { - chk_in_use_entry(jt, n, out, error_cnt_p); - chk_om_in_use_count++; + ObjectMonitor* cur = NULL; + ObjectMonitor* next = NULL; + if ((cur = get_list_head_locked(&jt->om_in_use_list)) != NULL) { + next = unmarked_next(cur); + // Marked the per-thread in-use list head so process the list. + while (true) { + chk_in_use_entry(jt, cur, out, error_cnt_p); + chk_om_in_use_count++; + + lock_next_for_traversal(&cur, &next); + if (cur == NULL) { + break; + } + } } if (jt->om_in_use_count == chk_om_in_use_count) { out->print_cr("jt=" INTPTR_FORMAT ": om_in_use_count=%d equals " - "chk_om_in_use_count=%d", p2i(jt), jt->om_in_use_count, - chk_om_in_use_count); + "chk_om_in_use_count=%d", p2i(jt), + jt->om_in_use_count, chk_om_in_use_count); } else { out->print_cr("ERROR: jt=" INTPTR_FORMAT ": om_in_use_count=%d is not " - "equal to chk_om_in_use_count=%d", p2i(jt), jt->om_in_use_count, - chk_om_in_use_count); + "equal to chk_om_in_use_count=%d", p2i(jt), + jt->om_in_use_count, chk_om_in_use_count); *error_cnt_p = *error_cnt_p + 1; } } @@ -2084,58 +3291,71 @@ // Log details about ObjectMonitors on the in-use lists. The 'BHL' // flags indicate why the entry is in-use, 'object' and 'object type' // indicate the associated object and its type. -void ObjectSynchronizer::log_in_use_monitor_details(outputStream * out, - bool on_exit) { - if (!on_exit) { - // Not at VM exit so grab the global list lock. - Thread::muxAcquire(&gListLock, "log_in_use_monitor_details"); - } - +void ObjectSynchronizer::log_in_use_monitor_details(outputStream * out) { stringStream ss; - if (g_om_in_use_count > 0) { + if (Atomic::load(&LVars.in_use_count) > 0) { out->print_cr("In-use global monitor info:"); out->print_cr("(B -> is_busy, H -> has hash code, L -> lock status)"); - out->print_cr("%18s %s %18s %18s", - "monitor", "BHL", "object", "object type"); - out->print_cr("================== === ================== =================="); - for (ObjectMonitor* n = g_om_in_use_list; n != NULL; n = n->_next_om) { - const oop obj = (oop) n->object(); - const markWord mark = n->header(); - ResourceMark rm; - out->print(INTPTR_FORMAT " %d%d%d " INTPTR_FORMAT " %s", p2i(n), - n->is_busy() != 0, mark.hash() != 0, n->owner() != NULL, - p2i(obj), obj->klass()->external_name()); - if (n->is_busy() != 0) { - out->print(" (%s)", n->is_busy_to_string(&ss)); - ss.reset(); + out->print_cr("%18s %s %7s %18s %18s", + "monitor", "BHL", "ref_cnt", "object", "object type"); + out->print_cr("================== === ======= ================== =================="); + ObjectMonitor* cur = NULL; + ObjectMonitor* next = NULL; + if ((cur = get_list_head_locked(&LVars.in_use_list)) != NULL) { + next = unmarked_next(cur); + // Marked the global in-use list head so process the list. + while (true) { + const oop obj = (oop) cur->object(); + const markWord mark = cur->header(); + ResourceMark rm; + out->print(INTPTR_FORMAT " %d%d%d %7d " INTPTR_FORMAT " %s", + p2i(cur), cur->is_busy() != 0, mark.hash() != 0, + cur->owner() != NULL, (int)cur->ref_count(), p2i(obj), + obj->klass()->external_name()); + if (cur->is_busy() != 0) { + out->print(" (%s)", cur->is_busy_to_string(&ss)); + ss.reset(); + } + out->cr(); + + lock_next_for_traversal(&cur, &next); + if (cur == NULL) { + break; + } } - out->cr(); } } - if (!on_exit) { - Thread::muxRelease(&gListLock); - } - out->print_cr("In-use per-thread monitor info:"); out->print_cr("(B -> is_busy, H -> has hash code, L -> lock status)"); - out->print_cr("%18s %18s %s %18s %18s", - "jt", "monitor", "BHL", "object", "object type"); - out->print_cr("================== ================== === ================== =================="); + out->print_cr("%18s %18s %s %7s %18s %18s", + "jt", "monitor", "BHL", "ref_cnt", "object", "object type"); + out->print_cr("================== ================== === ======= ================== =================="); for (JavaThreadIteratorWithHandle jtiwh; JavaThread *jt = jtiwh.next(); ) { - for (ObjectMonitor* n = jt->om_in_use_list; n != NULL; n = n->_next_om) { - const oop obj = (oop) n->object(); - const markWord mark = n->header(); - ResourceMark rm; - out->print(INTPTR_FORMAT " " INTPTR_FORMAT " %d%d%d " INTPTR_FORMAT - " %s", p2i(jt), p2i(n), n->is_busy() != 0, - mark.hash() != 0, n->owner() != NULL, p2i(obj), - obj->klass()->external_name()); - if (n->is_busy() != 0) { - out->print(" (%s)", n->is_busy_to_string(&ss)); - ss.reset(); + ObjectMonitor* cur = NULL; + ObjectMonitor* next = NULL; + if ((cur = get_list_head_locked(&jt->om_in_use_list)) != NULL) { + next = unmarked_next(cur); + // Marked the global in-use list head so process the list. + while (true) { + const oop obj = (oop) cur->object(); + const markWord mark = cur->header(); + ResourceMark rm; + out->print(INTPTR_FORMAT " " INTPTR_FORMAT " %d%d%d %7d " + INTPTR_FORMAT " %s", p2i(jt), p2i(cur), cur->is_busy() != 0, + mark.hash() != 0, cur->owner() != NULL, (int)cur->ref_count(), + p2i(obj), obj->klass()->external_name()); + if (cur->is_busy() != 0) { + out->print(" (%s)", cur->is_busy_to_string(&ss)); + ss.reset(); + } + out->cr(); + + lock_next_for_traversal(&cur, &next); + if (cur == NULL) { + break; + } } - out->cr(); } } @@ -2146,12 +3366,15 @@ // the population count. int ObjectSynchronizer::log_monitor_list_counts(outputStream * out) { int pop_count = 0; - out->print_cr("%18s %10s %10s %10s", - "Global Lists:", "InUse", "Free", "Total"); - out->print_cr("================== ========== ========== =========="); - out->print_cr("%18s %10d %10d %10d", "", - g_om_in_use_count, g_om_free_count, g_om_population); - pop_count += g_om_in_use_count + g_om_free_count; + out->print_cr("%18s %10s %10s %10s %10s", + "Global Lists:", "InUse", "Free", "Wait", "Total"); + out->print_cr("================== ========== ========== ========== =========="); + out->print_cr("%18s %10d %10d %10d %10d", "", Atomic::load(&LVars.in_use_count), + Atomic::load(&LVars.free_count), Atomic::load(&LVars.wait_count), Atomic::load(&LVars.population)); + pop_count += Atomic::load(&LVars.in_use_count) + Atomic::load(&LVars.free_count); + if (HandshakeAfterDeflateIdleMonitors) { + pop_count += Atomic::load(&LVars.wait_count); + } out->print_cr("%18s %10s %10s %10s", "Per-Thread Lists:", "InUse", "Free", "Provision"); @@ -2172,7 +3395,7 @@ // the list of extant blocks without taking a lock. int ObjectSynchronizer::verify_objmon_isinpool(ObjectMonitor *monitor) { - PaddedObjectMonitor* block = Atomic::load_acquire(&g_block_list); + PaddedObjectMonitor* block = Atomic::load(&g_block_list); while (block != NULL) { assert(block->object() == CHAINMARKER, "must be a block header"); if (monitor > &block[0] && monitor < &block[_BLOCKSIZE]) { @@ -2182,7 +3405,9 @@ assert((diff % sizeof(PaddedObjectMonitor)) == 0, "must be aligned"); return 1; } - block = (PaddedObjectMonitor*)block->_next_om; + // unmarked_next() is not needed with g_block_list (no locking + // used with with block linkage _next_om fields). + block = (PaddedObjectMonitor*)Atomic::load(&block->_next_om); } return 0; } --- old/src/hotspot/share/runtime/synchronizer.hpp 2019-12-11 14:52:24.000000000 -0500 +++ new/src/hotspot/share/runtime/synchronizer.hpp 2019-12-11 14:52:24.000000000 -0500 @@ -32,16 +32,23 @@ #include "runtime/perfData.hpp" class ObjectMonitor; +class ObjectMonitorHandle; class ThreadsList; -typedef PaddedEnd PaddedObjectMonitor; +#ifndef OM_CACHE_LINE_SIZE +// Use DEFAULT_CACHE_LINE_SIZE if not already specified for +// the current build platform. +#define OM_CACHE_LINE_SIZE DEFAULT_CACHE_LINE_SIZE +#endif + +typedef PaddedEnd PaddedObjectMonitor; struct DeflateMonitorCounters { - int n_in_use; // currently associated with objects - int n_in_circulation; // extant - int n_scavenged; // reclaimed (global and per-thread) - int per_thread_scavenged; // per-thread scavenge total - double per_thread_times; // per-thread scavenge times + volatile int n_in_use; // currently associated with objects + volatile int n_in_circulation; // extant + volatile int n_scavenged; // reclaimed (global and per-thread) + volatile int per_thread_scavenged; // per-thread scavenge total + double per_thread_times; // per-thread scavenge times }; class ObjectSynchronizer : AllStatic { @@ -96,15 +103,16 @@ static void reenter (Handle obj, intx recursions, TRAPS); // thread-specific and global ObjectMonitor free list accessors - static ObjectMonitor* om_alloc(Thread* self); + static ObjectMonitor* om_alloc(Thread* self, const InflateCause cause); static void om_release(Thread* self, ObjectMonitor* m, bool FromPerThreadAlloc); static void om_flush(Thread* self); // Inflate light weight monitor to heavy weight monitor - static ObjectMonitor* inflate(Thread* self, oop obj, const InflateCause cause); + static void inflate(ObjectMonitorHandle* omh_p, Thread* self, oop obj, + const InflateCause cause); // This version is only for internal use - static void inflate_helper(oop obj); + static void inflate_helper(ObjectMonitorHandle* omh_p, oop obj); static const char* inflate_cause_name(const InflateCause cause); // Returns the identity hash value for an oop @@ -126,19 +134,40 @@ // Basically we deflate all monitors that are not busy. // An adaptive profile-based deflation policy could be used if needed static void deflate_idle_monitors(DeflateMonitorCounters* counters); + static void deflate_idle_monitors_using_JT(); + static void deflate_global_idle_monitors_using_JT(); + static void deflate_per_thread_idle_monitors_using_JT(JavaThread* target); + static void deflate_common_idle_monitors_using_JT(bool is_global, JavaThread* target); static void deflate_thread_local_monitors(Thread* thread, DeflateMonitorCounters* counters); static void prepare_deflate_idle_monitors(DeflateMonitorCounters* counters); static void finish_deflate_idle_monitors(DeflateMonitorCounters* counters); // For a given monitor list: global or per-thread, deflate idle monitors static int deflate_monitor_list(ObjectMonitor** list_p, + int* count_p, ObjectMonitor** free_head_p, ObjectMonitor** free_tail_p); + // For a given in-use monitor list: global or per-thread, deflate idle + // monitors using a JavaThread. + static int deflate_monitor_list_using_JT(ObjectMonitor** list_p, + int* count_p, + ObjectMonitor** free_head_p, + ObjectMonitor** free_tail_p, + ObjectMonitor** saved_mid_in_use_p); static bool deflate_monitor(ObjectMonitor* mid, oop obj, ObjectMonitor** free_head_p, ObjectMonitor** free_tail_p); - static bool is_cleanup_needed(); + static bool deflate_monitor_using_JT(ObjectMonitor* mid, + ObjectMonitor** free_head_p, + ObjectMonitor** free_tail_p); + static bool is_async_deflation_needed(); static bool needs_monitor_scavenge(); + static bool is_safepoint_deflation_needed(); + static bool is_async_deflation_requested() { return _is_async_deflation_requested; } + static bool is_special_deflation_requested() { return _is_special_deflation_requested; } + static void set_is_async_deflation_requested(bool new_value) { _is_async_deflation_requested = new_value; } + static void set_is_special_deflation_requested(bool new_value) { _is_special_deflation_requested = new_value; } + static jlong time_since_last_async_deflation_ms(); static void oops_do(OopClosure* f); // Process oops in thread local used monitors static void thread_local_used_oops_do(Thread* thread, OopClosure* f); @@ -149,6 +178,8 @@ outputStream * out, int *error_cnt_p); static void chk_global_free_list_and_count(outputStream * out, int *error_cnt_p); + static void chk_global_wait_list_and_count(outputStream * out, + int *error_cnt_p); static void chk_global_in_use_list_and_count(outputStream * out, int *error_cnt_p); static void chk_in_use_entry(JavaThread* jt, ObjectMonitor* n, @@ -159,28 +190,29 @@ static void chk_per_thread_free_list_and_count(JavaThread *jt, outputStream * out, int *error_cnt_p); - static void log_in_use_monitor_details(outputStream * out, bool on_exit); + static void log_in_use_monitor_details(outputStream * out); static int log_monitor_list_counts(outputStream * out); static int verify_objmon_isinpool(ObjectMonitor *addr) PRODUCT_RETURN0; + static void do_safepoint_work(DeflateMonitorCounters* counters); + private: friend class SynchronizerTest; enum { _BLOCKSIZE = 128 }; // global list of blocks of monitors - static PaddedObjectMonitor* volatile g_block_list; - // global monitor free list - static ObjectMonitor* volatile g_free_list; - // global monitor in-use list, for moribund threads, - // monitors they inflated need to be scanned for deflation - static ObjectMonitor* volatile g_om_in_use_list; - // count of entries in g_om_in_use_list - static int g_om_in_use_count; + static PaddedObjectMonitor* g_block_list; + static volatile bool _is_async_deflation_requested; + static volatile bool _is_special_deflation_requested; + static jlong _last_async_deflation_time_ns; + + // Function to prepend new blocks to the appropriate lists: + static void prepend_block_to_lists(PaddedObjectMonitor* new_blk); // Process oops in all global used monitors (i.e. moribund thread's monitors) static void global_used_oops_do(OopClosure* f); // Process oops in monitors on the given list - static void list_oops_do(ObjectMonitor* list, OopClosure* f); + static void list_oops_do(ObjectMonitor* list, int count, OopClosure* f); // Support for SynchronizerTest access to GVars fields: static u_char* get_gvars_addr(); --- old/src/hotspot/share/runtime/thread.cpp 2019-12-11 14:52:25.000000000 -0500 +++ new/src/hotspot/share/runtime/thread.cpp 2019-12-11 14:52:25.000000000 -0500 @@ -4669,7 +4669,12 @@ DO_JAVA_THREADS(t_list, p) { if (!p->can_call_java()) continue; - address pending = (address)p->current_pending_monitor(); + // This current_pending_monitor() call site only uses the returned + // ObjectMonitor* for matching purposes and does not try to fetch + // the object reference so this ObjectMonitorHandle is not strictly + // necessary. + ObjectMonitorHandle omh; + address pending = (address)p->current_pending_monitor(&omh); if (pending == monitor) { // found a match if (i < count) result->append(p); // save the first count matches i++; --- old/src/hotspot/share/runtime/thread.hpp 2019-12-11 14:52:27.000000000 -0500 +++ new/src/hotspot/share/runtime/thread.hpp 2019-12-11 14:52:26.000000000 -0500 @@ -526,7 +526,6 @@ os::set_native_thread_name(name); } - ObjectMonitor** om_in_use_list_addr() { return (ObjectMonitor **)&om_in_use_list; } Monitor* SR_lock() const { return _SR_lock; } bool has_async_exception() const { return (_suspend_flags & _has_async_exception) != 0; } @@ -625,11 +624,23 @@ void increment_vm_operation_completed_count() { _vm_operation_completed_count++; } // For tracking the heavyweight monitor the thread is pending on. - ObjectMonitor* current_pending_monitor() { - return _current_pending_monitor; + ObjectMonitor* current_pending_monitor(ObjectMonitorHandle* omh_p) { + if (omh_p->set_om_ptr_if_safe(_current_pending_monitor)) { + return omh_p->om_ptr(); // Return the safe ObjectMonitor*. + } + return NULL; } void set_current_pending_monitor(ObjectMonitor* monitor) { + ObjectMonitor* saved_cur = NULL; + if (monitor != NULL) { + monitor->inc_ref_count(); // Protect the ObjectMonitor* we're about to cache. + } else { + saved_cur = _current_pending_monitor; + } _current_pending_monitor = monitor; + if (saved_cur != NULL) { + saved_cur->dec_ref_count(); // Cleared the cached ObjectMonitor*. + } } void set_current_pending_monitor_is_from_java(bool from_java) { _current_pending_monitor_is_from_java = from_java; @@ -639,11 +650,23 @@ } // For tracking the ObjectMonitor on which this thread called Object.wait() - ObjectMonitor* current_waiting_monitor() { - return _current_waiting_monitor; + ObjectMonitor* current_waiting_monitor(ObjectMonitorHandle* omh_p) { + if (omh_p->set_om_ptr_if_safe(_current_waiting_monitor)) { + return omh_p->om_ptr(); // Return the safe ObjectMonitor*. + } + return NULL; } void set_current_waiting_monitor(ObjectMonitor* monitor) { + ObjectMonitor* saved_cur = NULL; + if (monitor != NULL) { + monitor->inc_ref_count(); // Protect the ObjectMonitor* we're about to cache. + } else { + saved_cur = _current_waiting_monitor; + } _current_waiting_monitor = monitor; + if (saved_cur != NULL) { + saved_cur->dec_ref_count(); // Cleared the cached ObjectMonitor*. + } } // For tracking the Jvmti raw monitor the thread is pending on. --- old/src/hotspot/share/runtime/vframe.cpp 2019-12-11 14:52:28.000000000 -0500 +++ new/src/hotspot/share/runtime/vframe.cpp 2019-12-11 14:52:28.000000000 -0500 @@ -122,8 +122,15 @@ if (mons->is_empty()) return result; bool found_first_monitor = false; - ObjectMonitor *pending_monitor = thread()->current_pending_monitor(); - ObjectMonitor *waiting_monitor = thread()->current_waiting_monitor(); + // For all of the javaVFrame::locked_monitors()() call sites, we + // are either at a safepoint or the calling thread is operating + // on itself so this ObjectMonitorHandle is not strictly necessary. + ObjectMonitorHandle omh; + ObjectMonitor *waiting_monitor = thread()->current_waiting_monitor(&omh); + ObjectMonitor *pending_monitor = NULL; + if (waiting_monitor == NULL) { + pending_monitor = thread()->current_pending_monitor(&omh); + } oop pending_obj = (pending_monitor != NULL ? (oop) pending_monitor->object() : (oop) NULL); oop waiting_obj = (waiting_monitor != NULL ? (oop) waiting_monitor->object() : (oop) NULL); @@ -231,9 +238,12 @@ // an inflated monitor that is first on the monitor list in // the first frame can block us on a monitor enter. markWord mark = monitor->owner()->mark(); + // The calling thread is operating on itself so this + // ObjectMonitorHandle is not strictly necessary. + ObjectMonitorHandle omh; if (mark.has_monitor() && ( // we have marked ourself as pending on this monitor - mark.monitor() == thread()->current_pending_monitor() || + mark.monitor() == thread()->current_pending_monitor(&omh) || // we are not the owner of this monitor !mark.monitor()->is_entered(thread()) )) { --- old/src/hotspot/share/runtime/vmOperations.cpp 2019-12-11 14:52:29.000000000 -0500 +++ new/src/hotspot/share/runtime/vmOperations.cpp 2019-12-11 14:52:29.000000000 -0500 @@ -41,6 +41,7 @@ #include "runtime/frame.inline.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/sweeper.hpp" +#include "runtime/synchronizer.hpp" #include "runtime/thread.inline.hpp" #include "runtime/threadSMR.inline.hpp" #include "runtime/vmOperations.hpp" @@ -446,6 +447,17 @@ } } +bool VM_Exit::doit_prologue() { + if (AsyncDeflateIdleMonitors && log_is_enabled(Info, monitorinflation)) { + // AsyncDeflateIdleMonitors does a special deflation at the VM_Exit + // safepoint in order to reduce the in-use monitor population that + // is reported by ObjectSynchronizer::log_in_use_monitor_details() + // at VM exit. + ObjectSynchronizer::set_is_special_deflation_requested(true); + } + return true; +} + void VM_Exit::doit() { if (VerifyBeforeExit) { --- old/src/hotspot/share/runtime/vmOperations.hpp 2019-12-11 14:52:30.000000000 -0500 +++ new/src/hotspot/share/runtime/vmOperations.hpp 2019-12-11 14:52:30.000000000 -0500 @@ -451,6 +451,7 @@ } } VMOp_Type type() const { return VMOp_Exit; } + bool doit_prologue(); void doit(); }; --- old/src/hotspot/share/runtime/vmStructs.cpp 2019-12-11 14:52:32.000000000 -0500 +++ new/src/hotspot/share/runtime/vmStructs.cpp 2019-12-11 14:52:31.000000000 -0500 @@ -94,6 +94,7 @@ #include "runtime/serviceThread.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" #include "runtime/thread.inline.hpp" #include "runtime/threadSMR.hpp" #include "runtime/vframeArray.hpp" @@ -909,14 +910,14 @@ volatile_nonstatic_field(ObjectMonitor, _header, markWord) \ unchecked_nonstatic_field(ObjectMonitor, _object, sizeof(void *)) /* NOTE: no type */ \ unchecked_nonstatic_field(ObjectMonitor, _owner, sizeof(void *)) /* NOTE: no type */ \ + volatile_nonstatic_field(ObjectMonitor, _next_om, ObjectMonitor*) \ + volatile_nonstatic_field(BasicLock, _displaced_header, markWord) \ volatile_nonstatic_field(ObjectMonitor, _contentions, jint) \ volatile_nonstatic_field(ObjectMonitor, _waiters, jint) \ volatile_nonstatic_field(ObjectMonitor, _recursions, intx) \ - nonstatic_field(ObjectMonitor, _next_om, ObjectMonitor*) \ - volatile_nonstatic_field(BasicLock, _displaced_header, markWord) \ nonstatic_field(BasicObjectLock, _lock, BasicLock) \ nonstatic_field(BasicObjectLock, _obj, oop) \ - static_ptr_volatile_field(ObjectSynchronizer, g_block_list, PaddedObjectMonitor*) \ + static_field(ObjectSynchronizer, g_block_list, PaddedObjectMonitor*) \ \ /*********************/ \ /* Matcher (C2 only) */ \ --- old/src/hotspot/share/runtime/vmThread.cpp 2019-12-11 14:52:33.000000000 -0500 +++ new/src/hotspot/share/runtime/vmThread.cpp 2019-12-11 14:52:33.000000000 -0500 @@ -41,6 +41,7 @@ #include "runtime/mutexLocker.hpp" #include "runtime/os.hpp" #include "runtime/safepoint.hpp" +#include "runtime/synchronizer.hpp" #include "runtime/thread.inline.hpp" #include "runtime/vmThread.hpp" #include "runtime/vmOperations.hpp" @@ -308,6 +309,14 @@ assert(should_terminate(), "termination flag must be set"); } + if (AsyncDeflateIdleMonitors && log_is_enabled(Info, monitorinflation)) { + // AsyncDeflateIdleMonitors does a special deflation at the final + // safepoint in order to reduce the in-use monitor population that + // is reported by ObjectSynchronizer::log_in_use_monitor_details() + // at VM exit. + ObjectSynchronizer::set_is_special_deflation_requested(true); + } + // 4526887 let VM thread exit at Safepoint _cur_vm_operation = &halt_op; SafepointSynchronize::begin(); --- old/src/hotspot/share/services/threadService.cpp 2019-12-11 14:52:34.000000000 -0500 +++ new/src/hotspot/share/services/threadService.cpp 2019-12-11 14:52:34.000000000 -0500 @@ -208,7 +208,12 @@ assert(thread != NULL, "should be non-NULL"); debug_only(Thread::check_for_dangling_thread_pointer(thread);) - ObjectMonitor *wait_obj = thread->current_waiting_monitor(); + // This function can be called on a target JavaThread that is not + // the caller and we are not at a safepoint. This ObjectMonitorHandle + // keeps the ObjectMonitor from being async deflated so the object + // reference we fetch remains non-NULL. + ObjectMonitorHandle omh; + ObjectMonitor *wait_obj = thread->current_waiting_monitor(&omh); oop obj = NULL; if (wait_obj != NULL) { @@ -216,7 +221,7 @@ obj = (oop) wait_obj->object(); assert(obj != NULL, "Object.wait() should have an object"); } else { - ObjectMonitor *enter_obj = thread->current_pending_monitor(); + ObjectMonitor *enter_obj = thread->current_pending_monitor(&omh); if (enter_obj != NULL) { // thread is trying to enter() an ObjectMonitor. obj = (oop) enter_obj->object(); @@ -362,6 +367,9 @@ // This code was modified from the original Threads::find_deadlocks code. int globalDfn = 0, thisDfn; + // This code is called at a safepoint so this ObjectMonitorHandle + // is not strictly necessary. + ObjectMonitorHandle omh; ObjectMonitor* waitingToLockMonitor = NULL; JvmtiRawMonitor* waitingToLockRawMonitor = NULL; oop waitingToLockBlocker = NULL; @@ -391,10 +399,15 @@ cycle->reset(); + if (waitingToLockMonitor != NULL) { + // Done with the current waitingToLockMonitor value so release + // the ObjectMonitorHandle manually before we use it again: + omh.unset_om_ptr(); + } // When there is a deadlock, all the monitors involved in the dependency // cycle must be contended and heavyweight. So we only care about the // heavyweight monitor a thread is waiting to lock. - waitingToLockMonitor = jt->current_pending_monitor(); + waitingToLockMonitor = jt->current_pending_monitor(&omh); // JVM TI raw monitors can also be involved in deadlocks, and we can be // waiting to lock both a raw monitor and ObjectMonitor at the same time. // It isn't clear how to make deadlock detection work correctly if that @@ -485,7 +498,12 @@ break; } previousThread = currentThread; - waitingToLockMonitor = (ObjectMonitor*)currentThread->current_pending_monitor(); + if (waitingToLockMonitor != NULL) { + // Done with the current waitingToLockMonitor value so release + // the ObjectMonitorHandle manually before we use it again: + omh.unset_om_ptr(); + } + waitingToLockMonitor = (ObjectMonitor*)currentThread->current_pending_monitor(&omh); if (concurrent_locks) { waitingToLockBlocker = currentThread->current_park_blocker(); } @@ -967,13 +985,15 @@ st->print("============================="); JavaThread* currentThread; - ObjectMonitor* waitingToLockMonitor; JvmtiRawMonitor* waitingToLockRawMonitor; oop waitingToLockBlocker; int len = _threads->length(); for (int i = 0; i < len; i++) { currentThread = _threads->at(i); - waitingToLockMonitor = currentThread->current_pending_monitor(); + // This code is called at a safepoint so this ObjectMonitorHandle + // is not strictly necessary. + ObjectMonitorHandle omh; + ObjectMonitor* waitingToLockMonitor = currentThread->current_pending_monitor(&omh); waitingToLockRawMonitor = currentThread->current_pending_raw_monitor(); waitingToLockBlocker = currentThread->current_park_blocker(); st->cr(); --- old/test/hotspot/gtest/oops/test_markWord.cpp 2019-12-11 14:52:36.000000000 -0500 +++ new/test/hotspot/gtest/oops/test_markWord.cpp 2019-12-11 14:52:35.000000000 -0500 @@ -123,6 +123,10 @@ // This is no longer biased, because ObjectLocker revokes the bias. assert_test_pattern(h_obj, "is_neutral no_hash"); + // Hash the object then print it. + intx hash = h_obj->identity_hash(); + assert_test_pattern(h_obj, "is_neutral hash=0x"); + // Wait gets the lock inflated. { ObjectLocker ol(h_obj, THREAD); @@ -137,14 +141,18 @@ done.wait_with_safepoint_check(THREAD); // wait till the thread is done. } - // Make the object older. Not all GCs use this field. - Universe::heap()->collect(GCCause::_java_lang_system_gc); - if (UseParallelGC) { - assert_test_pattern(h_obj, "is_neutral no_hash age 1"); - } + if (!AsyncDeflateIdleMonitors) { + // With AsyncDeflateIdleMonitors, the collect() call below + // does not guarantee monitor deflation. + // Make the object older. Not all GCs use this field. + Universe::heap()->collect(GCCause::_java_lang_system_gc); + if (UseParallelGC) { + assert_test_pattern(h_obj, "is_neutral no_hash age 1"); + } - // Hash the object then print it. - intx hash = h_obj->identity_hash(); - assert_test_pattern(h_obj, "is_neutral hash=0x"); + // Hash the object then print it. + intx hash = h_obj->identity_hash(); + assert_test_pattern(h_obj, "is_neutral hash=0x"); + } } #endif // PRODUCT --- old/test/hotspot/jtreg/runtime/logging/SafepointCleanupTest.java 2019-12-11 14:52:37.000000000 -0500 +++ new/test/hotspot/jtreg/runtime/logging/SafepointCleanupTest.java 2019-12-11 14:52:37.000000000 -0500 @@ -29,12 +29,17 @@ * @modules java.base/jdk.internal.misc * java.management * @run driver SafepointCleanupTest + * @run driver SafepointCleanupTest -XX:+AsyncDeflateIdleMonitors */ import jdk.test.lib.process.OutputAnalyzer; import jdk.test.lib.process.ProcessTools; public class SafepointCleanupTest { + static final String ASYNC_DISABLE_OPTION = "-XX:-AsyncDeflateIdleMonitors"; + static final String ASYNC_ENABLE_OPTION = "-XX:+AsyncDeflateIdleMonitors"; + static final String UNLOCK_DIAG_OPTION = "-XX:+UnlockDiagnosticVMOptions"; + static void analyzeOutputOn(ProcessBuilder pb) throws Exception { OutputAnalyzer output = new OutputAnalyzer(pb.start()); output.shouldContain("[safepoint,cleanup]"); @@ -53,19 +58,40 @@ } public static void main(String[] args) throws Exception { + String async_option; + if (args.length == 0) { + // By default test deflating idle monitors at a safepoint. + async_option = ASYNC_DISABLE_OPTION; + } else { + async_option = args[0]; + } + if (!async_option.equals(ASYNC_DISABLE_OPTION) && + !async_option.equals(ASYNC_ENABLE_OPTION)) { + throw new RuntimeException("Unknown async_option value: '" + + async_option + "'"); + } + ProcessBuilder pb = ProcessTools.createJavaProcessBuilder("-Xlog:safepoint+cleanup=info", + UNLOCK_DIAG_OPTION, + async_option, InnerClass.class.getName()); analyzeOutputOn(pb); pb = ProcessTools.createJavaProcessBuilder("-XX:+TraceSafepointCleanupTime", + UNLOCK_DIAG_OPTION, + async_option, InnerClass.class.getName()); analyzeOutputOn(pb); pb = ProcessTools.createJavaProcessBuilder("-Xlog:safepoint+cleanup=off", + UNLOCK_DIAG_OPTION, + async_option, InnerClass.class.getName()); analyzeOutputOff(pb); pb = ProcessTools.createJavaProcessBuilder("-XX:-TraceSafepointCleanupTime", + UNLOCK_DIAG_OPTION, + async_option, InnerClass.class.getName()); analyzeOutputOff(pb); } --- old/test/jdk/java/rmi/server/UnicastRemoteObject/unexportObject/UnexportLeak.java 2019-12-11 14:52:38.000000000 -0500 +++ new/test/jdk/java/rmi/server/UnicastRemoteObject/unexportObject/UnexportLeak.java 2019-12-11 14:52:38.000000000 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -74,7 +74,7 @@ } /** - * Force desparate garbage collection so that all WeakReference instances + * Force desperate garbage collection so that all WeakReference instances * will be cleared. */ private static void flushRefs() { @@ -85,6 +85,9 @@ chain.addElement(hungry); } } catch (OutOfMemoryError e) { + // An inflated Java monitor can keep 'obj' alive so request + // an explicit GC to make sure things are cleaned up. + System.gc(); } } }