--- old/src/hotspot/share/runtime/synchronizer.cpp 2019-10-17 17:29:14.000000000 -0400 +++ new/src/hotspot/share/runtime/synchronizer.cpp 2019-10-17 17:29:13.000000000 -0400 @@ -37,6 +37,7 @@ #include "runtime/atomic.hpp" #include "runtime/biasedLocking.hpp" #include "runtime/handles.inline.hpp" +#include "runtime/handshake.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/mutexLocker.hpp" #include "runtime/objectMonitor.hpp" @@ -118,21 +119,269 @@ // global list of blocks of monitors PaddedObjectMonitor* volatile ObjectSynchronizer::g_block_list = NULL; +bool volatile ObjectSynchronizer::_is_async_deflation_requested = false; +bool volatile ObjectSynchronizer::_is_special_deflation_requested = false; +jlong ObjectSynchronizer::_last_async_deflation_time_ns = 0; + // Global ObjectMonitor free list. Newly allocated and deflated // ObjectMonitors are prepended here. -ObjectMonitor* volatile ObjectSynchronizer::g_free_list = NULL; +static ObjectMonitor* volatile g_free_list = NULL; // Global ObjectMonitor in-use list. When a JavaThread is exiting, // ObjectMonitors on its per-thread in-use list are prepended here. -ObjectMonitor* volatile ObjectSynchronizer::g_om_in_use_list = NULL; -int ObjectSynchronizer::g_om_in_use_count = 0; // # on g_om_in_use_list - -static volatile intptr_t gListLock = 0; // protects global monitor lists -static volatile int g_om_free_count = 0; // # on g_free_list -static volatile int g_om_population = 0; // # Extant -- in circulation +static ObjectMonitor* volatile g_om_in_use_list = NULL; +// Global ObjectMonitor wait list. If HandshakeAfterDeflateIdleMonitors +// is true, deflated ObjectMonitors wait on this list until after a +// handshake or a safepoint for platforms that don't support handshakes. +// After the handshake or safepoint, the deflated ObjectMonitors are +// prepended to g_free_list. +static ObjectMonitor* volatile g_wait_list = NULL; + +static volatile int g_om_free_count = 0; // # on g_free_list +static volatile int g_om_in_use_count = 0; // # on g_om_in_use_list +static volatile int g_om_population = 0; // # Extant -- in circulation +static volatile int g_om_wait_count = 0; // # on g_wait_list #define CHAINMARKER (cast_to_oop(-1)) +// =====================> List Management functions + +// Return true if the ObjectMonitor's next field is marked. +// Otherwise returns false. +static bool is_next_marked(ObjectMonitor* om) { + return ((intptr_t)OrderAccess::load_acquire(&om->_next_om) & 0x1) != 0; +} + +// Mark an ObjectMonitor* and return it. Note: the om parameter +// may or may not have been marked originally. +static ObjectMonitor* mark_om_ptr(ObjectMonitor* om) { + return (ObjectMonitor*)((intptr_t)om | 0x1); +} + +// Mark the next field in an ObjectMonitor. If marking was successful, +// then the unmarked next field is returned via parameter and true is +// returned. Otherwise false is returned. +static bool mark_next(ObjectMonitor* om, ObjectMonitor** next_p) { + // Get current next field without any marking value. + ObjectMonitor* next = (ObjectMonitor*) + ((intptr_t)OrderAccess::load_acquire(&om->_next_om) & ~0x1); + if (Atomic::cmpxchg(mark_om_ptr(next), &om->_next_om, next) != next) { + return false; // Could not mark the next field or it was already marked. + } + *next_p = next; + return true; +} + +// Loop until we mark the next field in an ObjectMonitor. The unmarked +// next field is returned. +static ObjectMonitor* mark_next_loop(ObjectMonitor* om) { + ObjectMonitor* next; + while (true) { + if (mark_next(om, &next)) { + // Marked om's next field so return the unmarked value. + return next; + } + } +} + +// Set the next field in an ObjectMonitor to the specified value. +// The caller of set_next() must be the same thread that marked the +// ObjectMonitor. +static void set_next(ObjectMonitor* om, ObjectMonitor* value) { + OrderAccess::release_store(&om->_next_om, value); +} + +// Mark the next field in the list head ObjectMonitor. If marking was +// successful, then the mid and the unmarked next field are returned +// via parameter and true is returned. Otherwise false is returned. +static bool mark_list_head(ObjectMonitor* volatile * list_p, + ObjectMonitor** mid_p, ObjectMonitor** next_p) { + while (true) { + ObjectMonitor* mid = OrderAccess::load_acquire(list_p); + if (mid == NULL) { + return false; // The list is empty so nothing to mark. + } + if (mark_next(mid, next_p)) { + if (OrderAccess::load_acquire(list_p) != mid) { + // The list head changed so we have to retry. + set_next(mid, *next_p); // unmark mid + continue; + } + // We marked next field to guard against races. + *mid_p = mid; + return true; + } + } +} + +// Return the unmarked next field in an ObjectMonitor. Note: the next +// field may or may not have been marked originally. +static ObjectMonitor* unmarked_next(ObjectMonitor* om) { + return (ObjectMonitor*)((intptr_t)OrderAccess::load_acquire(&om->_next_om) & ~0x1); +} + +// Prepend a list of ObjectMonitors to the specified *list_p. 'tail' is +// the last ObjectMonitor in the list and there are 'count' on the list. +// Also updates the specified *count_p. +static void prepend_list_to_common(ObjectMonitor* list, ObjectMonitor* tail, + int count, ObjectMonitor* volatile* list_p, + volatile int* count_p) { + while (true) { + ObjectMonitor* cur = OrderAccess::load_acquire(list_p); + // Prepend list to *list_p. + ObjectMonitor* next = NULL; + if (!mark_next(tail, &next)) { + continue; // failed to mark next field so try it all again + } + set_next(tail, cur); // tail now points to cur (and unmarks tail) + if (cur == NULL) { + // No potential race with takers or other prependers since + // *list_p is empty. + if (Atomic::cmpxchg(list, list_p, cur) == cur) { + // Successfully switched *list_p to the list value. + Atomic::add(count, count_p); + break; + } + // Implied else: try it all again + } else { + // Try to mark next field to guard against races: + if (!mark_next(cur, &next)) { + continue; // failed to mark next field so try it all again + } + // We marked the next field so try to switch *list_p to the list value. + if (Atomic::cmpxchg(list, list_p, cur) != cur) { + // The list head has changed so unmark the next field and try again: + set_next(cur, next); + continue; + } + Atomic::add(count, count_p); + set_next(cur, next); // unmark next field + break; + } + } +} + +// Prepend a newly allocated block of ObjectMonitors to g_block_list and +// g_free_list. Also updates g_om_population and g_om_free_count. +void ObjectSynchronizer::prepend_block_to_lists(PaddedObjectMonitor* new_blk) { + // First we handle g_block_list: + while (true) { + PaddedObjectMonitor* cur = g_block_list; + // Prepend new_blk to g_block_list. The first ObjectMonitor in + // a block is reserved for use as linkage to the next block. + new_blk[0]._next_om = cur; + if (Atomic::cmpxchg(new_blk, &g_block_list, cur) == cur) { + // Successfully switched g_block_list to the new_blk value. + Atomic::add(_BLOCKSIZE - 1, &g_om_population); + break; + } + // Implied else: try it all again + } + + // Second we handle g_free_list: + prepend_list_to_common(new_blk + 1, &new_blk[_BLOCKSIZE - 1], _BLOCKSIZE - 1, + &g_free_list, &g_om_free_count); +} + +// Prepend a list of ObjectMonitors to g_free_list. 'tail' is the last +// ObjectMonitor in the list and there are 'count' on the list. Also +// updates g_om_free_count. +static void prepend_list_to_g_free_list(ObjectMonitor* list, + ObjectMonitor* tail, int count) { + prepend_list_to_common(list, tail, count, &g_free_list, &g_om_free_count); +} + +// Prepend a list of ObjectMonitors to g_wait_list. 'tail' is the last +// ObjectMonitor in the list and there are 'count' on the list. Also +// updates g_om_wait_count. +static void prepend_list_to_g_wait_list(ObjectMonitor* list, + ObjectMonitor* tail, int count) { + assert(HandshakeAfterDeflateIdleMonitors, "sanity check"); + prepend_list_to_common(list, tail, count, &g_wait_list, &g_om_wait_count); +} + +// Prepend a list of ObjectMonitors to g_om_in_use_list. 'tail' is the last +// ObjectMonitor in the list and there are 'count' on the list. Also +// updates g_om_in_use_list. +static void prepend_list_to_g_om_in_use_list(ObjectMonitor* list, + ObjectMonitor* tail, int count) { + prepend_list_to_common(list, tail, count, &g_om_in_use_list, &g_om_in_use_count); +} + +// Prepend an ObjectMonitor to the specified list. Also updates +// the specified counter. +static void prepend_to_common(ObjectMonitor* m, ObjectMonitor* volatile * list_p, + int volatile * count_p) { + while (true) { + (void)mark_next_loop(m); // mark m so we can safely update its next field + ObjectMonitor* cur = NULL; + ObjectMonitor* next = NULL; + // Mark the list head to guard against A-B-A race: + if (mark_list_head(list_p, &cur, &next)) { + // List head is now marked so we can safely switch it. + set_next(m, cur); // m now points to cur (and unmarks m) + OrderAccess::release_store(list_p, m); // Switch list head to unmarked m. + set_next(cur, next); // Unmark the previous list head. + break; + } + // The list is empty so try to set the list head. + assert(cur == NULL, "cur must be NULL: cur=" INTPTR_FORMAT, p2i(cur)); + set_next(m, cur); // m now points to NULL (and unmarks m) + if (Atomic::cmpxchg(m, list_p, cur) == cur) { + // List head is now unmarked m. + break; + } + // Implied else: try it all again + } + Atomic::inc(count_p); +} + +// Prepend an ObjectMonitor to a per-thread om_free_list. +// Also updates the per-thread om_free_count. +static void prepend_to_om_free_list(Thread* self, ObjectMonitor* m) { + prepend_to_common(m, &self->om_free_list, &self->om_free_count); +} + +// Prepend an ObjectMonitor to a per-thread om_in_use_list. +// Also updates the per-thread om_in_use_count. +static void prepend_to_om_in_use_list(Thread* self, ObjectMonitor* m) { + prepend_to_common(m, &self->om_in_use_list, &self->om_in_use_count); +} + +// Take an ObjectMonitor from the start of the specified list. Also +// decrements the specified counter. Returns NULL if none are available. +static ObjectMonitor* take_from_start_of_common(ObjectMonitor* volatile * list_p, + int volatile * count_p) { + ObjectMonitor* next = NULL; + ObjectMonitor* take = NULL; + // Mark the list head to guard against A-B-A race: + if (!mark_list_head(list_p, &take, &next)) { + return NULL; // None are available. + } + // Switch marked list head to next (which unmarks the list head, but + // leaves take marked): + OrderAccess::release_store(list_p, next); + Atomic::dec(count_p); + // Unmark take, but leave the next value for any lagging list + // walkers. It will get cleaned up when take is prepended to + // the in-use list: + set_next(take, next); + return take; +} + +// Take an ObjectMonitor from the start of the global free-list. Also +// updates g_om_free_count. Returns NULL if none are available. +static ObjectMonitor* take_from_start_of_g_free_list() { + return take_from_start_of_common(&g_free_list, &g_om_free_count); +} + +// Take an ObjectMonitor from the start of a per-thread free-list. +// Also updates om_free_count. Returns NULL if none are available. +static ObjectMonitor* take_from_start_of_om_free_list(Thread* self) { + return take_from_start_of_common(&self->om_free_list, &self->om_free_count); +} + + // =====================> Quick functions // The quick_* forms are special fast-path variants used to improve @@ -211,39 +460,59 @@ assert(((JavaThread *) self)->thread_state() == _thread_in_Java, "invariant"); NoSafepointVerifier nsv; if (obj == NULL) return false; // Need to throw NPE - const markWord mark = obj->mark(); - if (mark.has_monitor()) { - ObjectMonitor* const m = mark.monitor(); - assert(m->object() == obj, "invariant"); - Thread* const owner = (Thread *) m->_owner; - - // Lock contention and Transactional Lock Elision (TLE) diagnostics - // and observability - // Case: light contention possibly amenable to TLE - // Case: TLE inimical operations such as nested/recursive synchronization + while (true) { + const markWord mark = obj->mark(); - if (owner == self) { - m->_recursions++; - return true; - } + if (mark.has_monitor()) { + ObjectMonitorHandle omh; + if (!omh.save_om_ptr(obj, mark)) { + // Lost a race with async deflation so try again. + assert(AsyncDeflateIdleMonitors, "sanity check"); + continue; + } + ObjectMonitor* const m = omh.om_ptr(); + assert(m->object() == obj, "invariant"); + Thread* const owner = (Thread *) m->_owner; + + // Lock contention and Transactional Lock Elision (TLE) diagnostics + // and observability + // Case: light contention possibly amenable to TLE + // Case: TLE inimical operations such as nested/recursive synchronization + + if (owner == self) { + m->_recursions++; + return true; + } - // This Java Monitor is inflated so obj's header will never be - // displaced to this thread's BasicLock. Make the displaced header - // non-NULL so this BasicLock is not seen as recursive nor as - // being locked. We do this unconditionally so that this thread's - // BasicLock cannot be mis-interpreted by any stack walkers. For - // performance reasons, stack walkers generally first check for - // Biased Locking in the object's header, the second check is for - // stack-locking in the object's header, the third check is for - // recursive stack-locking in the displaced header in the BasicLock, - // and last are the inflated Java Monitor (ObjectMonitor) checks. - lock->set_displaced_header(markWord::unused_mark()); + // This Java Monitor is inflated so obj's header will never be + // displaced to this thread's BasicLock. Make the displaced header + // non-NULL so this BasicLock is not seen as recursive nor as + // being locked. We do this unconditionally so that this thread's + // BasicLock cannot be mis-interpreted by any stack walkers. For + // performance reasons, stack walkers generally first check for + // Biased Locking in the object's header, the second check is for + // stack-locking in the object's header, the third check is for + // recursive stack-locking in the displaced header in the BasicLock, + // and last are the inflated Java Monitor (ObjectMonitor) checks. + lock->set_displaced_header(markWord::unused_mark()); + + if (owner == NULL && m->try_set_owner_from(self, NULL) == NULL) { + assert(m->_recursions == 0, "invariant"); + return true; + } - if (owner == NULL && Atomic::replace_if_null(self, &(m->_owner))) { - assert(m->_recursions == 0, "invariant"); - return true; + if (AsyncDeflateIdleMonitors && + m->try_set_owner_from(self, DEFLATER_MARKER) == DEFLATER_MARKER) { + // The deflation protocol finished the first part (setting owner), + // but it failed the second part (making ref_count negative) and + // bailed. Or the ObjectMonitor was async deflated and reused. + // Acquired the monitor. + assert(m->_recursions == 0, "invariant"); + return true; + } } + break; } // Note that we could inflate in quick_enter. @@ -295,7 +564,9 @@ // must be non-zero to avoid looking like a re-entrant lock, // and must not look locked either. lock->set_displaced_header(markWord::unused_mark()); - inflate(THREAD, obj(), inflate_cause_monitor_enter)->enter(THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_monitor_enter); + omh.om_ptr()->enter(THREAD); } void ObjectSynchronizer::exit(oop object, BasicLock* lock, TRAPS) { @@ -344,7 +615,9 @@ } // We have to take the slow-path of possible inflation and then exit. - inflate(THREAD, object, inflate_cause_vm_internal)->exit(true, THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, object, inflate_cause_vm_internal); + omh.om_ptr()->exit(true, THREAD); } // ----------------------------------------------------------------------------- @@ -365,9 +638,10 @@ assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now"); } - ObjectMonitor* monitor = inflate(THREAD, obj(), inflate_cause_vm_internal); - - return monitor->complete_exit(THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_vm_internal); + intptr_t ret_code = omh.om_ptr()->complete_exit(THREAD); + return ret_code; } // NOTE: must use heavy weight monitor to handle complete_exit/reenter() @@ -377,9 +651,9 @@ assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now"); } - ObjectMonitor* monitor = inflate(THREAD, obj(), inflate_cause_vm_internal); - - monitor->reenter(recursion, THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_vm_internal); + omh.om_ptr()->reenter(recursion, THREAD); } // ----------------------------------------------------------------------------- // JNI locks on java objects @@ -391,7 +665,9 @@ assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now"); } THREAD->set_current_pending_monitor_is_from_java(false); - inflate(THREAD, obj(), inflate_cause_jni_enter)->enter(THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_jni_enter); + omh.om_ptr()->enter(THREAD); THREAD->set_current_pending_monitor_is_from_java(true); } @@ -404,7 +680,9 @@ } assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now"); - ObjectMonitor* monitor = inflate(THREAD, obj, inflate_cause_jni_exit); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj, inflate_cause_jni_exit); + ObjectMonitor* monitor = omh.om_ptr(); // If this thread has locked the object, exit the monitor. We // intentionally do not use CHECK here because we must exit the // monitor even if an exception is pending. @@ -445,7 +723,9 @@ if (millis < 0) { THROW_MSG_0(vmSymbols::java_lang_IllegalArgumentException(), "timeout value is negative"); } - ObjectMonitor* monitor = inflate(THREAD, obj(), inflate_cause_wait); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_wait); + ObjectMonitor* monitor = omh.om_ptr(); DTRACE_MONITOR_WAIT_PROBE(monitor, obj(), THREAD, millis); monitor->wait(millis, true, THREAD); @@ -454,7 +734,8 @@ // that's fixed we can uncomment the following line, remove the call // and change this function back into a "void" func. // DTRACE_MONITOR_PROBE(waited, monitor, obj(), THREAD); - return dtrace_waited_probe(monitor, obj, THREAD); + int ret_code = dtrace_waited_probe(monitor, obj, THREAD); + return ret_code; } void ObjectSynchronizer::wait_uninterruptibly(Handle obj, jlong millis, TRAPS) { @@ -465,7 +746,9 @@ if (millis < 0) { THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(), "timeout value is negative"); } - inflate(THREAD, obj(), inflate_cause_wait)->wait(millis, false, THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_wait); + omh.om_ptr()->wait(millis, false, THREAD); } void ObjectSynchronizer::notify(Handle obj, TRAPS) { @@ -478,7 +761,9 @@ if (mark.has_locker() && THREAD->is_lock_owned((address)mark.locker())) { return; } - inflate(THREAD, obj(), inflate_cause_notify)->notify(THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_notify); + omh.om_ptr()->notify(THREAD); } // NOTE: see comment of notify() @@ -492,7 +777,9 @@ if (mark.has_locker() && THREAD->is_lock_owned((address)mark.locker())) { return; } - inflate(THREAD, obj(), inflate_cause_notify)->notifyAll(THREAD); + ObjectMonitorHandle omh; + inflate(&omh, THREAD, obj(), inflate_cause_notify); + omh.om_ptr()->notifyAll(THREAD); } // ----------------------------------------------------------------------------- @@ -517,15 +804,15 @@ // performed by the CPU(s) or platform. struct SharedGlobals { - char _pad_prefix[DEFAULT_CACHE_LINE_SIZE]; + char _pad_prefix[OM_CACHE_LINE_SIZE]; // These are highly shared mostly-read variables. // To avoid false-sharing they need to be the sole occupants of a cache line. volatile int stw_random; volatile int stw_cycle; - DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int) * 2); + DEFINE_PAD_MINUS_SIZE(1, OM_CACHE_LINE_SIZE, sizeof(volatile int) * 2); // Hot RW variable -- Sequester to avoid false-sharing volatile int hc_sequence; - DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int)); + DEFINE_PAD_MINUS_SIZE(2, OM_CACHE_LINE_SIZE, sizeof(volatile int)); }; static SharedGlobals GVars; @@ -686,79 +973,93 @@ assert(Universe::verify_in_progress() || DumpSharedSpaces || ((JavaThread *)self)->thread_state() != _thread_blocked, "invariant"); - ObjectMonitor* monitor = NULL; - markWord temp, test; - intptr_t hash; - markWord mark = read_stable_mark(obj); + while (true) { + ObjectMonitor* monitor = NULL; + markWord temp, test; + intptr_t hash; + markWord mark = read_stable_mark(obj); - // object should remain ineligible for biased locking - assert(!mark.has_bias_pattern(), "invariant"); + // object should remain ineligible for biased locking + assert(!mark.has_bias_pattern(), "invariant"); - if (mark.is_neutral()) { - hash = mark.hash(); // this is a normal header - if (hash != 0) { // if it has hash, just return it - return hash; - } - hash = get_next_hash(self, obj); // allocate a new hash code - temp = mark.copy_set_hash(hash); // merge the hash code into header - // use (machine word version) atomic operation to install the hash - test = obj->cas_set_mark(temp, mark); - if (test == mark) { - return hash; - } - // If atomic operation failed, we must inflate the header - // into heavy weight monitor. We could add more code here - // for fast path, but it does not worth the complexity. - } else if (mark.has_monitor()) { - monitor = mark.monitor(); - temp = monitor->header(); - assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value()); - hash = temp.hash(); - if (hash != 0) { - return hash; - } - // Skip to the following code to reduce code size - } else if (self->is_lock_owned((address)mark.locker())) { - temp = mark.displaced_mark_helper(); // this is a lightweight monitor owned - assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value()); - hash = temp.hash(); // by current thread, check if the displaced - if (hash != 0) { // header contains hash code - return hash; - } - // WARNING: - // The displaced header in the BasicLock on a thread's stack - // is strictly immutable. It CANNOT be changed in ANY cases. - // So we have to inflate the stack lock into an ObjectMonitor - // even if the current thread owns the lock. The BasicLock on - // a thread's stack can be asynchronously read by other threads - // during an inflate() call so any change to that stack memory - // may not propagate to other threads correctly. - } - - // Inflate the monitor to set hash code - monitor = inflate(self, obj, inflate_cause_hash_code); - // Load displaced header and check it has hash code - mark = monitor->header(); - assert(mark.is_neutral(), "invariant: header=" INTPTR_FORMAT, mark.value()); - hash = mark.hash(); - if (hash == 0) { - hash = get_next_hash(self, obj); - temp = mark.copy_set_hash(hash); // merge hash code into header - assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value()); - uintptr_t v = Atomic::cmpxchg(temp.value(), (volatile uintptr_t*)monitor->header_addr(), mark.value()); - test = markWord(v); - if (test != mark) { - // The only non-deflation update to the ObjectMonitor's - // header/dmw field is to merge in the hash code. If someone - // adds a new usage of the header/dmw field, please update - // this code. - hash = test.hash(); - assert(test.is_neutral(), "invariant: header=" INTPTR_FORMAT, test.value()); - assert(hash != 0, "Trivial unexpected object/monitor header usage."); + if (mark.is_neutral()) { + hash = mark.hash(); // this is a normal header + if (hash != 0) { // if it has hash, just return it + return hash; + } + hash = get_next_hash(self, obj); // allocate a new hash code + temp = mark.copy_set_hash(hash); // merge the hash code into header + // use (machine word version) atomic operation to install the hash + test = obj->cas_set_mark(temp, mark); + if (test == mark) { + return hash; + } + // If atomic operation failed, we must inflate the header + // into heavy weight monitor. We could add more code here + // for fast path, but it does not worth the complexity. + } else if (mark.has_monitor()) { + ObjectMonitorHandle omh; + if (!omh.save_om_ptr(obj, mark)) { + // Lost a race with async deflation so try again. + assert(AsyncDeflateIdleMonitors, "sanity check"); + continue; + } + monitor = omh.om_ptr(); + temp = monitor->header(); + assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value()); + hash = temp.hash(); + if (hash != 0) { + return hash; + } + // Skip to the following code to reduce code size + } else if (self->is_lock_owned((address)mark.locker())) { + temp = mark.displaced_mark_helper(); // this is a lightweight monitor owned + assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value()); + hash = temp.hash(); // by current thread, check if the displaced + if (hash != 0) { // header contains hash code + return hash; + } + // WARNING: + // The displaced header in the BasicLock on a thread's stack + // is strictly immutable. It CANNOT be changed in ANY cases. + // So we have to inflate the stack lock into an ObjectMonitor + // even if the current thread owns the lock. The BasicLock on + // a thread's stack can be asynchronously read by other threads + // during an inflate() call so any change to that stack memory + // may not propagate to other threads correctly. + } + + // Inflate the monitor to set hash code + ObjectMonitorHandle omh; + inflate(&omh, self, obj, inflate_cause_hash_code); + monitor = omh.om_ptr(); + // Load displaced header and check it has hash code + mark = monitor->header(); + assert(mark.is_neutral(), "invariant: header=" INTPTR_FORMAT, mark.value()); + hash = mark.hash(); + if (hash == 0) { + hash = get_next_hash(self, obj); + temp = mark.copy_set_hash(hash); // merge hash code into header + assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value()); + uintptr_t v = Atomic::cmpxchg(temp.value(), (volatile uintptr_t*)monitor->header_addr(), mark.value()); + test = markWord(v); + if (test != mark) { + // The only non-deflation update to the ObjectMonitor's + // header/dmw field is to merge in the hash code. If someone + // adds a new usage of the header/dmw field, please update + // this code. + // ObjectMonitor::install_displaced_markword_in_object() + // does mark the header/dmw field as part of async deflation, + // but that protocol cannot happen now due to the + // ObjectMonitorHandle above. + hash = test.hash(); + assert(test.is_neutral(), "invariant: header=" INTPTR_FORMAT, test.value()); + assert(hash != 0, "Trivial unexpected object/monitor header usage."); + } } + // We finally get the hash + return hash; } - // We finally get the hash - return hash; } // Deprecated -- use FastHashCode() instead. @@ -778,20 +1079,28 @@ assert(thread == JavaThread::current(), "Can only be called on current thread"); oop obj = h_obj(); - markWord mark = read_stable_mark(obj); + while (true) { + markWord mark = read_stable_mark(obj); - // Uncontended case, header points to stack - if (mark.has_locker()) { - return thread->is_lock_owned((address)mark.locker()); - } - // Contended case, header points to ObjectMonitor (tagged pointer) - if (mark.has_monitor()) { - ObjectMonitor* monitor = mark.monitor(); - return monitor->is_entered(thread) != 0; + // Uncontended case, header points to stack + if (mark.has_locker()) { + return thread->is_lock_owned((address)mark.locker()); + } + // Contended case, header points to ObjectMonitor (tagged pointer) + if (mark.has_monitor()) { + ObjectMonitorHandle omh; + if (!omh.save_om_ptr(obj, mark)) { + // Lost a race with async deflation so try again. + assert(AsyncDeflateIdleMonitors, "sanity check"); + continue; + } + bool ret_code = omh.om_ptr()->is_entered(thread) != 0; + return ret_code; + } + // Unlocked case, header in place + assert(mark.is_neutral(), "sanity check"); + return false; } - // Unlocked case, header in place - assert(mark.is_neutral(), "sanity check"); - return false; } // Be aware of this method could revoke bias of the lock object. @@ -817,27 +1126,37 @@ assert(self == JavaThread::current(), "Can only be called on current thread"); oop obj = h_obj(); - markWord mark = read_stable_mark(obj); - // CASE: stack-locked. Mark points to a BasicLock on the owner's stack. - if (mark.has_locker()) { - return self->is_lock_owned((address)mark.locker()) ? - owner_self : owner_other; - } + while (true) { + markWord mark = read_stable_mark(obj); - // CASE: inflated. Mark (tagged pointer) points to an ObjectMonitor. - // The Object:ObjectMonitor relationship is stable as long as we're - // not at a safepoint. - if (mark.has_monitor()) { - void* owner = mark.monitor()->_owner; - if (owner == NULL) return owner_none; - return (owner == self || - self->is_lock_owned((address)owner)) ? owner_self : owner_other; - } + // CASE: stack-locked. Mark points to a BasicLock on the owner's stack. + if (mark.has_locker()) { + return self->is_lock_owned((address)mark.locker()) ? + owner_self : owner_other; + } + + // CASE: inflated. Mark (tagged pointer) points to an ObjectMonitor. + // The Object:ObjectMonitor relationship is stable as long as we're + // not at a safepoint and AsyncDeflateIdleMonitors is false. + if (mark.has_monitor()) { + ObjectMonitorHandle omh; + if (!omh.save_om_ptr(obj, mark)) { + // Lost a race with async deflation so try again. + assert(AsyncDeflateIdleMonitors, "sanity check"); + continue; + } + ObjectMonitor* monitor = omh.om_ptr(); + void* owner = monitor->_owner; + if (owner == NULL) return owner_none; + return (owner == self || + self->is_lock_owned((address)owner)) ? owner_self : owner_other; + } - // CASE: neutral - assert(mark.is_neutral(), "sanity check"); - return owner_none; // it's unlocked + // CASE: neutral + assert(mark.is_neutral(), "sanity check"); + return owner_none; // it's unlocked + } } // FIXME: jvmti should call this @@ -852,33 +1171,41 @@ } oop obj = h_obj(); - address owner = NULL; - markWord mark = read_stable_mark(obj); + while (true) { + address owner = NULL; + markWord mark = read_stable_mark(obj); - // Uncontended case, header points to stack - if (mark.has_locker()) { - owner = (address) mark.locker(); - } + // Uncontended case, header points to stack + if (mark.has_locker()) { + owner = (address) mark.locker(); + } - // Contended case, header points to ObjectMonitor (tagged pointer) - else if (mark.has_monitor()) { - ObjectMonitor* monitor = mark.monitor(); - assert(monitor != NULL, "monitor should be non-null"); - owner = (address) monitor->owner(); - } + // Contended case, header points to ObjectMonitor (tagged pointer) + else if (mark.has_monitor()) { + ObjectMonitorHandle omh; + if (!omh.save_om_ptr(obj, mark)) { + // Lost a race with async deflation so try again. + assert(AsyncDeflateIdleMonitors, "sanity check"); + continue; + } + ObjectMonitor* monitor = omh.om_ptr(); + assert(monitor != NULL, "monitor should be non-null"); + owner = (address) monitor->owner(); + } - if (owner != NULL) { - // owning_thread_from_monitor_owner() may also return NULL here - return Threads::owning_thread_from_monitor_owner(t_list, owner); - } + if (owner != NULL) { + // owning_thread_from_monitor_owner() may also return NULL here + return Threads::owning_thread_from_monitor_owner(t_list, owner); + } - // Unlocked case, header in place - // Cannot have assertion since this object may have been - // locked by another thread when reaching here. - // assert(mark.is_neutral(), "sanity check"); + // Unlocked case, header in place + // Cannot have assertion since this object may have been + // locked by another thread when reaching here. + // assert(mark.is_neutral(), "sanity check"); - return NULL; + return NULL; + } } // Visitors ... @@ -889,32 +1216,101 @@ assert(block->object() == CHAINMARKER, "must be a block header"); for (int i = _BLOCKSIZE - 1; i > 0; i--) { ObjectMonitor* mid = (ObjectMonitor *)(block + i); - oop object = (oop)mid->object(); - if (object != NULL) { - // Only process with closure if the object is set. + if (mid->is_active()) { + ObjectMonitorHandle omh(mid); + + if (mid->object() == NULL || + (AsyncDeflateIdleMonitors && mid->ref_count() < 0)) { + // Only process with closure if the object is set. + // For async deflation, race here if monitor is not owned! + // The above ref_count bump (in ObjectMonitorHandle ctr) + // will cause subsequent async deflation to skip it. + // However, previous or concurrent async deflation is a race + // so skip this ObjectMonitor if it is being async deflated. + continue; + } closure->do_monitor(mid); } } - block = (PaddedObjectMonitor*)block->_next_om; + // unmarked_next() is not needed with g_block_list (no next field marking). + block = (PaddedObjectMonitor*)OrderAccess::load_acquire(&block->_next_om); } } static bool monitors_used_above_threshold() { - if (g_om_population == 0) { + if (OrderAccess::load_acquire(&g_om_population) == 0) { return false; } - int monitors_used = g_om_population - g_om_free_count; - int monitor_usage = (monitors_used * 100LL) / g_om_population; - return monitor_usage > MonitorUsedDeflationThreshold; + if (MonitorUsedDeflationThreshold > 0) { + int monitors_used = OrderAccess::load_acquire(&g_om_population) - + OrderAccess::load_acquire(&g_om_free_count); + if (HandshakeAfterDeflateIdleMonitors) { + monitors_used -= OrderAccess::load_acquire(&g_om_wait_count); + } + int monitor_usage = (monitors_used * 100LL) / + OrderAccess::load_acquire(&g_om_population); + return monitor_usage > MonitorUsedDeflationThreshold; + } + return false; } -bool ObjectSynchronizer::is_cleanup_needed() { - if (MonitorUsedDeflationThreshold > 0) { - return monitors_used_above_threshold(); +// Returns true if MonitorBound is set (> 0) and if the specified +// cnt is > MonitorBound. Otherwise returns false. +static bool is_MonitorBound_exceeded(const int cnt) { + const int mx = MonitorBound; + return mx > 0 && cnt > mx; +} + +bool ObjectSynchronizer::is_async_deflation_needed() { + if (!AsyncDeflateIdleMonitors) { + return false; + } + if (is_async_deflation_requested()) { + // Async deflation request. + return true; + } + if (AsyncDeflationInterval > 0 && + time_since_last_async_deflation_ms() > AsyncDeflationInterval && + monitors_used_above_threshold()) { + // It's been longer than our specified deflate interval and there + // are too many monitors in use. We don't deflate more frequently + // than AsyncDeflationInterval (unless is_async_deflation_requested) + // in order to not swamp the ServiceThread. + _last_async_deflation_time_ns = os::javaTimeNanos(); + return true; + } + int monitors_used = OrderAccess::load_acquire(&g_om_population) - + OrderAccess::load_acquire(&g_om_free_count); + if (HandshakeAfterDeflateIdleMonitors) { + monitors_used -= OrderAccess::load_acquire(&g_om_wait_count); + } + if (is_MonitorBound_exceeded(monitors_used)) { + // Not enough ObjectMonitors on the global free list. + return true; + } + return false; +} + +bool ObjectSynchronizer::is_safepoint_deflation_needed() { + if (!AsyncDeflateIdleMonitors) { + if (monitors_used_above_threshold()) { + // Too many monitors in use. + return true; + } + return false; + } + if (is_special_deflation_requested()) { + // For AsyncDeflateIdleMonitors only do a safepoint deflation + // if there is a special deflation request. + return true; } return false; } +jlong ObjectSynchronizer::time_since_last_async_deflation_ms() { + return (os::javaTimeNanos() - _last_async_deflation_time_ns) / (NANOUNITS / MILLIUNITS); +} + void ObjectSynchronizer::oops_do(OopClosure* f) { // We only scan the global used list here (for moribund threads), and // the thread-local monitors in Thread::oops_do(). @@ -923,18 +1319,20 @@ void ObjectSynchronizer::global_used_oops_do(OopClosure* f) { assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); - list_oops_do(g_om_in_use_list, f); + list_oops_do(OrderAccess::load_acquire(&g_om_in_use_list), OrderAccess::load_acquire(&g_om_in_use_count), f); } void ObjectSynchronizer::thread_local_used_oops_do(Thread* thread, OopClosure* f) { assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); - list_oops_do(thread->om_in_use_list, f); + list_oops_do(OrderAccess::load_acquire(&thread->om_in_use_list), OrderAccess::load_acquire(&thread->om_in_use_count), f); } -void ObjectSynchronizer::list_oops_do(ObjectMonitor* list, OopClosure* f) { +void ObjectSynchronizer::list_oops_do(ObjectMonitor* list, int count, OopClosure* f) { assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); - ObjectMonitor* mid; - for (mid = list; mid != NULL; mid = mid->_next_om) { + // The oops_do() phase does not overlap with monitor deflation + // so no need to update the ObjectMonitor's ref_count for this + // ObjectMonitor* use. + for (ObjectMonitor* mid = list; mid != NULL; mid = unmarked_next(mid)) { if (mid->object() != NULL) { f->do_oop((oop*)mid->object_addr()); } @@ -950,9 +1348,6 @@ // STW-time -- disassociates idle monitors from objects. Such // scavenged monitors are returned to the g_free_list. // -// The global list is protected by gListLock. All the critical sections -// are short and operate in constant-time. -// // ObjectMonitors reside in type-stable memory (TSM) and are immortal. // // Lifecycle: @@ -966,6 +1361,7 @@ // // If MonitorBound is not set (<= 0), MonitorBound checks are disabled. // +// When safepoint deflation is being used (!AsyncDeflateIdleMonitors): // The monitor pool is grow-only. We scavenge at STW safepoint-time, but the // the rate of scavenging is driven primarily by GC. As such, we can find // an inordinate number of monitors in circulation. @@ -980,13 +1376,26 @@ // // The current implementation uses asynchronous VM operations. // -// If MonitorBound is set, the boundry applies to +// When safepoint deflation is being used and MonitorBound is set, the +// boundry applies to // (g_om_population - g_om_free_count) // i.e., if there are not enough ObjectMonitors on the global free list, // then a safepoint deflation is induced. Picking a good MonitorBound value // is non-trivial. +// +// When async deflation is being used: +// The monitor pool is still grow-only. Async deflation is requested +// by a safepoint's cleanup phase or by the ServiceThread at periodic +// intervals when is_async_deflation_needed() returns true. In +// addition to other policies that are checked, if there are not +// enough ObjectMonitors on the global free list, then +// is_async_deflation_needed() will return true. The ServiceThread +// calls deflate_global_idle_monitors_using_JT() and also calls +// deflate_per_thread_idle_monitors_using_JT() as needed. static void InduceScavenge(Thread* self, const char * Whence) { + assert(!AsyncDeflateIdleMonitors, "is not used by async deflation"); + // Induce STW safepoint to trim monitors // Ultimately, this results in a call to deflate_idle_monitors() in the near future. // More precisely, trigger an asynchronous STW safepoint as the number @@ -1002,31 +1411,30 @@ } } -ObjectMonitor* ObjectSynchronizer::om_alloc(Thread* self) { +ObjectMonitor* ObjectSynchronizer::om_alloc(Thread* self, + const InflateCause cause) { // A large MAXPRIVATE value reduces both list lock contention // and list coherency traffic, but also tends to increase the // number of ObjectMonitors in circulation as well as the STW // scavenge costs. As usual, we lean toward time in space-time // tradeoffs. const int MAXPRIVATE = 1024; + stringStream ss; for (;;) { ObjectMonitor* m; // 1: try to allocate from the thread's local om_free_list. // Threads will attempt to allocate first from their local list, then - // from the global list, and only after those attempts fail will the thread - // attempt to instantiate new monitors. Thread-local free lists take - // heat off the gListLock and improve allocation latency, as well as reducing - // coherency traffic on the shared global list. - m = self->om_free_list; + // from the global list, and only after those attempts fail will the + // thread attempt to instantiate new monitors. Thread-local free lists + // improve allocation latency, as well as reducing coherency traffic + // on the shared global list. + m = take_from_start_of_om_free_list(self); if (m != NULL) { - self->om_free_list = m->_next_om; - self->om_free_count--; guarantee(m->object() == NULL, "invariant"); - m->_next_om = self->om_in_use_list; - self->om_in_use_list = m; - self->om_in_use_count++; + m->set_allocation_state(ObjectMonitor::New); + prepend_to_om_in_use_list(self, m); return m; } @@ -1035,25 +1443,45 @@ // If the muxTry() fails then drop immediately into case 3. // If we're using thread-local free lists then try // to reprovision the caller's free list. - if (g_free_list != NULL) { + if (OrderAccess::load_acquire(&g_free_list) != NULL) { // Reprovision the thread's om_free_list. // Use bulk transfers to reduce the allocation rate and heat // on various locks. - Thread::muxAcquire(&gListLock, "om_alloc(1)"); - for (int i = self->om_free_provision; --i >= 0 && g_free_list != NULL;) { - g_om_free_count--; - ObjectMonitor* take = g_free_list; - g_free_list = take->_next_om; + for (int i = self->om_free_provision; --i >= 0;) { + ObjectMonitor* take = take_from_start_of_g_free_list(); + if (take == NULL) { + break; // No more are available. + } guarantee(take->object() == NULL, "invariant"); + if (AsyncDeflateIdleMonitors) { + // We allowed 3 field values to linger during async deflation. + // We clear header and restore ref_count here, but we leave + // owner == DEFLATER_MARKER so the simple C2 ObjectMonitor + // enter optimization can no longer race with async deflation + // and reuse. + take->set_header(markWord::zero()); + if (take->ref_count() < 0) { + // Add back max_jint to restore the ref_count field to its + // proper value. + Atomic::add(max_jint, &take->_ref_count); + + assert(take->ref_count() >= 0, "must not be negative: ref_count=%d", + take->ref_count()); + } + } take->Recycle(); + // Since we're taking from the global free-list, take must be Free. + // om_release() also sets the allocation state to Free because it + // is called from other code paths. + assert(take->is_free(), "invariant"); om_release(self, take, false); } - Thread::muxRelease(&gListLock); self->om_free_provision += 1 + (self->om_free_provision/2); if (self->om_free_provision > MAXPRIVATE) self->om_free_provision = MAXPRIVATE; - const int mx = MonitorBound; - if (mx > 0 && (g_om_population-g_om_free_count) > mx) { + if (!AsyncDeflateIdleMonitors && + is_MonitorBound_exceeded(OrderAccess::load_acquire(&g_om_population) - + OrderAccess::load_acquire(&g_om_free_count))) { // Not enough ObjectMonitors on the global free list. // We can't safely induce a STW safepoint from om_alloc() as our thread // state may not be appropriate for such activities and callers may hold @@ -1074,9 +1502,9 @@ assert(_BLOCKSIZE > 1, "invariant"); size_t neededsize = sizeof(PaddedObjectMonitor) * _BLOCKSIZE; PaddedObjectMonitor* temp; - size_t aligned_size = neededsize + (DEFAULT_CACHE_LINE_SIZE - 1); + size_t aligned_size = neededsize + (OM_CACHE_LINE_SIZE - 1); void* real_malloc_addr = NEW_C_HEAP_ARRAY(char, aligned_size, mtInternal); - temp = (PaddedObjectMonitor*)align_up(real_malloc_addr, DEFAULT_CACHE_LINE_SIZE); + temp = (PaddedObjectMonitor*)align_up(real_malloc_addr, OM_CACHE_LINE_SIZE); (void)memset((void *) temp, 0, neededsize); // Format the block. @@ -1088,11 +1516,12 @@ // look like: class Block { Block * next; int N; ObjectMonitor Body [N] ; } for (int i = 1; i < _BLOCKSIZE; i++) { - temp[i]._next_om = (ObjectMonitor *)&temp[i+1]; + OrderAccess::release_store(&temp[i]._next_om, (ObjectMonitor*)&temp[i+1]); + assert(temp[i].is_free(), "invariant"); } // terminate the last monitor as the end of list - temp[_BLOCKSIZE - 1]._next_om = NULL; + OrderAccess::release_store(&temp[_BLOCKSIZE - 1]._next_om, (ObjectMonitor*)NULL); // Element [0] is reserved for global list linkage temp[0].set_object(CHAINMARKER); @@ -1101,24 +1530,7 @@ // block in hand. This avoids some lock traffic and redundant // list activity. - // Acquire the gListLock to manipulate g_block_list and g_free_list. - // An Oyama-Taura-Yonezawa scheme might be more efficient. - Thread::muxAcquire(&gListLock, "om_alloc(2)"); - g_om_population += _BLOCKSIZE-1; - g_om_free_count += _BLOCKSIZE-1; - - // Add the new block to the list of extant blocks (g_block_list). - // The very first ObjectMonitor in a block is reserved and dedicated. - // It serves as blocklist "next" linkage. - temp[0]._next_om = g_block_list; - // There are lock-free uses of g_block_list so make sure that - // the previous stores happen before we update g_block_list. - OrderAccess::release_store(&g_block_list, temp); - - // Add the new string of ObjectMonitors to the global free list - temp[_BLOCKSIZE - 1]._next_om = g_free_list; - g_free_list = temp + 1; - Thread::muxRelease(&gListLock); + prepend_block_to_lists(temp); } } @@ -1131,8 +1543,8 @@ // // Key constraint: all ObjectMonitors on a thread's free list and the global // free list must have their object field set to null. This prevents the -// scavenger -- deflate_monitor_list() -- from reclaiming them while we -// are trying to release them. +// scavenger -- deflate_monitor_list() or deflate_monitor_list_using_JT() +// -- from reclaiming them while we are trying to release them. void ObjectSynchronizer::om_release(Thread* self, ObjectMonitor* m, bool from_per_thread_alloc) { @@ -1140,33 +1552,64 @@ guarantee(m->object() == NULL, "invariant"); stringStream ss; guarantee((m->is_busy() | m->_recursions) == 0, "freeing in-use monitor: " - "%s, recursions=" INTPTR_FORMAT, m->is_busy_to_string(&ss), + "%s, recursions=" INTX_FORMAT, m->is_busy_to_string(&ss), m->_recursions); + m->set_allocation_state(ObjectMonitor::Free); // _next_om is used for both per-thread in-use and free lists so // we have to remove 'm' from the in-use list first (as needed). if (from_per_thread_alloc) { // Need to remove 'm' from om_in_use_list. + // We use the more complicated mark-cur_mid_in_use-and-mid-as-we-go + // protocol because async deflation can do list deletions in parallel. ObjectMonitor* cur_mid_in_use = NULL; + ObjectMonitor* mid = NULL; + ObjectMonitor* next = NULL; bool extracted = false; - for (ObjectMonitor* mid = self->om_in_use_list; mid != NULL; cur_mid_in_use = mid, mid = mid->_next_om) { + + if (!mark_list_head(&self->om_in_use_list, &mid, &next)) { + fatal("thread=" INTPTR_FORMAT " in-use list must not be empty.", p2i(self)); + } + while (true) { if (m == mid) { - // extract from per-thread in-use list - if (mid == self->om_in_use_list) { - self->om_in_use_list = mid->_next_om; - } else if (cur_mid_in_use != NULL) { - cur_mid_in_use->_next_om = mid->_next_om; // maintain the current thread in-use list + // We found 'm' on the per-thread in-use list so try to extract it. + if (cur_mid_in_use == NULL) { + // mid is the list head and it is marked. Switch the list head + // to next which unmarks the list head, but leaves mid marked: + OrderAccess::release_store(&self->om_in_use_list, next); + } else { + // mid and cur_mid_in_use are marked. Switch cur_mid_in_use's + // next field to next which unmarks cur_mid_in_use, but leaves + // mid marked: + OrderAccess::release_store(&cur_mid_in_use->_next_om, next); } extracted = true; - self->om_in_use_count--; + Atomic::dec(&self->om_in_use_count); + // Unmark mid, but leave the next value for any lagging list + // walkers. It will get cleaned up when mid is prepended to + // the thread's free list: + set_next(mid, next); break; } + if (cur_mid_in_use != NULL) { + set_next(cur_mid_in_use, mid); // umark cur_mid_in_use + } + // The next cur_mid_in_use keeps mid's marked next field so + // that it is stable for a possible next field change. It + // cannot be deflated while it is marked. + cur_mid_in_use = mid; + mid = next; + if (mid == NULL) { + // Reached end of the list and didn't find m so: + fatal("must find m=" INTPTR_FORMAT "on om_in_use_list=" INTPTR_FORMAT, + p2i(m), p2i(self->om_in_use_list)); + } + // Mark mid's next field so we can possibly extract it: + next = mark_next_loop(mid); } - assert(extracted, "Should have extracted from in-use list"); } - m->_next_om = self->om_free_list; - self->om_free_list = m; - self->om_free_count++; + prepend_to_om_free_list(self, m); + guarantee(m->is_free(), "invariant"); } // Return ObjectMonitors on a moribund thread's free and in-use @@ -1181,62 +1624,108 @@ // scanned by a GC safepoint, either via Thread::oops_do() (before // om_flush() is called) or via ObjectSynchronizer::oops_do() (after // om_flush() is called). +// +// With AsyncDeflateIdleMonitors, deflate_global_idle_monitors_using_JT() +// and deflate_per_thread_idle_monitors_using_JT() (in another thread) can +// run at the same time as om_flush() so we have to follow a careful +// protocol to prevent list corruption. void ObjectSynchronizer::om_flush(Thread* self) { - ObjectMonitor* free_list = self->om_free_list; - ObjectMonitor* free_tail = NULL; + // This function can race with an async deflater thread. Since + // deflation has to process the per-thread in-use list before + // prepending the deflated ObjectMonitors to the global free list, + // we process the per-thread lists in the same order to prevent + // ordering races. + int in_use_count = 0; + ObjectMonitor* in_use_list = NULL; + ObjectMonitor* in_use_tail = NULL; + ObjectMonitor* next = NULL; + + // An async deflation thread checks to see if the target thread + // is exiting, but if it has made it past that check before we + // started exiting, then it is racing to get to the in-use list. + if (mark_list_head(&self->om_in_use_list, &in_use_list, &next)) { + // At this point, we have marked the in-use list head so an + // async deflation thread cannot come in after us. If an async + // deflation thread is ahead of us, then we'll detect that and + // wait for it to finish its work. + // + // The thread is going away, however the ObjectMonitors on the + // om_in_use_list may still be in-use by other threads. Link + // them to in_use_tail, which will be linked into the global + // in-use list g_om_in_use_list below. + // + // Account for the in-use list head before the loop since it is + // already marked (by this thread): + in_use_tail = in_use_list; + in_use_count++; + for (ObjectMonitor* cur_om = unmarked_next(in_use_list); cur_om != NULL;) { + if (is_next_marked(cur_om)) { + // This next field is marked so there must be an async deflater + // thread ahead of us so we'll give it a chance to finish. + while (is_next_marked(cur_om)) { + os::naked_short_sleep(1); + } + // Refetch the possibly changed next field and try again. + cur_om = unmarked_next(in_use_tail); + continue; + } + if (!cur_om->is_active()) { + // cur_om was deflated and the allocation state was changed + // to Free while it was marked. We happened to see it just + // after it was unmarked (and added to the free list). + // Refetch the possibly changed next field and try again. + cur_om = unmarked_next(in_use_tail); + continue; + } + in_use_tail = cur_om; + in_use_count++; + cur_om = unmarked_next(cur_om); + } + guarantee(in_use_tail != NULL, "invariant"); + int l_om_in_use_count = OrderAccess::load_acquire(&self->om_in_use_count); + ADIM_guarantee(l_om_in_use_count == in_use_count, "in-use counts don't " + "match: l_om_in_use_count=%d, in_use_count=%d", + l_om_in_use_count, in_use_count); + // Clear the in-use count before unmarking the in-use list head + // to avoid races: + OrderAccess::release_store(&self->om_in_use_count, 0); + // Clear the in-use list head (which also unmarks it): + OrderAccess::release_store(&self->om_in_use_list, (ObjectMonitor*)NULL); + // Unmark the disconnected list head: + set_next(in_use_list, next); + } + int free_count = 0; + ObjectMonitor* free_list = OrderAccess::load_acquire(&self->om_free_list); + ObjectMonitor* free_tail = NULL; if (free_list != NULL) { - ObjectMonitor* s; // The thread is going away. Set 'free_tail' to the last per-thread free - // monitor which will be linked to g_free_list below under the gListLock. + // monitor which will be linked to g_free_list below. stringStream ss; - for (s = free_list; s != NULL; s = s->_next_om) { + for (ObjectMonitor* s = free_list; s != NULL; s = unmarked_next(s)) { free_count++; free_tail = s; guarantee(s->object() == NULL, "invariant"); guarantee(!s->is_busy(), "must be !is_busy: %s", s->is_busy_to_string(&ss)); } guarantee(free_tail != NULL, "invariant"); - assert(self->om_free_count == free_count, "free-count off"); - self->om_free_list = NULL; - self->om_free_count = 0; - } - - ObjectMonitor* in_use_list = self->om_in_use_list; - ObjectMonitor* in_use_tail = NULL; - int in_use_count = 0; - if (in_use_list != NULL) { - // The thread is going away, however the ObjectMonitors on the - // om_in_use_list may still be in-use by other threads. Link - // them to in_use_tail, which will be linked into the global - // in-use list g_om_in_use_list below, under the gListLock. - ObjectMonitor *cur_om; - for (cur_om = in_use_list; cur_om != NULL; cur_om = cur_om->_next_om) { - in_use_tail = cur_om; - in_use_count++; - } - guarantee(in_use_tail != NULL, "invariant"); - assert(self->om_in_use_count == in_use_count, "in-use count off"); - self->om_in_use_list = NULL; - self->om_in_use_count = 0; + int l_om_free_count = OrderAccess::load_acquire(&self->om_free_count); + ADIM_guarantee(l_om_free_count == free_count, "free counts don't match: " + "l_om_free_count=%d, free_count=%d", l_om_free_count, + free_count); + OrderAccess::release_store(&self->om_free_list, (ObjectMonitor*)NULL); + OrderAccess::release_store(&self->om_free_count, 0); } - Thread::muxAcquire(&gListLock, "om_flush"); if (free_tail != NULL) { - free_tail->_next_om = g_free_list; - g_free_list = free_list; - g_om_free_count += free_count; + prepend_list_to_g_free_list(free_list, free_tail, free_count); } if (in_use_tail != NULL) { - in_use_tail->_next_om = g_om_in_use_list; - g_om_in_use_list = in_use_list; - g_om_in_use_count += in_use_count; + prepend_list_to_g_om_in_use_list(in_use_list, in_use_tail, in_use_count); } - Thread::muxRelease(&gListLock); - LogStreamHandle(Debug, monitorinflation) lsh_debug; LogStreamHandle(Info, monitorinflation) lsh_info; LogStream* ls = NULL; @@ -1265,19 +1754,28 @@ } // Fast path code shared by multiple functions -void ObjectSynchronizer::inflate_helper(oop obj) { - markWord mark = obj->mark(); - if (mark.has_monitor()) { - assert(ObjectSynchronizer::verify_objmon_isinpool(mark.monitor()), "monitor is invalid"); - assert(mark.monitor()->header().is_neutral(), "monitor must record a good object header"); +void ObjectSynchronizer::inflate_helper(ObjectMonitorHandle* omh_p, oop obj) { + while (true) { + markWord mark = obj->mark(); + if (mark.has_monitor()) { + if (!omh_p->save_om_ptr(obj, mark)) { + // Lost a race with async deflation so try again. + assert(AsyncDeflateIdleMonitors, "sanity check"); + continue; + } + ObjectMonitor* monitor = omh_p->om_ptr(); + assert(ObjectSynchronizer::verify_objmon_isinpool(monitor), "monitor is invalid"); + markWord dmw = monitor->header(); + assert(dmw.is_neutral(), "sanity check: header=" INTPTR_FORMAT, dmw.value()); + return; + } + inflate(omh_p, Thread::current(), obj, inflate_cause_vm_internal); return; } - inflate(Thread::current(), obj, inflate_cause_vm_internal); } -ObjectMonitor* ObjectSynchronizer::inflate(Thread* self, - oop object, - const InflateCause cause) { +void ObjectSynchronizer::inflate(ObjectMonitorHandle* omh_p, Thread* self, + oop object, const InflateCause cause) { // Inflate mutates the heap ... // Relaxing assertion for bug 6320749. assert(Universe::verify_in_progress() || @@ -1298,12 +1796,17 @@ // CASE: inflated if (mark.has_monitor()) { - ObjectMonitor* inf = mark.monitor(); + if (!omh_p->save_om_ptr(object, mark)) { + // Lost a race with async deflation so try again. + assert(AsyncDeflateIdleMonitors, "sanity check"); + continue; + } + ObjectMonitor* inf = omh_p->om_ptr(); markWord dmw = inf->header(); assert(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value()); assert(inf->object() == object, "invariant"); assert(ObjectSynchronizer::verify_objmon_isinpool(inf), "monitor is invalid"); - return inf; + return; } // CASE: inflation in progress - inflating over a stack-lock. @@ -1339,7 +1842,7 @@ LogStreamHandle(Trace, monitorinflation) lsh; if (mark.has_locker()) { - ObjectMonitor* m = om_alloc(self); + ObjectMonitor* m = om_alloc(self, cause); // Optimistically prepare the objectmonitor - anticipate successful CAS // We do this before the CAS in order to minimize the length of time // in which INFLATING appears in the mark. @@ -1349,6 +1852,7 @@ markWord cmp = object->cas_set_mark(markWord::INFLATING(), mark); if (cmp != mark) { + // om_release() will reset the allocation state from New to Free. om_release(self, m, true); continue; // Interference -- just retry } @@ -1386,7 +1890,7 @@ markWord dmw = mark.displaced_mark_helper(); // Catch if the object's header is not neutral (not locked and // not marked is what we care about here). - assert(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value()); + ADIM_guarantee(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value()); // Setup monitor fields to proper values -- prepare the monitor m->set_header(dmw); @@ -1396,15 +1900,26 @@ // Note that a thread can inflate an object // that it has stack-locked -- as might happen in wait() -- directly // with CAS. That is, we can avoid the xchg-NULL .... ST idiom. - m->set_owner(mark.locker()); + if (AsyncDeflateIdleMonitors) { + m->set_owner_from(mark.locker(), NULL, DEFLATER_MARKER); + } else { + m->set_owner_from(mark.locker(), NULL); + } m->set_object(object); // TODO-FIXME: assert BasicLock->dhw != 0. + omh_p->set_om_ptr(m); + // Must preserve store ordering. The monitor state must // be stable at the time of publishing the monitor address. guarantee(object->mark() == markWord::INFLATING(), "invariant"); object->release_set_mark(markWord::encode(m)); + // Once ObjectMonitor is configured and the object is associated + // with the ObjectMonitor, it is safe to allow async deflation: + assert(m->is_new(), "freshly allocated monitor must be new"); + m->set_allocation_state(ObjectMonitor::Old); + // Hopefully the performance counters are allocated on distinct cache lines // to avoid false sharing on MP systems ... OM_PERFDATA_OP(Inflations, inc()); @@ -1417,7 +1932,8 @@ if (event.should_commit()) { post_monitor_inflate_event(&event, object, cause); } - return m; + ADIM_guarantee(!m->is_free(), "inflated monitor to be returned cannot be free"); + return; } // CASE: neutral @@ -1431,19 +1947,26 @@ // Catch if the object's header is not neutral (not locked and // not marked is what we care about here). - assert(mark.is_neutral(), "invariant: header=" INTPTR_FORMAT, mark.value()); - ObjectMonitor* m = om_alloc(self); + ADIM_guarantee(mark.is_neutral(), "invariant: header=" INTPTR_FORMAT,mark.value()); + ObjectMonitor* m = om_alloc(self, cause); // prepare m for installation - set monitor to initial state m->Recycle(); m->set_header(mark); + // If we leave _owner == DEFLATER_MARKER here, then the simple C2 + // ObjectMonitor enter optimization can no longer race with async + // deflation and reuse. m->set_object(object); m->_Responsible = NULL; m->_SpinDuration = ObjectMonitor::Knob_SpinLimit; // consider: keep metastats by type/class + omh_p->set_om_ptr(m); + if (object->cas_set_mark(markWord::encode(m), mark) != mark) { m->set_header(markWord::zero()); m->set_object(NULL); m->Recycle(); + omh_p->set_om_ptr(NULL); + // om_release() will reset the allocation state from New to Free. om_release(self, m, true); m = NULL; continue; @@ -1452,6 +1975,11 @@ // live-lock -- "Inflated" is an absorbing state. } + // Once the ObjectMonitor is configured and object is associated + // with the ObjectMonitor, it is safe to allow async deflation: + assert(m->is_new(), "freshly allocated monitor must be new"); + m->set_allocation_state(ObjectMonitor::Old); + // Hopefully the performance counters are allocated on distinct // cache lines to avoid false sharing on MP systems ... OM_PERFDATA_OP(Inflations, inc()); @@ -1464,13 +1992,15 @@ if (event.should_commit()) { post_monitor_inflate_event(&event, object, cause); } - return m; + ADIM_guarantee(!m->is_free(), "inflated monitor to be returned cannot be free"); + return; } } // We maintain a list of in-use monitors for each thread. // +// For safepoint based deflation: // deflate_thread_local_monitors() scans a single thread's in-use list, while // deflate_idle_monitors() scans only a global list of in-use monitors which // is populated only as a thread dies (see om_flush()). @@ -1489,6 +2019,31 @@ // typically drives the scavenge rate. Large heaps can mean infrequent GC, // which in turn can mean large(r) numbers of ObjectMonitors in circulation. // This is an unfortunate aspect of this design. +// +// For async deflation: +// If a special deflation request is made, then the safepoint based +// deflation mechanism is used. Otherwise, an async deflation request +// is registered with the ServiceThread and it is notified. + +void ObjectSynchronizer::do_safepoint_work(DeflateMonitorCounters* counters) { + assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); + + // The per-thread in-use lists are handled in + // ParallelSPCleanupThreadClosure::do_thread(). + + if (!AsyncDeflateIdleMonitors || is_special_deflation_requested()) { + // Use the older mechanism for the global in-use list or if a + // special deflation has been requested before the safepoint. + ObjectSynchronizer::deflate_idle_monitors(counters); + return; + } + + log_debug(monitorinflation)("requesting async deflation of idle monitors."); + // Request deflation of idle monitors by the ServiceThread: + set_is_async_deflation_requested(true); + MonitorLocker ml(Service_lock, Mutex::_no_safepoint_check_flag); + ml.notify_all(); +} // Deflate a single monitor if not in-use // Return true if deflated, false if in-use @@ -1507,7 +2062,9 @@ const markWord dmw = mid->header(); guarantee(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value()); - if (mid->is_busy()) { + if (mid->is_busy() || mid->ref_count() != 0) { + // Easy checks are first - the ObjectMonitor is busy or ObjectMonitor* + // is in use so no deflation. deflated = false; } else { // Deflate the monitor if it is no longer being used @@ -1523,21 +2080,34 @@ // Restore the header back to obj obj->release_set_mark(dmw); + if (AsyncDeflateIdleMonitors) { + // clear() expects the owner field to be NULL and we won't race + // with the simple C2 ObjectMonitor enter optimization since + // we're at a safepoint. DEFLATER_MARKER is the only non-NULL + // value we should see here. + mid->try_set_owner_from(NULL, DEFLATER_MARKER); + } mid->clear(); assert(mid->object() == NULL, "invariant: object=" INTPTR_FORMAT, p2i(mid->object())); + assert(mid->is_free(), "invariant"); // Move the deflated ObjectMonitor to the working free list - // defined by free_head_p and free_tail_p. + // defined by free_head_p and free_tail_p. No races on this list + // so no need for load_acquire() or store_release(). if (*free_head_p == NULL) *free_head_p = mid; if (*free_tail_p != NULL) { // We append to the list so the caller can use mid->_next_om // to fix the linkages in its context. ObjectMonitor* prevtail = *free_tail_p; // Should have been cleaned up by the caller: - assert(prevtail->_next_om == NULL, "cleaned up deflated?"); - prevtail->_next_om = mid; + // Note: Should not have to mark prevtail here since we're at a + // safepoint and ObjectMonitors on the local free list should + // not be accessed in parallel. + assert(prevtail->_next_om == NULL, "must be NULL: _next_om=" + INTPTR_FORMAT, p2i(prevtail->_next_om)); + set_next(prevtail, mid); } *free_tail_p = mid; // At this point, mid->_next_om still refers to its current @@ -1549,9 +2119,150 @@ return deflated; } -// Walk a given monitor list, and deflate idle monitors -// The given list could be a per-thread list or a global list -// Caller acquires gListLock as needed. +// Deflate the specified ObjectMonitor if not in-use using a JavaThread. +// Returns true if it was deflated and false otherwise. +// +// The async deflation protocol sets owner to DEFLATER_MARKER and +// makes ref_count negative as signals to contending threads that +// an async deflation is in progress. There are a number of checks +// as part of the protocol to make sure that the calling thread has +// not lost the race to a contending thread or to a thread that just +// wants to use the ObjectMonitor*. +// +// The ObjectMonitor has been successfully async deflated when: +// (owner == DEFLATER_MARKER && ref_count < 0) +// Contending threads or ObjectMonitor* using threads that see those +// values know to retry their operation. +// +bool ObjectSynchronizer::deflate_monitor_using_JT(ObjectMonitor* mid, + ObjectMonitor** free_head_p, + ObjectMonitor** free_tail_p) { + assert(AsyncDeflateIdleMonitors, "sanity check"); + assert(Thread::current()->is_Java_thread(), "precondition"); + // A newly allocated ObjectMonitor should not be seen here so we + // avoid an endless inflate/deflate cycle. + assert(mid->is_old(), "must be old: allocation_state=%d", + (int) mid->allocation_state()); + + if (mid->is_busy() || mid->ref_count() != 0) { + // Easy checks are first - the ObjectMonitor is busy or ObjectMonitor* + // is in use so no deflation. + return false; + } + + if (mid->try_set_owner_from(DEFLATER_MARKER, NULL) == NULL) { + // ObjectMonitor is not owned by another thread. Our setting + // owner to DEFLATER_MARKER forces any contending thread through + // the slow path. This is just the first part of the async + // deflation dance. + + if (mid->_contentions != 0 || mid->_waiters != 0) { + // Another thread has raced to enter the ObjectMonitor after + // mid->is_busy() above or has already entered and waited on + // it which makes it busy so no deflation. Restore owner to + // NULL if it is still DEFLATER_MARKER. + mid->try_set_owner_from(NULL, DEFLATER_MARKER); + return false; + } + + if (Atomic::cmpxchg(-max_jint, &mid->_ref_count, (jint)0) == 0) { + // Make ref_count negative to force any contending threads or + // ObjectMonitor* using threads to retry. This is the second + // part of the async deflation dance. + + if (mid->owner_is_DEFLATER_MARKER()) { + // If owner is still DEFLATER_MARKER, then we have successfully + // signaled any contending threads to retry. If it is not, then we + // have lost the race to an entering thread and the ObjectMonitor + // is now busy. This is the third and final part of the async + // deflation dance. + // Note: This owner check solves the ABA problem with ref_count + // where another thread acquired the ObjectMonitor, finished + // using it and restored the ref_count to zero. + + // Sanity checks for the races: + guarantee(mid->_contentions == 0, "must be 0: contentions=%d", + mid->_contentions); + guarantee(mid->_waiters == 0, "must be 0: waiters=%d", mid->_waiters); + guarantee(mid->_cxq == NULL, "must be no contending threads: cxq=" + INTPTR_FORMAT, p2i(mid->_cxq)); + guarantee(mid->_EntryList == NULL, + "must be no entering threads: EntryList=" INTPTR_FORMAT, + p2i(mid->_EntryList)); + + const oop obj = (oop) mid->object(); + if (log_is_enabled(Trace, monitorinflation)) { + ResourceMark rm; + log_trace(monitorinflation)("deflate_monitor_using_JT: " + "object=" INTPTR_FORMAT ", mark=" + INTPTR_FORMAT ", type='%s'", + p2i(obj), obj->mark().value(), + obj->klass()->external_name()); + } + + // Install the old mark word if nobody else has already done it. + mid->install_displaced_markword_in_object(obj); + mid->clear_using_JT(); + + assert(mid->object() == NULL, "must be NULL: object=" INTPTR_FORMAT, + p2i(mid->object())); + assert(mid->is_free(), "must be free: allocation_state=%d", + (int) mid->allocation_state()); + + // Move the deflated ObjectMonitor to the working free list + // defined by free_head_p and free_tail_p. No races on this list + // so no need for load_acquire() or store_release(). + if (*free_head_p == NULL) { + // First one on the list. + *free_head_p = mid; + } + if (*free_tail_p != NULL) { + // We append to the list so the caller can use mid->_next_om + // to fix the linkages in its context. + ObjectMonitor* prevtail = *free_tail_p; + // Should have been cleaned up by the caller: + ObjectMonitor* next = mark_next_loop(prevtail); + assert(unmarked_next(prevtail) == NULL, "must be NULL: _next_om=" + INTPTR_FORMAT, p2i(unmarked_next(prevtail))); + set_next(prevtail, mid); // prevtail now points to mid (and is unmarked) + } + *free_tail_p = mid; + + // At this point, mid->_next_om still refers to its current + // value and another ObjectMonitor's _next_om field still + // refers to this ObjectMonitor. Those linkages have to be + // cleaned up by the caller who has the complete context. + + // We leave owner == DEFLATER_MARKER and ref_count < 0 + // to force any racing threads to retry. + return true; // Success, ObjectMonitor has been deflated. + } + + // The owner was changed from DEFLATER_MARKER so we lost the + // race since the ObjectMonitor is now busy. + + // Add back max_jint to restore the ref_count field to its + // proper value (which may not be what we saw above): + Atomic::add(max_jint, &mid->_ref_count); + + assert(mid->ref_count() >= 0, "must not be negative: ref_count=%d", + mid->ref_count()); + return false; + } + + // The ref_count was no longer 0 so we lost the race since the + // ObjectMonitor is now busy or the ObjectMonitor* is now is use. + // Restore owner to NULL if it is still DEFLATER_MARKER: + mid->try_set_owner_from(NULL, DEFLATER_MARKER); + } + + // The owner field is no longer NULL so we lost the race since the + // ObjectMonitor is now busy. + return false; +} + +// Walk a given monitor list, and deflate idle monitors. +// The given list could be a per-thread list or a global list. // // In the case of parallel processing of thread local monitor lists, // work is done by Threads::parallel_threads_do() which ensures that @@ -1562,47 +2273,219 @@ // See also ParallelSPCleanupTask and // SafepointSynchronize::do_cleanup_tasks() in safepoint.cpp and // Threads::parallel_java_threads_do() in thread.cpp. -int ObjectSynchronizer::deflate_monitor_list(ObjectMonitor** list_p, +int ObjectSynchronizer::deflate_monitor_list(ObjectMonitor* volatile * list_p, + int volatile * count_p, ObjectMonitor** free_head_p, ObjectMonitor** free_tail_p) { - ObjectMonitor* mid; - ObjectMonitor* next; ObjectMonitor* cur_mid_in_use = NULL; + ObjectMonitor* mid = NULL; + ObjectMonitor* next = NULL; int deflated_count = 0; - for (mid = *list_p; mid != NULL;) { + // We use the simpler mark-mid-as-we-go protocol since there are no + // parallel list deletions since we are at a safepoint. + if (!mark_list_head(list_p, &mid, &next)) { + return 0; // The list is empty so nothing to deflate. + } + + while (true) { oop obj = (oop) mid->object(); if (obj != NULL && deflate_monitor(mid, obj, free_head_p, free_tail_p)) { // Deflation succeeded and already updated free_head_p and // free_tail_p as needed. Finish the move to the local free list // by unlinking mid from the global or per-thread in-use list. - if (mid == *list_p) { - *list_p = mid->_next_om; - } else if (cur_mid_in_use != NULL) { - cur_mid_in_use->_next_om = mid->_next_om; // maintain the current thread in-use list + if (cur_mid_in_use == NULL) { + // mid is the list head and it is marked. Switch the list head + // to next which unmarks the list head, but leaves mid marked: + OrderAccess::release_store(list_p, next); + } else { + // mid is marked. Switch cur_mid_in_use's next field to next + // which is safe because we have no parallel list deletions, + // but we leave mid marked: + OrderAccess::release_store(&cur_mid_in_use->_next_om, next); } - next = mid->_next_om; - mid->_next_om = NULL; // This mid is current tail in the free_head_p list + // At this point mid is disconnected from the in-use list so + // its marked next field no longer has any effects. + deflated_count++; + Atomic::dec(count_p); + // mid is current tail in the free_head_p list so NULL terminate it + // (which also unmarks it): + set_next(mid, NULL); + + // All the list management is done so move on to the next one: mid = next; + } else { + set_next(mid, next); // unmark next field + + // All the list management is done so move on to the next one: + cur_mid_in_use = mid; + mid = next; + } + if (mid == NULL) { + break; // Reached end of the list so nothing more to deflate. + } + // Mark mid's next field so we can possibly deflate it: + next = mark_next_loop(mid); + } + return deflated_count; +} + +// Walk a given ObjectMonitor list and deflate idle ObjectMonitors using +// a JavaThread. Returns the number of deflated ObjectMonitors. The given +// list could be a per-thread in-use list or the global in-use list. +// If a safepoint has started, then we save state via saved_mid_in_use_p +// and return to the caller to honor the safepoint. +// +int ObjectSynchronizer::deflate_monitor_list_using_JT(ObjectMonitor* volatile * list_p, + int volatile * count_p, + ObjectMonitor** free_head_p, + ObjectMonitor** free_tail_p, + ObjectMonitor** saved_mid_in_use_p) { + assert(AsyncDeflateIdleMonitors, "sanity check"); + assert(Thread::current()->is_Java_thread(), "precondition"); + + ObjectMonitor* cur_mid_in_use = NULL; + ObjectMonitor* mid = NULL; + ObjectMonitor* next = NULL; + ObjectMonitor* next_next = NULL; + int deflated_count = 0; + + // We use the more complicated mark-cur_mid_in_use-and-mid-as-we-go + // protocol because om_release() can do list deletions in parallel. + // We also mark-next-next-as-we-go to prevent an om_flush() that is + // behind this thread from passing us. + if (*saved_mid_in_use_p == NULL) { + // No saved state so start at the beginning. + // Mark the list head's next field so we can possibly deflate it: + if (!mark_list_head(list_p, &mid, &next)) { + return 0; // The list is empty so nothing to deflate. + } + } else { + // We're restarting after a safepoint so restore the necessary state + // before we resume. + cur_mid_in_use = *saved_mid_in_use_p; + // Mark cur_mid_in_use's next field so we can possibly update its + // next field to extract a deflated ObjectMonitor. + mid = mark_next_loop(cur_mid_in_use); + if (mid == NULL) { + set_next(cur_mid_in_use, NULL); // unmark next field + *saved_mid_in_use_p = NULL; + return 0; // The remainder is empty so nothing more to deflate. + } + // Mark mid's next field so we can possibly deflate it: + next = mark_next_loop(mid); + } + + while (true) { + // The current mid's next field is marked at this point. If we have + // a cur_mid_in_use, then its next field is also marked at this point. + + if (next != NULL) { + // We mark next's next field so that an om_flush() + // thread that is behind us cannot pass us when we + // unmark the current mid's next field. + next_next = mark_next_loop(next); + } + + // Only try to deflate if there is an associated Java object and if + // mid is old (is not newly allocated and is not newly freed). + if (mid->object() != NULL && mid->is_old() && + deflate_monitor_using_JT(mid, free_head_p, free_tail_p)) { + // Deflation succeeded and already updated free_head_p and + // free_tail_p as needed. Finish the move to the local free list + // by unlinking mid from the global or per-thread in-use list. + if (cur_mid_in_use == NULL) { + // mid is the list head and it is marked. Switch the list head + // to next which is also marked (if not NULL) and also leave + // mid marked: + OrderAccess::release_store(list_p, next); + } else { + ObjectMonitor* marked_next = mark_om_ptr(next); + // mid and cur_mid_in_use are marked. Switch cur_mid_in_use's + // next field to marked_next and also leave mid marked: + OrderAccess::release_store(&cur_mid_in_use->_next_om, marked_next); + } + // At this point mid is disconnected from the in-use list so + // its marked next field no longer has any effects. deflated_count++; + Atomic::dec(count_p); + // mid is current tail in the free_head_p list so NULL terminate it + // (which also unmarks it): + set_next(mid, NULL); + + // All the list management is done so move on to the next one: + mid = next; // mid keeps non-NULL next's marked next field + next = next_next; } else { + // mid is considered in-use if it does not have an associated + // Java object or mid is not old or deflation did not succeed. + // A mid->is_new() node can be seen here when it is freshly + // returned by om_alloc() (and skips the deflation code path). + // A mid->is_old() node can be seen here when deflation failed. + // A mid->is_free() node can be seen here when a fresh node from + // om_alloc() is released by om_release() due to losing the race + // in inflate(). + + // All the list management is done so move on to the next one: + if (cur_mid_in_use != NULL) { + set_next(cur_mid_in_use, mid); // umark cur_mid_in_use + } + // The next cur_mid_in_use keeps mid's marked next field so + // that it is stable for a possible next field change. It + // cannot be modified by om_release() while it is marked. cur_mid_in_use = mid; - mid = mid->_next_om; + mid = next; // mid keeps non-NULL next's marked next field + next = next_next; + + if (SafepointSynchronize::is_synchronizing() && + cur_mid_in_use != OrderAccess::load_acquire(list_p) && + cur_mid_in_use->is_old()) { + // If a safepoint has started and cur_mid_in_use is not the list + // head and is old, then it is safe to use as saved state. Return + // to the caller before blocking. + *saved_mid_in_use_p = cur_mid_in_use; + set_next(cur_mid_in_use, mid); // umark cur_mid_in_use + if (mid != NULL) { + set_next(mid, next); // umark mid + } + return deflated_count; + } } + if (mid == NULL) { + if (cur_mid_in_use != NULL) { + set_next(cur_mid_in_use, mid); // umark cur_mid_in_use + } + break; // Reached end of the list so nothing more to deflate. + } + + // The current mid's next field is marked at this point. If we have + // a cur_mid_in_use, then its next field is also marked at this point. } + // We finished the list without a safepoint starting so there's + // no need to save state. + *saved_mid_in_use_p = NULL; return deflated_count; } void ObjectSynchronizer::prepare_deflate_idle_monitors(DeflateMonitorCounters* counters) { - counters->n_in_use = 0; // currently associated with objects - counters->n_in_circulation = 0; // extant - counters->n_scavenged = 0; // reclaimed (global and per-thread) - counters->per_thread_scavenged = 0; // per-thread scavenge total - counters->per_thread_times = 0.0; // per-thread scavenge times + OrderAccess::release_store(&counters->n_in_use, 0); // currently associated with objects + OrderAccess::release_store(&counters->n_in_circulation, 0); // extant + OrderAccess::release_store(&counters->n_scavenged, 0); // reclaimed (global and per-thread) + OrderAccess::release_store(&counters->per_thread_scavenged, 0); // per-thread scavenge total + counters->per_thread_times = 0.0; // per-thread scavenge times } void ObjectSynchronizer::deflate_idle_monitors(DeflateMonitorCounters* counters) { assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); + + if (AsyncDeflateIdleMonitors) { + // Nothing to do when global idle ObjectMonitors are deflated using + // a JavaThread unless a special deflation has been requested. + if (!is_special_deflation_requested()) { + return; + } + } + bool deflated = false; ObjectMonitor* free_head_p = NULL; // Local SLL of scavenged monitors @@ -1613,33 +2496,28 @@ timer.start(); } - // Prevent om_flush from changing mids in Thread dtor's during deflation - // And in case the vm thread is acquiring a lock during a safepoint - // See e.g. 6320749 - Thread::muxAcquire(&gListLock, "deflate_idle_monitors"); - // Note: the thread-local monitors lists get deflated in // a separate pass. See deflate_thread_local_monitors(). // For moribund threads, scan g_om_in_use_list int deflated_count = 0; - if (g_om_in_use_list) { - counters->n_in_circulation += g_om_in_use_count; - deflated_count = deflate_monitor_list((ObjectMonitor **)&g_om_in_use_list, &free_head_p, &free_tail_p); - g_om_in_use_count -= deflated_count; - counters->n_scavenged += deflated_count; - counters->n_in_use += g_om_in_use_count; + if (OrderAccess::load_acquire(&g_om_in_use_list) != NULL) { + // Update n_in_circulation before g_om_in_use_count is updated by deflation. + Atomic::add(OrderAccess::load_acquire(&g_om_in_use_count), &counters->n_in_circulation); + + deflated_count = deflate_monitor_list(&g_om_in_use_list, &g_om_in_use_count, &free_head_p, &free_tail_p); + Atomic::add(OrderAccess::load_acquire(&g_om_in_use_count), &counters->n_in_use); } if (free_head_p != NULL) { // Move the deflated ObjectMonitors back to the global free list. - guarantee(free_tail_p != NULL && counters->n_scavenged > 0, "invariant"); - assert(free_tail_p->_next_om == NULL, "invariant"); - // constant-time list splice - prepend scavenged segment to g_free_list - free_tail_p->_next_om = g_free_list; - g_free_list = free_head_p; + // No races on the working free list so no need for load_acquire(). + guarantee(free_tail_p != NULL && deflated_count > 0, "invariant"); + assert(free_tail_p->_next_om == NULL, "must be NULL: _next_om=" + INTPTR_FORMAT, p2i(free_tail_p->_next_om)); + prepend_list_to_g_free_list(free_head_p, free_tail_p, deflated_count); + Atomic::add(deflated_count, &counters->n_scavenged); } - Thread::muxRelease(&gListLock); timer.stop(); LogStreamHandle(Debug, monitorinflation) lsh_debug; @@ -1655,39 +2533,228 @@ } } +class HandshakeForDeflation : public ThreadClosure { + public: + void do_thread(Thread* thread) { + log_trace(monitorinflation)("HandshakeForDeflation::do_thread: thread=" + INTPTR_FORMAT, p2i(thread)); + } +}; + +void ObjectSynchronizer::deflate_idle_monitors_using_JT() { + assert(AsyncDeflateIdleMonitors, "sanity check"); + + // Deflate any global idle monitors. + deflate_global_idle_monitors_using_JT(); + + int count = 0; + for (JavaThreadIteratorWithHandle jtiwh; JavaThread *jt = jtiwh.next(); ) { + if (jt->om_in_use_count > 0 && !jt->is_exiting()) { + // This JavaThread is using ObjectMonitors so deflate any that + // are idle unless this JavaThread is exiting; do not race with + // ObjectSynchronizer::om_flush(). + deflate_per_thread_idle_monitors_using_JT(jt); + count++; + } + } + if (count > 0) { + log_debug(monitorinflation)("did async deflation of idle monitors for %d thread(s).", count); + } + // The ServiceThread's async deflation request has been processed. + set_is_async_deflation_requested(false); + + if (HandshakeAfterDeflateIdleMonitors && g_om_wait_count > 0) { + // There are deflated ObjectMonitors waiting for a handshake + // (or a safepoint) for safety. + + // g_wait_list and g_om_wait_count are only updated by the calling + // thread so no need for load_acquire() or release_store(). + ObjectMonitor* list = g_wait_list; + ADIM_guarantee(list != NULL, "g_wait_list must not be NULL"); + int count = g_om_wait_count; + g_wait_list = NULL; + g_om_wait_count = 0; + + // Find the tail for prepend_list_to_common(). + int l_count = 0; + ObjectMonitor* tail = NULL; + for (ObjectMonitor* n = list; n != NULL; n = unmarked_next(n)) { + tail = n; + l_count++; + } + ADIM_guarantee(count == l_count, "count=%d != l_count=%d", count, l_count); + + // Will execute a safepoint if !ThreadLocalHandshakes: + HandshakeForDeflation hfd_tc; + Handshake::execute(&hfd_tc); + + prepend_list_to_common(list, tail, count, &g_free_list, &g_om_free_count); + + log_info(monitorinflation)("moved %d idle monitors from global waiting list to global free list", count); + } +} + +// Deflate global idle ObjectMonitors using a JavaThread. +// +void ObjectSynchronizer::deflate_global_idle_monitors_using_JT() { + assert(AsyncDeflateIdleMonitors, "sanity check"); + assert(Thread::current()->is_Java_thread(), "precondition"); + JavaThread* self = JavaThread::current(); + + deflate_common_idle_monitors_using_JT(true /* is_global */, self); +} + +// Deflate the specified JavaThread's idle ObjectMonitors using a JavaThread. +// +void ObjectSynchronizer::deflate_per_thread_idle_monitors_using_JT(JavaThread* target) { + assert(AsyncDeflateIdleMonitors, "sanity check"); + assert(Thread::current()->is_Java_thread(), "precondition"); + + deflate_common_idle_monitors_using_JT(false /* !is_global */, target); +} + +// Deflate global or per-thread idle ObjectMonitors using a JavaThread. +// +void ObjectSynchronizer::deflate_common_idle_monitors_using_JT(bool is_global, JavaThread* target) { + JavaThread* self = JavaThread::current(); + + int deflated_count = 0; + ObjectMonitor* free_head_p = NULL; // Local SLL of scavenged ObjectMonitors + ObjectMonitor* free_tail_p = NULL; + ObjectMonitor* saved_mid_in_use_p = NULL; + elapsedTimer timer; + + if (log_is_enabled(Info, monitorinflation)) { + timer.start(); + } + + if (is_global) { + OM_PERFDATA_OP(MonExtant, set_value(OrderAccess::load_acquire(&g_om_in_use_count))); + } else { + OM_PERFDATA_OP(MonExtant, inc(OrderAccess::load_acquire(&target->om_in_use_count))); + } + + do { + int local_deflated_count; + if (is_global) { + local_deflated_count = deflate_monitor_list_using_JT(&g_om_in_use_list, &g_om_in_use_count, &free_head_p, &free_tail_p, &saved_mid_in_use_p); + } else { + local_deflated_count = deflate_monitor_list_using_JT(&target->om_in_use_list, &target->om_in_use_count, &free_head_p, &free_tail_p, &saved_mid_in_use_p); + } + deflated_count += local_deflated_count; + + if (free_head_p != NULL) { + // Move the deflated ObjectMonitors to the global free list. + // No races on the working list so no need for load_acquire(). + guarantee(free_tail_p != NULL && local_deflated_count > 0, "free_tail_p=" INTPTR_FORMAT ", local_deflated_count=%d", p2i(free_tail_p), local_deflated_count); + // Note: The target thread can be doing an om_alloc() that + // is trying to prepend an ObjectMonitor on its in-use list + // at the same time that we have deflated the current in-use + // list head and put it on the local free list. prepend_to_common() + // will detect the race and retry which avoids list corruption, + // but the next field in free_tail_p can flicker to marked + // and then unmarked while prepend_to_common() is sorting it + // all out. + assert(unmarked_next(free_tail_p) == NULL, "must be NULL: _next_om=" + INTPTR_FORMAT, p2i(unmarked_next(free_tail_p))); + + if (HandshakeAfterDeflateIdleMonitors) { + prepend_list_to_g_wait_list(free_head_p, free_tail_p, local_deflated_count); + } else { + prepend_list_to_g_free_list(free_head_p, free_tail_p, local_deflated_count); + } + + OM_PERFDATA_OP(Deflations, inc(local_deflated_count)); + } + + if (saved_mid_in_use_p != NULL) { + // deflate_monitor_list_using_JT() detected a safepoint starting. + timer.stop(); + { + if (is_global) { + log_debug(monitorinflation)("pausing deflation of global idle monitors for a safepoint."); + } else { + log_debug(monitorinflation)("jt=" INTPTR_FORMAT ": pausing deflation of per-thread idle monitors for a safepoint.", p2i(target)); + } + assert(SafepointSynchronize::is_synchronizing(), "sanity check"); + ThreadBlockInVM blocker(self); + } + // Prepare for another loop after the safepoint. + free_head_p = NULL; + free_tail_p = NULL; + if (log_is_enabled(Info, monitorinflation)) { + timer.start(); + } + } + } while (saved_mid_in_use_p != NULL); + timer.stop(); + + LogStreamHandle(Debug, monitorinflation) lsh_debug; + LogStreamHandle(Info, monitorinflation) lsh_info; + LogStream* ls = NULL; + if (log_is_enabled(Debug, monitorinflation)) { + ls = &lsh_debug; + } else if (deflated_count != 0 && log_is_enabled(Info, monitorinflation)) { + ls = &lsh_info; + } + if (ls != NULL) { + if (is_global) { + ls->print_cr("async-deflating global idle monitors, %3.7f secs, %d monitors", timer.seconds(), deflated_count); + } else { + ls->print_cr("jt=" INTPTR_FORMAT ": async-deflating per-thread idle monitors, %3.7f secs, %d monitors", p2i(target), timer.seconds(), deflated_count); + } + } +} + void ObjectSynchronizer::finish_deflate_idle_monitors(DeflateMonitorCounters* counters) { // Report the cumulative time for deflating each thread's idle // monitors. Note: if the work is split among more than one // worker thread, then the reported time will likely be more // than a beginning to end measurement of the phase. - log_info(safepoint, cleanup)("deflating per-thread idle monitors, %3.7f secs, monitors=%d", counters->per_thread_times, counters->per_thread_scavenged); + // Note: AsyncDeflateIdleMonitors only deflates per-thread idle + // monitors at a safepoint when a special deflation has been requested. + log_info(safepoint, cleanup)("deflating per-thread idle monitors, %3.7f secs, monitors=%d", + counters->per_thread_times, + OrderAccess::load_acquire(&counters->per_thread_scavenged)); + + bool needs_special_deflation = is_special_deflation_requested(); + if (!AsyncDeflateIdleMonitors || needs_special_deflation) { + // AsyncDeflateIdleMonitors does not use these counters unless + // there is a special deflation request. - g_om_free_count += counters->n_scavenged; + OM_PERFDATA_OP(Deflations, inc(counters->n_scavenged)); + OM_PERFDATA_OP(MonExtant, set_value(counters->n_in_circulation)); + } if (log_is_enabled(Debug, monitorinflation)) { // exit_globals()'s call to audit_and_print_stats() is done // at the Info level. ObjectSynchronizer::audit_and_print_stats(false /* on_exit */); } else if (log_is_enabled(Info, monitorinflation)) { - Thread::muxAcquire(&gListLock, "finish_deflate_idle_monitors"); log_info(monitorinflation)("g_om_population=%d, g_om_in_use_count=%d, " - "g_om_free_count=%d", g_om_population, - g_om_in_use_count, g_om_free_count); - Thread::muxRelease(&gListLock); + "g_om_free_count=%d, g_om_wait_count=%d", + OrderAccess::load_acquire(&g_om_population), + OrderAccess::load_acquire(&g_om_in_use_count), + OrderAccess::load_acquire(&g_om_free_count), + OrderAccess::load_acquire(&g_om_wait_count)); } ForceMonitorScavenge = 0; // Reset - - OM_PERFDATA_OP(Deflations, inc(counters->n_scavenged)); - OM_PERFDATA_OP(MonExtant, set_value(counters->n_in_circulation)); - GVars.stw_random = os::random(); GVars.stw_cycle++; + if (needs_special_deflation) { + set_is_special_deflation_requested(false); // special deflation is done + } } void ObjectSynchronizer::deflate_thread_local_monitors(Thread* thread, DeflateMonitorCounters* counters) { assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); + if (AsyncDeflateIdleMonitors && !is_special_deflation_requested()) { + // Nothing to do if a special deflation has NOT been requested. + return; + } + ObjectMonitor* free_head_p = NULL; // Local SLL of scavenged monitors ObjectMonitor* free_tail_p = NULL; elapsedTimer timer; @@ -1697,25 +2764,21 @@ timer.start(); } - int deflated_count = deflate_monitor_list(thread->om_in_use_list_addr(), &free_head_p, &free_tail_p); + // Update n_in_circulation before om_in_use_count is updated by deflation. + Atomic::add(OrderAccess::load_acquire(&thread->om_in_use_count), &counters->n_in_circulation); - Thread::muxAcquire(&gListLock, "deflate_thread_local_monitors"); - - // Adjust counters - counters->n_in_circulation += thread->om_in_use_count; - thread->om_in_use_count -= deflated_count; - counters->n_scavenged += deflated_count; - counters->n_in_use += thread->om_in_use_count; - counters->per_thread_scavenged += deflated_count; + int deflated_count = deflate_monitor_list(&thread->om_in_use_list, &thread->om_in_use_count, &free_head_p, &free_tail_p); + Atomic::add(OrderAccess::load_acquire(&thread->om_in_use_count), &counters->n_in_use); if (free_head_p != NULL) { // Move the deflated ObjectMonitors back to the global free list. + // No races on the working list so no need for load_acquire(). guarantee(free_tail_p != NULL && deflated_count > 0, "invariant"); - assert(free_tail_p->_next_om == NULL, "invariant"); - - // constant-time list splice - prepend scavenged segment to g_free_list - free_tail_p->_next_om = g_free_list; - g_free_list = free_head_p; + assert(free_tail_p->_next_om == NULL, "must be NULL: _next_om=" + INTPTR_FORMAT, p2i(free_tail_p->_next_om)); + prepend_list_to_g_free_list(free_head_p, free_tail_p, deflated_count); + Atomic::add(deflated_count, &counters->n_scavenged); + Atomic::add(deflated_count, &counters->per_thread_scavenged); } timer.stop(); @@ -1724,8 +2787,6 @@ // should be cheap. counters->per_thread_times += timer.seconds(); - Thread::muxRelease(&gListLock); - LogStreamHandle(Debug, monitorinflation) lsh_debug; LogStreamHandle(Info, monitorinflation) lsh_info; LogStream* ls = NULL; @@ -1776,9 +2837,7 @@ assert(THREAD == JavaThread::current(), "must be current Java thread"); NoSafepointVerifier nsv; ReleaseJavaMonitorsClosure rjmc(THREAD); - Thread::muxAcquire(&gListLock, "release_monitors_owned_by_thread"); ObjectSynchronizer::monitors_iterate(&rjmc); - Thread::muxRelease(&gListLock); THREAD->clear_pending_exception(); } @@ -1832,11 +2891,6 @@ } assert(ls != NULL, "sanity check"); - if (!on_exit) { - // Not at VM exit so grab the global list lock. - Thread::muxAcquire(&gListLock, "audit_and_print_stats"); - } - // Log counts for the global and per-thread monitor lists: int chk_om_population = log_monitor_list_counts(ls); int error_cnt = 0; @@ -1844,14 +2898,19 @@ ls->print_cr("Checking global lists:"); // Check g_om_population: - if (g_om_population == chk_om_population) { + if (OrderAccess::load_acquire(&g_om_population) == chk_om_population) { ls->print_cr("g_om_population=%d equals chk_om_population=%d", - g_om_population, chk_om_population); + OrderAccess::load_acquire(&g_om_population), + chk_om_population); } else { - ls->print_cr("ERROR: g_om_population=%d is not equal to " - "chk_om_population=%d", g_om_population, + // With lock free access to the monitor lists, it is possible for + // log_monitor_list_counts() to return a value that doesn't match + // g_om_population. So far a higher value has been seen in testing + // so something is being double counted by log_monitor_list_counts(). + ls->print_cr("WARNING: g_om_population=%d is not equal to " + "chk_om_population=%d", + OrderAccess::load_acquire(&g_om_population), chk_om_population); - error_cnt++; } // Check g_om_in_use_list and g_om_in_use_count: @@ -1860,8 +2919,9 @@ // Check g_free_list and g_om_free_count: chk_global_free_list_and_count(ls, &error_cnt); - if (!on_exit) { - Thread::muxRelease(&gListLock); + if (HandshakeAfterDeflateIdleMonitors) { + // Check g_wait_list and g_om_wait_count: + chk_global_wait_list_and_count(ls, &error_cnt); } ls->print_cr("Checking per-thread lists:"); @@ -1885,7 +2945,7 @@ // When exiting this log output is at the Info level. When called // at a safepoint, this log output is at the Trace level since // there can be a lot of it. - log_in_use_monitor_details(ls, on_exit); + log_in_use_monitor_details(ls); } ls->flush(); @@ -1914,12 +2974,13 @@ ": free per-thread monitor must have NULL _header " "field: _header=" INTPTR_FORMAT, p2i(jt), p2i(n), n->header().value()); - } else { + *error_cnt_p = *error_cnt_p + 1; + } else if (!AsyncDeflateIdleMonitors) { out->print_cr("ERROR: monitor=" INTPTR_FORMAT ": free global monitor " "must have NULL _header field: _header=" INTPTR_FORMAT, p2i(n), n->header().value()); + *error_cnt_p = *error_cnt_p + 1; } - *error_cnt_p = *error_cnt_p + 1; } if (n->object() != NULL) { if (jt != NULL) { @@ -1940,17 +3001,44 @@ void ObjectSynchronizer::chk_global_free_list_and_count(outputStream * out, int *error_cnt_p) { int chk_om_free_count = 0; - for (ObjectMonitor* n = g_free_list; n != NULL; n = n->_next_om) { + for (ObjectMonitor* n = OrderAccess::load_acquire(&g_free_list); n != NULL; n = unmarked_next(n)) { chk_free_entry(NULL /* jt */, n, out, error_cnt_p); chk_om_free_count++; } - if (g_om_free_count == chk_om_free_count) { + if (OrderAccess::load_acquire(&g_om_free_count) == chk_om_free_count) { out->print_cr("g_om_free_count=%d equals chk_om_free_count=%d", - g_om_free_count, chk_om_free_count); + OrderAccess::load_acquire(&g_om_free_count), + chk_om_free_count); } else { - out->print_cr("ERROR: g_om_free_count=%d is not equal to " - "chk_om_free_count=%d", g_om_free_count, + // With lock free access to g_free_list, it is possible for an + // ObjectMonitor to be prepended to g_free_list after we started + // calculating chk_om_free_count so g_om_free_count may not + // match anymore. + out->print_cr("WARNING: g_om_free_count=%d is not equal to " + "chk_om_free_count=%d", + OrderAccess::load_acquire(&g_om_free_count), chk_om_free_count); + } +} + +// Check the global wait list and count; log the results of the checks. +void ObjectSynchronizer::chk_global_wait_list_and_count(outputStream * out, + int *error_cnt_p) { + int chk_om_wait_count = 0; + for (ObjectMonitor* n = OrderAccess::load_acquire(&g_wait_list); n != NULL; n = unmarked_next(n)) { + // Rules for g_wait_list are the same as of g_free_list: + chk_free_entry(NULL /* jt */, n, out, error_cnt_p); + chk_om_wait_count++; + } + if (OrderAccess::load_acquire(&g_om_wait_count) == chk_om_wait_count) { + out->print_cr("g_om_wait_count=%d equals chk_om_wait_count=%d", + OrderAccess::load_acquire(&g_om_wait_count), + chk_om_wait_count); + } else { + out->print_cr("ERROR: g_om_wait_count=%d is not equal to " + "chk_om_wait_count=%d", + OrderAccess::load_acquire(&g_om_wait_count), + chk_om_wait_count); *error_cnt_p = *error_cnt_p + 1; } } @@ -1959,17 +3047,21 @@ void ObjectSynchronizer::chk_global_in_use_list_and_count(outputStream * out, int *error_cnt_p) { int chk_om_in_use_count = 0; - for (ObjectMonitor* n = g_om_in_use_list; n != NULL; n = n->_next_om) { + for (ObjectMonitor* n = OrderAccess::load_acquire(&g_om_in_use_list); n != NULL; n = unmarked_next(n)) { chk_in_use_entry(NULL /* jt */, n, out, error_cnt_p); chk_om_in_use_count++; } - if (g_om_in_use_count == chk_om_in_use_count) { - out->print_cr("g_om_in_use_count=%d equals chk_om_in_use_count=%d", g_om_in_use_count, + if (OrderAccess::load_acquire(&g_om_in_use_count) == chk_om_in_use_count) { + out->print_cr("g_om_in_use_count=%d equals chk_om_in_use_count=%d", + OrderAccess::load_acquire(&g_om_in_use_count), chk_om_in_use_count); } else { - out->print_cr("ERROR: g_om_in_use_count=%d is not equal to chk_om_in_use_count=%d", - g_om_in_use_count, chk_om_in_use_count); - *error_cnt_p = *error_cnt_p + 1; + // With lock free access to the monitor lists, it is possible for + // an exiting JavaThread to put its in-use ObjectMonitors on the + // global in-use list after chk_om_in_use_count is calculated above. + out->print_cr("WARNING: g_om_in_use_count=%d is not equal to chk_om_in_use_count=%d", + OrderAccess::load_acquire(&g_om_in_use_count), + chk_om_in_use_count); } } @@ -2037,16 +3129,19 @@ outputStream * out, int *error_cnt_p) { int chk_om_free_count = 0; - for (ObjectMonitor* n = jt->om_free_list; n != NULL; n = n->_next_om) { + for (ObjectMonitor* n = OrderAccess::load_acquire(&jt->om_free_list); n != NULL; n = unmarked_next(n)) { chk_free_entry(jt, n, out, error_cnt_p); chk_om_free_count++; } - if (jt->om_free_count == chk_om_free_count) { + if (OrderAccess::load_acquire(&jt->om_free_count) == chk_om_free_count) { out->print_cr("jt=" INTPTR_FORMAT ": om_free_count=%d equals " - "chk_om_free_count=%d", p2i(jt), jt->om_free_count, chk_om_free_count); + "chk_om_free_count=%d", p2i(jt), + OrderAccess::load_acquire(&jt->om_free_count), + chk_om_free_count); } else { out->print_cr("ERROR: jt=" INTPTR_FORMAT ": om_free_count=%d is not " - "equal to chk_om_free_count=%d", p2i(jt), jt->om_free_count, + "equal to chk_om_free_count=%d", p2i(jt), + OrderAccess::load_acquire(&jt->om_free_count), chk_om_free_count); *error_cnt_p = *error_cnt_p + 1; } @@ -2057,17 +3152,19 @@ outputStream * out, int *error_cnt_p) { int chk_om_in_use_count = 0; - for (ObjectMonitor* n = jt->om_in_use_list; n != NULL; n = n->_next_om) { + for (ObjectMonitor* n = OrderAccess::load_acquire(&jt->om_in_use_list); n != NULL; n = unmarked_next(n)) { chk_in_use_entry(jt, n, out, error_cnt_p); chk_om_in_use_count++; } - if (jt->om_in_use_count == chk_om_in_use_count) { + if (OrderAccess::load_acquire(&jt->om_in_use_count) == chk_om_in_use_count) { out->print_cr("jt=" INTPTR_FORMAT ": om_in_use_count=%d equals " - "chk_om_in_use_count=%d", p2i(jt), jt->om_in_use_count, + "chk_om_in_use_count=%d", p2i(jt), + OrderAccess::load_acquire(&jt->om_in_use_count), chk_om_in_use_count); } else { out->print_cr("ERROR: jt=" INTPTR_FORMAT ": om_in_use_count=%d is not " - "equal to chk_om_in_use_count=%d", p2i(jt), jt->om_in_use_count, + "equal to chk_om_in_use_count=%d", p2i(jt), + OrderAccess::load_acquire(&jt->om_in_use_count), chk_om_in_use_count); *error_cnt_p = *error_cnt_p + 1; } @@ -2076,27 +3173,22 @@ // Log details about ObjectMonitors on the in-use lists. The 'BHL' // flags indicate why the entry is in-use, 'object' and 'object type' // indicate the associated object and its type. -void ObjectSynchronizer::log_in_use_monitor_details(outputStream * out, - bool on_exit) { - if (!on_exit) { - // Not at VM exit so grab the global list lock. - Thread::muxAcquire(&gListLock, "log_in_use_monitor_details"); - } - +void ObjectSynchronizer::log_in_use_monitor_details(outputStream * out) { stringStream ss; - if (g_om_in_use_count > 0) { + if (OrderAccess::load_acquire(&g_om_in_use_count) > 0) { out->print_cr("In-use global monitor info:"); out->print_cr("(B -> is_busy, H -> has hash code, L -> lock status)"); - out->print_cr("%18s %s %18s %18s", - "monitor", "BHL", "object", "object type"); - out->print_cr("================== === ================== =================="); - for (ObjectMonitor* n = g_om_in_use_list; n != NULL; n = n->_next_om) { + out->print_cr("%18s %s %7s %18s %18s", + "monitor", "BHL", "ref_cnt", "object", "object type"); + out->print_cr("================== === ======= ================== =================="); + for (ObjectMonitor* n = OrderAccess::load_acquire(&g_om_in_use_list); n != NULL; n = unmarked_next(n)) { const oop obj = (oop) n->object(); const markWord mark = n->header(); ResourceMark rm; - out->print(INTPTR_FORMAT " %d%d%d " INTPTR_FORMAT " %s", p2i(n), - n->is_busy() != 0, mark.hash() != 0, n->owner() != NULL, - p2i(obj), obj->klass()->external_name()); + out->print(INTPTR_FORMAT " %d%d%d %7d " INTPTR_FORMAT " %s", + p2i(n), n->is_busy() != 0, mark.hash() != 0, + n->owner() != NULL, (int)n->ref_count(), p2i(obj), + obj->klass()->external_name()); if (n->is_busy() != 0) { out->print(" (%s)", n->is_busy_to_string(&ss)); ss.reset(); @@ -2105,24 +3197,20 @@ } } - if (!on_exit) { - Thread::muxRelease(&gListLock); - } - out->print_cr("In-use per-thread monitor info:"); out->print_cr("(B -> is_busy, H -> has hash code, L -> lock status)"); - out->print_cr("%18s %18s %s %18s %18s", - "jt", "monitor", "BHL", "object", "object type"); - out->print_cr("================== ================== === ================== =================="); + out->print_cr("%18s %18s %s %7s %18s %18s", + "jt", "monitor", "BHL", "ref_cnt", "object", "object type"); + out->print_cr("================== ================== === ======= ================== =================="); for (JavaThreadIteratorWithHandle jtiwh; JavaThread *jt = jtiwh.next(); ) { - for (ObjectMonitor* n = jt->om_in_use_list; n != NULL; n = n->_next_om) { + for (ObjectMonitor* n = OrderAccess::load_acquire(&jt->om_in_use_list); n != NULL; n = unmarked_next(n)) { const oop obj = (oop) n->object(); const markWord mark = n->header(); ResourceMark rm; - out->print(INTPTR_FORMAT " " INTPTR_FORMAT " %d%d%d " INTPTR_FORMAT - " %s", p2i(jt), p2i(n), n->is_busy() != 0, - mark.hash() != 0, n->owner() != NULL, p2i(obj), - obj->klass()->external_name()); + out->print(INTPTR_FORMAT " " INTPTR_FORMAT " %d%d%d %7d " + INTPTR_FORMAT " %s", p2i(jt), p2i(n), n->is_busy() != 0, + mark.hash() != 0, n->owner() != NULL, (int)n->ref_count(), + p2i(obj), obj->klass()->external_name()); if (n->is_busy() != 0) { out->print(" (%s)", n->is_busy_to_string(&ss)); ss.reset(); @@ -2138,12 +3226,19 @@ // the population count. int ObjectSynchronizer::log_monitor_list_counts(outputStream * out) { int pop_count = 0; - out->print_cr("%18s %10s %10s %10s", - "Global Lists:", "InUse", "Free", "Total"); - out->print_cr("================== ========== ========== =========="); - out->print_cr("%18s %10d %10d %10d", "", - g_om_in_use_count, g_om_free_count, g_om_population); - pop_count += g_om_in_use_count + g_om_free_count; + out->print_cr("%18s %10s %10s %10s %10s", + "Global Lists:", "InUse", "Free", "Wait", "Total"); + out->print_cr("================== ========== ========== ========== =========="); + out->print_cr("%18s %10d %10d %10d %10d", "", + OrderAccess::load_acquire(&g_om_in_use_count), + OrderAccess::load_acquire(&g_om_free_count), + OrderAccess::load_acquire(&g_om_wait_count), + OrderAccess::load_acquire(&g_om_population)); + pop_count += OrderAccess::load_acquire(&g_om_in_use_count) + + OrderAccess::load_acquire(&g_om_free_count); + if (HandshakeAfterDeflateIdleMonitors) { + pop_count += OrderAccess::load_acquire(&g_om_wait_count); + } out->print_cr("%18s %10s %10s %10s", "Per-Thread Lists:", "InUse", "Free", "Provision"); @@ -2151,8 +3246,11 @@ for (JavaThreadIteratorWithHandle jtiwh; JavaThread *jt = jtiwh.next(); ) { out->print_cr(INTPTR_FORMAT " %10d %10d %10d", p2i(jt), - jt->om_in_use_count, jt->om_free_count, jt->om_free_provision); - pop_count += jt->om_in_use_count + jt->om_free_count; + OrderAccess::load_acquire(&jt->om_in_use_count), + OrderAccess::load_acquire(&jt->om_free_count), + jt->om_free_provision); + pop_count += OrderAccess::load_acquire(&jt->om_in_use_count) + + OrderAccess::load_acquire(&jt->om_free_count); } return pop_count; } @@ -2174,7 +3272,8 @@ assert((diff % sizeof(PaddedObjectMonitor)) == 0, "must be aligned"); return 1; } - block = (PaddedObjectMonitor*)block->_next_om; + // unmarked_next() is not needed with g_block_list (no next field marking). + block = (PaddedObjectMonitor*)OrderAccess::load_acquire(&block->_next_om); } return 0; }