open Cdiff src/hotspot/share/runtime/synchronizer.cpp

src/hotspot/share/runtime/synchronizer.cpp

rev 56634 : imported patch 8230876.patch
rev 56635 : v2.00 -> v2.05 (CR5/v2.05/8-for-jdk13) patches combined into one; merge with 8229212.patch; merge with jdk-14+11; merge with 8230184.patch; merge with 8230876.patch; merge with jdk-14+15; merge with jdk-14+18.
rev 56636 : renames, comment cleanups and additions, whitespace and indent fixes; add PaddedObjectMonitor typdef to make 'PaddedEnd<ObjectMonitor' cleanups easier; add a couple of missing 'private' decls; delete unused next() function; merge pieces from dcubed.monitor_deflate_conc.v2.06d in dcubed.monitor_deflate_conc.v2.06[ac]; merge with 8229212.patch; merge with jdk-14+11; merge with 8230184.patch.
rev 56637 : Add OM_CACHE_LINE_SIZE so that ObjectMonitor cache line sizes can be experimented with independently of DEFAULT_CACHE_LINE_SIZE; for SPARC and X64 configs that use 128 for DEFAULT_CACHE_LINE_SIZE, we are experimenting with 64; move _previous_owner_tid and _allocation_state fields to share the cache line with ObjectMonitor::_header; put ObjectMonitor::_ref_count on its own cache line after _owner; add 'int* count_p' parameter to deflate_monitor_list() and deflate_monitor_list_using_JT() and push counter updates down to where the ObjectMonitors are actually removed from the in-use lists; monitors_iterate() async deflation check should use negative ref_count; add 'JavaThread* target' param to deflate_per_thread_idle_monitors_using_JT() add deflate_common_idle_monitors_using_JT() to make it clear which JavaThread* is the target of the work and which is the calling JavaThread* (self); g_free_list, g_om_in_use_list and g_om_in_use_count are now static to synchronizer.cpp (reduce scope); add more diagnostic info to some assert()'s; minor code cleanups and code motion; save_om_ptr() should detect a race with a deflating thread that is bailing out and cause a retry when the ref_count field is not positive; merge with jdk-14+11; add special GC support for TestHumongousClassLoader.java; merge with 8230184.patch; merge with jdk-14+14; merge with jdk-14+18.
rev 56638 : Merge the remainder of the lock-free monitor list changes from v2.06 with v2.06a and v2.06b after running the changes through the edit scripts; merge pieces from dcubed.monitor_deflate_conc.v2.06d in dcubed.monitor_deflate_conc.v2.06[ac]; merge pieces from dcubed.monitor_deflate_conc.v2.06e into dcubed.monitor_deflate_conc.v2.06c; merge with jdk-14+11; test work around for test/jdk/tools/jlink/multireleasejar/JLinkMultiReleaseJarTest.java should not been needed anymore; merge with jdk-14+18.
rev 56639 : loosen a couple more counter checks due to races observed in testing; simplify om_release() extraction of mid since list head or cur_mid_in_use is marked; simplify deflate_monitor_list() extraction of mid since there are no parallel deleters due to the safepoint; simplify deflate_monitor_list_using_JT() extraction of mid since list head or cur_mid_in_use is marked; prepend_block_to_lists() - simplify based on David H's comments; does not need load_acquire() or release_store() because of the cmpxchg(); prepend_to_common() - simplify to use mark_next_loop() for m and use mark_list_head() and release_store() for the non-empty list case; add more debugging for "Non-balanced monitor enter/exit" failure mode; fix race in inflate() in the "CASE: neutral" code path; install_displaced_markword_in_object() does not need to clear the header field since that is handled when the ObjectMonitor is moved from the global free list; LSuccess should clear boxReg to set ICC.ZF=1 to avoid depending on existing boxReg contents; update fast_unlock() to detect when object no longer refers to the same ObjectMonitor and take fast path exit instead; clarify fast_lock() code where we detect when object no longer refers to the same ObjectMonitor; add/update comments for movptr() calls where we move a literal into an Address; remove set_owner(); refactor setting of owner field into set_owner_from(2 versions), set_owner_from_BasicLock(), and try_set_owner_from(); the new functions include monitorinflation+owner logging; extract debug code from v2.06 and v2.07 and move to v2.07.debug; change 'jccb' -> 'jcc' and 'jmpb' -> 'jmp' as needed; checkpoint initial version of MacroAssembler::inc_om_ref_count(); update LP64 MacroAssembler::fast_lock() and fast_unlock() to use inc_om_ref_count(); fast_lock() return flag setting logic can use 'testptr(tmpReg, tmpReg)' instead of 'cmpptr(tmpReg, 0)' since that's more efficient; fast_unlock() LSuccess return flag setting logic can use 'testl (boxReg, 0)' instead of 'xorptr(boxReg, boxReg)' since that's more efficient; cleanup "fast-path" vs "fast path" and "slow-path" vs "slow path"; update MacroAssembler::rtm_inflated_locking() to use inc_om_ref_count(); update MacroAssembler::fast_lock() to preserve the flags before decrementing ref_count and restore the flags afterwards; this is more clean than depending on the contents of rax/tmpReg; coleenp CR - refactor async monitor deflation work from ServiceThread::service_thread_entry() to ObjectSynchronizer::deflate_idle_monitors_using_JT(); rehn,eosterlund CR - add support for HandshakeAfterDeflateIdleMonitors for platforms that don't have ObjectMonitor ref_count support implemented in C2 fast_lock() and fast_unlock().


*** 35,44 ****
--- 35,45 ----
  #include "oops/markWord.hpp"
  #include "oops/oop.inline.hpp"
  #include "runtime/atomic.hpp"
  #include "runtime/biasedLocking.hpp"
  #include "runtime/handles.inline.hpp"
+ #include "runtime/handshake.hpp"
  #include "runtime/interfaceSupport.inline.hpp"
  #include "runtime/mutexLocker.hpp"
  #include "runtime/objectMonitor.hpp"
  #include "runtime/objectMonitor.inline.hpp"
  #include "runtime/osThread.hpp"
*** 116,140 ****
  #define NINFLATIONLOCKS 256
  static volatile intptr_t gInflationLocks[NINFLATIONLOCKS];
  
  // global list of blocks of monitors
  PaddedObjectMonitor* volatile ObjectSynchronizer::g_block_list = NULL;
  // Global ObjectMonitor free list. Newly allocated and deflated
  // ObjectMonitors are prepended here.
! ObjectMonitor* volatile ObjectSynchronizer::g_free_list = NULL;
  // Global ObjectMonitor in-use list. When a JavaThread is exiting,
  // ObjectMonitors on its per-thread in-use list are prepended here.
! ObjectMonitor* volatile ObjectSynchronizer::g_om_in_use_list = NULL;
! int ObjectSynchronizer::g_om_in_use_count = 0;  // # on g_om_in_use_list
  
- static volatile intptr_t gListLock = 0;   // protects global monitor lists
  static volatile int g_om_free_count = 0;  // # on g_free_list
  static volatile int g_om_population = 0;  // # Extant -- in circulation
  
  #define CHAINMARKER (cast_to_oop<intptr_t>(-1))
  
  
  // =====================> Quick functions
  
  // The quick_* forms are special fast-path variants used to improve
  // performance.  In the simplest case, a "quick_*" implementation could
  // simply return false, in which case the caller will perform the necessary
--- 117,389 ----
  #define NINFLATIONLOCKS 256
  static volatile intptr_t gInflationLocks[NINFLATIONLOCKS];
  
  // global list of blocks of monitors
  PaddedObjectMonitor* volatile ObjectSynchronizer::g_block_list = NULL;
+ bool volatile ObjectSynchronizer::_is_async_deflation_requested = false;
+ bool volatile ObjectSynchronizer::_is_special_deflation_requested = false;
+ jlong ObjectSynchronizer::_last_async_deflation_time_ns = 0;
+ 
  // Global ObjectMonitor free list. Newly allocated and deflated
  // ObjectMonitors are prepended here.
! static ObjectMonitor* volatile g_free_list = NULL;
  // Global ObjectMonitor in-use list. When a JavaThread is exiting,
  // ObjectMonitors on its per-thread in-use list are prepended here.
! static ObjectMonitor* volatile g_om_in_use_list = NULL;
! // Global ObjectMonitor wait list. If HandshakeAfterDeflateIdleMonitors
! // is true, deflated ObjectMonitors wait on this list until after a
! // handshake or a safepoint for platforms that don't support handshakes.
! // After the handshake or safepoint, the deflated ObjectMonitors are
! // prepended to g_free_list.
! static ObjectMonitor* volatile g_wait_list = NULL;
  
  static volatile int g_om_free_count = 0;    // # on g_free_list
+ static volatile int g_om_in_use_count = 0;  // # on g_om_in_use_list
  static volatile int g_om_population = 0;    // # Extant -- in circulation
+ static volatile int g_om_wait_count = 0;    // # on g_wait_list
  
  #define CHAINMARKER (cast_to_oop<intptr_t>(-1))
  
  
+ // =====================> List Management functions
+ 
+ // Return true if the ObjectMonitor's next field is marked.
+ // Otherwise returns false.
+ static bool is_next_marked(ObjectMonitor* om) {
+   return ((intptr_t)OrderAccess::load_acquire(&om->_next_om) & 0x1) != 0;
+ }
+ 
+ // Mark an ObjectMonitor* and return it. Note: the om parameter
+ // may or may not have been marked originally.
+ static ObjectMonitor* mark_om_ptr(ObjectMonitor* om) {
+   return (ObjectMonitor*)((intptr_t)om | 0x1);
+ }
+ 
+ // Mark the next field in an ObjectMonitor. If marking was successful,
+ // then the unmarked next field is returned via parameter and true is
+ // returned. Otherwise false is returned.
+ static bool mark_next(ObjectMonitor* om, ObjectMonitor** next_p) {
+   // Get current next field without any marking value.
+   ObjectMonitor* next = (ObjectMonitor*)
+       ((intptr_t)OrderAccess::load_acquire(&om->_next_om) & ~0x1);
+   if (Atomic::cmpxchg(mark_om_ptr(next), &om->_next_om, next) != next) {
+     return false;  // Could not mark the next field or it was already marked.
+   }
+   *next_p = next;
+   return true;
+ }
+ 
+ // Loop until we mark the next field in an ObjectMonitor. The unmarked
+ // next field is returned.
+ static ObjectMonitor* mark_next_loop(ObjectMonitor* om) {
+   ObjectMonitor* next;
+   while (true) {
+     if (mark_next(om, &next)) {
+       // Marked om's next field so return the unmarked value.
+       return next;
+     }
+   }
+ }
+ 
+ // Set the next field in an ObjectMonitor to the specified value.
+ // The caller of set_next() must be the same thread that marked the
+ // ObjectMonitor.
+ static void set_next(ObjectMonitor* om, ObjectMonitor* value) {
+   OrderAccess::release_store(&om->_next_om, value);
+ }
+ 
+ // Mark the next field in the list head ObjectMonitor. If marking was
+ // successful, then the mid and the unmarked next field are returned
+ // via parameter and true is returned. Otherwise false is returned.
+ static bool mark_list_head(ObjectMonitor* volatile * list_p,
+                            ObjectMonitor** mid_p, ObjectMonitor** next_p) {
+   while (true) {
+     ObjectMonitor* mid = OrderAccess::load_acquire(list_p);
+     if (mid == NULL) {
+       return false;  // The list is empty so nothing to mark.
+     }
+     if (mark_next(mid, next_p)) {
+       if (OrderAccess::load_acquire(list_p) != mid) {
+         // The list head changed so we have to retry.
+         set_next(mid, *next_p);  // unmark mid
+         continue;
+       }
+       // We marked next field to guard against races.
+       *mid_p = mid;
+       return true;
+     }
+   }
+ }
+ 
+ // Return the unmarked next field in an ObjectMonitor. Note: the next
+ // field may or may not have been marked originally.
+ static ObjectMonitor* unmarked_next(ObjectMonitor* om) {
+   return (ObjectMonitor*)((intptr_t)OrderAccess::load_acquire(&om->_next_om) & ~0x1);
+ }
+ 
+ // Prepend a list of ObjectMonitors to the specified *list_p. 'tail' is
+ // the last ObjectMonitor in the list and there are 'count' on the list.
+ // Also updates the specified *count_p.
+ static void prepend_list_to_common(ObjectMonitor* list, ObjectMonitor* tail,
+                                    int count, ObjectMonitor* volatile* list_p,
+                                    volatile int* count_p) {
+   while (true) {
+     ObjectMonitor* cur = OrderAccess::load_acquire(list_p);
+     // Prepend list to *list_p.
+     ObjectMonitor* next = NULL;
+     if (!mark_next(tail, &next)) {
+       continue;  // failed to mark next field so try it all again
+     }
+     set_next(tail, cur);  // tail now points to cur (and unmarks tail)
+     if (cur == NULL) {
+       // No potential race with takers or other prependers since
+       // *list_p is empty.
+       if (Atomic::cmpxchg(list, list_p, cur) == cur) {
+         // Successfully switched *list_p to the list value.
+         Atomic::add(count, count_p);
+         break;
+       }
+       // Implied else: try it all again
+     } else {
+       // Try to mark next field to guard against races:
+       if (!mark_next(cur, &next)) {
+         continue;  // failed to mark next field so try it all again
+       }
+       // We marked the next field so try to switch *list_p to the list value.
+       if (Atomic::cmpxchg(list, list_p, cur) != cur) {
+         // The list head has changed so unmark the next field and try again:
+         set_next(cur, next);
+         continue;
+       }
+       Atomic::add(count, count_p);
+       set_next(cur, next);  // unmark next field
+       break;
+     }
+   }
+ }
+ 
+ // Prepend a newly allocated block of ObjectMonitors to g_block_list and
+ // g_free_list. Also updates g_om_population and g_om_free_count.
+ void ObjectSynchronizer::prepend_block_to_lists(PaddedObjectMonitor* new_blk) {
+   // First we handle g_block_list:
+   while (true) {
+     PaddedObjectMonitor* cur = g_block_list;
+     // Prepend new_blk to g_block_list. The first ObjectMonitor in
+     // a block is reserved for use as linkage to the next block.
+     new_blk[0]._next_om = cur;
+     if (Atomic::cmpxchg(new_blk, &g_block_list, cur) == cur) {
+       // Successfully switched g_block_list to the new_blk value.
+       Atomic::add(_BLOCKSIZE - 1, &g_om_population);
+       break;
+     }
+     // Implied else: try it all again
+   }
+ 
+   // Second we handle g_free_list:
+   prepend_list_to_common(new_blk + 1, &new_blk[_BLOCKSIZE - 1], _BLOCKSIZE - 1,
+                          &g_free_list, &g_om_free_count);
+ }
+ 
+ // Prepend a list of ObjectMonitors to g_free_list. 'tail' is the last
+ // ObjectMonitor in the list and there are 'count' on the list. Also
+ // updates g_om_free_count.
+ static void prepend_list_to_g_free_list(ObjectMonitor* list,
+                                         ObjectMonitor* tail, int count) {
+   prepend_list_to_common(list, tail, count, &g_free_list, &g_om_free_count);
+ }
+ 
+ // Prepend a list of ObjectMonitors to g_wait_list. 'tail' is the last
+ // ObjectMonitor in the list and there are 'count' on the list. Also
+ // updates g_om_wait_count.
+ static void prepend_list_to_g_wait_list(ObjectMonitor* list,
+                                         ObjectMonitor* tail, int count) {
+   assert(HandshakeAfterDeflateIdleMonitors, "sanity check");
+   prepend_list_to_common(list, tail, count, &g_wait_list, &g_om_wait_count);
+ }
+ 
+ // Prepend a list of ObjectMonitors to g_om_in_use_list. 'tail' is the last
+ // ObjectMonitor in the list and there are 'count' on the list. Also
+ // updates g_om_in_use_list.
+ static void prepend_list_to_g_om_in_use_list(ObjectMonitor* list,
+                                              ObjectMonitor* tail, int count) {
+   prepend_list_to_common(list, tail, count, &g_om_in_use_list, &g_om_in_use_count);
+ }
+ 
+ // Prepend an ObjectMonitor to the specified list. Also updates
+ // the specified counter.
+ static void prepend_to_common(ObjectMonitor* m, ObjectMonitor* volatile * list_p,
+                               int volatile * count_p) {
+   while (true) {
+     (void)mark_next_loop(m);  // mark m so we can safely update its next field
+     ObjectMonitor* cur = NULL;
+     ObjectMonitor* next = NULL;
+     // Mark the list head to guard against A-B-A race:
+     if (mark_list_head(list_p, &cur, &next)) {
+       // List head is now marked so we can safely switch it.
+       set_next(m, cur);  // m now points to cur (and unmarks m)
+       OrderAccess::release_store(list_p, m);  // Switch list head to unmarked m.
+       set_next(cur, next);  // Unmark the previous list head.
+       break;
+     }
+     // The list is empty so try to set the list head.
+     assert(cur == NULL, "cur must be NULL: cur=" INTPTR_FORMAT, p2i(cur));
+     set_next(m, cur);  // m now points to NULL (and unmarks m)
+     if (Atomic::cmpxchg(m, list_p, cur) == cur) {
+       // List head is now unmarked m.
+       break;
+     }
+     // Implied else: try it all again
+   }
+   Atomic::inc(count_p);
+ }
+ 
+ // Prepend an ObjectMonitor to a per-thread om_free_list.
+ // Also updates the per-thread om_free_count.
+ static void prepend_to_om_free_list(Thread* self, ObjectMonitor* m) {
+   prepend_to_common(m, &self->om_free_list, &self->om_free_count);
+ }
+ 
+ // Prepend an ObjectMonitor to a per-thread om_in_use_list.
+ // Also updates the per-thread om_in_use_count.
+ static void prepend_to_om_in_use_list(Thread* self, ObjectMonitor* m) {
+   prepend_to_common(m, &self->om_in_use_list, &self->om_in_use_count);
+ }
+ 
+ // Take an ObjectMonitor from the start of the specified list. Also
+ // decrements the specified counter. Returns NULL if none are available.
+ static ObjectMonitor* take_from_start_of_common(ObjectMonitor* volatile * list_p,
+                                                 int volatile * count_p) {
+   ObjectMonitor* next = NULL;
+   ObjectMonitor* take = NULL;
+   // Mark the list head to guard against A-B-A race:
+   if (!mark_list_head(list_p, &take, &next)) {
+     return NULL;  // None are available.
+   }
+   // Switch marked list head to next (which unmarks the list head, but
+   // leaves take marked):
+   OrderAccess::release_store(list_p, next);
+   Atomic::dec(count_p);
+   // Unmark take, but leave the next value for any lagging list
+   // walkers. It will get cleaned up when take is prepended to
+   // the in-use list:
+   set_next(take, next);
+   return take;
+ }
+ 
+ // Take an ObjectMonitor from the start of the global free-list. Also
+ // updates g_om_free_count. Returns NULL if none are available.
+ static ObjectMonitor* take_from_start_of_g_free_list() {
+   return take_from_start_of_common(&g_free_list, &g_om_free_count);
+ }
+ 
+ // Take an ObjectMonitor from the start of a per-thread free-list.
+ // Also updates om_free_count. Returns NULL if none are available.
+ static ObjectMonitor* take_from_start_of_om_free_list(Thread* self) {
+   return take_from_start_of_common(&self->om_free_list, &self->om_free_count);
+ }
+ 
+ 
  // =====================> Quick functions
  
  // The quick_* forms are special fast-path variants used to improve
  // performance.  In the simplest case, a "quick_*" implementation could
  // simply return false, in which case the caller will perform the necessary
*** 209,222 ****
    assert(!SafepointSynchronize::is_at_safepoint(), "invariant");
    assert(self->is_Java_thread(), "invariant");
    assert(((JavaThread *) self)->thread_state() == _thread_in_Java, "invariant");
    NoSafepointVerifier nsv;
    if (obj == NULL) return false;       // Need to throw NPE
    const markWord mark = obj->mark();
  
    if (mark.has_monitor()) {
!     ObjectMonitor* const m = mark.monitor();
      assert(m->object() == obj, "invariant");
      Thread* const owner = (Thread *) m->_owner;
  
      // Lock contention and Transactional Lock Elision (TLE) diagnostics
      // and observability
--- 458,479 ----
    assert(!SafepointSynchronize::is_at_safepoint(), "invariant");
    assert(self->is_Java_thread(), "invariant");
    assert(((JavaThread *) self)->thread_state() == _thread_in_Java, "invariant");
    NoSafepointVerifier nsv;
    if (obj == NULL) return false;       // Need to throw NPE
+ 
+   while (true) {
      const markWord mark = obj->mark();
  
      if (mark.has_monitor()) {
!       ObjectMonitorHandle omh;
!       if (!omh.save_om_ptr(obj, mark)) {
!         // Lost a race with async deflation so try again.
!         assert(AsyncDeflateIdleMonitors, "sanity check");
!         continue;
!       }
!       ObjectMonitor* const m = omh.om_ptr();
        assert(m->object() == obj, "invariant");
        Thread* const owner = (Thread *) m->_owner;
  
        // Lock contention and Transactional Lock Elision (TLE) diagnostics
        // and observability
*** 238,252 ****
      // stack-locking in the object's header, the third check is for
      // recursive stack-locking in the displaced header in the BasicLock,
      // and last are the inflated Java Monitor (ObjectMonitor) checks.
      lock->set_displaced_header(markWord::unused_mark());
  
!     if (owner == NULL && Atomic::replace_if_null(self, &(m->_owner))) {
        assert(m->_recursions == 0, "invariant");
        return true;
      }
    }
  
    // Note that we could inflate in quick_enter.
    // This is likely a useful optimization
    // Critically, in quick_enter() we must not:
    // -- perform bias revocation, or
--- 495,521 ----
        // stack-locking in the object's header, the third check is for
        // recursive stack-locking in the displaced header in the BasicLock,
        // and last are the inflated Java Monitor (ObjectMonitor) checks.
        lock->set_displaced_header(markWord::unused_mark());
  
!       if (owner == NULL && m->try_set_owner_from(self, NULL) == NULL) {
!         assert(m->_recursions == 0, "invariant");
!         return true;
!       }
! 
!       if (AsyncDeflateIdleMonitors &&
!           m->try_set_owner_from(self, DEFLATER_MARKER) == DEFLATER_MARKER) {
!         // The deflation protocol finished the first part (setting owner),
!         // but it failed the second part (making ref_count negative) and
!         // bailed. Or the ObjectMonitor was async deflated and reused.
!         // Acquired the monitor.
          assert(m->_recursions == 0, "invariant");
          return true;
        }
      }
+     break;
+   }
  
    // Note that we could inflate in quick_enter.
    // This is likely a useful optimization
    // Critically, in quick_enter() we must not:
    // -- perform bias revocation, or
*** 293,303 ****
    // The object header will never be displaced to this lock,
    // so it does not matter what the value is, except that it
    // must be non-zero to avoid looking like a re-entrant lock,
    // and must not look locked either.
    lock->set_displaced_header(markWord::unused_mark());
!   inflate(THREAD, obj(), inflate_cause_monitor_enter)->enter(THREAD);
  }
  
  void ObjectSynchronizer::exit(oop object, BasicLock* lock, TRAPS) {
    markWord mark = object->mark();
    // We cannot check for Biased Locking if we are racing an inflation.
--- 562,574 ----
    // The object header will never be displaced to this lock,
    // so it does not matter what the value is, except that it
    // must be non-zero to avoid looking like a re-entrant lock,
    // and must not look locked either.
    lock->set_displaced_header(markWord::unused_mark());
!   ObjectMonitorHandle omh;
!   inflate(&omh, THREAD, obj(), inflate_cause_monitor_enter);
!   omh.om_ptr()->enter(THREAD);
  }
  
  void ObjectSynchronizer::exit(oop object, BasicLock* lock, TRAPS) {
    markWord mark = object->mark();
    // We cannot check for Biased Locking if we are racing an inflation.
*** 342,352 ****
        return;
      }
    }
  
    // We have to take the slow-path of possible inflation and then exit.
!   inflate(THREAD, object, inflate_cause_vm_internal)->exit(true, THREAD);
  }
  
  // -----------------------------------------------------------------------------
  // Class Loader  support to workaround deadlocks on the class loader lock objects
  // Also used by GC
--- 613,625 ----
        return;
      }
    }
  
    // We have to take the slow-path of possible inflation and then exit.
!   ObjectMonitorHandle omh;
!   inflate(&omh, THREAD, object, inflate_cause_vm_internal);
!   omh.om_ptr()->exit(true, THREAD);
  }
  
  // -----------------------------------------------------------------------------
  // Class Loader  support to workaround deadlocks on the class loader lock objects
  // Also used by GC
*** 363,387 ****
    if (UseBiasedLocking) {
      BiasedLocking::revoke(obj, THREAD);
      assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
    }
  
!   ObjectMonitor* monitor = inflate(THREAD, obj(), inflate_cause_vm_internal);
! 
!   return monitor->complete_exit(THREAD);
  }
  
  // NOTE: must use heavy weight monitor to handle complete_exit/reenter()
  void ObjectSynchronizer::reenter(Handle obj, intptr_t recursion, TRAPS) {
    if (UseBiasedLocking) {
      BiasedLocking::revoke(obj, THREAD);
      assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
    }
  
!   ObjectMonitor* monitor = inflate(THREAD, obj(), inflate_cause_vm_internal);
! 
!   monitor->reenter(recursion, THREAD);
  }
  // -----------------------------------------------------------------------------
  // JNI locks on java objects
  // NOTE: must use heavy weight monitor to handle jni monitor enter
  void ObjectSynchronizer::jni_enter(Handle obj, TRAPS) {
--- 636,661 ----
    if (UseBiasedLocking) {
      BiasedLocking::revoke(obj, THREAD);
      assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
    }
  
!   ObjectMonitorHandle omh;
!   inflate(&omh, THREAD, obj(), inflate_cause_vm_internal);
!   intptr_t ret_code = omh.om_ptr()->complete_exit(THREAD);
!   return ret_code;
  }
  
  // NOTE: must use heavy weight monitor to handle complete_exit/reenter()
  void ObjectSynchronizer::reenter(Handle obj, intptr_t recursion, TRAPS) {
    if (UseBiasedLocking) {
      BiasedLocking::revoke(obj, THREAD);
      assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
    }
  
!   ObjectMonitorHandle omh;
!   inflate(&omh, THREAD, obj(), inflate_cause_vm_internal);
!   omh.om_ptr()->reenter(recursion, THREAD);
  }
  // -----------------------------------------------------------------------------
  // JNI locks on java objects
  // NOTE: must use heavy weight monitor to handle jni monitor enter
  void ObjectSynchronizer::jni_enter(Handle obj, TRAPS) {
*** 389,399 ****
    if (UseBiasedLocking) {
      BiasedLocking::revoke(obj, THREAD);
      assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
    }
    THREAD->set_current_pending_monitor_is_from_java(false);
!   inflate(THREAD, obj(), inflate_cause_jni_enter)->enter(THREAD);
    THREAD->set_current_pending_monitor_is_from_java(true);
  }
  
  // NOTE: must use heavy weight monitor to handle jni monitor exit
  void ObjectSynchronizer::jni_exit(oop obj, Thread* THREAD) {
--- 663,675 ----
    if (UseBiasedLocking) {
      BiasedLocking::revoke(obj, THREAD);
      assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
    }
    THREAD->set_current_pending_monitor_is_from_java(false);
!   ObjectMonitorHandle omh;
!   inflate(&omh, THREAD, obj(), inflate_cause_jni_enter);
!   omh.om_ptr()->enter(THREAD);
    THREAD->set_current_pending_monitor_is_from_java(true);
  }
  
  // NOTE: must use heavy weight monitor to handle jni monitor exit
  void ObjectSynchronizer::jni_exit(oop obj, Thread* THREAD) {
*** 402,412 ****
      BiasedLocking::revoke(h_obj, THREAD);
      obj = h_obj();
    }
    assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
  
!   ObjectMonitor* monitor = inflate(THREAD, obj, inflate_cause_jni_exit);
    // If this thread has locked the object, exit the monitor. We
    // intentionally do not use CHECK here because we must exit the
    // monitor even if an exception is pending.
    if (monitor->check_owner(THREAD)) {
      monitor->exit(true, THREAD);
--- 678,690 ----
      BiasedLocking::revoke(h_obj, THREAD);
      obj = h_obj();
    }
    assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
  
!   ObjectMonitorHandle omh;
!   inflate(&omh, THREAD, obj, inflate_cause_jni_exit);
!   ObjectMonitor* monitor = omh.om_ptr();
    // If this thread has locked the object, exit the monitor. We
    // intentionally do not use CHECK here because we must exit the
    // monitor even if an exception is pending.
    if (monitor->check_owner(THREAD)) {
      monitor->exit(true, THREAD);
*** 443,473 ****
      assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
    }
    if (millis < 0) {
      THROW_MSG_0(vmSymbols::java_lang_IllegalArgumentException(), "timeout value is negative");
    }
!   ObjectMonitor* monitor = inflate(THREAD, obj(), inflate_cause_wait);
  
    DTRACE_MONITOR_WAIT_PROBE(monitor, obj(), THREAD, millis);
    monitor->wait(millis, true, THREAD);
  
    // This dummy call is in place to get around dtrace bug 6254741.  Once
    // that's fixed we can uncomment the following line, remove the call
    // and change this function back into a "void" func.
    // DTRACE_MONITOR_PROBE(waited, monitor, obj(), THREAD);
!   return dtrace_waited_probe(monitor, obj, THREAD);
  }
  
  void ObjectSynchronizer::wait_uninterruptibly(Handle obj, jlong millis, TRAPS) {
    if (UseBiasedLocking) {
      BiasedLocking::revoke(obj, THREAD);
      assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
    }
    if (millis < 0) {
      THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(), "timeout value is negative");
    }
!   inflate(THREAD, obj(), inflate_cause_wait)->wait(millis, false, THREAD);
  }
  
  void ObjectSynchronizer::notify(Handle obj, TRAPS) {
    if (UseBiasedLocking) {
      BiasedLocking::revoke(obj, THREAD);
--- 721,756 ----
      assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
    }
    if (millis < 0) {
      THROW_MSG_0(vmSymbols::java_lang_IllegalArgumentException(), "timeout value is negative");
    }
!   ObjectMonitorHandle omh;
!   inflate(&omh, THREAD, obj(), inflate_cause_wait);
!   ObjectMonitor* monitor = omh.om_ptr();
  
    DTRACE_MONITOR_WAIT_PROBE(monitor, obj(), THREAD, millis);
    monitor->wait(millis, true, THREAD);
  
    // This dummy call is in place to get around dtrace bug 6254741.  Once
    // that's fixed we can uncomment the following line, remove the call
    // and change this function back into a "void" func.
    // DTRACE_MONITOR_PROBE(waited, monitor, obj(), THREAD);
!   int ret_code = dtrace_waited_probe(monitor, obj, THREAD);
!   return ret_code;
  }
  
  void ObjectSynchronizer::wait_uninterruptibly(Handle obj, jlong millis, TRAPS) {
    if (UseBiasedLocking) {
      BiasedLocking::revoke(obj, THREAD);
      assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
    }
    if (millis < 0) {
      THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(), "timeout value is negative");
    }
!   ObjectMonitorHandle omh;
!   inflate(&omh, THREAD, obj(), inflate_cause_wait);
!   omh.om_ptr()->wait(millis, false, THREAD);
  }
  
  void ObjectSynchronizer::notify(Handle obj, TRAPS) {
    if (UseBiasedLocking) {
      BiasedLocking::revoke(obj, THREAD);
*** 476,486 ****
  
    markWord mark = obj->mark();
    if (mark.has_locker() && THREAD->is_lock_owned((address)mark.locker())) {
      return;
    }
!   inflate(THREAD, obj(), inflate_cause_notify)->notify(THREAD);
  }
  
  // NOTE: see comment of notify()
  void ObjectSynchronizer::notifyall(Handle obj, TRAPS) {
    if (UseBiasedLocking) {
--- 759,771 ----
  
    markWord mark = obj->mark();
    if (mark.has_locker() && THREAD->is_lock_owned((address)mark.locker())) {
      return;
    }
!   ObjectMonitorHandle omh;
!   inflate(&omh, THREAD, obj(), inflate_cause_notify);
!   omh.om_ptr()->notify(THREAD);
  }
  
  // NOTE: see comment of notify()
  void ObjectSynchronizer::notifyall(Handle obj, TRAPS) {
    if (UseBiasedLocking) {
*** 490,500 ****
  
    markWord mark = obj->mark();
    if (mark.has_locker() && THREAD->is_lock_owned((address)mark.locker())) {
      return;
    }
!   inflate(THREAD, obj(), inflate_cause_notify)->notifyAll(THREAD);
  }
  
  // -----------------------------------------------------------------------------
  // Hash Code handling
  //
--- 775,787 ----
  
    markWord mark = obj->mark();
    if (mark.has_locker() && THREAD->is_lock_owned((address)mark.locker())) {
      return;
    }
!   ObjectMonitorHandle omh;
!   inflate(&omh, THREAD, obj(), inflate_cause_notify);
!   omh.om_ptr()->notifyAll(THREAD);
  }
  
  // -----------------------------------------------------------------------------
  // Hash Code handling
  //
*** 515,533 ****
  // As a general policy we use "volatile" to control compiler-based reordering
  // and explicit fences (barriers) to control for architectural reordering
  // performed by the CPU(s) or platform.
  
  struct SharedGlobals {
!   char         _pad_prefix[DEFAULT_CACHE_LINE_SIZE];
    // These are highly shared mostly-read variables.
    // To avoid false-sharing they need to be the sole occupants of a cache line.
    volatile int stw_random;
    volatile int stw_cycle;
!   DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int) * 2);
    // Hot RW variable -- Sequester to avoid false-sharing
    volatile int hc_sequence;
!   DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile int));
  };
  
  static SharedGlobals GVars;
  static int MonitorScavengeThreshold = 1000000;
  static volatile int ForceMonitorScavenge = 0; // Scavenge required and pending
--- 802,820 ----
  // As a general policy we use "volatile" to control compiler-based reordering
  // and explicit fences (barriers) to control for architectural reordering
  // performed by the CPU(s) or platform.
  
  struct SharedGlobals {
!   char         _pad_prefix[OM_CACHE_LINE_SIZE];
    // These are highly shared mostly-read variables.
    // To avoid false-sharing they need to be the sole occupants of a cache line.
    volatile int stw_random;
    volatile int stw_cycle;
!   DEFINE_PAD_MINUS_SIZE(1, OM_CACHE_LINE_SIZE, sizeof(volatile int) * 2);
    // Hot RW variable -- Sequester to avoid false-sharing
    volatile int hc_sequence;
!   DEFINE_PAD_MINUS_SIZE(2, OM_CACHE_LINE_SIZE, sizeof(volatile int));
  };
  
  static SharedGlobals GVars;
  static int MonitorScavengeThreshold = 1000000;
  static volatile int ForceMonitorScavenge = 0; // Scavenge required and pending
*** 684,693 ****
--- 971,981 ----
    assert(Universe::verify_in_progress() || DumpSharedSpaces ||
           self->is_Java_thread() , "invariant");
    assert(Universe::verify_in_progress() || DumpSharedSpaces ||
           ((JavaThread *)self)->thread_state() != _thread_blocked, "invariant");
  
+   while (true) {
      ObjectMonitor* monitor = NULL;
      markWord temp, test;
      intptr_t hash;
      markWord mark = read_stable_mark(obj);
  
*** 708,718 ****
      }
      // If atomic operation failed, we must inflate the header
      // into heavy weight monitor. We could add more code here
      // for fast path, but it does not worth the complexity.
    } else if (mark.has_monitor()) {
!     monitor = mark.monitor();
      temp = monitor->header();
      assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value());
      hash = temp.hash();
      if (hash != 0) {
        return hash;
--- 996,1012 ----
        }
        // If atomic operation failed, we must inflate the header
        // into heavy weight monitor. We could add more code here
        // for fast path, but it does not worth the complexity.
      } else if (mark.has_monitor()) {
!       ObjectMonitorHandle omh;
!       if (!omh.save_om_ptr(obj, mark)) {
!         // Lost a race with async deflation so try again.
!         assert(AsyncDeflateIdleMonitors, "sanity check");
!         continue;
!       }
!       monitor = omh.om_ptr();
        temp = monitor->header();
        assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value());
        hash = temp.hash();
        if (hash != 0) {
          return hash;
*** 734,744 ****
      // during an inflate() call so any change to that stack memory
      // may not propagate to other threads correctly.
    }
  
    // Inflate the monitor to set hash code
!   monitor = inflate(self, obj, inflate_cause_hash_code);
    // Load displaced header and check it has hash code
    mark = monitor->header();
    assert(mark.is_neutral(), "invariant: header=" INTPTR_FORMAT, mark.value());
    hash = mark.hash();
    if (hash == 0) {
--- 1028,1040 ----
        // during an inflate() call so any change to that stack memory
        // may not propagate to other threads correctly.
      }
  
      // Inflate the monitor to set hash code
!     ObjectMonitorHandle omh;
!     inflate(&omh, self, obj, inflate_cause_hash_code);
!     monitor = omh.om_ptr();
      // Load displaced header and check it has hash code
      mark = monitor->header();
      assert(mark.is_neutral(), "invariant: header=" INTPTR_FORMAT, mark.value());
      hash = mark.hash();
      if (hash == 0) {
*** 750,766 ****
--- 1046,1067 ----
        if (test != mark) {
          // The only non-deflation update to the ObjectMonitor's
          // header/dmw field is to merge in the hash code. If someone
          // adds a new usage of the header/dmw field, please update
          // this code.
+         // ObjectMonitor::install_displaced_markword_in_object()
+         // does mark the header/dmw field as part of async deflation,
+         // but that protocol cannot happen now due to the
+         // ObjectMonitorHandle above.
          hash = test.hash();
          assert(test.is_neutral(), "invariant: header=" INTPTR_FORMAT, test.value());
          assert(hash != 0, "Trivial unexpected object/monitor header usage.");
        }
      }
      // We finally get the hash
      return hash;
+   }
  }
  
  // Deprecated -- use FastHashCode() instead.
  
  intptr_t ObjectSynchronizer::identity_hash_value_for(Handle obj) {
*** 776,799 ****
    }
  
    assert(thread == JavaThread::current(), "Can only be called on current thread");
    oop obj = h_obj();
  
    markWord mark = read_stable_mark(obj);
  
    // Uncontended case, header points to stack
    if (mark.has_locker()) {
      return thread->is_lock_owned((address)mark.locker());
    }
    // Contended case, header points to ObjectMonitor (tagged pointer)
    if (mark.has_monitor()) {
!     ObjectMonitor* monitor = mark.monitor();
!     return monitor->is_entered(thread) != 0;
    }
    // Unlocked case, header in place
    assert(mark.is_neutral(), "sanity check");
    return false;
  }
  
  // Be aware of this method could revoke bias of the lock object.
  // This method queries the ownership of the lock handle specified by 'h_obj'.
  // If the current thread owns the lock, it returns owner_self. If no
--- 1077,1108 ----
    }
  
    assert(thread == JavaThread::current(), "Can only be called on current thread");
    oop obj = h_obj();
  
+   while (true) {
      markWord mark = read_stable_mark(obj);
  
      // Uncontended case, header points to stack
      if (mark.has_locker()) {
        return thread->is_lock_owned((address)mark.locker());
      }
      // Contended case, header points to ObjectMonitor (tagged pointer)
      if (mark.has_monitor()) {
!       ObjectMonitorHandle omh;
!       if (!omh.save_om_ptr(obj, mark)) {
!         // Lost a race with async deflation so try again.
!         assert(AsyncDeflateIdleMonitors, "sanity check");
!         continue;
!       }
!       bool ret_code = omh.om_ptr()->is_entered(thread) != 0;
!       return ret_code;
      }
      // Unlocked case, header in place
      assert(mark.is_neutral(), "sanity check");
      return false;
+   }
  }
  
  // Be aware of this method could revoke bias of the lock object.
  // This method queries the ownership of the lock handle specified by 'h_obj'.
  // If the current thread owns the lock, it returns owner_self. If no
*** 815,845 ****
             "biases should be revoked by now");
    }
  
    assert(self == JavaThread::current(), "Can only be called on current thread");
    oop obj = h_obj();
    markWord mark = read_stable_mark(obj);
  
    // CASE: stack-locked.  Mark points to a BasicLock on the owner's stack.
    if (mark.has_locker()) {
      return self->is_lock_owned((address)mark.locker()) ?
        owner_self : owner_other;
    }
  
    // CASE: inflated. Mark (tagged pointer) points to an ObjectMonitor.
    // The Object:ObjectMonitor relationship is stable as long as we're
!   // not at a safepoint.
    if (mark.has_monitor()) {
!     void* owner = mark.monitor()->_owner;
      if (owner == NULL) return owner_none;
      return (owner == self ||
              self->is_lock_owned((address)owner)) ? owner_self : owner_other;
    }
  
    // CASE: neutral
    assert(mark.is_neutral(), "sanity check");
    return owner_none;           // it's unlocked
  }
  
  // FIXME: jvmti should call this
  JavaThread* ObjectSynchronizer::get_lock_owner(ThreadsList * t_list, Handle h_obj) {
    if (UseBiasedLocking) {
--- 1124,1164 ----
             "biases should be revoked by now");
    }
  
    assert(self == JavaThread::current(), "Can only be called on current thread");
    oop obj = h_obj();
+ 
+   while (true) {
      markWord mark = read_stable_mark(obj);
  
      // CASE: stack-locked.  Mark points to a BasicLock on the owner's stack.
      if (mark.has_locker()) {
        return self->is_lock_owned((address)mark.locker()) ?
          owner_self : owner_other;
      }
  
      // CASE: inflated. Mark (tagged pointer) points to an ObjectMonitor.
      // The Object:ObjectMonitor relationship is stable as long as we're
!     // not at a safepoint and AsyncDeflateIdleMonitors is false.
      if (mark.has_monitor()) {
!       ObjectMonitorHandle omh;
!       if (!omh.save_om_ptr(obj, mark)) {
!         // Lost a race with async deflation so try again.
!         assert(AsyncDeflateIdleMonitors, "sanity check");
!         continue;
!       }
!       ObjectMonitor* monitor = omh.om_ptr();
!       void* owner = monitor->_owner;
        if (owner == NULL) return owner_none;
        return (owner == self ||
                self->is_lock_owned((address)owner)) ? owner_self : owner_other;
      }
  
      // CASE: neutral
      assert(mark.is_neutral(), "sanity check");
      return owner_none;           // it's unlocked
+   }
  }
  
  // FIXME: jvmti should call this
  JavaThread* ObjectSynchronizer::get_lock_owner(ThreadsList * t_list, Handle h_obj) {
    if (UseBiasedLocking) {
*** 850,871 ****
      }
      assert(!h_obj->mark().has_bias_pattern(), "biases should be revoked by now");
    }
  
    oop obj = h_obj();
-   address owner = NULL;
  
    markWord mark = read_stable_mark(obj);
  
    // Uncontended case, header points to stack
    if (mark.has_locker()) {
      owner = (address) mark.locker();
    }
  
    // Contended case, header points to ObjectMonitor (tagged pointer)
    else if (mark.has_monitor()) {
!     ObjectMonitor* monitor = mark.monitor();
      assert(monitor != NULL, "monitor should be non-null");
      owner = (address) monitor->owner();
    }
  
    if (owner != NULL) {
--- 1169,1197 ----
      }
      assert(!h_obj->mark().has_bias_pattern(), "biases should be revoked by now");
    }
  
    oop obj = h_obj();
  
+   while (true) {
+     address owner = NULL;
      markWord mark = read_stable_mark(obj);
  
      // Uncontended case, header points to stack
      if (mark.has_locker()) {
        owner = (address) mark.locker();
      }
  
      // Contended case, header points to ObjectMonitor (tagged pointer)
      else if (mark.has_monitor()) {
!       ObjectMonitorHandle omh;
!       if (!omh.save_om_ptr(obj, mark)) {
!         // Lost a race with async deflation so try again.
!         assert(AsyncDeflateIdleMonitors, "sanity check");
!         continue;
!       }
!       ObjectMonitor* monitor = omh.om_ptr();
        assert(monitor != NULL, "monitor should be non-null");
        owner = (address) monitor->owner();
      }
  
      if (owner != NULL) {
*** 877,942 ****
    // Cannot have assertion since this object may have been
    // locked by another thread when reaching here.
    // assert(mark.is_neutral(), "sanity check");
  
    return NULL;
  }
  
  // Visitors ...
  
  void ObjectSynchronizer::monitors_iterate(MonitorClosure* closure) {
    PaddedObjectMonitor* block = OrderAccess::load_acquire(&g_block_list);
    while (block != NULL) {
      assert(block->object() == CHAINMARKER, "must be a block header");
      for (int i = _BLOCKSIZE - 1; i > 0; i--) {
        ObjectMonitor* mid = (ObjectMonitor *)(block + i);
!       oop object = (oop)mid->object();
!       if (object != NULL) {
          // Only process with closure if the object is set.
          closure->do_monitor(mid);
        }
      }
!     block = (PaddedObjectMonitor*)block->_next_om;
    }
  }
  
  static bool monitors_used_above_threshold() {
!   if (g_om_population == 0) {
      return false;
    }
!   int monitors_used = g_om_population - g_om_free_count;
!   int monitor_usage = (monitors_used * 100LL) / g_om_population;
    return monitor_usage > MonitorUsedDeflationThreshold;
  }
  
! bool ObjectSynchronizer::is_cleanup_needed() {
!   if (MonitorUsedDeflationThreshold > 0) {
!     return monitors_used_above_threshold();
    }
    return false;
  }
  
  void ObjectSynchronizer::oops_do(OopClosure* f) {
    // We only scan the global used list here (for moribund threads), and
    // the thread-local monitors in Thread::oops_do().
    global_used_oops_do(f);
  }
  
  void ObjectSynchronizer::global_used_oops_do(OopClosure* f) {
    assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
!   list_oops_do(g_om_in_use_list, f);
  }
  
  void ObjectSynchronizer::thread_local_used_oops_do(Thread* thread, OopClosure* f) {
    assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
!   list_oops_do(thread->om_in_use_list, f);
  }
  
! void ObjectSynchronizer::list_oops_do(ObjectMonitor* list, OopClosure* f) {
    assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
!   ObjectMonitor* mid;
!   for (mid = list; mid != NULL; mid = mid->_next_om) {
      if (mid->object() != NULL) {
        f->do_oop((oop*)mid->object_addr());
      }
    }
  }
--- 1203,1340 ----
      // Cannot have assertion since this object may have been
      // locked by another thread when reaching here.
      // assert(mark.is_neutral(), "sanity check");
  
      return NULL;
+   }
  }
  
  // Visitors ...
  
  void ObjectSynchronizer::monitors_iterate(MonitorClosure* closure) {
    PaddedObjectMonitor* block = OrderAccess::load_acquire(&g_block_list);
    while (block != NULL) {
      assert(block->object() == CHAINMARKER, "must be a block header");
      for (int i = _BLOCKSIZE - 1; i > 0; i--) {
        ObjectMonitor* mid = (ObjectMonitor *)(block + i);
!       if (mid->is_active()) {
!         ObjectMonitorHandle omh(mid);
! 
!         if (mid->object() == NULL ||
!             (AsyncDeflateIdleMonitors && mid->ref_count() < 0)) {
            // Only process with closure if the object is set.
+           // For async deflation, race here if monitor is not owned!
+           // The above ref_count bump (in ObjectMonitorHandle ctr)
+           // will cause subsequent async deflation to skip it.
+           // However, previous or concurrent async deflation is a race
+           // so skip this ObjectMonitor if it is being async deflated.
+           continue;
+         }
          closure->do_monitor(mid);
        }
      }
!     // unmarked_next() is not needed with g_block_list (no next field marking).
!     block = (PaddedObjectMonitor*)OrderAccess::load_acquire(&block->_next_om);
    }
  }
  
  static bool monitors_used_above_threshold() {
!   if (OrderAccess::load_acquire(&g_om_population) == 0) {
      return false;
    }
!   if (MonitorUsedDeflationThreshold > 0) {
!     int monitors_used = OrderAccess::load_acquire(&g_om_population) -
!                         OrderAccess::load_acquire(&g_om_free_count);
!     if (HandshakeAfterDeflateIdleMonitors) {
!       monitors_used -= OrderAccess::load_acquire(&g_om_wait_count);
!     }
!     int monitor_usage = (monitors_used * 100LL) /
!                         OrderAccess::load_acquire(&g_om_population);
      return monitor_usage > MonitorUsedDeflationThreshold;
+   }
+   return false;
  }
  
! // Returns true if MonitorBound is set (> 0) and if the specified
! // cnt is > MonitorBound. Otherwise returns false.
! static bool is_MonitorBound_exceeded(const int cnt) {
!   const int mx = MonitorBound;
!   return mx > 0 && cnt > mx;
! }
! 
! bool ObjectSynchronizer::is_async_deflation_needed() {
!   if (!AsyncDeflateIdleMonitors) {
!     return false;
!   }
!   if (is_async_deflation_requested()) {
!     // Async deflation request.
!     return true;
!   }
!   if (AsyncDeflationInterval > 0 &&
!       time_since_last_async_deflation_ms() > AsyncDeflationInterval &&
!       monitors_used_above_threshold()) {
!     // It's been longer than our specified deflate interval and there
!     // are too many monitors in use. We don't deflate more frequently
!     // than AsyncDeflationInterval (unless is_async_deflation_requested)
!     // in order to not swamp the ServiceThread.
!     _last_async_deflation_time_ns = os::javaTimeNanos();
!     return true;
!   }
!   int monitors_used = OrderAccess::load_acquire(&g_om_population) -
!                       OrderAccess::load_acquire(&g_om_free_count);
!   if (HandshakeAfterDeflateIdleMonitors) {
!     monitors_used -= OrderAccess::load_acquire(&g_om_wait_count);
!   }
!   if (is_MonitorBound_exceeded(monitors_used)) {
!     // Not enough ObjectMonitors on the global free list.
!     return true;
    }
    return false;
  }
  
+ bool ObjectSynchronizer::is_safepoint_deflation_needed() {
+   if (!AsyncDeflateIdleMonitors) {
+     if (monitors_used_above_threshold()) {
+       // Too many monitors in use.
+       return true;
+     }
+     return false;
+   }
+   if (is_special_deflation_requested()) {
+     // For AsyncDeflateIdleMonitors only do a safepoint deflation
+     // if there is a special deflation request.
+     return true;
+   }
+   return false;
+ }
+ 
+ jlong ObjectSynchronizer::time_since_last_async_deflation_ms() {
+   return (os::javaTimeNanos() - _last_async_deflation_time_ns) / (NANOUNITS / MILLIUNITS);
+ }
+ 
  void ObjectSynchronizer::oops_do(OopClosure* f) {
    // We only scan the global used list here (for moribund threads), and
    // the thread-local monitors in Thread::oops_do().
    global_used_oops_do(f);
  }
  
  void ObjectSynchronizer::global_used_oops_do(OopClosure* f) {
    assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
!   list_oops_do(OrderAccess::load_acquire(&g_om_in_use_list), OrderAccess::load_acquire(&g_om_in_use_count), f);
  }
  
  void ObjectSynchronizer::thread_local_used_oops_do(Thread* thread, OopClosure* f) {
    assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
!   list_oops_do(OrderAccess::load_acquire(&thread->om_in_use_list), OrderAccess::load_acquire(&thread->om_in_use_count), f);
  }
  
! void ObjectSynchronizer::list_oops_do(ObjectMonitor* list, int count, OopClosure* f) {
    assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
!   // The oops_do() phase does not overlap with monitor deflation
!   // so no need to update the ObjectMonitor's ref_count for this
!   // ObjectMonitor* use.
!   for (ObjectMonitor* mid = list; mid != NULL; mid = unmarked_next(mid)) {
      if (mid->object() != NULL) {
        f->do_oop((oop*)mid->object_addr());
      }
    }
  }
*** 948,960 ****
  // Inflation unlinks monitors from the global g_free_list and
  // associates them with objects.  Deflation -- which occurs at
  // STW-time -- disassociates idle monitors from objects.  Such
  // scavenged monitors are returned to the g_free_list.
  //
- // The global list is protected by gListLock.  All the critical sections
- // are short and operate in constant-time.
- //
  // ObjectMonitors reside in type-stable memory (TSM) and are immortal.
  //
  // Lifecycle:
  // --   unassigned and on the global free list
  // --   unassigned and on a thread's private om_free_list
--- 1346,1355 ----
*** 964,973 ****
--- 1359,1369 ----
  
  // Constraining monitor pool growth via MonitorBound ...
  //
  // If MonitorBound is not set (<= 0), MonitorBound checks are disabled.
  //
+ // When safepoint deflation is being used (!AsyncDeflateIdleMonitors):
  // The monitor pool is grow-only.  We scavenge at STW safepoint-time, but the
  // the rate of scavenging is driven primarily by GC.  As such,  we can find
  // an inordinate number of monitors in circulation.
  // To avoid that scenario we can artificially induce a STW safepoint
  // if the pool appears to be growing past some reasonable bound.
*** 978,994 ****
  // we'll incur more safepoints, which are harmful to performance.
  // See also: GuaranteedSafepointInterval
  //
  // The current implementation uses asynchronous VM operations.
  //
! // If MonitorBound is set, the boundry applies to
  //     (g_om_population - g_om_free_count)
  // i.e., if there are not enough ObjectMonitors on the global free list,
  // then a safepoint deflation is induced. Picking a good MonitorBound value
  // is non-trivial.
  
  static void InduceScavenge(Thread* self, const char * Whence) {
    // Induce STW safepoint to trim monitors
    // Ultimately, this results in a call to deflate_idle_monitors() in the near future.
    // More precisely, trigger an asynchronous STW safepoint as the number
    // of active monitors passes the specified threshold.
    // TODO: assert thread state is reasonable
--- 1374,1403 ----
  // we'll incur more safepoints, which are harmful to performance.
  // See also: GuaranteedSafepointInterval
  //
  // The current implementation uses asynchronous VM operations.
  //
! // When safepoint deflation is being used and MonitorBound is set, the
! // boundry applies to
  //     (g_om_population - g_om_free_count)
  // i.e., if there are not enough ObjectMonitors on the global free list,
  // then a safepoint deflation is induced. Picking a good MonitorBound value
  // is non-trivial.
+ //
+ // When async deflation is being used:
+ // The monitor pool is still grow-only. Async deflation is requested
+ // by a safepoint's cleanup phase or by the ServiceThread at periodic
+ // intervals when is_async_deflation_needed() returns true. In
+ // addition to other policies that are checked, if there are not
+ // enough ObjectMonitors on the global free list, then
+ // is_async_deflation_needed() will return true. The ServiceThread
+ // calls deflate_global_idle_monitors_using_JT() and also calls
+ // deflate_per_thread_idle_monitors_using_JT() as needed.
  
  static void InduceScavenge(Thread* self, const char * Whence) {
+   assert(!AsyncDeflateIdleMonitors, "is not used by async deflation");
+ 
    // Induce STW safepoint to trim monitors
    // Ultimately, this results in a call to deflate_idle_monitors() in the near future.
    // More precisely, trigger an asynchronous STW safepoint as the number
    // of active monitors passes the specified threshold.
    // TODO: assert thread state is reasonable
*** 1000,1061 ****
      // The VMThread will delete the op when completed.
      VMThread::execute(new VM_ScavengeMonitors());
    }
  }
  
! ObjectMonitor* ObjectSynchronizer::om_alloc(Thread* self) {
    // A large MAXPRIVATE value reduces both list lock contention
    // and list coherency traffic, but also tends to increase the
    // number of ObjectMonitors in circulation as well as the STW
    // scavenge costs.  As usual, we lean toward time in space-time
    // tradeoffs.
    const int MAXPRIVATE = 1024;
    stringStream ss;
    for (;;) {
      ObjectMonitor* m;
  
      // 1: try to allocate from the thread's local om_free_list.
      // Threads will attempt to allocate first from their local list, then
!     // from the global list, and only after those attempts fail will the thread
!     // attempt to instantiate new monitors.   Thread-local free lists take
!     // heat off the gListLock and improve allocation latency, as well as reducing
!     // coherency traffic on the shared global list.
!     m = self->om_free_list;
      if (m != NULL) {
-       self->om_free_list = m->_next_om;
-       self->om_free_count--;
        guarantee(m->object() == NULL, "invariant");
!       m->_next_om = self->om_in_use_list;
!       self->om_in_use_list = m;
!       self->om_in_use_count++;
        return m;
      }
  
      // 2: try to allocate from the global g_free_list
      // CONSIDER: use muxTry() instead of muxAcquire().
      // If the muxTry() fails then drop immediately into case 3.
      // If we're using thread-local free lists then try
      // to reprovision the caller's free list.
!     if (g_free_list != NULL) {
        // Reprovision the thread's om_free_list.
        // Use bulk transfers to reduce the allocation rate and heat
        // on various locks.
!       Thread::muxAcquire(&gListLock, "om_alloc(1)");
!       for (int i = self->om_free_provision; --i >= 0 && g_free_list != NULL;) {
!         g_om_free_count--;
!         ObjectMonitor* take = g_free_list;
!         g_free_list = take->_next_om;
          guarantee(take->object() == NULL, "invariant");
          take->Recycle();
          om_release(self, take, false);
        }
-       Thread::muxRelease(&gListLock);
        self->om_free_provision += 1 + (self->om_free_provision/2);
        if (self->om_free_provision > MAXPRIVATE) self->om_free_provision = MAXPRIVATE;
  
!       const int mx = MonitorBound;
!       if (mx > 0 && (g_om_population-g_om_free_count) > mx) {
          // Not enough ObjectMonitors on the global free list.
          // We can't safely induce a STW safepoint from om_alloc() as our thread
          // state may not be appropriate for such activities and callers may hold
          // naked oops, so instead we defer the action.
          InduceScavenge(self, "om_alloc");
--- 1409,1489 ----
      // The VMThread will delete the op when completed.
      VMThread::execute(new VM_ScavengeMonitors());
    }
  }
  
! ObjectMonitor* ObjectSynchronizer::om_alloc(Thread* self,
!                                            const InflateCause cause) {
    // A large MAXPRIVATE value reduces both list lock contention
    // and list coherency traffic, but also tends to increase the
    // number of ObjectMonitors in circulation as well as the STW
    // scavenge costs.  As usual, we lean toward time in space-time
    // tradeoffs.
    const int MAXPRIVATE = 1024;
+ 
    stringStream ss;
    for (;;) {
      ObjectMonitor* m;
  
      // 1: try to allocate from the thread's local om_free_list.
      // Threads will attempt to allocate first from their local list, then
!     // from the global list, and only after those attempts fail will the
!     // thread attempt to instantiate new monitors. Thread-local free lists
!     // improve allocation latency, as well as reducing coherency traffic
!     // on the shared global list.
!     m = take_from_start_of_om_free_list(self);
      if (m != NULL) {
        guarantee(m->object() == NULL, "invariant");
!       m->set_allocation_state(ObjectMonitor::New);
!       prepend_to_om_in_use_list(self, m);
        return m;
      }
  
      // 2: try to allocate from the global g_free_list
      // CONSIDER: use muxTry() instead of muxAcquire().
      // If the muxTry() fails then drop immediately into case 3.
      // If we're using thread-local free lists then try
      // to reprovision the caller's free list.
!     if (OrderAccess::load_acquire(&g_free_list) != NULL) {
        // Reprovision the thread's om_free_list.
        // Use bulk transfers to reduce the allocation rate and heat
        // on various locks.
!       for (int i = self->om_free_provision; --i >= 0;) {
!         ObjectMonitor* take = take_from_start_of_g_free_list();
!         if (take == NULL) {
!           break;  // No more are available.
!         }
          guarantee(take->object() == NULL, "invariant");
+         if (AsyncDeflateIdleMonitors) {
+           // We allowed 3 field values to linger during async deflation.
+           // We clear header and restore ref_count here, but we leave
+           // owner == DEFLATER_MARKER so the simple C2 ObjectMonitor
+           // enter optimization can no longer race with async deflation
+           // and reuse.
+           take->set_header(markWord::zero());
+           if (take->ref_count() < 0) {
+             // Add back max_jint to restore the ref_count field to its
+             // proper value.
+             Atomic::add(max_jint, &take->_ref_count);
+ 
+             assert(take->ref_count() >= 0, "must not be negative: ref_count=%d",
+                    take->ref_count());
+           }
+         }
          take->Recycle();
+         // Since we're taking from the global free-list, take must be Free.
+         // om_release() also sets the allocation state to Free because it
+         // is called from other code paths.
+         assert(take->is_free(), "invariant");
          om_release(self, take, false);
        }
        self->om_free_provision += 1 + (self->om_free_provision/2);
        if (self->om_free_provision > MAXPRIVATE) self->om_free_provision = MAXPRIVATE;
  
!       if (!AsyncDeflateIdleMonitors &&
!           is_MonitorBound_exceeded(OrderAccess::load_acquire(&g_om_population) -
!                                    OrderAccess::load_acquire(&g_om_free_count))) {
          // Not enough ObjectMonitors on the global free list.
          // We can't safely induce a STW safepoint from om_alloc() as our thread
          // state may not be appropriate for such activities and callers may hold
          // naked oops, so instead we defer the action.
          InduceScavenge(self, "om_alloc");
*** 1072,1084 ****
      // A better solution would be to use C++ placement-new.
      // BEWARE: As it stands currently, we don't run the ctors!
      assert(_BLOCKSIZE > 1, "invariant");
      size_t neededsize = sizeof(PaddedObjectMonitor) * _BLOCKSIZE;
      PaddedObjectMonitor* temp;
!     size_t aligned_size = neededsize + (DEFAULT_CACHE_LINE_SIZE - 1);
      void* real_malloc_addr = NEW_C_HEAP_ARRAY(char, aligned_size, mtInternal);
!     temp = (PaddedObjectMonitor*)align_up(real_malloc_addr, DEFAULT_CACHE_LINE_SIZE);
      (void)memset((void *) temp, 0, neededsize);
  
      // Format the block.
      // initialize the linked list, each monitor points to its next
      // forming the single linked free list, the very first monitor
--- 1500,1512 ----
      // A better solution would be to use C++ placement-new.
      // BEWARE: As it stands currently, we don't run the ctors!
      assert(_BLOCKSIZE > 1, "invariant");
      size_t neededsize = sizeof(PaddedObjectMonitor) * _BLOCKSIZE;
      PaddedObjectMonitor* temp;
!     size_t aligned_size = neededsize + (OM_CACHE_LINE_SIZE - 1);
      void* real_malloc_addr = NEW_C_HEAP_ARRAY(char, aligned_size, mtInternal);
!     temp = (PaddedObjectMonitor*)align_up(real_malloc_addr, OM_CACHE_LINE_SIZE);
      (void)memset((void *) temp, 0, neededsize);
  
      // Format the block.
      // initialize the linked list, each monitor points to its next
      // forming the single linked free list, the very first monitor
*** 1086,1126 ****
      // The trick of using the 1st element in the block as g_block_list
      // linkage should be reconsidered.  A better implementation would
      // look like: class Block { Block * next; int N; ObjectMonitor Body [N] ; }
  
      for (int i = 1; i < _BLOCKSIZE; i++) {
!       temp[i]._next_om = (ObjectMonitor *)&temp[i+1];
      }
  
      // terminate the last monitor as the end of list
!     temp[_BLOCKSIZE - 1]._next_om = NULL;
  
      // Element [0] is reserved for global list linkage
      temp[0].set_object(CHAINMARKER);
  
      // Consider carving out this thread's current request from the
      // block in hand.  This avoids some lock traffic and redundant
      // list activity.
  
!     // Acquire the gListLock to manipulate g_block_list and g_free_list.
!     // An Oyama-Taura-Yonezawa scheme might be more efficient.
!     Thread::muxAcquire(&gListLock, "om_alloc(2)");
!     g_om_population += _BLOCKSIZE-1;
!     g_om_free_count += _BLOCKSIZE-1;
! 
!     // Add the new block to the list of extant blocks (g_block_list).
!     // The very first ObjectMonitor in a block is reserved and dedicated.
!     // It serves as blocklist "next" linkage.
!     temp[0]._next_om = g_block_list;
!     // There are lock-free uses of g_block_list so make sure that
!     // the previous stores happen before we update g_block_list.
!     OrderAccess::release_store(&g_block_list, temp);
! 
!     // Add the new string of ObjectMonitors to the global free list
!     temp[_BLOCKSIZE - 1]._next_om = g_free_list;
!     g_free_list = temp + 1;
!     Thread::muxRelease(&gListLock);
    }
  }
  
  // Place "m" on the caller's private per-thread om_free_list.
  // In practice there's no need to clamp or limit the number of
--- 1514,1538 ----
      // The trick of using the 1st element in the block as g_block_list
      // linkage should be reconsidered.  A better implementation would
      // look like: class Block { Block * next; int N; ObjectMonitor Body [N] ; }
  
      for (int i = 1; i < _BLOCKSIZE; i++) {
!       OrderAccess::release_store(&temp[i]._next_om, (ObjectMonitor*)&temp[i+1]);
!       assert(temp[i].is_free(), "invariant");
      }
  
      // terminate the last monitor as the end of list
!     OrderAccess::release_store(&temp[_BLOCKSIZE - 1]._next_om, (ObjectMonitor*)NULL);
  
      // Element [0] is reserved for global list linkage
      temp[0].set_object(CHAINMARKER);
  
      // Consider carving out this thread's current request from the
      // block in hand.  This avoids some lock traffic and redundant
      // list activity.
  
!     prepend_block_to_lists(temp);
    }
  }
  
  // Place "m" on the caller's private per-thread om_free_list.
  // In practice there's no need to clamp or limit the number of
*** 1129,1174 ****
  // a CAS attempt failed. This doesn't allow unbounded #s of monitors to
  // accumulate on a thread's free list.
  //
  // Key constraint: all ObjectMonitors on a thread's free list and the global
  // free list must have their object field set to null. This prevents the
! // scavenger -- deflate_monitor_list() -- from reclaiming them while we
! // are trying to release them.
  
  void ObjectSynchronizer::om_release(Thread* self, ObjectMonitor* m,
                                      bool from_per_thread_alloc) {
    guarantee(m->header().value() == 0, "invariant");
    guarantee(m->object() == NULL, "invariant");
    stringStream ss;
    guarantee((m->is_busy() | m->_recursions) == 0, "freeing in-use monitor: "
!             "%s, recursions=" INTPTR_FORMAT, m->is_busy_to_string(&ss),
              m->_recursions);
    // _next_om is used for both per-thread in-use and free lists so
    // we have to remove 'm' from the in-use list first (as needed).
    if (from_per_thread_alloc) {
      // Need to remove 'm' from om_in_use_list.
      ObjectMonitor* cur_mid_in_use = NULL;
      bool extracted = false;
!     for (ObjectMonitor* mid = self->om_in_use_list; mid != NULL; cur_mid_in_use = mid, mid = mid->_next_om) {
        if (m == mid) {
!         // extract from per-thread in-use list
!         if (mid == self->om_in_use_list) {
!           self->om_in_use_list = mid->_next_om;
!         } else if (cur_mid_in_use != NULL) {
!           cur_mid_in_use->_next_om = mid->_next_om; // maintain the current thread in-use list
          }
          extracted = true;
!         self->om_in_use_count--;
          break;
        }
      }
-     assert(extracted, "Should have extracted from in-use list");
    }
  
!   m->_next_om = self->om_free_list;
!   self->om_free_list = m;
!   self->om_free_count++;
  }
  
  // Return ObjectMonitors on a moribund thread's free and in-use
  // lists to the appropriate global lists. The ObjectMonitors on the
  // per-thread in-use list may still be in use by other threads.
--- 1541,1617 ----
  // a CAS attempt failed. This doesn't allow unbounded #s of monitors to
  // accumulate on a thread's free list.
  //
  // Key constraint: all ObjectMonitors on a thread's free list and the global
  // free list must have their object field set to null. This prevents the
! // scavenger -- deflate_monitor_list() or deflate_monitor_list_using_JT()
! // -- from reclaiming them while we are trying to release them.
  
  void ObjectSynchronizer::om_release(Thread* self, ObjectMonitor* m,
                                      bool from_per_thread_alloc) {
    guarantee(m->header().value() == 0, "invariant");
    guarantee(m->object() == NULL, "invariant");
    stringStream ss;
    guarantee((m->is_busy() | m->_recursions) == 0, "freeing in-use monitor: "
!             "%s, recursions=" INTX_FORMAT, m->is_busy_to_string(&ss),
              m->_recursions);
+   m->set_allocation_state(ObjectMonitor::Free);
    // _next_om is used for both per-thread in-use and free lists so
    // we have to remove 'm' from the in-use list first (as needed).
    if (from_per_thread_alloc) {
      // Need to remove 'm' from om_in_use_list.
+     // We use the more complicated mark-cur_mid_in_use-and-mid-as-we-go
+     // protocol because async deflation can do list deletions in parallel.
      ObjectMonitor* cur_mid_in_use = NULL;
+     ObjectMonitor* mid = NULL;
+     ObjectMonitor* next = NULL;
      bool extracted = false;
! 
!     if (!mark_list_head(&self->om_in_use_list, &mid, &next)) {
!       fatal("thread=" INTPTR_FORMAT " in-use list must not be empty.", p2i(self));
!     }
!     while (true) {
        if (m == mid) {
!         // We found 'm' on the per-thread in-use list so try to extract it.
!         if (cur_mid_in_use == NULL) {
!           // mid is the list head and it is marked. Switch the list head
!           // to next which unmarks the list head, but leaves mid marked:
!           OrderAccess::release_store(&self->om_in_use_list, next);
!         } else {
!           // mid and cur_mid_in_use are marked. Switch cur_mid_in_use's
!           // next field to next which unmarks cur_mid_in_use, but leaves
!           // mid marked:
!           OrderAccess::release_store(&cur_mid_in_use->_next_om, next);
          }
          extracted = true;
!         Atomic::dec(&self->om_in_use_count);
!         // Unmark mid, but leave the next value for any lagging list
!         // walkers. It will get cleaned up when mid is prepended to
!         // the thread's free list:
!         set_next(mid, next);
          break;
        }
+       if (cur_mid_in_use != NULL) {
+         set_next(cur_mid_in_use, mid);  // umark cur_mid_in_use
+       }
+       // The next cur_mid_in_use keeps mid's marked next field so
+       // that it is stable for a possible next field change. It
+       // cannot be deflated while it is marked.
+       cur_mid_in_use = mid;
+       mid = next;
+       if (mid == NULL) {
+         // Reached end of the list and didn't find m so:
+         fatal("must find m=" INTPTR_FORMAT "on om_in_use_list=" INTPTR_FORMAT,
+               p2i(m), p2i(self->om_in_use_list));
+       }
+       // Mark mid's next field so we can possibly extract it:
+       next = mark_next_loop(mid);
      }
    }
  
!   prepend_to_om_free_list(self, m);
!   guarantee(m->is_free(), "invariant");
  }
  
  // Return ObjectMonitors on a moribund thread's free and in-use
  // lists to the appropriate global lists. The ObjectMonitors on the
  // per-thread in-use list may still be in use by other threads.
*** 1179,1244 ****
  // a safepoint and interleave with deflate_idle_monitors(). In
  // particular, this ensures that the thread's in-use monitors are
  // scanned by a GC safepoint, either via Thread::oops_do() (before
  // om_flush() is called) or via ObjectSynchronizer::oops_do() (after
  // om_flush() is called).
  
  void ObjectSynchronizer::om_flush(Thread* self) {
!   ObjectMonitor* free_list = self->om_free_list;
!   ObjectMonitor* free_tail = NULL;
    int free_count = 0;
    if (free_list != NULL) {
-     ObjectMonitor* s;
      // The thread is going away. Set 'free_tail' to the last per-thread free
!     // monitor which will be linked to g_free_list below under the gListLock.
      stringStream ss;
!     for (s = free_list; s != NULL; s = s->_next_om) {
        free_count++;
        free_tail = s;
        guarantee(s->object() == NULL, "invariant");
        guarantee(!s->is_busy(), "must be !is_busy: %s", s->is_busy_to_string(&ss));
      }
      guarantee(free_tail != NULL, "invariant");
!     assert(self->om_free_count == free_count, "free-count off");
!     self->om_free_list = NULL;
!     self->om_free_count = 0;
    }
  
-   ObjectMonitor* in_use_list = self->om_in_use_list;
-   ObjectMonitor* in_use_tail = NULL;
-   int in_use_count = 0;
-   if (in_use_list != NULL) {
-     // The thread is going away, however the ObjectMonitors on the
-     // om_in_use_list may still be in-use by other threads. Link
-     // them to in_use_tail, which will be linked into the global
-     // in-use list g_om_in_use_list below, under the gListLock.
-     ObjectMonitor *cur_om;
-     for (cur_om = in_use_list; cur_om != NULL; cur_om = cur_om->_next_om) {
-       in_use_tail = cur_om;
-       in_use_count++;
-     }
-     guarantee(in_use_tail != NULL, "invariant");
-     assert(self->om_in_use_count == in_use_count, "in-use count off");
-     self->om_in_use_list = NULL;
-     self->om_in_use_count = 0;
-   }
- 
-   Thread::muxAcquire(&gListLock, "om_flush");
    if (free_tail != NULL) {
!     free_tail->_next_om = g_free_list;
!     g_free_list = free_list;
!     g_om_free_count += free_count;
    }
  
    if (in_use_tail != NULL) {
!     in_use_tail->_next_om = g_om_in_use_list;
!     g_om_in_use_list = in_use_list;
!     g_om_in_use_count += in_use_count;
    }
  
-   Thread::muxRelease(&gListLock);
- 
    LogStreamHandle(Debug, monitorinflation) lsh_debug;
    LogStreamHandle(Info, monitorinflation) lsh_info;
    LogStream* ls = NULL;
    if (log_is_enabled(Debug, monitorinflation)) {
      ls = &lsh_debug;
--- 1622,1733 ----
  // a safepoint and interleave with deflate_idle_monitors(). In
  // particular, this ensures that the thread's in-use monitors are
  // scanned by a GC safepoint, either via Thread::oops_do() (before
  // om_flush() is called) or via ObjectSynchronizer::oops_do() (after
  // om_flush() is called).
+ //
+ // With AsyncDeflateIdleMonitors, deflate_global_idle_monitors_using_JT()
+ // and deflate_per_thread_idle_monitors_using_JT() (in another thread) can
+ // run at the same time as om_flush() so we have to follow a careful
+ // protocol to prevent list corruption.
  
  void ObjectSynchronizer::om_flush(Thread* self) {
!   // This function can race with an async deflater thread. Since
!   // deflation has to process the per-thread in-use list before
!   // prepending the deflated ObjectMonitors to the global free list,
!   // we process the per-thread lists in the same order to prevent
!   // ordering races.
!   int in_use_count = 0;
!   ObjectMonitor* in_use_list = NULL;
!   ObjectMonitor* in_use_tail = NULL;
!   ObjectMonitor* next = NULL;
! 
!   // An async deflation thread checks to see if the target thread
!   // is exiting, but if it has made it past that check before we
!   // started exiting, then it is racing to get to the in-use list.
!   if (mark_list_head(&self->om_in_use_list, &in_use_list, &next)) {
!     // At this point, we have marked the in-use list head so an
!     // async deflation thread cannot come in after us. If an async
!     // deflation thread is ahead of us, then we'll detect that and
!     // wait for it to finish its work.
!     //
!     // The thread is going away, however the ObjectMonitors on the
!     // om_in_use_list may still be in-use by other threads. Link
!     // them to in_use_tail, which will be linked into the global
!     // in-use list g_om_in_use_list below.
!     //
!     // Account for the in-use list head before the loop since it is
!     // already marked (by this thread):
!     in_use_tail = in_use_list;
!     in_use_count++;
!     for (ObjectMonitor* cur_om = unmarked_next(in_use_list); cur_om != NULL;) {
!       if (is_next_marked(cur_om)) {
!         // This next field is marked so there must be an async deflater
!         // thread ahead of us so we'll give it a chance to finish.
!         while (is_next_marked(cur_om)) {
!           os::naked_short_sleep(1);
!         }
!         // Refetch the possibly changed next field and try again.
!         cur_om = unmarked_next(in_use_tail);
!         continue;
!       }
!       if (!cur_om->is_active()) {
!         // cur_om was deflated and the allocation state was changed
!         // to Free while it was marked. We happened to see it just
!         // after it was unmarked (and added to the free list).
!         // Refetch the possibly changed next field and try again.
!         cur_om = unmarked_next(in_use_tail);
!         continue;
!       }
!       in_use_tail = cur_om;
!       in_use_count++;
!       cur_om = unmarked_next(cur_om);
!     }
!     guarantee(in_use_tail != NULL, "invariant");
!     int l_om_in_use_count = OrderAccess::load_acquire(&self->om_in_use_count);
!     ADIM_guarantee(l_om_in_use_count == in_use_count, "in-use counts don't "
!                    "match: l_om_in_use_count=%d, in_use_count=%d",
!                    l_om_in_use_count, in_use_count);
!     // Clear the in-use count before unmarking the in-use list head
!     // to avoid races:
!     OrderAccess::release_store(&self->om_in_use_count, 0);
!     // Clear the in-use list head (which also unmarks it):
!     OrderAccess::release_store(&self->om_in_use_list, (ObjectMonitor*)NULL);
!     // Unmark the disconnected list head:
!     set_next(in_use_list, next);
!   }
! 
    int free_count = 0;
+   ObjectMonitor* free_list = OrderAccess::load_acquire(&self->om_free_list);
+   ObjectMonitor* free_tail = NULL;
    if (free_list != NULL) {
      // The thread is going away. Set 'free_tail' to the last per-thread free
!     // monitor which will be linked to g_free_list below.
      stringStream ss;
!     for (ObjectMonitor* s = free_list; s != NULL; s = unmarked_next(s)) {
        free_count++;
        free_tail = s;
        guarantee(s->object() == NULL, "invariant");
        guarantee(!s->is_busy(), "must be !is_busy: %s", s->is_busy_to_string(&ss));
      }
      guarantee(free_tail != NULL, "invariant");
!     int l_om_free_count = OrderAccess::load_acquire(&self->om_free_count);
!     ADIM_guarantee(l_om_free_count == free_count, "free counts don't match: "
!                    "l_om_free_count=%d, free_count=%d", l_om_free_count,
!                    free_count);
!     OrderAccess::release_store(&self->om_free_list, (ObjectMonitor*)NULL);
!     OrderAccess::release_store(&self->om_free_count, 0);
    }
  
    if (free_tail != NULL) {
!     prepend_list_to_g_free_list(free_list, free_tail, free_count);
    }
  
    if (in_use_tail != NULL) {
!     prepend_list_to_g_om_in_use_list(in_use_list, in_use_tail, in_use_count);
    }
  
    LogStreamHandle(Debug, monitorinflation) lsh_debug;
    LogStreamHandle(Info, monitorinflation) lsh_info;
    LogStream* ls = NULL;
    if (log_is_enabled(Debug, monitorinflation)) {
      ls = &lsh_debug;
*** 1263,1285 ****
    event->set_cause((u1)cause);
    event->commit();
  }
  
  // Fast path code shared by multiple functions
! void ObjectSynchronizer::inflate_helper(oop obj) {
    markWord mark = obj->mark();
    if (mark.has_monitor()) {
!     assert(ObjectSynchronizer::verify_objmon_isinpool(mark.monitor()), "monitor is invalid");
!     assert(mark.monitor()->header().is_neutral(), "monitor must record a good object header");
      return;
    }
-   inflate(Thread::current(), obj, inflate_cause_vm_internal);
  }
  
! ObjectMonitor* ObjectSynchronizer::inflate(Thread* self,
!                                            oop object,
!                                            const InflateCause cause) {
    // Inflate mutates the heap ...
    // Relaxing assertion for bug 6320749.
    assert(Universe::verify_in_progress() ||
           !SafepointSynchronize::is_at_safepoint(), "invariant");
  
--- 1752,1783 ----
    event->set_cause((u1)cause);
    event->commit();
  }
  
  // Fast path code shared by multiple functions
! void ObjectSynchronizer::inflate_helper(ObjectMonitorHandle* omh_p, oop obj) {
!   while (true) {
      markWord mark = obj->mark();
      if (mark.has_monitor()) {
!       if (!omh_p->save_om_ptr(obj, mark)) {
!         // Lost a race with async deflation so try again.
!         assert(AsyncDeflateIdleMonitors, "sanity check");
!         continue;
!       }
!       ObjectMonitor* monitor = omh_p->om_ptr();
!       assert(ObjectSynchronizer::verify_objmon_isinpool(monitor), "monitor is invalid");
!       markWord dmw = monitor->header();
!       assert(dmw.is_neutral(), "sanity check: header=" INTPTR_FORMAT, dmw.value());
!       return;
!     }
!     inflate(omh_p, Thread::current(), obj, inflate_cause_vm_internal);
      return;
    }
  }
  
! void ObjectSynchronizer::inflate(ObjectMonitorHandle* omh_p, Thread* self,
!                                  oop object, const InflateCause cause) {
    // Inflate mutates the heap ...
    // Relaxing assertion for bug 6320749.
    assert(Universe::verify_in_progress() ||
           !SafepointSynchronize::is_at_safepoint(), "invariant");
  
*** 1296,1311 ****
      // *  Neutral      - aggressively inflate the object.
      // *  BIASED       - Illegal.  We should never see this
  
      // CASE: inflated
      if (mark.has_monitor()) {
!       ObjectMonitor* inf = mark.monitor();
        markWord dmw = inf->header();
        assert(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value());
        assert(inf->object() == object, "invariant");
        assert(ObjectSynchronizer::verify_objmon_isinpool(inf), "monitor is invalid");
!       return inf;
      }
  
      // CASE: inflation in progress - inflating over a stack-lock.
      // Some other thread is converting from stack-locked to inflated.
      // Only that thread can complete inflation -- other threads must wait.
--- 1794,1814 ----
      // *  Neutral      - aggressively inflate the object.
      // *  BIASED       - Illegal.  We should never see this
  
      // CASE: inflated
      if (mark.has_monitor()) {
!       if (!omh_p->save_om_ptr(object, mark)) {
!         // Lost a race with async deflation so try again.
!         assert(AsyncDeflateIdleMonitors, "sanity check");
!         continue;
!       }
!       ObjectMonitor* inf = omh_p->om_ptr();
        markWord dmw = inf->header();
        assert(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value());
        assert(inf->object() == object, "invariant");
        assert(ObjectSynchronizer::verify_objmon_isinpool(inf), "monitor is invalid");
!       return;
      }
  
      // CASE: inflation in progress - inflating over a stack-lock.
      // Some other thread is converting from stack-locked to inflated.
      // Only that thread can complete inflation -- other threads must wait.
*** 1337,1356 ****
      // See the comments in om_alloc().
  
      LogStreamHandle(Trace, monitorinflation) lsh;
  
      if (mark.has_locker()) {
!       ObjectMonitor* m = om_alloc(self);
        // Optimistically prepare the objectmonitor - anticipate successful CAS
        // We do this before the CAS in order to minimize the length of time
        // in which INFLATING appears in the mark.
        m->Recycle();
        m->_Responsible  = NULL;
        m->_SpinDuration = ObjectMonitor::Knob_SpinLimit;   // Consider: maintain by type/class
  
        markWord cmp = object->cas_set_mark(markWord::INFLATING(), mark);
        if (cmp != mark) {
          om_release(self, m, true);
          continue;       // Interference -- just retry
        }
  
        // We've successfully installed INFLATING (0) into the mark-word.
--- 1840,1860 ----
      // See the comments in om_alloc().
  
      LogStreamHandle(Trace, monitorinflation) lsh;
  
      if (mark.has_locker()) {
!       ObjectMonitor* m = om_alloc(self, cause);
        // Optimistically prepare the objectmonitor - anticipate successful CAS
        // We do this before the CAS in order to minimize the length of time
        // in which INFLATING appears in the mark.
        m->Recycle();
        m->_Responsible  = NULL;
        m->_SpinDuration = ObjectMonitor::Knob_SpinLimit;   // Consider: maintain by type/class
  
        markWord cmp = object->cas_set_mark(markWord::INFLATING(), mark);
        if (cmp != mark) {
+         // om_release() will reset the allocation state from New to Free.
          om_release(self, m, true);
          continue;       // Interference -- just retry
        }
  
        // We've successfully installed INFLATING (0) into the mark-word.
*** 1384,1412 ****
        // object is in the mark.  Furthermore the owner can't complete
        // an unlock on the object, either.
        markWord dmw = mark.displaced_mark_helper();
        // Catch if the object's header is not neutral (not locked and
        // not marked is what we care about here).
!       assert(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value());
  
        // Setup monitor fields to proper values -- prepare the monitor
        m->set_header(dmw);
  
        // Optimization: if the mark.locker stack address is associated
        // with this thread we could simply set m->_owner = self.
        // Note that a thread can inflate an object
        // that it has stack-locked -- as might happen in wait() -- directly
        // with CAS.  That is, we can avoid the xchg-NULL .... ST idiom.
!       m->set_owner(mark.locker());
        m->set_object(object);
        // TODO-FIXME: assert BasicLock->dhw != 0.
  
        // Must preserve store ordering. The monitor state must
        // be stable at the time of publishing the monitor address.
        guarantee(object->mark() == markWord::INFLATING(), "invariant");
        object->release_set_mark(markWord::encode(m));
  
        // Hopefully the performance counters are allocated on distinct cache lines
        // to avoid false sharing on MP systems ...
        OM_PERFDATA_OP(Inflations, inc());
        if (log_is_enabled(Trace, monitorinflation)) {
          ResourceMark rm(self);
--- 1888,1927 ----
        // object is in the mark.  Furthermore the owner can't complete
        // an unlock on the object, either.
        markWord dmw = mark.displaced_mark_helper();
        // Catch if the object's header is not neutral (not locked and
        // not marked is what we care about here).
!       ADIM_guarantee(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value());
  
        // Setup monitor fields to proper values -- prepare the monitor
        m->set_header(dmw);
  
        // Optimization: if the mark.locker stack address is associated
        // with this thread we could simply set m->_owner = self.
        // Note that a thread can inflate an object
        // that it has stack-locked -- as might happen in wait() -- directly
        // with CAS.  That is, we can avoid the xchg-NULL .... ST idiom.
!       if (AsyncDeflateIdleMonitors) {
!         m->set_owner_from(mark.locker(), NULL, DEFLATER_MARKER);
!       } else {
!         m->set_owner_from(mark.locker(), NULL);
!       }
        m->set_object(object);
        // TODO-FIXME: assert BasicLock->dhw != 0.
  
+       omh_p->set_om_ptr(m);
+ 
        // Must preserve store ordering. The monitor state must
        // be stable at the time of publishing the monitor address.
        guarantee(object->mark() == markWord::INFLATING(), "invariant");
        object->release_set_mark(markWord::encode(m));
  
+       // Once ObjectMonitor is configured and the object is associated
+       // with the ObjectMonitor, it is safe to allow async deflation:
+       assert(m->is_new(), "freshly allocated monitor must be new");
+       m->set_allocation_state(ObjectMonitor::Old);
+ 
        // Hopefully the performance counters are allocated on distinct cache lines
        // to avoid false sharing on MP systems ...
        OM_PERFDATA_OP(Inflations, inc());
        if (log_is_enabled(Trace, monitorinflation)) {
          ResourceMark rm(self);
*** 1415,1425 ****
                       object->mark().value(), object->klass()->external_name());
        }
        if (event.should_commit()) {
          post_monitor_inflate_event(&event, object, cause);
        }
!       return m;
      }
  
      // CASE: neutral
      // TODO-FIXME: for entry we currently inflate and then try to CAS _owner.
      // If we know we're inflating for entry it's better to inflate by swinging a
--- 1930,1941 ----
                       object->mark().value(), object->klass()->external_name());
        }
        if (event.should_commit()) {
          post_monitor_inflate_event(&event, object, cause);
        }
!       ADIM_guarantee(!m->is_free(), "inflated monitor to be returned cannot be free");
!       return;
      }
  
      // CASE: neutral
      // TODO-FIXME: for entry we currently inflate and then try to CAS _owner.
      // If we know we're inflating for entry it's better to inflate by swinging a
*** 1429,1459 ****
      // to inflate and then CAS() again to try to swing _owner from NULL to self.
      // An inflateTry() method that we could call from enter() would be useful.
  
      // Catch if the object's header is not neutral (not locked and
      // not marked is what we care about here).
!     assert(mark.is_neutral(), "invariant: header=" INTPTR_FORMAT, mark.value());
!     ObjectMonitor* m = om_alloc(self);
      // prepare m for installation - set monitor to initial state
      m->Recycle();
      m->set_header(mark);
      m->set_object(object);
      m->_Responsible  = NULL;
      m->_SpinDuration = ObjectMonitor::Knob_SpinLimit;       // consider: keep metastats by type/class
  
      if (object->cas_set_mark(markWord::encode(m), mark) != mark) {
        m->set_header(markWord::zero());
        m->set_object(NULL);
        m->Recycle();
        om_release(self, m, true);
        m = NULL;
        continue;
        // interference - the markword changed - just retry.
        // The state-transitions are one-way, so there's no chance of
        // live-lock -- "Inflated" is an absorbing state.
      }
  
      // Hopefully the performance counters are allocated on distinct
      // cache lines to avoid false sharing on MP systems ...
      OM_PERFDATA_OP(Inflations, inc());
      if (log_is_enabled(Trace, monitorinflation)) {
        ResourceMark rm(self);
--- 1945,1987 ----
      // to inflate and then CAS() again to try to swing _owner from NULL to self.
      // An inflateTry() method that we could call from enter() would be useful.
  
      // Catch if the object's header is not neutral (not locked and
      // not marked is what we care about here).
!     ADIM_guarantee(mark.is_neutral(), "invariant: header=" INTPTR_FORMAT,mark.value());
!     ObjectMonitor* m = om_alloc(self, cause);
      // prepare m for installation - set monitor to initial state
      m->Recycle();
      m->set_header(mark);
+     // If we leave _owner == DEFLATER_MARKER here, then the simple C2
+     // ObjectMonitor enter optimization can no longer race with async
+     // deflation and reuse.
      m->set_object(object);
      m->_Responsible  = NULL;
      m->_SpinDuration = ObjectMonitor::Knob_SpinLimit;       // consider: keep metastats by type/class
  
+     omh_p->set_om_ptr(m);
+ 
      if (object->cas_set_mark(markWord::encode(m), mark) != mark) {
        m->set_header(markWord::zero());
        m->set_object(NULL);
        m->Recycle();
+       omh_p->set_om_ptr(NULL);
+       // om_release() will reset the allocation state from New to Free.
        om_release(self, m, true);
        m = NULL;
        continue;
        // interference - the markword changed - just retry.
        // The state-transitions are one-way, so there's no chance of
        // live-lock -- "Inflated" is an absorbing state.
      }
  
+     // Once the ObjectMonitor is configured and object is associated
+     // with the ObjectMonitor, it is safe to allow async deflation:
+     assert(m->is_new(), "freshly allocated monitor must be new");
+     m->set_allocation_state(ObjectMonitor::Old);
+ 
      // Hopefully the performance counters are allocated on distinct
      // cache lines to avoid false sharing on MP systems ...
      OM_PERFDATA_OP(Inflations, inc());
      if (log_is_enabled(Trace, monitorinflation)) {
        ResourceMark rm(self);
*** 1462,1478 ****
                     object->mark().value(), object->klass()->external_name());
      }
      if (event.should_commit()) {
        post_monitor_inflate_event(&event, object, cause);
      }
!     return m;
    }
  }
  
  
  // We maintain a list of in-use monitors for each thread.
  //
  // deflate_thread_local_monitors() scans a single thread's in-use list, while
  // deflate_idle_monitors() scans only a global list of in-use monitors which
  // is populated only as a thread dies (see om_flush()).
  //
  // These operations are called at all safepoints, immediately after mutators
--- 1990,2008 ----
                     object->mark().value(), object->klass()->external_name());
      }
      if (event.should_commit()) {
        post_monitor_inflate_event(&event, object, cause);
      }
!     ADIM_guarantee(!m->is_free(), "inflated monitor to be returned cannot be free");
!     return;
    }
  }
  
  
  // We maintain a list of in-use monitors for each thread.
  //
+ // For safepoint based deflation:
  // deflate_thread_local_monitors() scans a single thread's in-use list, while
  // deflate_idle_monitors() scans only a global list of in-use monitors which
  // is populated only as a thread dies (see om_flush()).
  //
  // These operations are called at all safepoints, immediately after mutators
*** 1487,1496 ****
--- 2017,2051 ----
  //
  // Perversely, the heap size -- and thus the STW safepoint rate --
  // typically drives the scavenge rate.  Large heaps can mean infrequent GC,
  // which in turn can mean large(r) numbers of ObjectMonitors in circulation.
  // This is an unfortunate aspect of this design.
+ //
+ // For async deflation:
+ // If a special deflation request is made, then the safepoint based
+ // deflation mechanism is used. Otherwise, an async deflation request
+ // is registered with the ServiceThread and it is notified.
+ 
+ void ObjectSynchronizer::do_safepoint_work(DeflateMonitorCounters* counters) {
+   assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
+ 
+   // The per-thread in-use lists are handled in
+   // ParallelSPCleanupThreadClosure::do_thread().
+ 
+   if (!AsyncDeflateIdleMonitors || is_special_deflation_requested()) {
+     // Use the older mechanism for the global in-use list or if a
+     // special deflation has been requested before the safepoint.
+     ObjectSynchronizer::deflate_idle_monitors(counters);
+     return;
+   }
+ 
+   log_debug(monitorinflation)("requesting async deflation of idle monitors.");
+   // Request deflation of idle monitors by the ServiceThread:
+   set_is_async_deflation_requested(true);
+   MonitorLocker ml(Service_lock, Mutex::_no_safepoint_check_flag);
+   ml.notify_all();
+ }
  
  // Deflate a single monitor if not in-use
  // Return true if deflated, false if in-use
  bool ObjectSynchronizer::deflate_monitor(ObjectMonitor* mid, oop obj,
                                           ObjectMonitor** free_head_p,
*** 1505,1515 ****
    guarantee(mark.monitor() == mid, "should match: monitor()=" INTPTR_FORMAT
              ", mid=" INTPTR_FORMAT, p2i(mark.monitor()), p2i(mid));
    const markWord dmw = mid->header();
    guarantee(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value());
  
!   if (mid->is_busy()) {
      deflated = false;
    } else {
      // Deflate the monitor if it is no longer being used
      // It's idle - scavenge and return to the global free list
      // plain old deflation ...
--- 2060,2072 ----
    guarantee(mark.monitor() == mid, "should match: monitor()=" INTPTR_FORMAT
              ", mid=" INTPTR_FORMAT, p2i(mark.monitor()), p2i(mid));
    const markWord dmw = mid->header();
    guarantee(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value());
  
!   if (mid->is_busy() || mid->ref_count() != 0) {
!     // Easy checks are first - the ObjectMonitor is busy or ObjectMonitor*
!     // is in use so no deflation.
      deflated = false;
    } else {
      // Deflate the monitor if it is no longer being used
      // It's idle - scavenge and return to the global free list
      // plain old deflation ...
*** 1521,1545 ****
                                    mark.value(), obj->klass()->external_name());
      }
  
      // Restore the header back to obj
      obj->release_set_mark(dmw);
      mid->clear();
  
      assert(mid->object() == NULL, "invariant: object=" INTPTR_FORMAT,
             p2i(mid->object()));
  
      // Move the deflated ObjectMonitor to the working free list
!     // defined by free_head_p and free_tail_p.
      if (*free_head_p == NULL) *free_head_p = mid;
      if (*free_tail_p != NULL) {
        // We append to the list so the caller can use mid->_next_om
        // to fix the linkages in its context.
        ObjectMonitor* prevtail = *free_tail_p;
        // Should have been cleaned up by the caller:
!       assert(prevtail->_next_om == NULL, "cleaned up deflated?");
!       prevtail->_next_om = mid;
      }
      *free_tail_p = mid;
      // At this point, mid->_next_om still refers to its current
      // value and another ObjectMonitor's _next_om field still
      // refers to this ObjectMonitor. Those linkages have to be
--- 2078,2115 ----
                                    mark.value(), obj->klass()->external_name());
      }
  
      // Restore the header back to obj
      obj->release_set_mark(dmw);
+     if (AsyncDeflateIdleMonitors) {
+       // clear() expects the owner field to be NULL and we won't race
+       // with the simple C2 ObjectMonitor enter optimization since
+       // we're at a safepoint. DEFLATER_MARKER is the only non-NULL
+       // value we should see here.
+       mid->try_set_owner_from(NULL, DEFLATER_MARKER);
+     }
      mid->clear();
  
      assert(mid->object() == NULL, "invariant: object=" INTPTR_FORMAT,
             p2i(mid->object()));
+     assert(mid->is_free(), "invariant");
  
      // Move the deflated ObjectMonitor to the working free list
!     // defined by free_head_p and free_tail_p. No races on this list
!     // so no need for load_acquire() or store_release().
      if (*free_head_p == NULL) *free_head_p = mid;
      if (*free_tail_p != NULL) {
        // We append to the list so the caller can use mid->_next_om
        // to fix the linkages in its context.
        ObjectMonitor* prevtail = *free_tail_p;
        // Should have been cleaned up by the caller:
!       // Note: Should not have to mark prevtail here since we're at a
!       // safepoint and ObjectMonitors on the local free list should
!       // not be accessed in parallel.
!       assert(prevtail->_next_om == NULL, "must be NULL: _next_om="
!              INTPTR_FORMAT, p2i(prevtail->_next_om));
!       set_next(prevtail, mid);
      }
      *free_tail_p = mid;
      // At this point, mid->_next_om still refers to its current
      // value and another ObjectMonitor's _next_om field still
      // refers to this ObjectMonitor. Those linkages have to be
*** 1547,1647 ****
      deflated = true;
    }
    return deflated;
  }
  
! // Walk a given monitor list, and deflate idle monitors
! // The given list could be a per-thread list or a global list
! // Caller acquires gListLock as needed.
  //
  // In the case of parallel processing of thread local monitor lists,
  // work is done by Threads::parallel_threads_do() which ensures that
  // each Java thread is processed by exactly one worker thread, and
  // thus avoid conflicts that would arise when worker threads would
  // process the same monitor lists concurrently.
  //
  // See also ParallelSPCleanupTask and
  // SafepointSynchronize::do_cleanup_tasks() in safepoint.cpp and
  // Threads::parallel_java_threads_do() in thread.cpp.
! int ObjectSynchronizer::deflate_monitor_list(ObjectMonitor** list_p,
                                               ObjectMonitor** free_head_p,
                                               ObjectMonitor** free_tail_p) {
-   ObjectMonitor* mid;
-   ObjectMonitor* next;
    ObjectMonitor* cur_mid_in_use = NULL;
    int deflated_count = 0;
  
!   for (mid = *list_p; mid != NULL;) {
      oop obj = (oop) mid->object();
      if (obj != NULL && deflate_monitor(mid, obj, free_head_p, free_tail_p)) {
        // Deflation succeeded and already updated free_head_p and
        // free_tail_p as needed. Finish the move to the local free list
        // by unlinking mid from the global or per-thread in-use list.
!       if (mid == *list_p) {
!         *list_p = mid->_next_om;
!       } else if (cur_mid_in_use != NULL) {
!         cur_mid_in_use->_next_om = mid->_next_om; // maintain the current thread in-use list
        }
!       next = mid->_next_om;
!       mid->_next_om = NULL;  // This mid is current tail in the free_head_p list
!       mid = next;
        deflated_count++;
      } else {
        cur_mid_in_use = mid;
!       mid = mid->_next_om;
      }
    }
    return deflated_count;
  }
  
  void ObjectSynchronizer::prepare_deflate_idle_monitors(DeflateMonitorCounters* counters) {
!   counters->n_in_use = 0;              // currently associated with objects
!   counters->n_in_circulation = 0;      // extant
!   counters->n_scavenged = 0;           // reclaimed (global and per-thread)
!   counters->per_thread_scavenged = 0;  // per-thread scavenge total
    counters->per_thread_times = 0.0;    // per-thread scavenge times
  }
  
  void ObjectSynchronizer::deflate_idle_monitors(DeflateMonitorCounters* counters) {
    assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
    bool deflated = false;
  
    ObjectMonitor* free_head_p = NULL;  // Local SLL of scavenged monitors
    ObjectMonitor* free_tail_p = NULL;
    elapsedTimer timer;
  
    if (log_is_enabled(Info, monitorinflation)) {
      timer.start();
    }
  
-   // Prevent om_flush from changing mids in Thread dtor's during deflation
-   // And in case the vm thread is acquiring a lock during a safepoint
-   // See e.g. 6320749
-   Thread::muxAcquire(&gListLock, "deflate_idle_monitors");
- 
    // Note: the thread-local monitors lists get deflated in
    // a separate pass. See deflate_thread_local_monitors().
  
    // For moribund threads, scan g_om_in_use_list
    int deflated_count = 0;
!   if (g_om_in_use_list) {
!     counters->n_in_circulation += g_om_in_use_count;
!     deflated_count = deflate_monitor_list((ObjectMonitor **)&g_om_in_use_list, &free_head_p, &free_tail_p);
!     g_om_in_use_count -= deflated_count;
!     counters->n_scavenged += deflated_count;
!     counters->n_in_use += g_om_in_use_count;
    }
  
    if (free_head_p != NULL) {
      // Move the deflated ObjectMonitors back to the global free list.
!     guarantee(free_tail_p != NULL && counters->n_scavenged > 0, "invariant");
!     assert(free_tail_p->_next_om == NULL, "invariant");
!     // constant-time list splice - prepend scavenged segment to g_free_list
!     free_tail_p->_next_om = g_free_list;
!     g_free_list = free_head_p;
    }
-   Thread::muxRelease(&gListLock);
    timer.stop();
  
    LogStreamHandle(Debug, monitorinflation) lsh_debug;
    LogStreamHandle(Info, monitorinflation) lsh_info;
    LogStream* ls = NULL;
--- 2117,2525 ----
      deflated = true;
    }
    return deflated;
  }
  
! // Deflate the specified ObjectMonitor if not in-use using a JavaThread.
! // Returns true if it was deflated and false otherwise.
! //
! // The async deflation protocol sets owner to DEFLATER_MARKER and
! // makes ref_count negative as signals to contending threads that
! // an async deflation is in progress. There are a number of checks
! // as part of the protocol to make sure that the calling thread has
! // not lost the race to a contending thread or to a thread that just
! // wants to use the ObjectMonitor*.
! //
! // The ObjectMonitor has been successfully async deflated when:
! // (owner == DEFLATER_MARKER && ref_count < 0)
! // Contending threads or ObjectMonitor* using threads that see those
! // values know to retry their operation.
! //
! bool ObjectSynchronizer::deflate_monitor_using_JT(ObjectMonitor* mid,
!                                                   ObjectMonitor** free_head_p,
!                                                   ObjectMonitor** free_tail_p) {
!   assert(AsyncDeflateIdleMonitors, "sanity check");
!   assert(Thread::current()->is_Java_thread(), "precondition");
!   // A newly allocated ObjectMonitor should not be seen here so we
!   // avoid an endless inflate/deflate cycle.
!   assert(mid->is_old(), "must be old: allocation_state=%d",
!          (int) mid->allocation_state());
! 
!   if (mid->is_busy() || mid->ref_count() != 0) {
!     // Easy checks are first - the ObjectMonitor is busy or ObjectMonitor*
!     // is in use so no deflation.
!     return false;
!   }
! 
!   if (mid->try_set_owner_from(DEFLATER_MARKER, NULL) == NULL) {
!     // ObjectMonitor is not owned by another thread. Our setting
!     // owner to DEFLATER_MARKER forces any contending thread through
!     // the slow path. This is just the first part of the async
!     // deflation dance.
! 
!     if (mid->_contentions != 0 || mid->_waiters != 0) {
!       // Another thread has raced to enter the ObjectMonitor after
!       // mid->is_busy() above or has already entered and waited on
!       // it which makes it busy so no deflation. Restore owner to
!       // NULL if it is still DEFLATER_MARKER.
!       mid->try_set_owner_from(NULL, DEFLATER_MARKER);
!       return false;
!     }
! 
!     if (Atomic::cmpxchg(-max_jint, &mid->_ref_count, (jint)0) == 0) {
!       // Make ref_count negative to force any contending threads or
!       // ObjectMonitor* using threads to retry. This is the second
!       // part of the async deflation dance.
! 
!       if (mid->owner_is_DEFLATER_MARKER()) {
!         // If owner is still DEFLATER_MARKER, then we have successfully
!         // signaled any contending threads to retry. If it is not, then we
!         // have lost the race to an entering thread and the ObjectMonitor
!         // is now busy. This is the third and final part of the async
!         // deflation dance.
!         // Note: This owner check solves the ABA problem with ref_count
!         // where another thread acquired the ObjectMonitor, finished
!         // using it and restored the ref_count to zero.
! 
!         // Sanity checks for the races:
!         guarantee(mid->_contentions == 0, "must be 0: contentions=%d",
!                   mid->_contentions);
!         guarantee(mid->_waiters == 0, "must be 0: waiters=%d", mid->_waiters);
!         guarantee(mid->_cxq == NULL, "must be no contending threads: cxq="
!                   INTPTR_FORMAT, p2i(mid->_cxq));
!         guarantee(mid->_EntryList == NULL,
!                   "must be no entering threads: EntryList=" INTPTR_FORMAT,
!                   p2i(mid->_EntryList));
! 
!         const oop obj = (oop) mid->object();
!         if (log_is_enabled(Trace, monitorinflation)) {
!           ResourceMark rm;
!           log_trace(monitorinflation)("deflate_monitor_using_JT: "
!                                       "object=" INTPTR_FORMAT ", mark="
!                                       INTPTR_FORMAT ", type='%s'",
!                                       p2i(obj), obj->mark().value(),
!                                       obj->klass()->external_name());
!         }
! 
!         // Install the old mark word if nobody else has already done it.
!         mid->install_displaced_markword_in_object(obj);
!         mid->clear_using_JT();
! 
!         assert(mid->object() == NULL, "must be NULL: object=" INTPTR_FORMAT,
!                p2i(mid->object()));
!         assert(mid->is_free(), "must be free: allocation_state=%d",
!                (int) mid->allocation_state());
! 
!         // Move the deflated ObjectMonitor to the working free list
!         // defined by free_head_p and free_tail_p. No races on this list
!         // so no need for load_acquire() or store_release().
!         if (*free_head_p == NULL) {
!           // First one on the list.
!           *free_head_p = mid;
!         }
!         if (*free_tail_p != NULL) {
!           // We append to the list so the caller can use mid->_next_om
!           // to fix the linkages in its context.
!           ObjectMonitor* prevtail = *free_tail_p;
!           // Should have been cleaned up by the caller:
!           ObjectMonitor* next = mark_next_loop(prevtail);
!           assert(unmarked_next(prevtail) == NULL, "must be NULL: _next_om="
!                  INTPTR_FORMAT, p2i(unmarked_next(prevtail)));
!           set_next(prevtail, mid);  // prevtail now points to mid (and is unmarked)
!         }
!         *free_tail_p = mid;
! 
!         // At this point, mid->_next_om still refers to its current
!         // value and another ObjectMonitor's _next_om field still
!         // refers to this ObjectMonitor. Those linkages have to be
!         // cleaned up by the caller who has the complete context.
! 
!         // We leave owner == DEFLATER_MARKER and ref_count < 0
!         // to force any racing threads to retry.
!         return true;  // Success, ObjectMonitor has been deflated.
!       }
! 
!       // The owner was changed from DEFLATER_MARKER so we lost the
!       // race since the ObjectMonitor is now busy.
! 
!       // Add back max_jint to restore the ref_count field to its
!       // proper value (which may not be what we saw above):
!       Atomic::add(max_jint, &mid->_ref_count);
! 
!       assert(mid->ref_count() >= 0, "must not be negative: ref_count=%d",
!              mid->ref_count());
!       return false;
!     }
! 
!     // The ref_count was no longer 0 so we lost the race since the
!     // ObjectMonitor is now busy or the ObjectMonitor* is now is use.
!     // Restore owner to NULL if it is still DEFLATER_MARKER:
!     mid->try_set_owner_from(NULL, DEFLATER_MARKER);
!   }
! 
!   // The owner field is no longer NULL so we lost the race since the
!   // ObjectMonitor is now busy.
!   return false;
! }
! 
! // Walk a given monitor list, and deflate idle monitors.
! // The given list could be a per-thread list or a global list.
  //
  // In the case of parallel processing of thread local monitor lists,
  // work is done by Threads::parallel_threads_do() which ensures that
  // each Java thread is processed by exactly one worker thread, and
  // thus avoid conflicts that would arise when worker threads would
  // process the same monitor lists concurrently.
  //
  // See also ParallelSPCleanupTask and
  // SafepointSynchronize::do_cleanup_tasks() in safepoint.cpp and
  // Threads::parallel_java_threads_do() in thread.cpp.
! int ObjectSynchronizer::deflate_monitor_list(ObjectMonitor* volatile * list_p,
!                                              int volatile * count_p,
                                               ObjectMonitor** free_head_p,
                                               ObjectMonitor** free_tail_p) {
    ObjectMonitor* cur_mid_in_use = NULL;
+   ObjectMonitor* mid = NULL;
+   ObjectMonitor* next = NULL;
    int deflated_count = 0;
  
!   // We use the simpler mark-mid-as-we-go protocol since there are no
!   // parallel list deletions since we are at a safepoint.
!   if (!mark_list_head(list_p, &mid, &next)) {
!     return 0;  // The list is empty so nothing to deflate.
!   }
! 
!   while (true) {
      oop obj = (oop) mid->object();
      if (obj != NULL && deflate_monitor(mid, obj, free_head_p, free_tail_p)) {
        // Deflation succeeded and already updated free_head_p and
        // free_tail_p as needed. Finish the move to the local free list
        // by unlinking mid from the global or per-thread in-use list.
!       if (cur_mid_in_use == NULL) {
!         // mid is the list head and it is marked. Switch the list head
!         // to next which unmarks the list head, but leaves mid marked:
!         OrderAccess::release_store(list_p, next);
!       } else {
!         // mid is marked. Switch cur_mid_in_use's next field to next
!         // which is safe because we have no parallel list deletions,
!         // but we leave mid marked:
!         OrderAccess::release_store(&cur_mid_in_use->_next_om, next);
        }
!       // At this point mid is disconnected from the in-use list so
!       // its marked next field no longer has any effects.
        deflated_count++;
+       Atomic::dec(count_p);
+       // mid is current tail in the free_head_p list so NULL terminate it
+       // (which also unmarks it):
+       set_next(mid, NULL);
+ 
+       // All the list management is done so move on to the next one:
+       mid = next;
      } else {
+       set_next(mid, next);  // unmark next field
+ 
+       // All the list management is done so move on to the next one:
+       cur_mid_in_use = mid;
+       mid = next;
+     }
+     if (mid == NULL) {
+       break;  // Reached end of the list so nothing more to deflate.
+     }
+     // Mark mid's next field so we can possibly deflate it:
+     next = mark_next_loop(mid);
+   }
+   return deflated_count;
+ }
+ 
+ // Walk a given ObjectMonitor list and deflate idle ObjectMonitors using
+ // a JavaThread. Returns the number of deflated ObjectMonitors. The given
+ // list could be a per-thread in-use list or the global in-use list.
+ // If a safepoint has started, then we save state via saved_mid_in_use_p
+ // and return to the caller to honor the safepoint.
+ //
+ int ObjectSynchronizer::deflate_monitor_list_using_JT(ObjectMonitor* volatile * list_p,
+                                                       int volatile * count_p,
+                                                       ObjectMonitor** free_head_p,
+                                                       ObjectMonitor** free_tail_p,
+                                                       ObjectMonitor** saved_mid_in_use_p) {
+   assert(AsyncDeflateIdleMonitors, "sanity check");
+   assert(Thread::current()->is_Java_thread(), "precondition");
+ 
+   ObjectMonitor* cur_mid_in_use = NULL;
+   ObjectMonitor* mid = NULL;
+   ObjectMonitor* next = NULL;
+   ObjectMonitor* next_next = NULL;
+   int deflated_count = 0;
+ 
+   // We use the more complicated mark-cur_mid_in_use-and-mid-as-we-go
+   // protocol because om_release() can do list deletions in parallel.
+   // We also mark-next-next-as-we-go to prevent an om_flush() that is
+   // behind this thread from passing us.
+   if (*saved_mid_in_use_p == NULL) {
+     // No saved state so start at the beginning.
+     // Mark the list head's next field so we can possibly deflate it:
+     if (!mark_list_head(list_p, &mid, &next)) {
+       return 0;  // The list is empty so nothing to deflate.
+     }
+   } else {
+     // We're restarting after a safepoint so restore the necessary state
+     // before we resume.
+     cur_mid_in_use = *saved_mid_in_use_p;
+     // Mark cur_mid_in_use's next field so we can possibly update its
+     // next field to extract a deflated ObjectMonitor.
+     mid = mark_next_loop(cur_mid_in_use);
+     if (mid == NULL) {
+       set_next(cur_mid_in_use, NULL);  // unmark next field
+       *saved_mid_in_use_p = NULL;
+       return 0;  // The remainder is empty so nothing more to deflate.
+     }
+     // Mark mid's next field so we can possibly deflate it:
+     next = mark_next_loop(mid);
+   }
+ 
+   while (true) {
+     // The current mid's next field is marked at this point. If we have
+     // a cur_mid_in_use, then its next field is also marked at this point.
+ 
+     if (next != NULL) {
+       // We mark next's next field so that an om_flush()
+       // thread that is behind us cannot pass us when we
+       // unmark the current mid's next field.
+       next_next = mark_next_loop(next);
+     }
+ 
+     // Only try to deflate if there is an associated Java object and if
+     // mid is old (is not newly allocated and is not newly freed).
+     if (mid->object() != NULL && mid->is_old() &&
+         deflate_monitor_using_JT(mid, free_head_p, free_tail_p)) {
+       // Deflation succeeded and already updated free_head_p and
+       // free_tail_p as needed. Finish the move to the local free list
+       // by unlinking mid from the global or per-thread in-use list.
+       if (cur_mid_in_use == NULL) {
+         // mid is the list head and it is marked. Switch the list head
+         // to next which is also marked (if not NULL) and also leave
+         // mid marked:
+         OrderAccess::release_store(list_p, next);
+       } else {
+         ObjectMonitor* marked_next = mark_om_ptr(next);
+         // mid and cur_mid_in_use are marked. Switch cur_mid_in_use's
+         // next field to marked_next and also leave mid marked:
+         OrderAccess::release_store(&cur_mid_in_use->_next_om, marked_next);
+       }
+       // At this point mid is disconnected from the in-use list so
+       // its marked next field no longer has any effects.
+       deflated_count++;
+       Atomic::dec(count_p);
+       // mid is current tail in the free_head_p list so NULL terminate it
+       // (which also unmarks it):
+       set_next(mid, NULL);
+ 
+       // All the list management is done so move on to the next one:
+       mid = next;  // mid keeps non-NULL next's marked next field
+       next = next_next;
+     } else {
+       // mid is considered in-use if it does not have an associated
+       // Java object or mid is not old or deflation did not succeed.
+       // A mid->is_new() node can be seen here when it is freshly
+       // returned by om_alloc() (and skips the deflation code path).
+       // A mid->is_old() node can be seen here when deflation failed.
+       // A mid->is_free() node can be seen here when a fresh node from
+       // om_alloc() is released by om_release() due to losing the race
+       // in inflate().
+ 
+       // All the list management is done so move on to the next one:
+       if (cur_mid_in_use != NULL) {
+         set_next(cur_mid_in_use, mid);  // umark cur_mid_in_use
+       }
+       // The next cur_mid_in_use keeps mid's marked next field so
+       // that it is stable for a possible next field change. It
+       // cannot be modified by om_release() while it is marked.
        cur_mid_in_use = mid;
!       mid = next;  // mid keeps non-NULL next's marked next field
!       next = next_next;
! 
!       if (SafepointSynchronize::is_synchronizing() &&
!           cur_mid_in_use != OrderAccess::load_acquire(list_p) &&
!           cur_mid_in_use->is_old()) {
!         // If a safepoint has started and cur_mid_in_use is not the list
!         // head and is old, then it is safe to use as saved state. Return
!         // to the caller before blocking.
!         *saved_mid_in_use_p = cur_mid_in_use;
!         set_next(cur_mid_in_use, mid);  // umark cur_mid_in_use
!         if (mid != NULL) {
!           set_next(mid, next);  // umark mid
!         }
!         return deflated_count;
!       }
      }
+     if (mid == NULL) {
+       if (cur_mid_in_use != NULL) {
+         set_next(cur_mid_in_use, mid);  // umark cur_mid_in_use
        }
+       break;  // Reached end of the list so nothing more to deflate.
+     }
+ 
+     // The current mid's next field is marked at this point. If we have
+     // a cur_mid_in_use, then its next field is also marked at this point.
+   }
+   // We finished the list without a safepoint starting so there's
+   // no need to save state.
+   *saved_mid_in_use_p = NULL;
    return deflated_count;
  }
  
  void ObjectSynchronizer::prepare_deflate_idle_monitors(DeflateMonitorCounters* counters) {
!   OrderAccess::release_store(&counters->n_in_use, 0);              // currently associated with objects
!   OrderAccess::release_store(&counters->n_in_circulation, 0);      // extant
!   OrderAccess::release_store(&counters->n_scavenged, 0);           // reclaimed (global and per-thread)
!   OrderAccess::release_store(&counters->per_thread_scavenged, 0);  // per-thread scavenge total
    counters->per_thread_times = 0.0;                                // per-thread scavenge times
  }
  
  void ObjectSynchronizer::deflate_idle_monitors(DeflateMonitorCounters* counters) {
    assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
+ 
+   if (AsyncDeflateIdleMonitors) {
+     // Nothing to do when global idle ObjectMonitors are deflated using
+     // a JavaThread unless a special deflation has been requested.
+     if (!is_special_deflation_requested()) {
+       return;
+     }
+   }
+ 
    bool deflated = false;
  
    ObjectMonitor* free_head_p = NULL;  // Local SLL of scavenged monitors
    ObjectMonitor* free_tail_p = NULL;
    elapsedTimer timer;
  
    if (log_is_enabled(Info, monitorinflation)) {
      timer.start();
    }
  
    // Note: the thread-local monitors lists get deflated in
    // a separate pass. See deflate_thread_local_monitors().
  
    // For moribund threads, scan g_om_in_use_list
    int deflated_count = 0;
!   if (OrderAccess::load_acquire(&g_om_in_use_list) != NULL) {
!     // Update n_in_circulation before g_om_in_use_count is updated by deflation.
!     Atomic::add(OrderAccess::load_acquire(&g_om_in_use_count), &counters->n_in_circulation);
! 
!     deflated_count = deflate_monitor_list(&g_om_in_use_list, &g_om_in_use_count, &free_head_p, &free_tail_p);
!     Atomic::add(OrderAccess::load_acquire(&g_om_in_use_count), &counters->n_in_use);
    }
  
    if (free_head_p != NULL) {
      // Move the deflated ObjectMonitors back to the global free list.
!     // No races on the working free list so no need for load_acquire().
!     guarantee(free_tail_p != NULL && deflated_count > 0, "invariant");
!     assert(free_tail_p->_next_om == NULL, "must be NULL: _next_om="
!            INTPTR_FORMAT, p2i(free_tail_p->_next_om));
!     prepend_list_to_g_free_list(free_head_p, free_tail_p, deflated_count);
!     Atomic::add(deflated_count, &counters->n_scavenged);
    }
    timer.stop();
  
    LogStreamHandle(Debug, monitorinflation) lsh_debug;
    LogStreamHandle(Info, monitorinflation) lsh_info;
    LogStream* ls = NULL;
*** 1653,1733 ****
    if (ls != NULL) {
      ls->print_cr("deflating global idle monitors, %3.7f secs, %d monitors", timer.seconds(), deflated_count);
    }
  }
  
  void ObjectSynchronizer::finish_deflate_idle_monitors(DeflateMonitorCounters* counters) {
    // Report the cumulative time for deflating each thread's idle
    // monitors. Note: if the work is split among more than one
    // worker thread, then the reported time will likely be more
    // than a beginning to end measurement of the phase.
!   log_info(safepoint, cleanup)("deflating per-thread idle monitors, %3.7f secs, monitors=%d", counters->per_thread_times, counters->per_thread_scavenged);
  
!   g_om_free_count += counters->n_scavenged;
  
    if (log_is_enabled(Debug, monitorinflation)) {
      // exit_globals()'s call to audit_and_print_stats() is done
      // at the Info level.
      ObjectSynchronizer::audit_and_print_stats(false /* on_exit */);
    } else if (log_is_enabled(Info, monitorinflation)) {
-     Thread::muxAcquire(&gListLock, "finish_deflate_idle_monitors");
      log_info(monitorinflation)("g_om_population=%d, g_om_in_use_count=%d, "
!                                "g_om_free_count=%d", g_om_population,
!                                g_om_in_use_count, g_om_free_count);
!     Thread::muxRelease(&gListLock);
    }
  
    ForceMonitorScavenge = 0;    // Reset
- 
-   OM_PERFDATA_OP(Deflations, inc(counters->n_scavenged));
-   OM_PERFDATA_OP(MonExtant, set_value(counters->n_in_circulation));
- 
    GVars.stw_random = os::random();
    GVars.stw_cycle++;
  }
  
  void ObjectSynchronizer::deflate_thread_local_monitors(Thread* thread, DeflateMonitorCounters* counters) {
    assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
  
    ObjectMonitor* free_head_p = NULL;  // Local SLL of scavenged monitors
    ObjectMonitor* free_tail_p = NULL;
    elapsedTimer timer;
  
    if (log_is_enabled(Info, safepoint, cleanup) ||
        log_is_enabled(Info, monitorinflation)) {
      timer.start();
    }
  
!   int deflated_count = deflate_monitor_list(thread->om_in_use_list_addr(), &free_head_p, &free_tail_p);
! 
!   Thread::muxAcquire(&gListLock, "deflate_thread_local_monitors");
  
!   // Adjust counters
!   counters->n_in_circulation += thread->om_in_use_count;
!   thread->om_in_use_count -= deflated_count;
!   counters->n_scavenged += deflated_count;
!   counters->n_in_use += thread->om_in_use_count;
!   counters->per_thread_scavenged += deflated_count;
  
    if (free_head_p != NULL) {
      // Move the deflated ObjectMonitors back to the global free list.
      guarantee(free_tail_p != NULL && deflated_count > 0, "invariant");
!     assert(free_tail_p->_next_om == NULL, "invariant");
! 
!     // constant-time list splice - prepend scavenged segment to g_free_list
!     free_tail_p->_next_om = g_free_list;
!     g_free_list = free_head_p;
    }
  
    timer.stop();
    // Safepoint logging cares about cumulative per_thread_times and
    // we'll capture most of the cost, but not the muxRelease() which
    // should be cheap.
    counters->per_thread_times += timer.seconds();
  
-   Thread::muxRelease(&gListLock);
- 
    LogStreamHandle(Debug, monitorinflation) lsh_debug;
    LogStreamHandle(Info, monitorinflation) lsh_info;
    LogStream* ls = NULL;
    if (log_is_enabled(Debug, monitorinflation)) {
      ls = &lsh_debug;
--- 2531,2794 ----
    if (ls != NULL) {
      ls->print_cr("deflating global idle monitors, %3.7f secs, %d monitors", timer.seconds(), deflated_count);
    }
  }
  
+ class HandshakeForDeflation : public ThreadClosure {
+  public:
+   void do_thread(Thread* thread) {
+     log_trace(monitorinflation)("HandshakeForDeflation::do_thread: thread="
+                                 INTPTR_FORMAT, p2i(thread));
+   }
+ };
+ 
+ void ObjectSynchronizer::deflate_idle_monitors_using_JT() {
+   assert(AsyncDeflateIdleMonitors, "sanity check");
+ 
+   // Deflate any global idle monitors.
+   deflate_global_idle_monitors_using_JT();
+ 
+   int count = 0;
+   for (JavaThreadIteratorWithHandle jtiwh; JavaThread *jt = jtiwh.next(); ) {
+     if (jt->om_in_use_count > 0 && !jt->is_exiting()) {
+       // This JavaThread is using ObjectMonitors so deflate any that
+       // are idle unless this JavaThread is exiting; do not race with
+       // ObjectSynchronizer::om_flush().
+       deflate_per_thread_idle_monitors_using_JT(jt);
+       count++;
+     }
+   }
+   if (count > 0) {
+     log_debug(monitorinflation)("did async deflation of idle monitors for %d thread(s).", count);
+   }
+   // The ServiceThread's async deflation request has been processed.
+   set_is_async_deflation_requested(false);
+ 
+   if (HandshakeAfterDeflateIdleMonitors && g_om_wait_count > 0) {
+     // There are deflated ObjectMonitors waiting for a handshake
+     // (or a safepoint) for safety.
+ 
+     // g_wait_list and g_om_wait_count are only updated by the calling
+     // thread so no need for load_acquire() or release_store().
+     ObjectMonitor* list = g_wait_list;
+     ADIM_guarantee(list != NULL, "g_wait_list must not be NULL");
+     int count = g_om_wait_count;
+     g_wait_list = NULL;
+     g_om_wait_count = 0;
+ 
+     // Find the tail for prepend_list_to_common().
+     int l_count = 0;
+     ObjectMonitor* tail = NULL;
+     for (ObjectMonitor* n = list; n != NULL; n = unmarked_next(n)) {
+       tail = n;
+       l_count++;
+     }
+     ADIM_guarantee(count == l_count, "count=%d != l_count=%d", count, l_count);
+ 
+     // Will execute a safepoint if !ThreadLocalHandshakes:
+     HandshakeForDeflation hfd_tc;
+     Handshake::execute(&hfd_tc);
+ 
+     prepend_list_to_common(list, tail, count, &g_free_list, &g_om_free_count);
+ 
+     log_info(monitorinflation)("moved %d idle monitors from global waiting list to global free list", count);
+   }
+ }
+ 
+ // Deflate global idle ObjectMonitors using a JavaThread.
+ //
+ void ObjectSynchronizer::deflate_global_idle_monitors_using_JT() {
+   assert(AsyncDeflateIdleMonitors, "sanity check");
+   assert(Thread::current()->is_Java_thread(), "precondition");
+   JavaThread* self = JavaThread::current();
+ 
+   deflate_common_idle_monitors_using_JT(true /* is_global */, self);
+ }
+ 
+ // Deflate the specified JavaThread's idle ObjectMonitors using a JavaThread.
+ //
+ void ObjectSynchronizer::deflate_per_thread_idle_monitors_using_JT(JavaThread* target) {
+   assert(AsyncDeflateIdleMonitors, "sanity check");
+   assert(Thread::current()->is_Java_thread(), "precondition");
+ 
+   deflate_common_idle_monitors_using_JT(false /* !is_global */, target);
+ }
+ 
+ // Deflate global or per-thread idle ObjectMonitors using a JavaThread.
+ //
+ void ObjectSynchronizer::deflate_common_idle_monitors_using_JT(bool is_global, JavaThread* target) {
+   JavaThread* self = JavaThread::current();
+ 
+   int deflated_count = 0;
+   ObjectMonitor* free_head_p = NULL;  // Local SLL of scavenged ObjectMonitors
+   ObjectMonitor* free_tail_p = NULL;
+   ObjectMonitor* saved_mid_in_use_p = NULL;
+   elapsedTimer timer;
+ 
+   if (log_is_enabled(Info, monitorinflation)) {
+     timer.start();
+   }
+ 
+   if (is_global) {
+     OM_PERFDATA_OP(MonExtant, set_value(OrderAccess::load_acquire(&g_om_in_use_count)));
+   } else {
+     OM_PERFDATA_OP(MonExtant, inc(OrderAccess::load_acquire(&target->om_in_use_count)));
+   }
+ 
+   do {
+     int local_deflated_count;
+     if (is_global) {
+       local_deflated_count = deflate_monitor_list_using_JT(&g_om_in_use_list, &g_om_in_use_count, &free_head_p, &free_tail_p, &saved_mid_in_use_p);
+     } else {
+       local_deflated_count = deflate_monitor_list_using_JT(&target->om_in_use_list, &target->om_in_use_count, &free_head_p, &free_tail_p, &saved_mid_in_use_p);
+     }
+     deflated_count += local_deflated_count;
+ 
+     if (free_head_p != NULL) {
+       // Move the deflated ObjectMonitors to the global free list.
+       // No races on the working list so no need for load_acquire().
+       guarantee(free_tail_p != NULL && local_deflated_count > 0, "free_tail_p=" INTPTR_FORMAT ", local_deflated_count=%d", p2i(free_tail_p), local_deflated_count);
+       // Note: The target thread can be doing an om_alloc() that
+       // is trying to prepend an ObjectMonitor on its in-use list
+       // at the same time that we have deflated the current in-use
+       // list head and put it on the local free list. prepend_to_common()
+       // will detect the race and retry which avoids list corruption,
+       // but the next field in free_tail_p can flicker to marked
+       // and then unmarked while prepend_to_common() is sorting it
+       // all out.
+       assert(unmarked_next(free_tail_p) == NULL, "must be NULL: _next_om="
+              INTPTR_FORMAT, p2i(unmarked_next(free_tail_p)));
+ 
+       if (HandshakeAfterDeflateIdleMonitors) {
+         prepend_list_to_g_wait_list(free_head_p, free_tail_p, local_deflated_count);
+       } else {
+         prepend_list_to_g_free_list(free_head_p, free_tail_p, local_deflated_count);
+       }
+ 
+       OM_PERFDATA_OP(Deflations, inc(local_deflated_count));
+     }
+ 
+     if (saved_mid_in_use_p != NULL) {
+       // deflate_monitor_list_using_JT() detected a safepoint starting.
+       timer.stop();
+       {
+         if (is_global) {
+           log_debug(monitorinflation)("pausing deflation of global idle monitors for a safepoint.");
+         } else {
+           log_debug(monitorinflation)("jt=" INTPTR_FORMAT ": pausing deflation of per-thread idle monitors for a safepoint.", p2i(target));
+         }
+         assert(SafepointSynchronize::is_synchronizing(), "sanity check");
+         ThreadBlockInVM blocker(self);
+       }
+       // Prepare for another loop after the safepoint.
+       free_head_p = NULL;
+       free_tail_p = NULL;
+       if (log_is_enabled(Info, monitorinflation)) {
+         timer.start();
+       }
+     }
+   } while (saved_mid_in_use_p != NULL);
+   timer.stop();
+ 
+   LogStreamHandle(Debug, monitorinflation) lsh_debug;
+   LogStreamHandle(Info, monitorinflation) lsh_info;
+   LogStream* ls = NULL;
+   if (log_is_enabled(Debug, monitorinflation)) {
+     ls = &lsh_debug;
+   } else if (deflated_count != 0 && log_is_enabled(Info, monitorinflation)) {
+     ls = &lsh_info;
+   }
+   if (ls != NULL) {
+     if (is_global) {
+       ls->print_cr("async-deflating global idle monitors, %3.7f secs, %d monitors", timer.seconds(), deflated_count);
+     } else {
+       ls->print_cr("jt=" INTPTR_FORMAT ": async-deflating per-thread idle monitors, %3.7f secs, %d monitors", p2i(target), timer.seconds(), deflated_count);
+     }
+   }
+ }
+ 
  void ObjectSynchronizer::finish_deflate_idle_monitors(DeflateMonitorCounters* counters) {
    // Report the cumulative time for deflating each thread's idle
    // monitors. Note: if the work is split among more than one
    // worker thread, then the reported time will likely be more
    // than a beginning to end measurement of the phase.
!   // Note: AsyncDeflateIdleMonitors only deflates per-thread idle
!   // monitors at a safepoint when a special deflation has been requested.
!   log_info(safepoint, cleanup)("deflating per-thread idle monitors, %3.7f secs, monitors=%d",
!                                counters->per_thread_times,
!                                OrderAccess::load_acquire(&counters->per_thread_scavenged));
! 
!   bool needs_special_deflation = is_special_deflation_requested();
!   if (!AsyncDeflateIdleMonitors || needs_special_deflation) {
!     // AsyncDeflateIdleMonitors does not use these counters unless
!     // there is a special deflation request.
  
!     OM_PERFDATA_OP(Deflations, inc(counters->n_scavenged));
!     OM_PERFDATA_OP(MonExtant, set_value(counters->n_in_circulation));
!   }
  
    if (log_is_enabled(Debug, monitorinflation)) {
      // exit_globals()'s call to audit_and_print_stats() is done
      // at the Info level.
      ObjectSynchronizer::audit_and_print_stats(false /* on_exit */);
    } else if (log_is_enabled(Info, monitorinflation)) {
      log_info(monitorinflation)("g_om_population=%d, g_om_in_use_count=%d, "
!                                "g_om_free_count=%d, g_om_wait_count=%d",
!                                OrderAccess::load_acquire(&g_om_population),
!                                OrderAccess::load_acquire(&g_om_in_use_count),
!                                OrderAccess::load_acquire(&g_om_free_count),
!                                OrderAccess::load_acquire(&g_om_wait_count));
    }
  
    ForceMonitorScavenge = 0;    // Reset
    GVars.stw_random = os::random();
    GVars.stw_cycle++;
+   if (needs_special_deflation) {
+     set_is_special_deflation_requested(false);  // special deflation is done
+   }
  }
  
  void ObjectSynchronizer::deflate_thread_local_monitors(Thread* thread, DeflateMonitorCounters* counters) {
    assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
  
+   if (AsyncDeflateIdleMonitors && !is_special_deflation_requested()) {
+     // Nothing to do if a special deflation has NOT been requested.
+     return;
+   }
+ 
    ObjectMonitor* free_head_p = NULL;  // Local SLL of scavenged monitors
    ObjectMonitor* free_tail_p = NULL;
    elapsedTimer timer;
  
    if (log_is_enabled(Info, safepoint, cleanup) ||
        log_is_enabled(Info, monitorinflation)) {
      timer.start();
    }
  
!   // Update n_in_circulation before om_in_use_count is updated by deflation.
!   Atomic::add(OrderAccess::load_acquire(&thread->om_in_use_count), &counters->n_in_circulation);
  
!   int deflated_count = deflate_monitor_list(&thread->om_in_use_list, &thread->om_in_use_count, &free_head_p, &free_tail_p);
!   Atomic::add(OrderAccess::load_acquire(&thread->om_in_use_count), &counters->n_in_use);
  
    if (free_head_p != NULL) {
      // Move the deflated ObjectMonitors back to the global free list.
+     // No races on the working list so no need for load_acquire().
      guarantee(free_tail_p != NULL && deflated_count > 0, "invariant");
!     assert(free_tail_p->_next_om == NULL, "must be NULL: _next_om="
!            INTPTR_FORMAT, p2i(free_tail_p->_next_om));
!     prepend_list_to_g_free_list(free_head_p, free_tail_p, deflated_count);
!     Atomic::add(deflated_count, &counters->n_scavenged);
!     Atomic::add(deflated_count, &counters->per_thread_scavenged);
    }
  
    timer.stop();
    // Safepoint logging cares about cumulative per_thread_times and
    // we'll capture most of the cost, but not the muxRelease() which
    // should be cheap.
    counters->per_thread_times += timer.seconds();
  
    LogStreamHandle(Debug, monitorinflation) lsh_debug;
    LogStreamHandle(Info, monitorinflation) lsh_info;
    LogStream* ls = NULL;
    if (log_is_enabled(Debug, monitorinflation)) {
      ls = &lsh_debug;
*** 1774,1786 ****
  
  void ObjectSynchronizer::release_monitors_owned_by_thread(TRAPS) {
    assert(THREAD == JavaThread::current(), "must be current Java thread");
    NoSafepointVerifier nsv;
    ReleaseJavaMonitorsClosure rjmc(THREAD);
-   Thread::muxAcquire(&gListLock, "release_monitors_owned_by_thread");
    ObjectSynchronizer::monitors_iterate(&rjmc);
-   Thread::muxRelease(&gListLock);
    THREAD->clear_pending_exception();
  }
  
  const char* ObjectSynchronizer::inflate_cause_name(const InflateCause cause) {
    switch (cause) {
--- 2835,2845 ----
*** 1830,1869 ****
    } else if (log_is_enabled(Info, monitorinflation)) {
      ls = &lsh_info;
    }
    assert(ls != NULL, "sanity check");
  
-   if (!on_exit) {
-     // Not at VM exit so grab the global list lock.
-     Thread::muxAcquire(&gListLock, "audit_and_print_stats");
-   }
- 
    // Log counts for the global and per-thread monitor lists:
    int chk_om_population = log_monitor_list_counts(ls);
    int error_cnt = 0;
  
    ls->print_cr("Checking global lists:");
  
    // Check g_om_population:
!   if (g_om_population == chk_om_population) {
      ls->print_cr("g_om_population=%d equals chk_om_population=%d",
!                  g_om_population, chk_om_population);
    } else {
!     ls->print_cr("ERROR: g_om_population=%d is not equal to "
!                  "chk_om_population=%d", g_om_population,
                   chk_om_population);
-     error_cnt++;
    }
  
    // Check g_om_in_use_list and g_om_in_use_count:
    chk_global_in_use_list_and_count(ls, &error_cnt);
  
    // Check g_free_list and g_om_free_count:
    chk_global_free_list_and_count(ls, &error_cnt);
  
!   if (!on_exit) {
!     Thread::muxRelease(&gListLock);
    }
  
    ls->print_cr("Checking per-thread lists:");
  
    for (JavaThreadIteratorWithHandle jtiwh; JavaThread *jt = jtiwh.next(); ) {
--- 2889,2929 ----
    } else if (log_is_enabled(Info, monitorinflation)) {
      ls = &lsh_info;
    }
    assert(ls != NULL, "sanity check");
  
    // Log counts for the global and per-thread monitor lists:
    int chk_om_population = log_monitor_list_counts(ls);
    int error_cnt = 0;
  
    ls->print_cr("Checking global lists:");
  
    // Check g_om_population:
!   if (OrderAccess::load_acquire(&g_om_population) == chk_om_population) {
      ls->print_cr("g_om_population=%d equals chk_om_population=%d",
!                  OrderAccess::load_acquire(&g_om_population),
!                  chk_om_population);
    } else {
!     // With lock free access to the monitor lists, it is possible for
!     // log_monitor_list_counts() to return a value that doesn't match
!     // g_om_population. So far a higher value has been seen in testing
!     // so something is being double counted by log_monitor_list_counts().
!     ls->print_cr("WARNING: g_om_population=%d is not equal to "
!                  "chk_om_population=%d",
!                  OrderAccess::load_acquire(&g_om_population),
                   chk_om_population);
    }
  
    // Check g_om_in_use_list and g_om_in_use_count:
    chk_global_in_use_list_and_count(ls, &error_cnt);
  
    // Check g_free_list and g_om_free_count:
    chk_global_free_list_and_count(ls, &error_cnt);
  
!   if (HandshakeAfterDeflateIdleMonitors) {
!     // Check g_wait_list and g_om_wait_count:
!     chk_global_wait_list_and_count(ls, &error_cnt);
    }
  
    ls->print_cr("Checking per-thread lists:");
  
    for (JavaThreadIteratorWithHandle jtiwh; JavaThread *jt = jtiwh.next(); ) {
*** 1883,1893 ****
    if ((on_exit && log_is_enabled(Info, monitorinflation)) ||
        (!on_exit && log_is_enabled(Trace, monitorinflation))) {
      // When exiting this log output is at the Info level. When called
      // at a safepoint, this log output is at the Trace level since
      // there can be a lot of it.
!     log_in_use_monitor_details(ls, on_exit);
    }
  
    ls->flush();
  
    guarantee(error_cnt == 0, "ERROR: found monitor list errors: error_cnt=%d", error_cnt);
--- 2943,2953 ----
    if ((on_exit && log_is_enabled(Info, monitorinflation)) ||
        (!on_exit && log_is_enabled(Trace, monitorinflation))) {
      // When exiting this log output is at the Info level. When called
      // at a safepoint, this log output is at the Trace level since
      // there can be a lot of it.
!     log_in_use_monitor_details(ls);
    }
  
    ls->flush();
  
    guarantee(error_cnt == 0, "ERROR: found monitor list errors: error_cnt=%d", error_cnt);
*** 1912,1928 ****
      if (jt != NULL) {
        out->print_cr("ERROR: jt=" INTPTR_FORMAT ", monitor=" INTPTR_FORMAT
                      ": free per-thread monitor must have NULL _header "
                      "field: _header=" INTPTR_FORMAT, p2i(jt), p2i(n),
                      n->header().value());
!     } else {
        out->print_cr("ERROR: monitor=" INTPTR_FORMAT ": free global monitor "
                      "must have NULL _header field: _header=" INTPTR_FORMAT,
                      p2i(n), n->header().value());
-     }
      *error_cnt_p = *error_cnt_p + 1;
    }
    if (n->object() != NULL) {
      if (jt != NULL) {
        out->print_cr("ERROR: jt=" INTPTR_FORMAT ", monitor=" INTPTR_FORMAT
                      ": free per-thread monitor must have NULL _object "
                      "field: _object=" INTPTR_FORMAT, p2i(jt), p2i(n),
--- 2972,2989 ----
      if (jt != NULL) {
        out->print_cr("ERROR: jt=" INTPTR_FORMAT ", monitor=" INTPTR_FORMAT
                      ": free per-thread monitor must have NULL _header "
                      "field: _header=" INTPTR_FORMAT, p2i(jt), p2i(n),
                      n->header().value());
!       *error_cnt_p = *error_cnt_p + 1;
!     } else if (!AsyncDeflateIdleMonitors) {
        out->print_cr("ERROR: monitor=" INTPTR_FORMAT ": free global monitor "
                      "must have NULL _header field: _header=" INTPTR_FORMAT,
                      p2i(n), n->header().value());
        *error_cnt_p = *error_cnt_p + 1;
      }
+   }
    if (n->object() != NULL) {
      if (jt != NULL) {
        out->print_cr("ERROR: jt=" INTPTR_FORMAT ", monitor=" INTPTR_FORMAT
                      ": free per-thread monitor must have NULL _object "
                      "field: _object=" INTPTR_FORMAT, p2i(jt), p2i(n),
*** 1938,1977 ****
  
  // Check the global free list and count; log the results of the checks.
  void ObjectSynchronizer::chk_global_free_list_and_count(outputStream * out,
                                                          int *error_cnt_p) {
    int chk_om_free_count = 0;
!   for (ObjectMonitor* n = g_free_list; n != NULL; n = n->_next_om) {
      chk_free_entry(NULL /* jt */, n, out, error_cnt_p);
      chk_om_free_count++;
    }
!   if (g_om_free_count == chk_om_free_count) {
      out->print_cr("g_om_free_count=%d equals chk_om_free_count=%d",
!                   g_om_free_count, chk_om_free_count);
    } else {
!     out->print_cr("ERROR: g_om_free_count=%d is not equal to "
!                   "chk_om_free_count=%d", g_om_free_count,
                    chk_om_free_count);
      *error_cnt_p = *error_cnt_p + 1;
    }
  }
  
  // Check the global in-use list and count; log the results of the checks.
  void ObjectSynchronizer::chk_global_in_use_list_and_count(outputStream * out,
                                                            int *error_cnt_p) {
    int chk_om_in_use_count = 0;
!   for (ObjectMonitor* n = g_om_in_use_list; n != NULL; n = n->_next_om) {
      chk_in_use_entry(NULL /* jt */, n, out, error_cnt_p);
      chk_om_in_use_count++;
    }
!   if (g_om_in_use_count == chk_om_in_use_count) {
!     out->print_cr("g_om_in_use_count=%d equals chk_om_in_use_count=%d", g_om_in_use_count,
                    chk_om_in_use_count);
    } else {
!     out->print_cr("ERROR: g_om_in_use_count=%d is not equal to chk_om_in_use_count=%d",
!                   g_om_in_use_count, chk_om_in_use_count);
!     *error_cnt_p = *error_cnt_p + 1;
    }
  }
  
  // Check an in-use monitor entry; log any errors.
  void ObjectSynchronizer::chk_in_use_entry(JavaThread* jt, ObjectMonitor* n,
--- 2999,3069 ----
  
  // Check the global free list and count; log the results of the checks.
  void ObjectSynchronizer::chk_global_free_list_and_count(outputStream * out,
                                                          int *error_cnt_p) {
    int chk_om_free_count = 0;
!   for (ObjectMonitor* n = OrderAccess::load_acquire(&g_free_list); n != NULL; n = unmarked_next(n)) {
      chk_free_entry(NULL /* jt */, n, out, error_cnt_p);
      chk_om_free_count++;
    }
!   if (OrderAccess::load_acquire(&g_om_free_count) == chk_om_free_count) {
      out->print_cr("g_om_free_count=%d equals chk_om_free_count=%d",
!                   OrderAccess::load_acquire(&g_om_free_count),
!                   chk_om_free_count);
    } else {
!     // With lock free access to g_free_list, it is possible for an
!     // ObjectMonitor to be prepended to g_free_list after we started
!     // calculating chk_om_free_count so g_om_free_count may not
!     // match anymore.
!     out->print_cr("WARNING: g_om_free_count=%d is not equal to "
!                   "chk_om_free_count=%d",
!                   OrderAccess::load_acquire(&g_om_free_count),
                    chk_om_free_count);
+   }
+ }
+ 
+ // Check the global wait list and count; log the results of the checks.
+ void ObjectSynchronizer::chk_global_wait_list_and_count(outputStream * out,
+                                                         int *error_cnt_p) {
+   int chk_om_wait_count = 0;
+   for (ObjectMonitor* n = OrderAccess::load_acquire(&g_wait_list); n != NULL; n = unmarked_next(n)) {
+     // Rules for g_wait_list are the same as of g_free_list:
+     chk_free_entry(NULL /* jt */, n, out, error_cnt_p);
+     chk_om_wait_count++;
+   }
+   if (OrderAccess::load_acquire(&g_om_wait_count) == chk_om_wait_count) {
+     out->print_cr("g_om_wait_count=%d equals chk_om_wait_count=%d",
+                   OrderAccess::load_acquire(&g_om_wait_count),
+                   chk_om_wait_count);
+   } else {
+     out->print_cr("ERROR: g_om_wait_count=%d is not equal to "
+                   "chk_om_wait_count=%d",
+                   OrderAccess::load_acquire(&g_om_wait_count),
+                   chk_om_wait_count);
      *error_cnt_p = *error_cnt_p + 1;
    }
  }
  
  // Check the global in-use list and count; log the results of the checks.
  void ObjectSynchronizer::chk_global_in_use_list_and_count(outputStream * out,
                                                            int *error_cnt_p) {
    int chk_om_in_use_count = 0;
!   for (ObjectMonitor* n = OrderAccess::load_acquire(&g_om_in_use_list); n != NULL; n = unmarked_next(n)) {
      chk_in_use_entry(NULL /* jt */, n, out, error_cnt_p);
      chk_om_in_use_count++;
    }
!   if (OrderAccess::load_acquire(&g_om_in_use_count) == chk_om_in_use_count) {
!     out->print_cr("g_om_in_use_count=%d equals chk_om_in_use_count=%d",
!                   OrderAccess::load_acquire(&g_om_in_use_count),
                    chk_om_in_use_count);
    } else {
!     // With lock free access to the monitor lists, it is possible for
!     // an exiting JavaThread to put its in-use ObjectMonitors on the
!     // global in-use list after chk_om_in_use_count is calculated above.
!     out->print_cr("WARNING: g_om_in_use_count=%d is not equal to chk_om_in_use_count=%d",
!                   OrderAccess::load_acquire(&g_om_in_use_count),
!                   chk_om_in_use_count);
    }
  }
  
  // Check an in-use monitor entry; log any errors.
  void ObjectSynchronizer::chk_in_use_entry(JavaThread* jt, ObjectMonitor* n,
*** 2035,2130 ****
  // Check the thread's free list and count; log the results of the checks.
  void ObjectSynchronizer::chk_per_thread_free_list_and_count(JavaThread *jt,
                                                              outputStream * out,
                                                              int *error_cnt_p) {
    int chk_om_free_count = 0;
!   for (ObjectMonitor* n = jt->om_free_list; n != NULL; n = n->_next_om) {
      chk_free_entry(jt, n, out, error_cnt_p);
      chk_om_free_count++;
    }
!   if (jt->om_free_count == chk_om_free_count) {
      out->print_cr("jt=" INTPTR_FORMAT ": om_free_count=%d equals "
!                   "chk_om_free_count=%d", p2i(jt), jt->om_free_count, chk_om_free_count);
    } else {
      out->print_cr("ERROR: jt=" INTPTR_FORMAT ": om_free_count=%d is not "
!                   "equal to chk_om_free_count=%d", p2i(jt), jt->om_free_count,
                    chk_om_free_count);
      *error_cnt_p = *error_cnt_p + 1;
    }
  }
  
  // Check the thread's in-use list and count; log the results of the checks.
  void ObjectSynchronizer::chk_per_thread_in_use_list_and_count(JavaThread *jt,
                                                                outputStream * out,
                                                                int *error_cnt_p) {
    int chk_om_in_use_count = 0;
!   for (ObjectMonitor* n = jt->om_in_use_list; n != NULL; n = n->_next_om) {
      chk_in_use_entry(jt, n, out, error_cnt_p);
      chk_om_in_use_count++;
    }
!   if (jt->om_in_use_count == chk_om_in_use_count) {
      out->print_cr("jt=" INTPTR_FORMAT ": om_in_use_count=%d equals "
!                   "chk_om_in_use_count=%d", p2i(jt), jt->om_in_use_count,
                    chk_om_in_use_count);
    } else {
      out->print_cr("ERROR: jt=" INTPTR_FORMAT ": om_in_use_count=%d is not "
!                   "equal to chk_om_in_use_count=%d", p2i(jt), jt->om_in_use_count,
                    chk_om_in_use_count);
      *error_cnt_p = *error_cnt_p + 1;
    }
  }
  
  // Log details about ObjectMonitors on the in-use lists. The 'BHL'
  // flags indicate why the entry is in-use, 'object' and 'object type'
  // indicate the associated object and its type.
! void ObjectSynchronizer::log_in_use_monitor_details(outputStream * out,
!                                                     bool on_exit) {
!   if (!on_exit) {
!     // Not at VM exit so grab the global list lock.
!     Thread::muxAcquire(&gListLock, "log_in_use_monitor_details");
!   }
! 
    stringStream ss;
!   if (g_om_in_use_count > 0) {
      out->print_cr("In-use global monitor info:");
      out->print_cr("(B -> is_busy, H -> has hash code, L -> lock status)");
!     out->print_cr("%18s  %s  %18s  %18s",
!                   "monitor", "BHL", "object", "object type");
!     out->print_cr("==================  ===  ==================  ==================");
!     for (ObjectMonitor* n = g_om_in_use_list; n != NULL; n = n->_next_om) {
        const oop obj = (oop) n->object();
        const markWord mark = n->header();
        ResourceMark rm;
!       out->print(INTPTR_FORMAT "  %d%d%d  " INTPTR_FORMAT "  %s", p2i(n),
!                  n->is_busy() != 0, mark.hash() != 0, n->owner() != NULL,
!                  p2i(obj), obj->klass()->external_name());
        if (n->is_busy() != 0) {
          out->print(" (%s)", n->is_busy_to_string(&ss));
          ss.reset();
        }
        out->cr();
      }
    }
  
-   if (!on_exit) {
-     Thread::muxRelease(&gListLock);
-   }
- 
    out->print_cr("In-use per-thread monitor info:");
    out->print_cr("(B -> is_busy, H -> has hash code, L -> lock status)");
!   out->print_cr("%18s  %18s  %s  %18s  %18s",
!                 "jt", "monitor", "BHL", "object", "object type");
!   out->print_cr("==================  ==================  ===  ==================  ==================");
    for (JavaThreadIteratorWithHandle jtiwh; JavaThread *jt = jtiwh.next(); ) {
!     for (ObjectMonitor* n = jt->om_in_use_list; n != NULL; n = n->_next_om) {
        const oop obj = (oop) n->object();
        const markWord mark = n->header();
        ResourceMark rm;
!       out->print(INTPTR_FORMAT "  " INTPTR_FORMAT "  %d%d%d  " INTPTR_FORMAT
!                  "  %s", p2i(jt), p2i(n), n->is_busy() != 0,
!                  mark.hash() != 0, n->owner() != NULL, p2i(obj),
!                  obj->klass()->external_name());
        if (n->is_busy() != 0) {
          out->print(" (%s)", n->is_busy_to_string(&ss));
          ss.reset();
        }
        out->cr();
--- 3127,3218 ----
  // Check the thread's free list and count; log the results of the checks.
  void ObjectSynchronizer::chk_per_thread_free_list_and_count(JavaThread *jt,
                                                              outputStream * out,
                                                              int *error_cnt_p) {
    int chk_om_free_count = 0;
!   for (ObjectMonitor* n = OrderAccess::load_acquire(&jt->om_free_list); n != NULL; n = unmarked_next(n)) {
      chk_free_entry(jt, n, out, error_cnt_p);
      chk_om_free_count++;
    }
!   if (OrderAccess::load_acquire(&jt->om_free_count) == chk_om_free_count) {
      out->print_cr("jt=" INTPTR_FORMAT ": om_free_count=%d equals "
!                   "chk_om_free_count=%d", p2i(jt),
!                   OrderAccess::load_acquire(&jt->om_free_count),
!                   chk_om_free_count);
    } else {
      out->print_cr("ERROR: jt=" INTPTR_FORMAT ": om_free_count=%d is not "
!                   "equal to chk_om_free_count=%d", p2i(jt),
!                   OrderAccess::load_acquire(&jt->om_free_count),
                    chk_om_free_count);
      *error_cnt_p = *error_cnt_p + 1;
    }
  }
  
  // Check the thread's in-use list and count; log the results of the checks.
  void ObjectSynchronizer::chk_per_thread_in_use_list_and_count(JavaThread *jt,
                                                                outputStream * out,
                                                                int *error_cnt_p) {
    int chk_om_in_use_count = 0;
!   for (ObjectMonitor* n = OrderAccess::load_acquire(&jt->om_in_use_list); n != NULL; n = unmarked_next(n)) {
      chk_in_use_entry(jt, n, out, error_cnt_p);
      chk_om_in_use_count++;
    }
!   if (OrderAccess::load_acquire(&jt->om_in_use_count) == chk_om_in_use_count) {
      out->print_cr("jt=" INTPTR_FORMAT ": om_in_use_count=%d equals "
!                   "chk_om_in_use_count=%d", p2i(jt),
!                   OrderAccess::load_acquire(&jt->om_in_use_count),
                    chk_om_in_use_count);
    } else {
      out->print_cr("ERROR: jt=" INTPTR_FORMAT ": om_in_use_count=%d is not "
!                   "equal to chk_om_in_use_count=%d", p2i(jt),
!                   OrderAccess::load_acquire(&jt->om_in_use_count),
                    chk_om_in_use_count);
      *error_cnt_p = *error_cnt_p + 1;
    }
  }
  
  // Log details about ObjectMonitors on the in-use lists. The 'BHL'
  // flags indicate why the entry is in-use, 'object' and 'object type'
  // indicate the associated object and its type.
! void ObjectSynchronizer::log_in_use_monitor_details(outputStream * out) {
    stringStream ss;
!   if (OrderAccess::load_acquire(&g_om_in_use_count) > 0) {
      out->print_cr("In-use global monitor info:");
      out->print_cr("(B -> is_busy, H -> has hash code, L -> lock status)");
!     out->print_cr("%18s  %s  %7s  %18s  %18s",
!                   "monitor", "BHL", "ref_cnt", "object", "object type");
!     out->print_cr("==================  ===  =======  ==================  ==================");
!     for (ObjectMonitor* n = OrderAccess::load_acquire(&g_om_in_use_list); n != NULL; n = unmarked_next(n)) {
        const oop obj = (oop) n->object();
        const markWord mark = n->header();
        ResourceMark rm;
!       out->print(INTPTR_FORMAT "  %d%d%d  %7d  " INTPTR_FORMAT "  %s",
!                  p2i(n), n->is_busy() != 0, mark.hash() != 0,
!                  n->owner() != NULL, (int)n->ref_count(), p2i(obj),
!                  obj->klass()->external_name());
        if (n->is_busy() != 0) {
          out->print(" (%s)", n->is_busy_to_string(&ss));
          ss.reset();
        }
        out->cr();
      }
    }
  
    out->print_cr("In-use per-thread monitor info:");
    out->print_cr("(B -> is_busy, H -> has hash code, L -> lock status)");
!   out->print_cr("%18s  %18s  %s  %7s  %18s  %18s",
!                 "jt", "monitor", "BHL", "ref_cnt", "object", "object type");
!   out->print_cr("==================  ==================  ===  =======  ==================  ==================");
    for (JavaThreadIteratorWithHandle jtiwh; JavaThread *jt = jtiwh.next(); ) {
!     for (ObjectMonitor* n = OrderAccess::load_acquire(&jt->om_in_use_list); n != NULL; n = unmarked_next(n)) {
        const oop obj = (oop) n->object();
        const markWord mark = n->header();
        ResourceMark rm;
!       out->print(INTPTR_FORMAT "  " INTPTR_FORMAT "  %d%d%d  %7d  "
!                  INTPTR_FORMAT "  %s", p2i(jt), p2i(n), n->is_busy() != 0,
!                  mark.hash() != 0, n->owner() != NULL, (int)n->ref_count(),
!                  p2i(obj), obj->klass()->external_name());
        if (n->is_busy() != 0) {
          out->print(" (%s)", n->is_busy_to_string(&ss));
          ss.reset();
        }
        out->cr();
*** 2136,2160 ****
  
  // Log counts for the global and per-thread monitor lists and return
  // the population count.
  int ObjectSynchronizer::log_monitor_list_counts(outputStream * out) {
    int pop_count = 0;
!   out->print_cr("%18s  %10s  %10s  %10s",
!                 "Global Lists:", "InUse", "Free", "Total");
!   out->print_cr("==================  ==========  ==========  ==========");
!   out->print_cr("%18s  %10d  %10d  %10d", "",
!                 g_om_in_use_count, g_om_free_count, g_om_population);
!   pop_count += g_om_in_use_count + g_om_free_count;
  
    out->print_cr("%18s  %10s  %10s  %10s",
                  "Per-Thread Lists:", "InUse", "Free", "Provision");
    out->print_cr("==================  ==========  ==========  ==========");
  
    for (JavaThreadIteratorWithHandle jtiwh; JavaThread *jt = jtiwh.next(); ) {
      out->print_cr(INTPTR_FORMAT "  %10d  %10d  %10d", p2i(jt),
!                   jt->om_in_use_count, jt->om_free_count, jt->om_free_provision);
!     pop_count += jt->om_in_use_count + jt->om_free_count;
    }
    return pop_count;
  }
  
  #ifndef PRODUCT
--- 3224,3258 ----
  
  // Log counts for the global and per-thread monitor lists and return
  // the population count.
  int ObjectSynchronizer::log_monitor_list_counts(outputStream * out) {
    int pop_count = 0;
!   out->print_cr("%18s  %10s  %10s  %10s  %10s",
!                 "Global Lists:", "InUse", "Free", "Wait", "Total");
!   out->print_cr("==================  ==========  ==========  ==========  ==========");
!   out->print_cr("%18s  %10d  %10d  %10d  %10d", "",
!                 OrderAccess::load_acquire(&g_om_in_use_count),
!                 OrderAccess::load_acquire(&g_om_free_count),
!                 OrderAccess::load_acquire(&g_om_wait_count),
!                 OrderAccess::load_acquire(&g_om_population));
!   pop_count += OrderAccess::load_acquire(&g_om_in_use_count) +
!                OrderAccess::load_acquire(&g_om_free_count);
!   if (HandshakeAfterDeflateIdleMonitors) {
!     pop_count += OrderAccess::load_acquire(&g_om_wait_count);
!   }
  
    out->print_cr("%18s  %10s  %10s  %10s",
                  "Per-Thread Lists:", "InUse", "Free", "Provision");
    out->print_cr("==================  ==========  ==========  ==========");
  
    for (JavaThreadIteratorWithHandle jtiwh; JavaThread *jt = jtiwh.next(); ) {
      out->print_cr(INTPTR_FORMAT "  %10d  %10d  %10d", p2i(jt),
!                   OrderAccess::load_acquire(&jt->om_in_use_count),
!                   OrderAccess::load_acquire(&jt->om_free_count),
!                   jt->om_free_provision);
!     pop_count += OrderAccess::load_acquire(&jt->om_in_use_count) +
!                  OrderAccess::load_acquire(&jt->om_free_count);
    }
    return pop_count;
  }
  
  #ifndef PRODUCT
*** 2172,2182 ****
        address blk = (address)block;
        size_t diff = mon - blk;
        assert((diff % sizeof(PaddedObjectMonitor)) == 0, "must be aligned");
        return 1;
      }
!     block = (PaddedObjectMonitor*)block->_next_om;
    }
    return 0;
  }
  
  #endif
--- 3270,3281 ----
        address blk = (address)block;
        size_t diff = mon - blk;
        assert((diff % sizeof(PaddedObjectMonitor)) == 0, "must be aligned");
        return 1;
      }
!     // unmarked_next() is not needed with g_block_list (no next field marking).
!     block = (PaddedObjectMonitor*)OrderAccess::load_acquire(&block->_next_om);
    }
    return 0;
  }
  
  #endif

< prev index next >