< prev index next >

src/hotspot/share/runtime/synchronizer.cpp

Print this page
rev 59376 : 8153224.v2.10.patch merged with 8153224.v2.11.patch.
rev 59377 : CR1 changes from dcubed, dholmes, eosterlund and rehn.
rev 59378 : CR changes from dholmes, dcubed; fix is_being_async_deflated() race found by eosterlund; WB_ForceSafepoint() should request a special clean up with AsyncDeflateIdleMonitors; add a barrier in install_displaced_markword_in_object() to separate the header load from the preceding loads in is_being_async_deflated().
rev 59379 : eosterlund CR - Switch from three part async deflation protocol to a two part async deflation protocol where a negative contentions field is a linearization point.

@@ -35,15 +35,17 @@
 #include "oops/markWord.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/atomic.hpp"
 #include "runtime/biasedLocking.hpp"
 #include "runtime/handles.inline.hpp"
+#include "runtime/handshake.hpp"
 #include "runtime/interfaceSupport.inline.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/objectMonitor.hpp"
 #include "runtime/objectMonitor.inline.hpp"
 #include "runtime/osThread.hpp"
+#include "runtime/safepointMechanism.inline.hpp"
 #include "runtime/safepointVerifiers.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/stubRoutines.hpp"
 #include "runtime/synchronizer.hpp"
 #include "runtime/thread.inline.hpp"

@@ -116,10 +118,13 @@
 #define NINFLATIONLOCKS 256
 static volatile intptr_t gInflationLocks[NINFLATIONLOCKS];
 
 // global list of blocks of monitors
 PaddedObjectMonitor* ObjectSynchronizer::g_block_list = NULL;
+bool volatile ObjectSynchronizer::_is_async_deflation_requested = false;
+bool volatile ObjectSynchronizer::_is_special_deflation_requested = false;
+jlong ObjectSynchronizer::_last_async_deflation_time_ns = 0;
 
 struct ObjectMonitorListGlobals {
   char         _pad_prefix[OM_CACHE_LINE_SIZE];
   // These are highly shared list related variables.
   // To avoid false-sharing they need to be the sole occupants of a cache line.

@@ -132,18 +137,28 @@
   // Global ObjectMonitor in-use list. When a JavaThread is exiting,
   // ObjectMonitors on its per-thread in-use list are prepended here.
   ObjectMonitor* _in_use_list;
   DEFINE_PAD_MINUS_SIZE(2, OM_CACHE_LINE_SIZE, sizeof(ObjectMonitor*));
 
+  // Global ObjectMonitor wait list. Deflated ObjectMonitors wait on
+  // this list until after a handshake or a safepoint for platforms
+  // that don't support handshakes. After the handshake or safepoint,
+  // the deflated ObjectMonitors are prepended to free_list.
+  ObjectMonitor* _wait_list;
+  DEFINE_PAD_MINUS_SIZE(3, OM_CACHE_LINE_SIZE, sizeof(ObjectMonitor*));
+
   int _free_count;    // # on free_list
-  DEFINE_PAD_MINUS_SIZE(3, OM_CACHE_LINE_SIZE, sizeof(int));
+  DEFINE_PAD_MINUS_SIZE(4, OM_CACHE_LINE_SIZE, sizeof(int));
 
   int _in_use_count;  // # on in_use_list
-  DEFINE_PAD_MINUS_SIZE(4, OM_CACHE_LINE_SIZE, sizeof(int));
+  DEFINE_PAD_MINUS_SIZE(5, OM_CACHE_LINE_SIZE, sizeof(int));
 
   int _population;    // # Extant -- in circulation
-  DEFINE_PAD_MINUS_SIZE(5, OM_CACHE_LINE_SIZE, sizeof(int));
+  DEFINE_PAD_MINUS_SIZE(6, OM_CACHE_LINE_SIZE, sizeof(int));
+
+  int _wait_count;    // # on wait_list
+  DEFINE_PAD_MINUS_SIZE(7, OM_CACHE_LINE_SIZE, sizeof(int));
 };
 static ObjectMonitorListGlobals om_list_globals;
 
 #define CHAINMARKER (cast_to_oop<intptr_t>(-1))
 

@@ -297,10 +312,19 @@
                                              ObjectMonitor* tail, int count) {
   prepend_list_to_common(list, tail, count, &om_list_globals._free_list,
                          &om_list_globals._free_count);
 }
 
+// Prepend a list of ObjectMonitors to om_list_globals._wait_list.
+// 'tail' is the last ObjectMonitor in the list and there are 'count'
+// on the list. Also updates om_list_globals._wait_count.
+static void prepend_list_to_global_wait_list(ObjectMonitor* list,
+                                             ObjectMonitor* tail, int count) {
+  prepend_list_to_common(list, tail, count, &om_list_globals._wait_list,
+                         &om_list_globals._wait_count);
+}
+
 // Prepend a list of ObjectMonitors to om_list_globals._in_use_list.
 // 'tail' is the last ObjectMonitor in the list and there are 'count'
 // on the list. Also updates om_list_globals._in_use_list.
 static void prepend_list_to_global_in_use_list(ObjectMonitor* list,
                                                ObjectMonitor* tail, int count) {

@@ -314,11 +338,11 @@
                               int* count_p) {
   while (true) {
     om_lock(m);  // Lock m so we can safely update its next field.
     ObjectMonitor* cur = NULL;
     // Lock the list head to guard against races with a list walker
-    // thread:
+    // or async deflater thread (which only races in om_in_use_list):
     if ((cur = get_list_head_locked(list_p)) != NULL) {
       // List head is now locked so we can safely switch it.
       m->set_next_om(cur);  // m now points to cur (and unlocks m)
       Atomic::store(list_p, m);  // Switch list head to unlocked m.
       om_unlock(cur);

@@ -352,11 +376,11 @@
 // decrements the specified counter. Returns NULL if none are available.
 static ObjectMonitor* take_from_start_of_common(ObjectMonitor** list_p,
                                                 int* count_p) {
   ObjectMonitor* take = NULL;
   // Lock the list head to guard against races with a list walker
-  // thread:
+  // or async deflater thread (which only races in om_list_globals._free_list):
   if ((take = get_list_head_locked(list_p)) == NULL) {
     return NULL;  // None are available.
   }
   ObjectMonitor* next = unmarked_next(take);
   // Switch locked list head to next (which unlocks the list head, but

@@ -461,15 +485,25 @@
   assert(!SafepointSynchronize::is_at_safepoint(), "invariant");
   assert(self->is_Java_thread(), "invariant");
   assert(((JavaThread *) self)->thread_state() == _thread_in_Java, "invariant");
   NoSafepointVerifier nsv;
   if (obj == NULL) return false;       // Need to throw NPE
+
   const markWord mark = obj->mark();
 
   if (mark.has_monitor()) {
     ObjectMonitor* const m = mark.monitor();
+    if (AsyncDeflateIdleMonitors) {
+      // An async deflation can race us before we manage to make the
+      // ObjectMonitor busy by setting the owner below. If we detect
+      // that race we just bail out to the slow-path here.
+      if (m->object() == NULL) {
+        return false;
+      }
+    } else {
     assert(m->object() == obj, "invariant");
+    }
     Thread* const owner = (Thread *) m->_owner;
 
     // Lock contention and Transactional Lock Elision (TLE) diagnostics
     // and observability
     // Case: light contention possibly amenable to TLE

@@ -545,11 +579,19 @@
   // The object header will never be displaced to this lock,
   // so it does not matter what the value is, except that it
   // must be non-zero to avoid looking like a re-entrant lock,
   // and must not look locked either.
   lock->set_displaced_header(markWord::unused_mark());
-  inflate(THREAD, obj(), inflate_cause_monitor_enter)->enter(THREAD);
+  // An async deflation can race after the inflate() call and before
+  // enter() can make the ObjectMonitor busy. enter() returns false if
+  // we have lost the race to async deflation and we simply try again.
+  while (true) {
+    ObjectMonitor* monitor = inflate(THREAD, obj(), inflate_cause_monitor_enter);
+    if (monitor->enter(THREAD)) {
+      return;
+    }
+  }
 }
 
 void ObjectSynchronizer::exit(oop object, BasicLock* lock, TRAPS) {
   markWord mark = object->mark();
   // We cannot check for Biased Locking if we are racing an inflation.

@@ -594,11 +636,14 @@
       return;
     }
   }
 
   // We have to take the slow-path of possible inflation and then exit.
-  inflate(THREAD, object, inflate_cause_vm_internal)->exit(true, THREAD);
+  // The ObjectMonitor* can't be async deflated until ownership is
+  // dropped inside exit() and the ObjectMonitor* must be !is_busy().
+  ObjectMonitor* monitor = inflate(THREAD, object, inflate_cause_vm_internal);
+  monitor->exit(true, THREAD);
 }
 
 // -----------------------------------------------------------------------------
 // Class Loader  support to workaround deadlocks on the class loader lock objects
 // Also used by GC

@@ -615,37 +660,55 @@
   if (UseBiasedLocking) {
     BiasedLocking::revoke(obj, THREAD);
     assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
   }
 
+  // The ObjectMonitor* can't be async deflated until ownership is
+  // dropped inside exit() and the ObjectMonitor* must be !is_busy().
   ObjectMonitor* monitor = inflate(THREAD, obj(), inflate_cause_vm_internal);
-
-  return monitor->complete_exit(THREAD);
+  intptr_t ret_code = monitor->complete_exit(THREAD);
+  return ret_code;
 }
 
 // NOTE: must use heavy weight monitor to handle complete_exit/reenter()
 void ObjectSynchronizer::reenter(Handle obj, intx recursions, TRAPS) {
   if (UseBiasedLocking) {
     BiasedLocking::revoke(obj, THREAD);
     assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
   }
 
+  // An async deflation can race after the inflate() call and before
+  // reenter() -> enter() can make the ObjectMonitor busy. reenter() ->
+  // enter() returns false if we have lost the race to async deflation
+  // and we simply try again.
+  while (true) {
   ObjectMonitor* monitor = inflate(THREAD, obj(), inflate_cause_vm_internal);
-
-  monitor->reenter(recursions, THREAD);
+    if (monitor->reenter(recursions, THREAD)) {
+      return;
+    }
+  }
 }
+
 // -----------------------------------------------------------------------------
 // JNI locks on java objects
 // NOTE: must use heavy weight monitor to handle jni monitor enter
 void ObjectSynchronizer::jni_enter(Handle obj, TRAPS) {
   // the current locking is from JNI instead of Java code
   if (UseBiasedLocking) {
     BiasedLocking::revoke(obj, THREAD);
     assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
   }
   THREAD->set_current_pending_monitor_is_from_java(false);
-  inflate(THREAD, obj(), inflate_cause_jni_enter)->enter(THREAD);
+  // An async deflation can race after the inflate() call and before
+  // enter() can make the ObjectMonitor busy. enter() returns false if
+  // we have lost the race to async deflation and we simply try again.
+  while (true) {
+    ObjectMonitor* monitor = inflate(THREAD, obj(), inflate_cause_jni_enter);
+    if (monitor->enter(THREAD)) {
+      break;
+    }
+  }
   THREAD->set_current_pending_monitor_is_from_java(true);
 }
 
 // NOTE: must use heavy weight monitor to handle jni monitor exit
 void ObjectSynchronizer::jni_exit(oop obj, Thread* THREAD) {

@@ -654,10 +717,12 @@
     BiasedLocking::revoke(h_obj, THREAD);
     obj = h_obj();
   }
   assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
 
+  // The ObjectMonitor* can't be async deflated until ownership is
+  // dropped inside exit() and the ObjectMonitor* must be !is_busy().
   ObjectMonitor* monitor = inflate(THREAD, obj, inflate_cause_jni_exit);
   // If this thread has locked the object, exit the monitor. We
   // intentionally do not use CHECK here because we must exit the
   // monitor even if an exception is pending.
   if (monitor->check_owner(THREAD)) {

@@ -695,31 +760,39 @@
     assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
   }
   if (millis < 0) {
     THROW_MSG_0(vmSymbols::java_lang_IllegalArgumentException(), "timeout value is negative");
   }
+  // The ObjectMonitor* can't be async deflated because the _waiters
+  // field is incremented before ownership is dropped and decremented
+  // after ownership is regained.
   ObjectMonitor* monitor = inflate(THREAD, obj(), inflate_cause_wait);
 
   DTRACE_MONITOR_WAIT_PROBE(monitor, obj(), THREAD, millis);
   monitor->wait(millis, true, THREAD);
 
   // This dummy call is in place to get around dtrace bug 6254741.  Once
   // that's fixed we can uncomment the following line, remove the call
   // and change this function back into a "void" func.
   // DTRACE_MONITOR_PROBE(waited, monitor, obj(), THREAD);
-  return dtrace_waited_probe(monitor, obj, THREAD);
+  int ret_code = dtrace_waited_probe(monitor, obj, THREAD);
+  return ret_code;
 }
 
 void ObjectSynchronizer::wait_uninterruptibly(Handle obj, jlong millis, TRAPS) {
   if (UseBiasedLocking) {
     BiasedLocking::revoke(obj, THREAD);
     assert(!obj->mark().has_bias_pattern(), "biases should be revoked by now");
   }
   if (millis < 0) {
     THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(), "timeout value is negative");
   }
-  inflate(THREAD, obj(), inflate_cause_wait)->wait(millis, false, THREAD);
+  // The ObjectMonitor* can't be async deflated because the _waiters
+  // field is incremented before ownership is dropped and decremented
+  // after ownership is regained.
+  ObjectMonitor* monitor = inflate(THREAD, obj(), inflate_cause_wait);
+  monitor->wait(millis, false, THREAD);
 }
 
 void ObjectSynchronizer::notify(Handle obj, TRAPS) {
   if (UseBiasedLocking) {
     BiasedLocking::revoke(obj, THREAD);

@@ -728,11 +801,14 @@
 
   markWord mark = obj->mark();
   if (mark.has_locker() && THREAD->is_lock_owned((address)mark.locker())) {
     return;
   }
-  inflate(THREAD, obj(), inflate_cause_notify)->notify(THREAD);
+  // The ObjectMonitor* can't be async deflated until ownership is
+  // dropped by the calling thread.
+  ObjectMonitor* monitor = inflate(THREAD, obj(), inflate_cause_notify);
+  monitor->notify(THREAD);
 }
 
 // NOTE: see comment of notify()
 void ObjectSynchronizer::notifyall(Handle obj, TRAPS) {
   if (UseBiasedLocking) {

@@ -742,11 +818,14 @@
 
   markWord mark = obj->mark();
   if (mark.has_locker() && THREAD->is_lock_owned((address)mark.locker())) {
     return;
   }
-  inflate(THREAD, obj(), inflate_cause_notify)->notifyAll(THREAD);
+  // The ObjectMonitor* can't be async deflated until ownership is
+  // dropped by the calling thread.
+  ObjectMonitor* monitor = inflate(THREAD, obj(), inflate_cause_notify);
+  monitor->notifyAll(THREAD);
 }
 
 // -----------------------------------------------------------------------------
 // Hash Code handling
 //

@@ -934,10 +1013,11 @@
   assert(Universe::verify_in_progress() || DumpSharedSpaces ||
          self->is_Java_thread() , "invariant");
   assert(Universe::verify_in_progress() || DumpSharedSpaces ||
          ((JavaThread *)self)->thread_state() != _thread_blocked, "invariant");
 
+  while (true) {
   ObjectMonitor* monitor = NULL;
   markWord temp, test;
   intptr_t hash;
   markWord mark = read_stable_mark(obj);
 

@@ -963,11 +1043,30 @@
   } else if (mark.has_monitor()) {
     monitor = mark.monitor();
     temp = monitor->header();
     assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value());
     hash = temp.hash();
-    if (hash != 0) {                  // if it has a hash, just return it
+      if (hash != 0) {
+        // It has a hash.
+
+        // Separate load of dmw/header above from the loads in
+        // is_being_async_deflated().
+        if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+          // A non-multiple copy atomic (nMCA) machine needs a bigger
+          // hammer to separate the load above and the loads below.
+          OrderAccess::fence();
+        } else {
+          OrderAccess::loadload();
+        }
+        if (monitor->is_being_async_deflated()) {
+          // But we can't safely use the hash if we detect that async
+          // deflation has occurred. So we attempt to restore the
+          // header/dmw to the object's header so that we only retry
+          // once if the deflater thread happens to be slow.
+          monitor->install_displaced_markword_in_object(obj);
+          continue;
+        }
       return hash;
     }
     // Fall thru so we only have one place that installs the hash in
     // the ObjectMonitor.
   } else if (self->is_lock_owned((address)mark.locker())) {

@@ -988,10 +1087,13 @@
     // during an inflate() call so any change to that stack memory
     // may not propagate to other threads correctly.
   }
 
   // Inflate the monitor to set the hash.
+
+    // An async deflation can race after the inflate() call and before we
+    // can update the ObjectMonitor's header with the hash value below.
   monitor = inflate(self, obj, inflate_cause_hash_code);
   // Load ObjectMonitor's header/dmw field and see if it has a hash.
   mark = monitor->header();
   assert(mark.is_neutral(), "invariant: header=" INTPTR_FORMAT, mark.value());
   hash = mark.hash();

@@ -1009,13 +1111,22 @@
       // will need to be updated.
       hash = test.hash();
       assert(test.is_neutral(), "invariant: header=" INTPTR_FORMAT, test.value());
       assert(hash != 0, "should only have lost the race to a thread that set a non-zero hash");
     }
+      if (monitor->is_being_async_deflated()) {
+        // If we detect that async deflation has occurred, then we
+        // attempt to restore the header/dmw to the object's header
+        // so that we only retry once if the deflater thread happens
+        // to be slow.
+        monitor->install_displaced_markword_in_object(obj);
+        continue;
+      }
   }
   // We finally get the hash.
   return hash;
+  }
 }
 
 // Deprecated -- use FastHashCode() instead.
 
 intptr_t ObjectSynchronizer::identity_hash_value_for(Handle obj) {

@@ -1039,10 +1150,12 @@
   if (mark.has_locker()) {
     return thread->is_lock_owned((address)mark.locker());
   }
   // Contended case, header points to ObjectMonitor (tagged pointer)
   if (mark.has_monitor()) {
+    // The first stage of async deflation does not affect any field
+    // used by this comparison so the ObjectMonitor* is usable here.
     ObjectMonitor* monitor = mark.monitor();
     return monitor->is_entered(thread) != 0;
   }
   // Unlocked case, header in place
   assert(mark.is_neutral(), "sanity check");

@@ -1080,13 +1193,16 @@
       owner_self : owner_other;
   }
 
   // CASE: inflated. Mark (tagged pointer) points to an ObjectMonitor.
   // The Object:ObjectMonitor relationship is stable as long as we're
-  // not at a safepoint.
+  // not at a safepoint and AsyncDeflateIdleMonitors is false.
   if (mark.has_monitor()) {
-    void* owner = mark.monitor()->_owner;
+    // The first stage of async deflation does not affect any field
+    // used by this comparison so the ObjectMonitor* is usable here.
+    ObjectMonitor* monitor = mark.monitor();
+    void* owner = monitor->owner();
     if (owner == NULL) return owner_none;
     return (owner == self ||
             self->is_lock_owned((address)owner)) ? owner_self : owner_other;
   }
 

@@ -1116,10 +1232,12 @@
     owner = (address) mark.locker();
   }
 
   // Contended case, header points to ObjectMonitor (tagged pointer)
   else if (mark.has_monitor()) {
+    // The first stage of async deflation does not affect any field
+    // used by this comparison so the ObjectMonitor* is usable here.
     ObjectMonitor* monitor = mark.monitor();
     assert(monitor != NULL, "monitor should be non-null");
     owner = (address) monitor->owner();
   }
 

@@ -1142,13 +1260,19 @@
   PaddedObjectMonitor* block = Atomic::load(&g_block_list);
   while (block != NULL) {
     assert(block->object() == CHAINMARKER, "must be a block header");
     for (int i = _BLOCKSIZE - 1; i > 0; i--) {
       ObjectMonitor* mid = (ObjectMonitor *)(block + i);
-      oop object = (oop)mid->object();
-      if (object != NULL) {
+      if (mid->object() != NULL) {
         // Only process with closure if the object is set.
+
+        // monitors_iterate() is only called at a safepoint or when the
+        // target thread is suspended or when the target thread is
+        // operating on itself. The current closures in use today are
+        // only interested in an owned ObjectMonitor and ownership
+        // cannot be dropped under the calling contexts so the
+        // ObjectMonitor cannot be async deflated.
         closure->do_monitor(mid);
       }
     }
     // unmarked_next() is not needed with g_block_list (no locking
     // used with block linkage _next_om fields).

@@ -1160,19 +1284,57 @@
   int population = Atomic::load(&om_list_globals._population);
   if (population == 0) {
     return false;
   }
   if (MonitorUsedDeflationThreshold > 0) {
-    int monitors_used = population - Atomic::load(&om_list_globals._free_count);
+    int monitors_used = population - Atomic::load(&om_list_globals._free_count) -
+                        Atomic::load(&om_list_globals._wait_count);
     int monitor_usage = (monitors_used * 100LL) / population;
     return monitor_usage > MonitorUsedDeflationThreshold;
   }
   return false;
 }
 
-bool ObjectSynchronizer::is_cleanup_needed() {
-  return monitors_used_above_threshold();
+bool ObjectSynchronizer::is_async_deflation_needed() {
+  if (!AsyncDeflateIdleMonitors) {
+    return false;
+  }
+  if (is_async_deflation_requested()) {
+    // Async deflation request.
+    return true;
+  }
+  if (AsyncDeflationInterval > 0 &&
+      time_since_last_async_deflation_ms() > AsyncDeflationInterval &&
+      monitors_used_above_threshold()) {
+    // It's been longer than our specified deflate interval and there
+    // are too many monitors in use. We don't deflate more frequently
+    // than AsyncDeflationInterval (unless is_async_deflation_requested)
+    // in order to not swamp the ServiceThread.
+    _last_async_deflation_time_ns = os::javaTimeNanos();
+    return true;
+  }
+  return false;
+}
+
+bool ObjectSynchronizer::is_safepoint_deflation_needed() {
+  if (!AsyncDeflateIdleMonitors) {
+    if (monitors_used_above_threshold()) {
+      // Too many monitors in use.
+      return true;
+    }
+    return false;
+  }
+  if (is_special_deflation_requested()) {
+    // For AsyncDeflateIdleMonitors only do a safepoint deflation
+    // if there is a special deflation request.
+    return true;
+  }
+  return false;
+}
+
+jlong ObjectSynchronizer::time_since_last_async_deflation_ms() {
+  return (os::javaTimeNanos() - _last_async_deflation_time_ns) / (NANOUNITS / MILLIUNITS);
 }
 
 void ObjectSynchronizer::oops_do(OopClosure* f) {
   // We only scan the global used list here (for moribund threads), and
   // the thread-local monitors in Thread::oops_do().

@@ -1204,11 +1366,11 @@
 // -----------------------------------------------------------------------------
 // ObjectMonitor Lifecycle
 // -----------------------
 // Inflation unlinks monitors from om_list_globals._free_list or a per-thread
 // free list and associates them with objects. Deflation -- which occurs at
-// STW-time -- disassociates idle monitors from objects.
+// STW-time or asynchronously -- disassociates idle monitors from objects.
 // Such scavenged monitors are returned to the om_list_globals._free_list.
 //
 // ObjectMonitors reside in type-stable memory (TSM) and are immortal.
 //
 // Lifecycle:

@@ -1236,10 +1398,11 @@
     // improve allocation latency, as well as reducing coherency traffic
     // on the shared global list.
     m = take_from_start_of_om_free_list(self);
     if (m != NULL) {
       guarantee(m->object() == NULL, "invariant");
+      m->set_allocation_state(ObjectMonitor::New);
       prepend_to_om_in_use_list(self, m);
       return m;
     }
 
     // 2: try to allocate from the global om_list_globals._free_list

@@ -1253,11 +1416,33 @@
         ObjectMonitor* take = take_from_start_of_global_free_list();
         if (take == NULL) {
           break;  // No more are available.
         }
         guarantee(take->object() == NULL, "invariant");
+        if (AsyncDeflateIdleMonitors) {
+          // We allowed 3 field values to linger during async deflation.
+          // Clear or restore them as appropriate.
+          take->set_header(markWord::zero());
+          // DEFLATER_MARKER is the only non-NULL value we should see here.
+          take->try_set_owner_from(DEFLATER_MARKER, NULL);
+          if (take->contentions() < 0) {
+            // Add back max_jint to restore the contentions field to its
+            // proper value.
+            take->add_to_contentions(max_jint);
+
+#ifdef ASSERT
+            jint l_contentions = take->contentions();
+#endif
+            assert(l_contentions >= 0, "must not be negative: l_contentions=%d, contentions=%d",
+                   l_contentions, take->contentions());
+          }
+        }
         take->Recycle();
+        // Since we're taking from the global free-list, take must be Free.
+        // om_release() also sets the allocation state to Free because it
+        // is called from other code paths.
+        assert(take->is_free(), "invariant");
         om_release(self, take, false);
       }
       self->om_free_provision += 1 + (self->om_free_provision / 2);
       if (self->om_free_provision > MAXPRIVATE) self->om_free_provision = MAXPRIVATE;
       continue;

@@ -1287,10 +1472,11 @@
     // linkage should be reconsidered.  A better implementation would
     // look like: class Block { Block * next; int N; ObjectMonitor Body [N] ; }
 
     for (int i = 1; i < _BLOCKSIZE; i++) {
       temp[i].set_next_om((ObjectMonitor*)&temp[i + 1]);
+      assert(temp[i].is_free(), "invariant");
     }
 
     // terminate the last monitor as the end of list
     temp[_BLOCKSIZE - 1].set_next_om((ObjectMonitor*)NULL);
 

@@ -1312,12 +1498,12 @@
 // a CAS attempt failed. This doesn't allow unbounded #s of monitors to
 // accumulate on a thread's free list.
 //
 // Key constraint: all ObjectMonitors on a thread's free list and the global
 // free list must have their object field set to null. This prevents the
-// scavenger -- deflate_monitor_list() -- from reclaiming them while we
-// are trying to release them.
+// scavenger -- deflate_monitor_list() or deflate_monitor_list_using_JT()
+// -- from reclaiming them while we are trying to release them.
 
 void ObjectSynchronizer::om_release(Thread* self, ObjectMonitor* m,
                                     bool from_per_thread_alloc) {
   guarantee(m->header().value() == 0, "invariant");
   guarantee(m->object() == NULL, "invariant");

@@ -1326,23 +1512,24 @@
   if ((m->is_busy() | m->_recursions) != 0) {
     stringStream ss;
     fatal("freeing in-use monitor: %s, recursions=" INTX_FORMAT,
           m->is_busy_to_string(&ss), m->_recursions);
   }
+  m->set_allocation_state(ObjectMonitor::Free);
   // _next_om is used for both per-thread in-use and free lists so
   // we have to remove 'm' from the in-use list first (as needed).
   if (from_per_thread_alloc) {
     // Need to remove 'm' from om_in_use_list.
     ObjectMonitor* mid = NULL;
     ObjectMonitor* next = NULL;
 
-    // This list walk can only race with another list walker since
-    // deflation can only happen at a safepoint so we don't have to
-    // worry about an ObjectMonitor being removed from this list
-    // while we are walking it.
+    // This list walk can race with another list walker or with async
+    // deflation so we have to worry about an ObjectMonitor being
+    // removed from this list while we are walking it.
 
-    // Lock the list head to avoid racing with another list walker.
+    // Lock the list head to avoid racing with another list walker
+    // or with async deflation.
     if ((mid = get_list_head_locked(&self->om_in_use_list)) == NULL) {
       fatal("thread=" INTPTR_FORMAT " in-use list must not be empty.", p2i(self));
     }
     next = unmarked_next(mid);
     if (m == mid) {

@@ -1354,37 +1541,48 @@
     } else if (m == next) {
       // Second special case:
       // 'm' matches next after the list head and we already have the list
       // head locked so set mid to what we are extracting:
       mid = next;
-      // Lock mid to prevent races with a list walker:
+      // Lock mid to prevent races with a list walker or an async
+      // deflater thread that's ahead of us. The locked list head
+      // prevents races from behind us.
       om_lock(mid);
       // Update next to what follows mid (if anything):
       next = unmarked_next(mid);
       // Switch next after the list head to new next which unlocks the
       // list head, but leaves the extracted mid locked:
       self->om_in_use_list->set_next_om(next);
     } else {
       // We have to search the list to find 'm'.
-      om_unlock(mid);  // unlock the list head
       guarantee(next != NULL, "thread=" INTPTR_FORMAT ": om_in_use_list=" INTPTR_FORMAT
                 " is too short.", p2i(self), p2i(self->om_in_use_list));
       // Our starting anchor is next after the list head which is the
       // last ObjectMonitor we checked:
       ObjectMonitor* anchor = next;
+      // Lock anchor to prevent races with a list walker or an async
+      // deflater thread that's ahead of us. The locked list head
+      // prevents races from behind us.
+      om_lock(anchor);
+      om_unlock(mid);  // Unlock the list head now that anchor is locked.
       while ((mid = unmarked_next(anchor)) != NULL) {
         if (m == mid) {
           // We found 'm' on the per-thread in-use list so extract it.
-          om_lock(anchor);  // Lock the anchor so we can safely modify it.
           // Update next to what follows mid (if anything):
           next = unmarked_next(mid);
           // Switch next after the anchor to new next which unlocks the
           // anchor, but leaves the extracted mid locked:
           anchor->set_next_om(next);
           break;
         } else {
-          anchor = mid;
+          // Lock the next anchor to prevent races with a list walker
+          // or an async deflater thread that's ahead of us. The locked
+          // current anchor prevents races from behind us.
+          om_lock(mid);
+          // Unlock current anchor now that next anchor is locked:
+          om_unlock(anchor);
+          anchor = mid;  // Advance to new anchor and try again.
         }
       }
     }
 
     if (mid == NULL) {

@@ -1401,10 +1599,11 @@
     // the thread's free list:
     om_unlock(mid);
   }
 
   prepend_to_om_free_list(self, m);
+  guarantee(m->is_free(), "invariant");
 }
 
 // Return ObjectMonitors on a moribund thread's free and in-use
 // lists to the appropriate global lists. The ObjectMonitors on the
 // per-thread in-use list may still be in use by other threads.

@@ -1415,20 +1614,28 @@
 // a safepoint and interleave with deflate_idle_monitors(). In
 // particular, this ensures that the thread's in-use monitors are
 // scanned by a GC safepoint, either via Thread::oops_do() (before
 // om_flush() is called) or via ObjectSynchronizer::oops_do() (after
 // om_flush() is called).
+//
+// With AsyncDeflateIdleMonitors, deflate_global_idle_monitors_using_JT()
+// and deflate_per_thread_idle_monitors_using_JT() (in another thread) can
+// run at the same time as om_flush() so we have to follow a careful
+// protocol to prevent list corruption.
 
 void ObjectSynchronizer::om_flush(Thread* self) {
   // Process the per-thread in-use list first to be consistent.
   int in_use_count = 0;
   ObjectMonitor* in_use_list = NULL;
   ObjectMonitor* in_use_tail = NULL;
   NoSafepointVerifier nsv;
 
-  // This function can race with a list walker thread so we lock the
-  // list head to prevent confusion.
+  // This function can race with a list walker or with an async
+  // deflater thread so we lock the list head to prevent confusion.
+  // An async deflater thread checks to see if the target thread
+  // is exiting, but if it has made it past that check before we
+  // started exiting, then it is racing to get to the in-use list.
   if ((in_use_list = get_list_head_locked(&self->om_in_use_list)) != NULL) {
     // At this point, we have locked the in-use list head so a racing
     // thread cannot come in after us. However, a racing thread could
     // be ahead of us; we'll detect that and delay to let it finish.
     //

@@ -1439,24 +1646,36 @@
     //
     // Account for the in-use list head before the loop since it is
     // already locked (by this thread):
     in_use_tail = in_use_list;
     in_use_count++;
-    for (ObjectMonitor* cur_om = unmarked_next(in_use_list); cur_om != NULL; cur_om = unmarked_next(cur_om)) {
+    for (ObjectMonitor* cur_om = unmarked_next(in_use_list); cur_om != NULL;) {
       if (is_locked(cur_om)) {
-        // cur_om is locked so there must be a racing walker thread ahead
-        // of us so we'll give it a chance to finish.
+        // cur_om is locked so there must be a racing walker or async
+        // deflater thread ahead of us so we'll give it a chance to finish.
         while (is_locked(cur_om)) {
           os::naked_short_sleep(1);
         }
+        // Refetch the possibly changed next field and try again.
+        cur_om = unmarked_next(in_use_tail);
+        continue;
+      }
+      if (cur_om->object() == NULL) {
+        // cur_om was deflated and the object ref was cleared while it
+        // was locked. We happened to see it just after it was unlocked
+        // (and added to the free list). Refetch the possibly changed
+        // next field and try again.
+        cur_om = unmarked_next(in_use_tail);
+        continue;
       }
       in_use_tail = cur_om;
       in_use_count++;
+      cur_om = unmarked_next(cur_om);
     }
     guarantee(in_use_tail != NULL, "invariant");
     int l_om_in_use_count = Atomic::load(&self->om_in_use_count);
-    assert(l_om_in_use_count == in_use_count, "in-use counts don't match: "
+    ADIM_guarantee(l_om_in_use_count == in_use_count, "in-use counts don't match: "
           "l_om_in_use_count=%d, in_use_count=%d", l_om_in_use_count, in_use_count);
     Atomic::store(&self->om_in_use_count, 0);
     // Clear the in-use list head (which also unlocks it):
     Atomic::store(&self->om_in_use_list, (ObjectMonitor*)NULL);
     om_unlock(in_use_list);

@@ -1495,11 +1714,11 @@
         fatal("must be !is_busy: %s", s->is_busy_to_string(&ss));
       }
     }
     guarantee(free_tail != NULL, "invariant");
     int l_om_free_count = Atomic::load(&self->om_free_count);
-    assert(l_om_free_count == free_count, "free counts don't match: "
+    ADIM_guarantee(l_om_free_count == free_count, "free counts don't match: "
            "l_om_free_count=%d, free_count=%d", l_om_free_count, free_count);
     Atomic::store(&self->om_free_count, 0);
     Atomic::store(&self->om_free_list, (ObjectMonitor*)NULL);
     om_unlock(free_list);
   }

@@ -1541,19 +1760,21 @@
 
 // Fast path code shared by multiple functions
 void ObjectSynchronizer::inflate_helper(oop obj) {
   markWord mark = obj->mark();
   if (mark.has_monitor()) {
-    assert(ObjectSynchronizer::verify_objmon_isinpool(mark.monitor()), "monitor is invalid");
-    assert(mark.monitor()->header().is_neutral(), "monitor must record a good object header");
+    ObjectMonitor* monitor = mark.monitor();
+    assert(ObjectSynchronizer::verify_objmon_isinpool(monitor), "monitor=" INTPTR_FORMAT " is invalid", p2i(monitor));
+    markWord dmw = monitor->header();
+    assert(dmw.is_neutral(), "sanity check: header=" INTPTR_FORMAT, dmw.value());
     return;
   }
-  inflate(Thread::current(), obj, inflate_cause_vm_internal);
+  (void)inflate(Thread::current(), obj, inflate_cause_vm_internal);
 }
 
-ObjectMonitor* ObjectSynchronizer::inflate(Thread* self,
-                                           oop object, const InflateCause cause) {
+ObjectMonitor* ObjectSynchronizer::inflate(Thread* self, oop object,
+                                           const InflateCause cause) {
   // Inflate mutates the heap ...
   // Relaxing assertion for bug 6320749.
   assert(Universe::verify_in_progress() ||
          !SafepointSynchronize::is_at_safepoint(), "invariant");
 

@@ -1573,11 +1794,11 @@
     // CASE: inflated
     if (mark.has_monitor()) {
       ObjectMonitor* inf = mark.monitor();
       markWord dmw = inf->header();
       assert(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value());
-      assert(inf->object() == object, "invariant");
+      assert(AsyncDeflateIdleMonitors || inf->object() == object, "invariant");
       assert(ObjectSynchronizer::verify_objmon_isinpool(inf), "monitor is invalid");
       return inf;
     }
 
     // CASE: inflation in progress - inflating over a stack-lock.

@@ -1621,10 +1842,11 @@
       m->_Responsible  = NULL;
       m->_SpinDuration = ObjectMonitor::Knob_SpinLimit;   // Consider: maintain by type/class
 
       markWord cmp = object->cas_set_mark(markWord::INFLATING(), mark);
       if (cmp != mark) {
+        // om_release() will reset the allocation state from New to Free.
         om_release(self, m, true);
         continue;       // Interference -- just retry
       }
 
       // We've successfully installed INFLATING (0) into the mark-word.

@@ -1658,29 +1880,38 @@
       // object is in the mark.  Furthermore the owner can't complete
       // an unlock on the object, either.
       markWord dmw = mark.displaced_mark_helper();
       // Catch if the object's header is not neutral (not locked and
       // not marked is what we care about here).
-      assert(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value());
+      ADIM_guarantee(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value());
 
       // Setup monitor fields to proper values -- prepare the monitor
       m->set_header(dmw);
 
       // Optimization: if the mark.locker stack address is associated
       // with this thread we could simply set m->_owner = self.
       // Note that a thread can inflate an object
       // that it has stack-locked -- as might happen in wait() -- directly
       // with CAS.  That is, we can avoid the xchg-NULL .... ST idiom.
+      if (AsyncDeflateIdleMonitors) {
+        m->set_owner_from(NULL, DEFLATER_MARKER, mark.locker());
+      } else {
       m->set_owner_from(NULL, mark.locker());
+      }
       m->set_object(object);
       // TODO-FIXME: assert BasicLock->dhw != 0.
 
       // Must preserve store ordering. The monitor state must
       // be stable at the time of publishing the monitor address.
       guarantee(object->mark() == markWord::INFLATING(), "invariant");
       object->release_set_mark(markWord::encode(m));
 
+      // Once ObjectMonitor is configured and the object is associated
+      // with the ObjectMonitor, it is safe to allow async deflation:
+      assert(m->is_new(), "freshly allocated monitor must be new");
+      m->set_allocation_state(ObjectMonitor::Old);
+
       // Hopefully the performance counters are allocated on distinct cache lines
       // to avoid false sharing on MP systems ...
       OM_PERFDATA_OP(Inflations, inc());
       if (log_is_enabled(Trace, monitorinflation)) {
         ResourceMark rm(self);

@@ -1703,31 +1934,41 @@
     // to inflate and then CAS() again to try to swing _owner from NULL to self.
     // An inflateTry() method that we could call from enter() would be useful.
 
     // Catch if the object's header is not neutral (not locked and
     // not marked is what we care about here).
-    assert(mark.is_neutral(), "invariant: header=" INTPTR_FORMAT, mark.value());
+    ADIM_guarantee(mark.is_neutral(), "invariant: header=" INTPTR_FORMAT, mark.value());
     ObjectMonitor* m = om_alloc(self);
     // prepare m for installation - set monitor to initial state
     m->Recycle();
     m->set_header(mark);
+    if (AsyncDeflateIdleMonitors) {
+      // DEFLATER_MARKER is the only non-NULL value we should see here.
+      m->try_set_owner_from(DEFLATER_MARKER, NULL);
+    }
     m->set_object(object);
     m->_Responsible  = NULL;
     m->_SpinDuration = ObjectMonitor::Knob_SpinLimit;       // consider: keep metastats by type/class
 
     if (object->cas_set_mark(markWord::encode(m), mark) != mark) {
       m->set_header(markWord::zero());
       m->set_object(NULL);
       m->Recycle();
+      // om_release() will reset the allocation state from New to Free.
       om_release(self, m, true);
       m = NULL;
       continue;
       // interference - the markword changed - just retry.
       // The state-transitions are one-way, so there's no chance of
       // live-lock -- "Inflated" is an absorbing state.
     }
 
+    // Once the ObjectMonitor is configured and object is associated
+    // with the ObjectMonitor, it is safe to allow async deflation:
+    assert(m->is_new(), "freshly allocated monitor must be new");
+    m->set_allocation_state(ObjectMonitor::Old);
+
     // Hopefully the performance counters are allocated on distinct
     // cache lines to avoid false sharing on MP systems ...
     OM_PERFDATA_OP(Inflations, inc());
     if (log_is_enabled(Trace, monitorinflation)) {
       ResourceMark rm(self);

@@ -1743,10 +1984,11 @@
 }
 
 
 // We maintain a list of in-use monitors for each thread.
 //
+// For safepoint based deflation:
 // deflate_thread_local_monitors() scans a single thread's in-use list, while
 // deflate_idle_monitors() scans only a global list of in-use monitors which
 // is populated only as a thread dies (see om_flush()).
 //
 // These operations are called at all safepoints, immediately after mutators

@@ -1761,10 +2003,44 @@
 //
 // Perversely, the heap size -- and thus the STW safepoint rate --
 // typically drives the scavenge rate.  Large heaps can mean infrequent GC,
 // which in turn can mean large(r) numbers of ObjectMonitors in circulation.
 // This is an unfortunate aspect of this design.
+//
+// For async deflation:
+// If a special deflation request is made, then the safepoint based
+// deflation mechanism is used. Otherwise, an async deflation request
+// is registered with the ServiceThread and it is notified.
+
+void ObjectSynchronizer::do_safepoint_work(DeflateMonitorCounters* counters) {
+  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
+
+  // The per-thread in-use lists are handled in
+  // ParallelSPCleanupThreadClosure::do_thread().
+
+  if (!AsyncDeflateIdleMonitors || is_special_deflation_requested()) {
+    // Use the older mechanism for the global in-use list or if a
+    // special deflation has been requested before the safepoint.
+    ObjectSynchronizer::deflate_idle_monitors(counters);
+    return;
+  }
+
+  log_debug(monitorinflation)("requesting async deflation of idle monitors.");
+  // Request deflation of idle monitors by the ServiceThread:
+  set_is_async_deflation_requested(true);
+  MonitorLocker ml(Service_lock, Mutex::_no_safepoint_check_flag);
+  ml.notify_all();
+
+  if (log_is_enabled(Debug, monitorinflation)) {
+    // exit_globals()'s call to audit_and_print_stats() is done
+    // at the Info level and not at a safepoint.
+    // For safepoint based deflation, audit_and_print_stats() is called
+    // in ObjectSynchronizer::finish_deflate_idle_monitors() at the
+    // Debug level at a safepoint.
+    ObjectSynchronizer::audit_and_print_stats(false /* on_exit */);
+  }
+}
 
 // Deflate a single monitor if not in-use
 // Return true if deflated, false if in-use
 bool ObjectSynchronizer::deflate_monitor(ObjectMonitor* mid, oop obj,
                                          ObjectMonitor** free_head_p,

@@ -1796,14 +2072,20 @@
                                   mark.value(), obj->klass()->external_name());
     }
 
     // Restore the header back to obj
     obj->release_set_mark(dmw);
+    if (AsyncDeflateIdleMonitors) {
+      // clear() expects the owner field to be NULL.
+      // DEFLATER_MARKER is the only non-NULL value we should see here.
+      mid->try_set_owner_from(DEFLATER_MARKER, NULL);
+    }
     mid->clear();
 
     assert(mid->object() == NULL, "invariant: object=" INTPTR_FORMAT,
            p2i(mid->object()));
+    assert(mid->is_free(), "invariant");
 
     // Move the deflated ObjectMonitor to the working free list
     // defined by free_head_p and free_tail_p.
     if (*free_head_p == NULL) *free_head_p = mid;
     if (*free_tail_p != NULL) {

@@ -1828,10 +2110,132 @@
     deflated = true;
   }
   return deflated;
 }
 
+// Deflate the specified ObjectMonitor if not in-use using a JavaThread.
+// Returns true if it was deflated and false otherwise.
+//
+// The async deflation protocol sets owner to DEFLATER_MARKER and
+// makes contentions negative as signals to contending threads that
+// an async deflation is in progress. There are a number of checks
+// as part of the protocol to make sure that the calling thread has
+// not lost the race to a contending thread.
+//
+// The ObjectMonitor has been successfully async deflated when:
+//   (contentions < 0)
+// Contending threads that see that condition know to retry their operation.
+//
+bool ObjectSynchronizer::deflate_monitor_using_JT(ObjectMonitor* mid,
+                                                  ObjectMonitor** free_head_p,
+                                                  ObjectMonitor** free_tail_p) {
+  assert(AsyncDeflateIdleMonitors, "sanity check");
+  assert(Thread::current()->is_Java_thread(), "precondition");
+  // A newly allocated ObjectMonitor should not be seen here so we
+  // avoid an endless inflate/deflate cycle.
+  assert(mid->is_old(), "must be old: allocation_state=%d",
+         (int) mid->allocation_state());
+
+  if (mid->is_busy()) {
+    // Easy checks are first - the ObjectMonitor is busy so no deflation.
+    return false;
+  }
+
+  // Set a NULL owner to DEFLATER_MARKER to force any contending thread
+  // through the slow path. This is just the first part of the async
+  // deflation dance.
+  if (mid->try_set_owner_from(NULL, DEFLATER_MARKER) != NULL) {
+    // The owner field is no longer NULL so we lost the race since the
+    // ObjectMonitor is now busy.
+    return false;
+  }
+
+  if (mid->contentions() > 0 || mid->_waiters != 0) {
+    // Another thread has raced to enter the ObjectMonitor after
+    // mid->is_busy() above or has already entered and waited on
+    // it which makes it busy so no deflation. Restore owner to
+    // NULL if it is still DEFLATER_MARKER.
+    if (mid->try_set_owner_from(DEFLATER_MARKER, NULL) != DEFLATER_MARKER) {
+      // Deferred decrement for the JT EnterI() that cancelled the async deflation.
+      mid->add_to_contentions(-1);
+    }
+    return false;
+  }
+
+  // Make a zero contentions field negative to force any contending threads
+  // to retry. This is the second part of the async deflation dance.
+  if (Atomic::cmpxchg(&mid->_contentions, (jint)0, -max_jint) != 0) {
+    // Contentions was no longer 0 so we lost the race since the
+    // ObjectMonitor is now busy. Restore owner to NULL if it is
+    // still DEFLATER_MARKER:
+    if (mid->try_set_owner_from(DEFLATER_MARKER, NULL) != DEFLATER_MARKER) {
+      // Deferred decrement for the JT EnterI() that cancelled the async deflation.
+      mid->add_to_contentions(-1);
+    }
+    return false;
+  }
+
+  // Sanity checks for the races:
+  guarantee(mid->owner_is_DEFLATER_MARKER(), "must be deflater marker");
+  guarantee(mid->contentions() < 0, "must be negative: contentions=%d",
+            mid->contentions());
+  guarantee(mid->_waiters == 0, "must be 0: waiters=%d", mid->_waiters);
+  guarantee(mid->_cxq == NULL, "must be no contending threads: cxq="
+            INTPTR_FORMAT, p2i(mid->_cxq));
+  guarantee(mid->_EntryList == NULL,
+            "must be no entering threads: EntryList=" INTPTR_FORMAT,
+            p2i(mid->_EntryList));
+
+  const oop obj = (oop) mid->object();
+  if (log_is_enabled(Trace, monitorinflation)) {
+    ResourceMark rm;
+    log_trace(monitorinflation)("deflate_monitor_using_JT: "
+                                "object=" INTPTR_FORMAT ", mark="
+                                INTPTR_FORMAT ", type='%s'",
+                                p2i(obj), obj->mark().value(),
+                                obj->klass()->external_name());
+  }
+
+  // Install the old mark word if nobody else has already done it.
+  mid->install_displaced_markword_in_object(obj);
+  mid->clear_common();
+
+  assert(mid->object() == NULL, "must be NULL: object=" INTPTR_FORMAT,
+         p2i(mid->object()));
+  assert(mid->is_free(), "must be free: allocation_state=%d",
+         (int)mid->allocation_state());
+
+  // Move the deflated ObjectMonitor to the working free list
+  // defined by free_head_p and free_tail_p.
+  if (*free_head_p == NULL) {
+    // First one on the list.
+    *free_head_p = mid;
+  }
+  if (*free_tail_p != NULL) {
+    // We append to the list so the caller can use mid->_next_om
+    // to fix the linkages in its context.
+    ObjectMonitor* prevtail = *free_tail_p;
+    // prevtail should have been cleaned up by the caller:
+#ifdef ASSERT
+    ObjectMonitor* l_next_om = unmarked_next(prevtail);
+#endif
+    assert(l_next_om == NULL, "must be NULL: _next_om=" INTPTR_FORMAT, p2i(l_next_om));
+    om_lock(prevtail);
+    prevtail->set_next_om(mid);  // prevtail now points to mid (and is unlocked)
+  }
+  *free_tail_p = mid;
+
+  // At this point, mid->_next_om still refers to its current
+  // value and another ObjectMonitor's _next_om field still
+  // refers to this ObjectMonitor. Those linkages have to be
+  // cleaned up by the caller who has the complete context.
+
+  // We leave owner == DEFLATER_MARKER and contentions < 0
+  // to force any racing threads to retry.
+  return true;  // Success, ObjectMonitor has been deflated.
+}
+
 // Walk a given monitor list, and deflate idle monitors.
 // The given list could be a per-thread list or a global list.
 //
 // In the case of parallel processing of thread local monitor lists,
 // work is done by Threads::parallel_threads_do() which ensures that

@@ -1878,20 +2282,170 @@
     }
   }
   return deflated_count;
 }
 
+// Walk a given ObjectMonitor list and deflate idle ObjectMonitors using
+// a JavaThread. Returns the number of deflated ObjectMonitors. The given
+// list could be a per-thread in-use list or the global in-use list.
+// If a safepoint has started, then we save state via saved_mid_in_use_p
+// and return to the caller to honor the safepoint.
+//
+int ObjectSynchronizer::deflate_monitor_list_using_JT(ObjectMonitor** list_p,
+                                                      int* count_p,
+                                                      ObjectMonitor** free_head_p,
+                                                      ObjectMonitor** free_tail_p,
+                                                      ObjectMonitor** saved_mid_in_use_p) {
+  assert(AsyncDeflateIdleMonitors, "sanity check");
+  JavaThread* self = JavaThread::current();
+
+  ObjectMonitor* cur_mid_in_use = NULL;
+  ObjectMonitor* mid = NULL;
+  ObjectMonitor* next = NULL;
+  ObjectMonitor* next_next = NULL;
+  int deflated_count = 0;
+  NoSafepointVerifier nsv;
+
+  // We use the more complicated lock-cur_mid_in_use-and-mid-as-we-go
+  // protocol because om_release() can do list deletions in parallel;
+  // this also prevents races with a list walker thread. We also
+  // lock-next-next-as-we-go to prevent an om_flush() that is behind
+  // this thread from passing us.
+  if (*saved_mid_in_use_p == NULL) {
+    // No saved state so start at the beginning.
+    // Lock the list head so we can possibly deflate it:
+    if ((mid = get_list_head_locked(list_p)) == NULL) {
+      return 0;  // The list is empty so nothing to deflate.
+    }
+    next = unmarked_next(mid);
+  } else {
+    // We're restarting after a safepoint so restore the necessary state
+    // before we resume.
+    cur_mid_in_use = *saved_mid_in_use_p;
+    // Lock cur_mid_in_use so we can possibly update its
+    // next field to extract a deflated ObjectMonitor.
+    om_lock(cur_mid_in_use);
+    mid = unmarked_next(cur_mid_in_use);
+    if (mid == NULL) {
+      om_unlock(cur_mid_in_use);
+      *saved_mid_in_use_p = NULL;
+      return 0;  // The remainder is empty so nothing more to deflate.
+    }
+    // Lock mid so we can possibly deflate it:
+    om_lock(mid);
+    next = unmarked_next(mid);
+  }
+
+  while (true) {
+    // The current mid is locked at this point. If we have a
+    // cur_mid_in_use, then it is also locked at this point.
+
+    if (next != NULL) {
+      // We lock next so that an om_flush() thread that is behind us
+      // cannot pass us when we unlock the current mid.
+      om_lock(next);
+      next_next = unmarked_next(next);
+    }
+
+    // Only try to deflate if there is an associated Java object and if
+    // mid is old (is not newly allocated and is not newly freed).
+    if (mid->object() != NULL && mid->is_old() &&
+        deflate_monitor_using_JT(mid, free_head_p, free_tail_p)) {
+      // Deflation succeeded and already updated free_head_p and
+      // free_tail_p as needed. Finish the move to the local free list
+      // by unlinking mid from the global or per-thread in-use list.
+      if (cur_mid_in_use == NULL) {
+        // mid is the list head and it is locked. Switch the list head
+        // to next which is also locked (if not NULL) and also leave
+        // mid locked:
+        Atomic::store(list_p, next);
+      } else {
+        ObjectMonitor* locked_next = mark_om_ptr(next);
+        // mid and cur_mid_in_use are locked. Switch cur_mid_in_use's
+        // next field to locked_next and also leave mid locked:
+        cur_mid_in_use->set_next_om(locked_next);
+      }
+      // At this point mid is disconnected from the in-use list so
+      // its lock longer has any effects on in-use list.
+      deflated_count++;
+      Atomic::dec(count_p);
+      // mid is current tail in the free_head_p list so NULL terminate it
+      // (which also unlocks it):
+      mid->set_next_om(NULL);
+
+      // All the list management is done so move on to the next one:
+      mid = next;  // mid keeps non-NULL next's locked state
+      next = next_next;
+    } else {
+      // mid is considered in-use if it does not have an associated
+      // Java object or mid is not old or deflation did not succeed.
+      // A mid->is_new() node can be seen here when it is freshly
+      // returned by om_alloc() (and skips the deflation code path).
+      // A mid->is_old() node can be seen here when deflation failed.
+      // A mid->is_free() node can be seen here when a fresh node from
+      // om_alloc() is released by om_release() due to losing the race
+      // in inflate().
+
+      // All the list management is done so move on to the next one:
+      if (cur_mid_in_use != NULL) {
+        om_unlock(cur_mid_in_use);
+      }
+      // The next cur_mid_in_use keeps mid's lock state so
+      // that it is stable for a possible next field change. It
+      // cannot be modified by om_release() while it is locked.
+      cur_mid_in_use = mid;
+      mid = next;  // mid keeps non-NULL next's locked state
+      next = next_next;
+
+      if (SafepointMechanism::should_block(self) &&
+          cur_mid_in_use != Atomic::load(list_p) && cur_mid_in_use->is_old()) {
+        // If a safepoint has started and cur_mid_in_use is not the list
+        // head and is old, then it is safe to use as saved state. Return
+        // to the caller before blocking.
+        *saved_mid_in_use_p = cur_mid_in_use;
+        om_unlock(cur_mid_in_use);
+        if (mid != NULL) {
+          om_unlock(mid);
+        }
+        return deflated_count;
+      }
+    }
+    if (mid == NULL) {
+      if (cur_mid_in_use != NULL) {
+        om_unlock(cur_mid_in_use);
+      }
+      break;  // Reached end of the list so nothing more to deflate.
+    }
+
+    // The current mid's next field is locked at this point. If we have
+    // a cur_mid_in_use, then it is also locked at this point.
+  }
+  // We finished the list without a safepoint starting so there's
+  // no need to save state.
+  *saved_mid_in_use_p = NULL;
+  return deflated_count;
+}
+
 void ObjectSynchronizer::prepare_deflate_idle_monitors(DeflateMonitorCounters* counters) {
   counters->n_in_use = 0;              // currently associated with objects
   counters->n_in_circulation = 0;      // extant
   counters->n_scavenged = 0;           // reclaimed (global and per-thread)
   counters->per_thread_scavenged = 0;  // per-thread scavenge total
   counters->per_thread_times = 0.0;    // per-thread scavenge times
 }
 
 void ObjectSynchronizer::deflate_idle_monitors(DeflateMonitorCounters* counters) {
   assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
+
+  if (AsyncDeflateIdleMonitors) {
+    // Nothing to do when global idle ObjectMonitors are deflated using
+    // a JavaThread unless a special deflation has been requested.
+    if (!is_special_deflation_requested()) {
+      return;
+    }
+  }
+
   bool deflated = false;
 
   ObjectMonitor* free_head_p = NULL;  // Local SLL of scavenged monitors
   ObjectMonitor* free_tail_p = NULL;
   elapsedTimer timer;

@@ -1940,39 +2494,257 @@
   if (ls != NULL) {
     ls->print_cr("deflating global idle monitors, %3.7f secs, %d monitors", timer.seconds(), deflated_count);
   }
 }
 
+class HandshakeForDeflation : public HandshakeClosure {
+ public:
+  HandshakeForDeflation() : HandshakeClosure("HandshakeForDeflation") {}
+
+  void do_thread(Thread* thread) {
+    log_trace(monitorinflation)("HandshakeForDeflation::do_thread: thread="
+                                INTPTR_FORMAT, p2i(thread));
+  }
+};
+
+void ObjectSynchronizer::deflate_idle_monitors_using_JT() {
+  assert(AsyncDeflateIdleMonitors, "sanity check");
+
+  // Deflate any global idle monitors.
+  deflate_global_idle_monitors_using_JT();
+
+  int count = 0;
+  for (JavaThreadIteratorWithHandle jtiwh; JavaThread *jt = jtiwh.next(); ) {
+    if (Atomic::load(&jt->om_in_use_count) > 0 && !jt->is_exiting()) {
+      // This JavaThread is using ObjectMonitors so deflate any that
+      // are idle unless this JavaThread is exiting; do not race with
+      // ObjectSynchronizer::om_flush().
+      deflate_per_thread_idle_monitors_using_JT(jt);
+      count++;
+    }
+  }
+  if (count > 0) {
+    log_debug(monitorinflation)("did async deflation of idle monitors for %d thread(s).", count);
+  }
+
+  log_info(monitorinflation)("async global_population=%d, global_in_use_count=%d, "
+                             "global_free_count=%d, global_wait_count=%d",
+                             Atomic::load(&om_list_globals._population),
+                             Atomic::load(&om_list_globals._in_use_count),
+                             Atomic::load(&om_list_globals._free_count),
+                             Atomic::load(&om_list_globals._wait_count));
+
+  // The ServiceThread's async deflation request has been processed.
+  set_is_async_deflation_requested(false);
+
+  if (Atomic::load(&om_list_globals._wait_count) > 0) {
+    // There are deflated ObjectMonitors waiting for a handshake
+    // (or a safepoint) for safety.
+
+    ObjectMonitor* list = Atomic::load(&om_list_globals._wait_list);
+    ADIM_guarantee(list != NULL, "om_list_globals._wait_list must not be NULL");
+    int count = Atomic::load(&om_list_globals._wait_count);
+    Atomic::store(&om_list_globals._wait_count, 0);
+    Atomic::store(&om_list_globals._wait_list, (ObjectMonitor*)NULL);
+
+    // Find the tail for prepend_list_to_common(). No need to mark
+    // ObjectMonitors for this list walk since only the deflater
+    // thread manages the wait list.
+    int l_count = 0;
+    ObjectMonitor* tail = NULL;
+    for (ObjectMonitor* n = list; n != NULL; n = unmarked_next(n)) {
+      tail = n;
+      l_count++;
+    }
+    ADIM_guarantee(count == l_count, "count=%d != l_count=%d", count, l_count);
+
+    // Will execute a safepoint if !ThreadLocalHandshakes:
+    HandshakeForDeflation hfd_hc;
+    Handshake::execute(&hfd_hc);
+
+    prepend_list_to_common(list, tail, count, &om_list_globals._free_list,
+                           &om_list_globals._free_count);
+
+    log_info(monitorinflation)("moved %d idle monitors from global waiting list to global free list", count);
+  }
+}
+
+// Deflate global idle ObjectMonitors using a JavaThread.
+//
+void ObjectSynchronizer::deflate_global_idle_monitors_using_JT() {
+  assert(AsyncDeflateIdleMonitors, "sanity check");
+  assert(Thread::current()->is_Java_thread(), "precondition");
+  JavaThread* self = JavaThread::current();
+
+  deflate_common_idle_monitors_using_JT(true /* is_global */, self);
+}
+
+// Deflate the specified JavaThread's idle ObjectMonitors using a JavaThread.
+//
+void ObjectSynchronizer::deflate_per_thread_idle_monitors_using_JT(JavaThread* target) {
+  assert(AsyncDeflateIdleMonitors, "sanity check");
+  assert(Thread::current()->is_Java_thread(), "precondition");
+
+  deflate_common_idle_monitors_using_JT(false /* !is_global */, target);
+}
+
+// Deflate global or per-thread idle ObjectMonitors using a JavaThread.
+//
+void ObjectSynchronizer::deflate_common_idle_monitors_using_JT(bool is_global, JavaThread* target) {
+  JavaThread* self = JavaThread::current();
+
+  int deflated_count = 0;
+  ObjectMonitor* free_head_p = NULL;  // Local SLL of scavenged ObjectMonitors
+  ObjectMonitor* free_tail_p = NULL;
+  ObjectMonitor* saved_mid_in_use_p = NULL;
+  elapsedTimer timer;
+
+  if (log_is_enabled(Info, monitorinflation)) {
+    timer.start();
+  }
+
+  if (is_global) {
+    OM_PERFDATA_OP(MonExtant, set_value(Atomic::load(&om_list_globals._in_use_count)));
+  } else {
+    OM_PERFDATA_OP(MonExtant, inc(Atomic::load(&target->om_in_use_count)));
+  }
+
+  do {
+    if (saved_mid_in_use_p != NULL) {
+      // We looped around because deflate_monitor_list_using_JT()
+      // detected a pending safepoint. Honoring the safepoint is good,
+      // but as long as is_special_deflation_requested() is supported,
+      // we can't safely restart using saved_mid_in_use_p. That saved
+      // ObjectMonitor could have been deflated by safepoint based
+      // deflation and would no longer be on the in-use list where we
+      // originally found it.
+      saved_mid_in_use_p = NULL;
+    }
+    int local_deflated_count;
+    if (is_global) {
+      local_deflated_count =
+          deflate_monitor_list_using_JT(&om_list_globals._in_use_list,
+                                        &om_list_globals._in_use_count,
+                                        &free_head_p, &free_tail_p,
+                                        &saved_mid_in_use_p);
+    } else {
+      local_deflated_count =
+          deflate_monitor_list_using_JT(&target->om_in_use_list,
+                                        &target->om_in_use_count, &free_head_p,
+                                        &free_tail_p, &saved_mid_in_use_p);
+    }
+    deflated_count += local_deflated_count;
+
+    if (free_head_p != NULL) {
+      // Move the deflated ObjectMonitors to the global free list.
+      guarantee(free_tail_p != NULL && local_deflated_count > 0, "free_tail_p=" INTPTR_FORMAT ", local_deflated_count=%d", p2i(free_tail_p), local_deflated_count);
+      // Note: The target thread can be doing an om_alloc() that
+      // is trying to prepend an ObjectMonitor on its in-use list
+      // at the same time that we have deflated the current in-use
+      // list head and put it on the local free list. prepend_to_common()
+      // will detect the race and retry which avoids list corruption,
+      // but the next field in free_tail_p can flicker to marked
+      // and then unmarked while prepend_to_common() is sorting it
+      // all out.
+#ifdef ASSERT
+      ObjectMonitor* l_next_om = unmarked_next(free_tail_p);
+#endif
+      assert(l_next_om == NULL, "must be NULL: _next_om=" INTPTR_FORMAT, p2i(l_next_om));
+
+      prepend_list_to_global_wait_list(free_head_p, free_tail_p, local_deflated_count);
+
+      OM_PERFDATA_OP(Deflations, inc(local_deflated_count));
+    }
+
+    if (saved_mid_in_use_p != NULL) {
+      // deflate_monitor_list_using_JT() detected a safepoint starting.
+      timer.stop();
+      {
+        if (is_global) {
+          log_debug(monitorinflation)("pausing deflation of global idle monitors for a safepoint.");
+        } else {
+          log_debug(monitorinflation)("jt=" INTPTR_FORMAT ": pausing deflation of per-thread idle monitors for a safepoint.", p2i(target));
+        }
+        assert(SafepointMechanism::should_block(self), "sanity check");
+        ThreadBlockInVM blocker(self);
+      }
+      // Prepare for another loop after the safepoint.
+      free_head_p = NULL;
+      free_tail_p = NULL;
+      if (log_is_enabled(Info, monitorinflation)) {
+        timer.start();
+      }
+    }
+  } while (saved_mid_in_use_p != NULL);
+  timer.stop();
+
+  LogStreamHandle(Debug, monitorinflation) lsh_debug;
+  LogStreamHandle(Info, monitorinflation) lsh_info;
+  LogStream* ls = NULL;
+  if (log_is_enabled(Debug, monitorinflation)) {
+    ls = &lsh_debug;
+  } else if (deflated_count != 0 && log_is_enabled(Info, monitorinflation)) {
+    ls = &lsh_info;
+  }
+  if (ls != NULL) {
+    if (is_global) {
+      ls->print_cr("async-deflating global idle monitors, %3.7f secs, %d monitors", timer.seconds(), deflated_count);
+    } else {
+      ls->print_cr("jt=" INTPTR_FORMAT ": async-deflating per-thread idle monitors, %3.7f secs, %d monitors", p2i(target), timer.seconds(), deflated_count);
+    }
+  }
+}
+
 void ObjectSynchronizer::finish_deflate_idle_monitors(DeflateMonitorCounters* counters) {
   // Report the cumulative time for deflating each thread's idle
   // monitors. Note: if the work is split among more than one
   // worker thread, then the reported time will likely be more
   // than a beginning to end measurement of the phase.
   log_info(safepoint, cleanup)("deflating per-thread idle monitors, %3.7f secs, monitors=%d", counters->per_thread_times, counters->per_thread_scavenged);
 
+  bool needs_special_deflation = is_special_deflation_requested();
+  if (AsyncDeflateIdleMonitors && !needs_special_deflation) {
+    // Nothing to do when idle ObjectMonitors are deflated using
+    // a JavaThread unless a special deflation has been requested.
+    return;
+  }
+
   if (log_is_enabled(Debug, monitorinflation)) {
     // exit_globals()'s call to audit_and_print_stats() is done
     // at the Info level and not at a safepoint.
+    // For async deflation, audit_and_print_stats() is called in
+    // ObjectSynchronizer::do_safepoint_work() at the Debug level
+    // at a safepoint.
     ObjectSynchronizer::audit_and_print_stats(false /* on_exit */);
   } else if (log_is_enabled(Info, monitorinflation)) {
     log_info(monitorinflation)("global_population=%d, global_in_use_count=%d, "
-                               "global_free_count=%d",
+                               "global_free_count=%d, global_wait_count=%d",
                                Atomic::load(&om_list_globals._population),
                                Atomic::load(&om_list_globals._in_use_count),
-                               Atomic::load(&om_list_globals._free_count));
+                               Atomic::load(&om_list_globals._free_count),
+                               Atomic::load(&om_list_globals._wait_count));
   }
 
   OM_PERFDATA_OP(Deflations, inc(counters->n_scavenged));
   OM_PERFDATA_OP(MonExtant, set_value(counters->n_in_circulation));
 
   GVars.stw_random = os::random();
   GVars.stw_cycle++;
+
+  if (needs_special_deflation) {
+    set_is_special_deflation_requested(false);  // special deflation is done
+  }
 }
 
 void ObjectSynchronizer::deflate_thread_local_monitors(Thread* thread, DeflateMonitorCounters* counters) {
   assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
 
+  if (AsyncDeflateIdleMonitors && !is_special_deflation_requested()) {
+    // Nothing to do if a special deflation has NOT been requested.
+    return;
+  }
+
   ObjectMonitor* free_head_p = NULL;  // Local SLL of scavenged monitors
   ObjectMonitor* free_tail_p = NULL;
   elapsedTimer timer;
 
   if (log_is_enabled(Info, safepoint, cleanup) ||

@@ -2142,10 +2914,13 @@
   chk_global_in_use_list_and_count(ls, &error_cnt);
 
   // Check om_list_globals._free_list and om_list_globals._free_count:
   chk_global_free_list_and_count(ls, &error_cnt);
 
+  // Check om_list_globals._wait_list and om_list_globals._wait_count:
+  chk_global_wait_list_and_count(ls, &error_cnt);
+
   ls->print_cr("Checking per-thread lists:");
 
   for (JavaThreadIteratorWithHandle jtiwh; JavaThread *jt = jtiwh.next(); ) {
     // Check om_in_use_list and om_in_use_count:
     chk_per_thread_in_use_list_and_count(jt, ls, &error_cnt);

@@ -2192,17 +2967,18 @@
     if (jt != NULL) {
       out->print_cr("ERROR: jt=" INTPTR_FORMAT ", monitor=" INTPTR_FORMAT
                     ": free per-thread monitor must have NULL _header "
                     "field: _header=" INTPTR_FORMAT, p2i(jt), p2i(n),
                     n->header().value());
-    } else {
+      *error_cnt_p = *error_cnt_p + 1;
+    } else if (!AsyncDeflateIdleMonitors) {
       out->print_cr("ERROR: monitor=" INTPTR_FORMAT ": free global monitor "
                     "must have NULL _header field: _header=" INTPTR_FORMAT,
                     p2i(n), n->header().value());
-    }
     *error_cnt_p = *error_cnt_p + 1;
   }
+  }
   if (n->object() != NULL) {
     if (jt != NULL) {
       out->print_cr("ERROR: jt=" INTPTR_FORMAT ", monitor=" INTPTR_FORMAT
                     ": free per-thread monitor must have NULL _object "
                     "field: _object=" INTPTR_FORMAT, p2i(jt), p2i(n),

@@ -2264,10 +3040,40 @@
     out->print_cr("WARNING: global_free_count=%d is not equal to "
                   "chk_om_free_count=%d", l_free_count, chk_om_free_count);
   }
 }
 
+// Check the global wait list and count; log the results of the checks.
+void ObjectSynchronizer::chk_global_wait_list_and_count(outputStream * out,
+                                                        int *error_cnt_p) {
+  int chk_om_wait_count = 0;
+  ObjectMonitor* cur = NULL;
+  if ((cur = get_list_head_locked(&om_list_globals._wait_list)) != NULL) {
+    // Marked the global wait list head so process the list.
+    while (true) {
+      // Rules for om_list_globals._wait_list are the same as for
+      // om_list_globals._free_list:
+      chk_free_entry(NULL /* jt */, cur, out, error_cnt_p);
+      chk_om_wait_count++;
+
+      cur = lock_next_for_traversal(cur);
+      if (cur == NULL) {
+        break;
+      }
+    }
+  }
+  if (Atomic::load(&om_list_globals._wait_count) == chk_om_wait_count) {
+    out->print_cr("global_wait_count=%d equals chk_om_wait_count=%d",
+                  Atomic::load(&om_list_globals._wait_count), chk_om_wait_count);
+  } else {
+    out->print_cr("ERROR: global_wait_count=%d is not equal to "
+                  "chk_om_wait_count=%d",
+                  Atomic::load(&om_list_globals._wait_count), chk_om_wait_count);
+    *error_cnt_p = *error_cnt_p + 1;
+  }
+}
+
 // Check the global in-use list and count; log the results of the checks.
 void ObjectSynchronizer::chk_global_in_use_list_and_count(outputStream * out,
                                                           int *error_cnt_p) {
   int chk_om_in_use_count = 0;
   ObjectMonitor* cur = NULL;

@@ -2487,18 +3293,20 @@
 
 // Log counts for the global and per-thread monitor lists and return
 // the population count.
 int ObjectSynchronizer::log_monitor_list_counts(outputStream * out) {
   int pop_count = 0;
-  out->print_cr("%18s  %10s  %10s  %10s",
-                "Global Lists:", "InUse", "Free", "Total");
-  out->print_cr("==================  ==========  ==========  ==========");
+  out->print_cr("%18s  %10s  %10s  %10s  %10s",
+                "Global Lists:", "InUse", "Free", "Wait", "Total");
+  out->print_cr("==================  ==========  ==========  ==========  ==========");
   int l_in_use_count = Atomic::load(&om_list_globals._in_use_count);
   int l_free_count = Atomic::load(&om_list_globals._free_count);
-  out->print_cr("%18s  %10d  %10d  %10d", "", l_in_use_count,
-                l_free_count, Atomic::load(&om_list_globals._population));
-  pop_count += l_in_use_count + l_free_count;
+  int l_wait_count = Atomic::load(&om_list_globals._wait_count);
+  out->print_cr("%18s  %10d  %10d  %10d  %10d", "", l_in_use_count,
+                l_free_count, l_wait_count,
+                Atomic::load(&om_list_globals._population));
+  pop_count += l_in_use_count + l_free_count + l_wait_count;
 
   out->print_cr("%18s  %10s  %10s  %10s",
                 "Per-Thread Lists:", "InUse", "Free", "Provision");
   out->print_cr("==================  ==========  ==========  ==========");
 
< prev index next >