--- old/src/share/vm/runtime/objectMonitor.cpp	2016-10-25 10:40:10.127778156 +0200
+++ new/src/share/vm/runtime/objectMonitor.cpp	2016-10-25 10:40:10.078778124 +0200
@@ -39,6 +39,7 @@
 #include "services/threadService.hpp"
 #include "trace/tracing.hpp"
 #include "trace/traceMacros.hpp"
+#include "evtrace/traceEvents.hpp"
 #include "utilities/dtrace.hpp"
 #include "utilities/macros.hpp"
 #include "utilities/preserveException.hpp"
@@ -315,7 +316,7 @@
   }
 }
 
-void ATTR ObjectMonitor::enter(TRAPS) {
+void ATTR ObjectMonitor::enter(int after_wait, TRAPS) {
   // The following code is ordered to check the most common cases first
   // and to reduce RTS->RTO cache line upgrades on SPARC and IA32 processors.
   Thread * const Self = THREAD ;
@@ -378,9 +379,14 @@
 
   EventJavaMonitorEnter event;
 
+  int trace_flags = 0;
   { // Change java thread status to indicate blocked on monitor enter.
     JavaThreadBlockedOnMonitorEnterState jtbmes(jt, this);
 
+    if (EnableEventTracing) {
+      TraceEvents::write_monitor_contended_enter(this, (TraceTypes::monitor_enter_wait) after_wait);
+    }
+
     DTRACE_MONITOR_PROBE(contended__enter, this, object(), jt);
     if (JvmtiExport::should_post_monitor_contended_enter()) {
       JvmtiExport::post_monitor_contended_enter(jt, this);
@@ -403,7 +409,7 @@
       // cleared by handle_special_suspend_equivalent_condition()
       // or java_suspend_self()
 
-      EnterI (THREAD) ;
+      trace_flags |= EnterI (THREAD) ;
 
       if (!ExitSuspendEquivalent(jt)) break ;
 
@@ -453,6 +459,10 @@
   // yet to acquire the lock.  While spinning that thread could
   // spinning we could increment JVMStat counters, etc.
 
+  if (EnableEventTracing) {
+    TraceEvents::write_monitor_contended_entered(this, (TraceTypes::monitor_entered_flags) trace_flags);
+  }
+
   DTRACE_MONITOR_PROBE(contended__entered, this, object(), jt);
   if (JvmtiExport::should_post_monitor_contended_entered()) {
     JvmtiExport::post_monitor_contended_entered(jt, this);
@@ -499,7 +509,9 @@
    }
 }
 
-void ATTR ObjectMonitor::EnterI (TRAPS) {
+int ATTR ObjectMonitor::EnterI (TRAPS) {
+    int trace_flags = 0;
+
     Thread * Self = THREAD ;
     assert (Self->is_Java_thread(), "invariant") ;
     assert (((JavaThread *) Self)->thread_state() == _thread_blocked   , "invariant") ;
@@ -509,7 +521,7 @@
         assert (_succ != Self              , "invariant") ;
         assert (_owner == Self             , "invariant") ;
         assert (_Responsible != Self       , "invariant") ;
-        return ;
+        return trace_flags;
     }
 
     DeferredInitialize () ;
@@ -525,7 +537,7 @@
         assert (_owner == Self        , "invariant") ;
         assert (_succ != Self         , "invariant") ;
         assert (_Responsible != Self  , "invariant") ;
-        return ;
+        return trace_flags;
     }
 
     // The Spin failed -- Enqueue and park the thread ...
@@ -563,10 +575,12 @@
             assert (_succ != Self         , "invariant") ;
             assert (_owner == Self        , "invariant") ;
             assert (_Responsible != Self  , "invariant") ;
-            return ;
+            return trace_flags;
         }
     }
 
+    trace_flags |= TraceTypes::entered_queued;
+
     // Check for cxq|EntryList edge transition to non-null.  This indicates
     // the onset of contention.  While contention persists exiting threads
     // will use a ST:MEMBAR:LD 1-1 exit protocol.  When contention abates exit
@@ -632,6 +646,8 @@
             Self->_ParkEvent->park() ;
         }
 
+        trace_flags |= TraceTypes::entered_parked;
+
         if (TryLock(Self) > 0) break ;
 
         // The lock is still contested.
@@ -737,7 +753,7 @@
     if (SyncFlags & 8) {
        OrderAccess::fence() ;
     }
-    return ;
+    return trace_flags;
 }
 
 // ReenterI() is a specialized inline form of the latter half of the
@@ -953,7 +969,7 @@
 // Both impinge on OS scalability.  Given that, at most one thread parked on
 // a monitor will use a timer.
 
-void ATTR ObjectMonitor::exit(bool not_suspended, TRAPS) {
+void ATTR ObjectMonitor::exit(intptr_t *exit_stack_id_for_wait, bool not_suspended, TRAPS) {
    Thread * Self = THREAD ;
    if (THREAD != _owner) {
      if (THREAD->is_lock_owned((address) _owner)) {
@@ -998,191 +1014,109 @@
    }
 #endif
 
-   for (;;) {
-      assert (THREAD == _owner, "invariant") ;
-
+   TraceEventMonitorContendedExited event(this);
+   if (exit_stack_id_for_wait != NULL) {
+     // This is a temporary exit for Object.wait().
+     // We don't want to use the current stack trace as the lock site, so if we
+     // end up writing the event, we allocate a stack id that we resolve later
+     // when the monitor is really exited. When there are multiple waits, we
+     // reuse the first preallocated stack id.
+     event.set_use_or_preallocate_stack_id_at((TraceTypes::stack_id *) exit_stack_id_for_wait);
+     event.set_resolve_stack(false);
+   } else {
+     // true exit
+     event.set_resolve_stack(true);
+     if (_trace_exit_stack != 0) {
+       event.set_use_stack_id(_trace_exit_stack);
+       event.enable(); // always write the exit event to resolve the stack
+     }
+   }
+   if ((intptr_t(_EntryList) | intptr_t(_cxq)) != 0) {
+      // there are queued threads -- we are definitely writing a trace event
+      event.enable();
+   }
 
-      if (Knob_ExitPolicy == 0) {
-         // release semantics: prior loads and stores from within the critical section
-         // must not float (reorder) past the following store that drops the lock.
-         // On SPARC that requires MEMBAR #loadstore|#storestore.
-         // But of course in TSO #loadstore|#storestore is not required.
-         // I'd like to write one of the following:
-         // A.  OrderAccess::release() ; _owner = NULL
-         // B.  OrderAccess::loadstore(); OrderAccess::storestore(); _owner = NULL;
-         // Unfortunately OrderAccess::release() and OrderAccess::loadstore() both
-         // store into a _dummy variable.  That store is not needed, but can result
-         // in massive wasteful coherency traffic on classic SMP systems.
-         // Instead, I use release_store(), which is implemented as just a simple
-         // ST on x64, x86 and SPARC.
-         OrderAccess::release_store_ptr (&_owner, NULL) ;   // drop the lock
-         OrderAccess::storeload() ;                         // See if we need to wake a successor
-         if ((intptr_t(_EntryList)|intptr_t(_cxq)) == 0 || _succ != NULL) {
-            TEVENT (Inflated exit - simple egress) ;
-            return ;
-         }
-         TEVENT (Inflated exit - complex egress) ;
+   _trace_exit_stack = 0;
 
-         // Normally the exiting thread is responsible for ensuring succession,
-         // but if other successors are ready or other entering threads are spinning
-         // then this thread can simply store NULL into _owner and exit without
-         // waking a successor.  The existence of spinners or ready successors
-         // guarantees proper succession (liveness).  Responsibility passes to the
-         // ready or running successors.  The exiting thread delegates the duty.
-         // More precisely, if a successor already exists this thread is absolved
-         // of the responsibility of waking (unparking) one.
-         //
-         // The _succ variable is critical to reducing futile wakeup frequency.
-         // _succ identifies the "heir presumptive" thread that has been made
-         // ready (unparked) but that has not yet run.  We need only one such
-         // successor thread to guarantee progress.
-         // See http://www.usenix.org/events/jvm01/full_papers/dice/dice.pdf
-         // section 3.3 "Futile Wakeup Throttling" for details.
-         //
-         // Note that spinners in Enter() also set _succ non-null.
-         // In the current implementation spinners opportunistically set
-         // _succ so that exiting threads might avoid waking a successor.
-         // Another less appealing alternative would be for the exiting thread
-         // to drop the lock and then spin briefly to see if a spinner managed
-         // to acquire the lock.  If so, the exiting thread could exit
-         // immediately without waking a successor, otherwise the exiting
-         // thread would need to dequeue and wake a successor.
-         // (Note that we'd need to make the post-drop spin short, but no
-         // shorter than the worst-case round-trip cache-line migration time.
-         // The dropped lock needs to become visible to the spinner, and then
-         // the acquisition of the lock by the spinner must become visible to
-         // the exiting thread).
-         //
+   for (;;) {
+      assert (THREAD == _owner, "invariant") ;
 
-         // It appears that an heir-presumptive (successor) must be made ready.
-         // Only the current lock owner can manipulate the EntryList or
-         // drain _cxq, so we need to reacquire the lock.  If we fail
-         // to reacquire the lock the responsibility for ensuring succession
-         // falls to the new owner.
-         //
-         if (Atomic::cmpxchg_ptr (THREAD, &_owner, NULL) != NULL) {
-            return ;
-         }
-         TEVENT (Exit - Reacquired) ;
-      } else {
-         if ((intptr_t(_EntryList)|intptr_t(_cxq)) == 0 || _succ != NULL) {
-            OrderAccess::release_store_ptr (&_owner, NULL) ;   // drop the lock
-            OrderAccess::storeload() ;
-            // Ratify the previously observed values.
-            if (_cxq == NULL || _succ != NULL) {
-                TEVENT (Inflated exit - simple egress) ;
-                return ;
-            }
+      //
+      // NOTE: we have removed all code paths for ExitPolicy != 0 and QMode != 0
+      //       knob values for simplicity of event tracing.
+      //
 
-            // inopportune interleaving -- the exiting thread (this thread)
-            // in the fast-exit path raced an entering thread in the slow-enter
-            // path.
-            // We have two choices:
-            // A.  Try to reacquire the lock.
-            //     If the CAS() fails return immediately, otherwise
-            //     we either restart/rerun the exit operation, or simply
-            //     fall-through into the code below which wakes a successor.
-            // B.  If the elements forming the EntryList|cxq are TSM
-            //     we could simply unpark() the lead thread and return
-            //     without having set _succ.
-            if (Atomic::cmpxchg_ptr (THREAD, &_owner, NULL) != NULL) {
-               TEVENT (Inflated exit - reacquired succeeded) ;
-               return ;
-            }
-            TEVENT (Inflated exit - reacquired failed) ;
-         } else {
-            TEVENT (Inflated exit - complex egress) ;
-         }
+      // release semantics: prior loads and stores from within the critical section
+      // must not float (reorder) past the following store that drops the lock.
+      // On SPARC that requires MEMBAR #loadstore|#storestore.
+      // But of course in TSO #loadstore|#storestore is not required.
+      // I'd like to write one of the following:
+      // A.  OrderAccess::release() ; _owner = NULL
+      // B.  OrderAccess::loadstore(); OrderAccess::storestore(); _owner = NULL;
+      // Unfortunately OrderAccess::release() and OrderAccess::loadstore() both
+      // store into a _dummy variable.  That store is not needed, but can result
+      // in massive wasteful coherency traffic on classic SMP systems.
+      // Instead, I use release_store(), which is implemented as just a simple
+      // ST on x64, x86 and SPARC.
+      OrderAccess::release_store_ptr (&_owner, NULL) ;   // drop the lock
+      OrderAccess::storeload() ;                         // See if we need to wake a successor
+      bool queues_empty = ((intptr_t(_EntryList) | intptr_t(_cxq)) == 0);
+      bool have_succ = (_succ != NULL);
+      if (!queues_empty) {
+    	  // some thread might have entered itself on _cxq in the meantime
+    	  event.enable();
       }
-
-      guarantee (_owner == THREAD, "invariant") ;
-
-      ObjectWaiter * w = NULL ;
-      int QMode = Knob_QMode ;
-
-      if (QMode == 2 && _cxq != NULL) {
-          // QMode == 2 : cxq has precedence over EntryList.
-          // Try to directly wake a successor from the cxq.
-          // If successful, the successor will need to unlink itself from cxq.
-          w = _cxq ;
-          assert (w != NULL, "invariant") ;
-          assert (w->TState == ObjectWaiter::TS_CXQ, "Invariant") ;
-          ExitEpilog (Self, w) ;
+      if (queues_empty || have_succ) {
+          TEVENT (Inflated exit - simple egress) ;
           return ;
       }
+      TEVENT (Inflated exit - complex egress) ;
 
-      if (QMode == 3 && _cxq != NULL) {
-          // Aggressively drain cxq into EntryList at the first opportunity.
-          // This policy ensure that recently-run threads live at the head of EntryList.
-          // Drain _cxq into EntryList - bulk transfer.
-          // First, detach _cxq.
-          // The following loop is tantamount to: w = swap (&cxq, NULL)
-          w = _cxq ;
-          for (;;) {
-             assert (w != NULL, "Invariant") ;
-             ObjectWaiter * u = (ObjectWaiter *) Atomic::cmpxchg_ptr (NULL, &_cxq, w) ;
-             if (u == w) break ;
-             w = u ;
-          }
-          assert (w != NULL              , "invariant") ;
-
-          ObjectWaiter * q = NULL ;
-          ObjectWaiter * p ;
-          for (p = w ; p != NULL ; p = p->_next) {
-              guarantee (p->TState == ObjectWaiter::TS_CXQ, "Invariant") ;
-              p->TState = ObjectWaiter::TS_ENTER ;
-              p->_prev = q ;
-              q = p ;
-          }
-
-          // Append the RATs to the EntryList
-          // TODO: organize EntryList as a CDLL so we can locate the tail in constant-time.
-          ObjectWaiter * Tail ;
-          for (Tail = _EntryList ; Tail != NULL && Tail->_next != NULL ; Tail = Tail->_next) ;
-          if (Tail == NULL) {
-              _EntryList = w ;
-          } else {
-              Tail->_next = w ;
-              w->_prev = Tail ;
-          }
+      // Normally the exiting thread is responsible for ensuring succession,
+      // but if other successors are ready or other entering threads are spinning
+      // then this thread can simply store NULL into _owner and exit without
+      // waking a successor.  The existence of spinners or ready successors
+      // guarantees proper succession (liveness).  Responsibility passes to the
+      // ready or running successors.  The exiting thread delegates the duty.
+      // More precisely, if a successor already exists this thread is absolved
+      // of the responsibility of waking (unparking) one.
+      //
+      // The _succ variable is critical to reducing futile wakeup frequency.
+      // _succ identifies the "heir presumptive" thread that has been made
+      // ready (unparked) but that has not yet run.  We need only one such
+      // successor thread to guarantee progress.
+      // See http://www.usenix.org/events/jvm01/full_papers/dice/dice.pdf
+      // section 3.3 "Futile Wakeup Throttling" for details.
+      //
+      // Note that spinners in Enter() also set _succ non-null.
+      // In the current implementation spinners opportunistically set
+      // _succ so that exiting threads might avoid waking a successor.
+      // Another less appealing alternative would be for the exiting thread
+      // to drop the lock and then spin briefly to see if a spinner managed
+      // to acquire the lock.  If so, the exiting thread could exit
+      // immediately without waking a successor, otherwise the exiting
+      // thread would need to dequeue and wake a successor.
+      // (Note that we'd need to make the post-drop spin short, but no
+      // shorter than the worst-case round-trip cache-line migration time.
+      // The dropped lock needs to become visible to the spinner, and then
+      // the acquisition of the lock by the spinner must become visible to
+      // the exiting thread).
+      //
 
-          // Fall thru into code that tries to wake a successor from EntryList
+      // It appears that an heir-presumptive (successor) must be made ready.
+      // Only the current lock owner can manipulate the EntryList or
+      // drain _cxq, so we need to reacquire the lock.  If we fail
+      // to reacquire the lock the responsibility for ensuring succession
+      // falls to the new owner.
+      //
+      if (Atomic::cmpxchg_ptr (THREAD, &_owner, NULL) != NULL) {
+          return ;
       }
+      TEVENT (Exit - Reacquired) ;
 
-      if (QMode == 4 && _cxq != NULL) {
-          // Aggressively drain cxq into EntryList at the first opportunity.
-          // This policy ensure that recently-run threads live at the head of EntryList.
-
-          // Drain _cxq into EntryList - bulk transfer.
-          // First, detach _cxq.
-          // The following loop is tantamount to: w = swap (&cxq, NULL)
-          w = _cxq ;
-          for (;;) {
-             assert (w != NULL, "Invariant") ;
-             ObjectWaiter * u = (ObjectWaiter *) Atomic::cmpxchg_ptr (NULL, &_cxq, w) ;
-             if (u == w) break ;
-             w = u ;
-          }
-          assert (w != NULL              , "invariant") ;
-
-          ObjectWaiter * q = NULL ;
-          ObjectWaiter * p ;
-          for (p = w ; p != NULL ; p = p->_next) {
-              guarantee (p->TState == ObjectWaiter::TS_CXQ, "Invariant") ;
-              p->TState = ObjectWaiter::TS_ENTER ;
-              p->_prev = q ;
-              q = p ;
-          }
-
-          // Prepend the RATs to the EntryList
-          if (_EntryList != NULL) {
-              q->_next = _EntryList ;
-              _EntryList->_prev = q ;
-          }
-          _EntryList = w ;
+      guarantee (_owner == THREAD, "invariant") ;
 
-          // Fall thru into code that tries to wake a successor from EntryList
-      }
+      ObjectWaiter * w = NULL ;
 
       w = _EntryList  ;
       if (w != NULL) {
@@ -1230,34 +1164,14 @@
       // TODO-FIXME: consider changing EntryList from a DLL to a CDLL so
       // we have faster access to the tail.
 
-      if (QMode == 1) {
-         // QMode == 1 : drain cxq to EntryList, reversing order
-         // We also reverse the order of the list.
-         ObjectWaiter * s = NULL ;
-         ObjectWaiter * t = w ;
-         ObjectWaiter * u = NULL ;
-         while (t != NULL) {
-             guarantee (t->TState == ObjectWaiter::TS_CXQ, "invariant") ;
-             t->TState = ObjectWaiter::TS_ENTER ;
-             u = t->_next ;
-             t->_prev = u ;
-             t->_next = s ;
-             s = t;
-             t = u ;
-         }
-         _EntryList  = s ;
-         assert (s != NULL, "invariant") ;
-      } else {
-         // QMode == 0 or QMode == 2
-         _EntryList = w ;
-         ObjectWaiter * q = NULL ;
-         ObjectWaiter * p ;
-         for (p = w ; p != NULL ; p = p->_next) {
-             guarantee (p->TState == ObjectWaiter::TS_CXQ, "Invariant") ;
-             p->TState = ObjectWaiter::TS_ENTER ;
-             p->_prev = q ;
-             q = p ;
-         }
+      _EntryList = w ;
+      ObjectWaiter * q = NULL ;
+      ObjectWaiter * p ;
+      for (p = w ; p != NULL ; p = p->_next) {
+          guarantee (p->TState == ObjectWaiter::TS_CXQ, "Invariant") ;
+          p->TState = ObjectWaiter::TS_ENTER ;
+          p->_prev = q ;
+          q = p ;
       }
 
       // In 1-0 mode we need: ST EntryList; MEMBAR #storestore; ST _owner = NULL
@@ -1368,7 +1282,7 @@
 // The _owner field is not always the Thread addr even with an
 // inflated monitor, e.g. the monitor can be inflated by a non-owning
 // thread due to contention.
-intptr_t ObjectMonitor::complete_exit(TRAPS) {
+void ObjectMonitor::complete_exit(intptr_t *saved_recursions, intptr_t *saved_trace_exit_stack, TRAPS) {
    Thread * const Self = THREAD;
    assert(Self->is_Java_thread(), "Must be Java thread!");
    JavaThread *jt = (JavaThread *)THREAD;
@@ -1385,16 +1299,17 @@
    }
 
    guarantee(Self == _owner, "complete_exit not owner");
-   intptr_t save = _recursions; // record the old recursion count
-   _recursions = 0;        // set the recursion level to be 0
-   exit (true, Self) ;           // exit the monitor
+   // record old recursion level and exit stack
+   if (saved_recursions != NULL) *saved_recursions = _recursions;
+   if (saved_recursions != NULL) *saved_trace_exit_stack = _trace_exit_stack;
+   _recursions = 0;
+   exit(saved_trace_exit_stack, true, Self);
    guarantee (_owner != Self, "invariant");
-   return save;
 }
 
 // reenter() enters a lock and sets recursion count
 // complete_exit/reenter operate as a wait without waiting
-void ObjectMonitor::reenter(intptr_t recursions, TRAPS) {
+void ObjectMonitor::reenter(intptr_t saved_recursions, intptr_t saved_trace_exit_stack, TRAPS) {
    Thread * const Self = THREAD;
    assert(Self->is_Java_thread(), "Must be Java thread!");
    JavaThread *jt = (JavaThread *)THREAD;
@@ -1402,8 +1317,8 @@
    guarantee(_owner != Self, "reenter already owner");
    enter (THREAD);       // enter the monitor
    guarantee (_recursions == 0, "reenter recursion");
-   _recursions = recursions;
-   return;
+   _recursions = saved_recursions;
+   _trace_exit_stack = saved_trace_exit_stack;
 }
 
 
@@ -1524,10 +1439,11 @@
    if ((SyncFlags & 4) == 0) {
       _Responsible = NULL ;
    }
-   intptr_t save = _recursions; // record the old recursion count
+   intptr_t saved_recursions = _recursions; // record the old recursion count
+   intptr_t saved_trace_exit_stack = _trace_exit_stack;
    _waiters++;                  // increment the number of waiters
    _recursions = 0;             // set the recursion level to be 1
-   exit (true, Self) ;                    // exit the monitor
+   exit(&saved_trace_exit_stack, true, Self); // exit, knows how to handle exit stack
    guarantee (_owner != Self, "invariant") ;
 
    // The thread is on the WaitSet list - now park() it.
@@ -1644,7 +1560,13 @@
      assert (_owner != Self, "invariant") ;
      ObjectWaiter::TStates v = node.TState ;
      if (v == ObjectWaiter::TS_RUN) {
-         enter (Self) ;
+         int after_wait = TraceTypes::enter_after_wait_other;
+         if (node._notified) {
+           after_wait = TraceTypes::enter_after_wait_notify;
+         } else if (ret == OS_TIMEOUT) {
+           after_wait = TraceTypes::enter_after_wait_timeout;
+         }
+         enter (after_wait, Self) ;
      } else {
          guarantee (v == ObjectWaiter::TS_ENTER || v == ObjectWaiter::TS_CXQ, "invariant") ;
          ReenterI (Self, &node) ;
@@ -1663,7 +1585,9 @@
    jt->set_current_waiting_monitor(NULL);
 
    guarantee (_recursions == 0, "invariant") ;
-   _recursions = save;     // restore the old recursion count
+   // restore the saved recursion count and exit stack
+   _recursions = saved_recursions;
+   _trace_exit_stack = saved_trace_exit_stack;
    _waiters--;             // decrement the number of waiters
 
    // Verify a few postconditions
@@ -2529,6 +2453,9 @@
   SETKNOB(FastHSSEC) ;
   #undef SETKNOB
 
+  guarantee(Knob_ExitPolicy == 0, "Sorry, event tracing does not support non-default ExitPolicy");
+  guarantee(Knob_QMode == 0,      "Sorry, event tracing does not support non-default QMode");
+
   if (os::is_MP()) {
      BackOffMask = (1 << Knob_SpinBackOff) - 1 ;
      if (Knob_ReportSettings) ::printf ("BackOffMask=%X\n", BackOffMask) ;