--- old/src/share/vm/runtime/objectMonitor.cpp 2016-10-25 10:40:10.127778156 +0200 +++ new/src/share/vm/runtime/objectMonitor.cpp 2016-10-25 10:40:10.078778124 +0200 @@ -39,6 +39,7 @@ #include "services/threadService.hpp" #include "trace/tracing.hpp" #include "trace/traceMacros.hpp" +#include "evtrace/traceEvents.hpp" #include "utilities/dtrace.hpp" #include "utilities/macros.hpp" #include "utilities/preserveException.hpp" @@ -315,7 +316,7 @@ } } -void ATTR ObjectMonitor::enter(TRAPS) { +void ATTR ObjectMonitor::enter(int after_wait, TRAPS) { // The following code is ordered to check the most common cases first // and to reduce RTS->RTO cache line upgrades on SPARC and IA32 processors. Thread * const Self = THREAD ; @@ -378,9 +379,14 @@ EventJavaMonitorEnter event; + int trace_flags = 0; { // Change java thread status to indicate blocked on monitor enter. JavaThreadBlockedOnMonitorEnterState jtbmes(jt, this); + if (EnableEventTracing) { + TraceEvents::write_monitor_contended_enter(this, (TraceTypes::monitor_enter_wait) after_wait); + } + DTRACE_MONITOR_PROBE(contended__enter, this, object(), jt); if (JvmtiExport::should_post_monitor_contended_enter()) { JvmtiExport::post_monitor_contended_enter(jt, this); @@ -403,7 +409,7 @@ // cleared by handle_special_suspend_equivalent_condition() // or java_suspend_self() - EnterI (THREAD) ; + trace_flags |= EnterI (THREAD) ; if (!ExitSuspendEquivalent(jt)) break ; @@ -453,6 +459,10 @@ // yet to acquire the lock. While spinning that thread could // spinning we could increment JVMStat counters, etc. + if (EnableEventTracing) { + TraceEvents::write_monitor_contended_entered(this, (TraceTypes::monitor_entered_flags) trace_flags); + } + DTRACE_MONITOR_PROBE(contended__entered, this, object(), jt); if (JvmtiExport::should_post_monitor_contended_entered()) { JvmtiExport::post_monitor_contended_entered(jt, this); @@ -499,7 +509,9 @@ } } -void ATTR ObjectMonitor::EnterI (TRAPS) { +int ATTR ObjectMonitor::EnterI (TRAPS) { + int trace_flags = 0; + Thread * Self = THREAD ; assert (Self->is_Java_thread(), "invariant") ; assert (((JavaThread *) Self)->thread_state() == _thread_blocked , "invariant") ; @@ -509,7 +521,7 @@ assert (_succ != Self , "invariant") ; assert (_owner == Self , "invariant") ; assert (_Responsible != Self , "invariant") ; - return ; + return trace_flags; } DeferredInitialize () ; @@ -525,7 +537,7 @@ assert (_owner == Self , "invariant") ; assert (_succ != Self , "invariant") ; assert (_Responsible != Self , "invariant") ; - return ; + return trace_flags; } // The Spin failed -- Enqueue and park the thread ... @@ -563,10 +575,12 @@ assert (_succ != Self , "invariant") ; assert (_owner == Self , "invariant") ; assert (_Responsible != Self , "invariant") ; - return ; + return trace_flags; } } + trace_flags |= TraceTypes::entered_queued; + // Check for cxq|EntryList edge transition to non-null. This indicates // the onset of contention. While contention persists exiting threads // will use a ST:MEMBAR:LD 1-1 exit protocol. When contention abates exit @@ -632,6 +646,8 @@ Self->_ParkEvent->park() ; } + trace_flags |= TraceTypes::entered_parked; + if (TryLock(Self) > 0) break ; // The lock is still contested. @@ -737,7 +753,7 @@ if (SyncFlags & 8) { OrderAccess::fence() ; } - return ; + return trace_flags; } // ReenterI() is a specialized inline form of the latter half of the @@ -953,7 +969,7 @@ // Both impinge on OS scalability. Given that, at most one thread parked on // a monitor will use a timer. -void ATTR ObjectMonitor::exit(bool not_suspended, TRAPS) { +void ATTR ObjectMonitor::exit(intptr_t *exit_stack_id_for_wait, bool not_suspended, TRAPS) { Thread * Self = THREAD ; if (THREAD != _owner) { if (THREAD->is_lock_owned((address) _owner)) { @@ -998,191 +1014,109 @@ } #endif - for (;;) { - assert (THREAD == _owner, "invariant") ; - + TraceEventMonitorContendedExited event(this); + if (exit_stack_id_for_wait != NULL) { + // This is a temporary exit for Object.wait(). + // We don't want to use the current stack trace as the lock site, so if we + // end up writing the event, we allocate a stack id that we resolve later + // when the monitor is really exited. When there are multiple waits, we + // reuse the first preallocated stack id. + event.set_use_or_preallocate_stack_id_at((TraceTypes::stack_id *) exit_stack_id_for_wait); + event.set_resolve_stack(false); + } else { + // true exit + event.set_resolve_stack(true); + if (_trace_exit_stack != 0) { + event.set_use_stack_id(_trace_exit_stack); + event.enable(); // always write the exit event to resolve the stack + } + } + if ((intptr_t(_EntryList) | intptr_t(_cxq)) != 0) { + // there are queued threads -- we are definitely writing a trace event + event.enable(); + } - if (Knob_ExitPolicy == 0) { - // release semantics: prior loads and stores from within the critical section - // must not float (reorder) past the following store that drops the lock. - // On SPARC that requires MEMBAR #loadstore|#storestore. - // But of course in TSO #loadstore|#storestore is not required. - // I'd like to write one of the following: - // A. OrderAccess::release() ; _owner = NULL - // B. OrderAccess::loadstore(); OrderAccess::storestore(); _owner = NULL; - // Unfortunately OrderAccess::release() and OrderAccess::loadstore() both - // store into a _dummy variable. That store is not needed, but can result - // in massive wasteful coherency traffic on classic SMP systems. - // Instead, I use release_store(), which is implemented as just a simple - // ST on x64, x86 and SPARC. - OrderAccess::release_store_ptr (&_owner, NULL) ; // drop the lock - OrderAccess::storeload() ; // See if we need to wake a successor - if ((intptr_t(_EntryList)|intptr_t(_cxq)) == 0 || _succ != NULL) { - TEVENT (Inflated exit - simple egress) ; - return ; - } - TEVENT (Inflated exit - complex egress) ; + _trace_exit_stack = 0; - // Normally the exiting thread is responsible for ensuring succession, - // but if other successors are ready or other entering threads are spinning - // then this thread can simply store NULL into _owner and exit without - // waking a successor. The existence of spinners or ready successors - // guarantees proper succession (liveness). Responsibility passes to the - // ready or running successors. The exiting thread delegates the duty. - // More precisely, if a successor already exists this thread is absolved - // of the responsibility of waking (unparking) one. - // - // The _succ variable is critical to reducing futile wakeup frequency. - // _succ identifies the "heir presumptive" thread that has been made - // ready (unparked) but that has not yet run. We need only one such - // successor thread to guarantee progress. - // See http://www.usenix.org/events/jvm01/full_papers/dice/dice.pdf - // section 3.3 "Futile Wakeup Throttling" for details. - // - // Note that spinners in Enter() also set _succ non-null. - // In the current implementation spinners opportunistically set - // _succ so that exiting threads might avoid waking a successor. - // Another less appealing alternative would be for the exiting thread - // to drop the lock and then spin briefly to see if a spinner managed - // to acquire the lock. If so, the exiting thread could exit - // immediately without waking a successor, otherwise the exiting - // thread would need to dequeue and wake a successor. - // (Note that we'd need to make the post-drop spin short, but no - // shorter than the worst-case round-trip cache-line migration time. - // The dropped lock needs to become visible to the spinner, and then - // the acquisition of the lock by the spinner must become visible to - // the exiting thread). - // + for (;;) { + assert (THREAD == _owner, "invariant") ; - // It appears that an heir-presumptive (successor) must be made ready. - // Only the current lock owner can manipulate the EntryList or - // drain _cxq, so we need to reacquire the lock. If we fail - // to reacquire the lock the responsibility for ensuring succession - // falls to the new owner. - // - if (Atomic::cmpxchg_ptr (THREAD, &_owner, NULL) != NULL) { - return ; - } - TEVENT (Exit - Reacquired) ; - } else { - if ((intptr_t(_EntryList)|intptr_t(_cxq)) == 0 || _succ != NULL) { - OrderAccess::release_store_ptr (&_owner, NULL) ; // drop the lock - OrderAccess::storeload() ; - // Ratify the previously observed values. - if (_cxq == NULL || _succ != NULL) { - TEVENT (Inflated exit - simple egress) ; - return ; - } + // + // NOTE: we have removed all code paths for ExitPolicy != 0 and QMode != 0 + // knob values for simplicity of event tracing. + // - // inopportune interleaving -- the exiting thread (this thread) - // in the fast-exit path raced an entering thread in the slow-enter - // path. - // We have two choices: - // A. Try to reacquire the lock. - // If the CAS() fails return immediately, otherwise - // we either restart/rerun the exit operation, or simply - // fall-through into the code below which wakes a successor. - // B. If the elements forming the EntryList|cxq are TSM - // we could simply unpark() the lead thread and return - // without having set _succ. - if (Atomic::cmpxchg_ptr (THREAD, &_owner, NULL) != NULL) { - TEVENT (Inflated exit - reacquired succeeded) ; - return ; - } - TEVENT (Inflated exit - reacquired failed) ; - } else { - TEVENT (Inflated exit - complex egress) ; - } + // release semantics: prior loads and stores from within the critical section + // must not float (reorder) past the following store that drops the lock. + // On SPARC that requires MEMBAR #loadstore|#storestore. + // But of course in TSO #loadstore|#storestore is not required. + // I'd like to write one of the following: + // A. OrderAccess::release() ; _owner = NULL + // B. OrderAccess::loadstore(); OrderAccess::storestore(); _owner = NULL; + // Unfortunately OrderAccess::release() and OrderAccess::loadstore() both + // store into a _dummy variable. That store is not needed, but can result + // in massive wasteful coherency traffic on classic SMP systems. + // Instead, I use release_store(), which is implemented as just a simple + // ST on x64, x86 and SPARC. + OrderAccess::release_store_ptr (&_owner, NULL) ; // drop the lock + OrderAccess::storeload() ; // See if we need to wake a successor + bool queues_empty = ((intptr_t(_EntryList) | intptr_t(_cxq)) == 0); + bool have_succ = (_succ != NULL); + if (!queues_empty) { + // some thread might have entered itself on _cxq in the meantime + event.enable(); } - - guarantee (_owner == THREAD, "invariant") ; - - ObjectWaiter * w = NULL ; - int QMode = Knob_QMode ; - - if (QMode == 2 && _cxq != NULL) { - // QMode == 2 : cxq has precedence over EntryList. - // Try to directly wake a successor from the cxq. - // If successful, the successor will need to unlink itself from cxq. - w = _cxq ; - assert (w != NULL, "invariant") ; - assert (w->TState == ObjectWaiter::TS_CXQ, "Invariant") ; - ExitEpilog (Self, w) ; + if (queues_empty || have_succ) { + TEVENT (Inflated exit - simple egress) ; return ; } + TEVENT (Inflated exit - complex egress) ; - if (QMode == 3 && _cxq != NULL) { - // Aggressively drain cxq into EntryList at the first opportunity. - // This policy ensure that recently-run threads live at the head of EntryList. - // Drain _cxq into EntryList - bulk transfer. - // First, detach _cxq. - // The following loop is tantamount to: w = swap (&cxq, NULL) - w = _cxq ; - for (;;) { - assert (w != NULL, "Invariant") ; - ObjectWaiter * u = (ObjectWaiter *) Atomic::cmpxchg_ptr (NULL, &_cxq, w) ; - if (u == w) break ; - w = u ; - } - assert (w != NULL , "invariant") ; - - ObjectWaiter * q = NULL ; - ObjectWaiter * p ; - for (p = w ; p != NULL ; p = p->_next) { - guarantee (p->TState == ObjectWaiter::TS_CXQ, "Invariant") ; - p->TState = ObjectWaiter::TS_ENTER ; - p->_prev = q ; - q = p ; - } - - // Append the RATs to the EntryList - // TODO: organize EntryList as a CDLL so we can locate the tail in constant-time. - ObjectWaiter * Tail ; - for (Tail = _EntryList ; Tail != NULL && Tail->_next != NULL ; Tail = Tail->_next) ; - if (Tail == NULL) { - _EntryList = w ; - } else { - Tail->_next = w ; - w->_prev = Tail ; - } + // Normally the exiting thread is responsible for ensuring succession, + // but if other successors are ready or other entering threads are spinning + // then this thread can simply store NULL into _owner and exit without + // waking a successor. The existence of spinners or ready successors + // guarantees proper succession (liveness). Responsibility passes to the + // ready or running successors. The exiting thread delegates the duty. + // More precisely, if a successor already exists this thread is absolved + // of the responsibility of waking (unparking) one. + // + // The _succ variable is critical to reducing futile wakeup frequency. + // _succ identifies the "heir presumptive" thread that has been made + // ready (unparked) but that has not yet run. We need only one such + // successor thread to guarantee progress. + // See http://www.usenix.org/events/jvm01/full_papers/dice/dice.pdf + // section 3.3 "Futile Wakeup Throttling" for details. + // + // Note that spinners in Enter() also set _succ non-null. + // In the current implementation spinners opportunistically set + // _succ so that exiting threads might avoid waking a successor. + // Another less appealing alternative would be for the exiting thread + // to drop the lock and then spin briefly to see if a spinner managed + // to acquire the lock. If so, the exiting thread could exit + // immediately without waking a successor, otherwise the exiting + // thread would need to dequeue and wake a successor. + // (Note that we'd need to make the post-drop spin short, but no + // shorter than the worst-case round-trip cache-line migration time. + // The dropped lock needs to become visible to the spinner, and then + // the acquisition of the lock by the spinner must become visible to + // the exiting thread). + // - // Fall thru into code that tries to wake a successor from EntryList + // It appears that an heir-presumptive (successor) must be made ready. + // Only the current lock owner can manipulate the EntryList or + // drain _cxq, so we need to reacquire the lock. If we fail + // to reacquire the lock the responsibility for ensuring succession + // falls to the new owner. + // + if (Atomic::cmpxchg_ptr (THREAD, &_owner, NULL) != NULL) { + return ; } + TEVENT (Exit - Reacquired) ; - if (QMode == 4 && _cxq != NULL) { - // Aggressively drain cxq into EntryList at the first opportunity. - // This policy ensure that recently-run threads live at the head of EntryList. - - // Drain _cxq into EntryList - bulk transfer. - // First, detach _cxq. - // The following loop is tantamount to: w = swap (&cxq, NULL) - w = _cxq ; - for (;;) { - assert (w != NULL, "Invariant") ; - ObjectWaiter * u = (ObjectWaiter *) Atomic::cmpxchg_ptr (NULL, &_cxq, w) ; - if (u == w) break ; - w = u ; - } - assert (w != NULL , "invariant") ; - - ObjectWaiter * q = NULL ; - ObjectWaiter * p ; - for (p = w ; p != NULL ; p = p->_next) { - guarantee (p->TState == ObjectWaiter::TS_CXQ, "Invariant") ; - p->TState = ObjectWaiter::TS_ENTER ; - p->_prev = q ; - q = p ; - } - - // Prepend the RATs to the EntryList - if (_EntryList != NULL) { - q->_next = _EntryList ; - _EntryList->_prev = q ; - } - _EntryList = w ; + guarantee (_owner == THREAD, "invariant") ; - // Fall thru into code that tries to wake a successor from EntryList - } + ObjectWaiter * w = NULL ; w = _EntryList ; if (w != NULL) { @@ -1230,34 +1164,14 @@ // TODO-FIXME: consider changing EntryList from a DLL to a CDLL so // we have faster access to the tail. - if (QMode == 1) { - // QMode == 1 : drain cxq to EntryList, reversing order - // We also reverse the order of the list. - ObjectWaiter * s = NULL ; - ObjectWaiter * t = w ; - ObjectWaiter * u = NULL ; - while (t != NULL) { - guarantee (t->TState == ObjectWaiter::TS_CXQ, "invariant") ; - t->TState = ObjectWaiter::TS_ENTER ; - u = t->_next ; - t->_prev = u ; - t->_next = s ; - s = t; - t = u ; - } - _EntryList = s ; - assert (s != NULL, "invariant") ; - } else { - // QMode == 0 or QMode == 2 - _EntryList = w ; - ObjectWaiter * q = NULL ; - ObjectWaiter * p ; - for (p = w ; p != NULL ; p = p->_next) { - guarantee (p->TState == ObjectWaiter::TS_CXQ, "Invariant") ; - p->TState = ObjectWaiter::TS_ENTER ; - p->_prev = q ; - q = p ; - } + _EntryList = w ; + ObjectWaiter * q = NULL ; + ObjectWaiter * p ; + for (p = w ; p != NULL ; p = p->_next) { + guarantee (p->TState == ObjectWaiter::TS_CXQ, "Invariant") ; + p->TState = ObjectWaiter::TS_ENTER ; + p->_prev = q ; + q = p ; } // In 1-0 mode we need: ST EntryList; MEMBAR #storestore; ST _owner = NULL @@ -1368,7 +1282,7 @@ // The _owner field is not always the Thread addr even with an // inflated monitor, e.g. the monitor can be inflated by a non-owning // thread due to contention. -intptr_t ObjectMonitor::complete_exit(TRAPS) { +void ObjectMonitor::complete_exit(intptr_t *saved_recursions, intptr_t *saved_trace_exit_stack, TRAPS) { Thread * const Self = THREAD; assert(Self->is_Java_thread(), "Must be Java thread!"); JavaThread *jt = (JavaThread *)THREAD; @@ -1385,16 +1299,17 @@ } guarantee(Self == _owner, "complete_exit not owner"); - intptr_t save = _recursions; // record the old recursion count - _recursions = 0; // set the recursion level to be 0 - exit (true, Self) ; // exit the monitor + // record old recursion level and exit stack + if (saved_recursions != NULL) *saved_recursions = _recursions; + if (saved_recursions != NULL) *saved_trace_exit_stack = _trace_exit_stack; + _recursions = 0; + exit(saved_trace_exit_stack, true, Self); guarantee (_owner != Self, "invariant"); - return save; } // reenter() enters a lock and sets recursion count // complete_exit/reenter operate as a wait without waiting -void ObjectMonitor::reenter(intptr_t recursions, TRAPS) { +void ObjectMonitor::reenter(intptr_t saved_recursions, intptr_t saved_trace_exit_stack, TRAPS) { Thread * const Self = THREAD; assert(Self->is_Java_thread(), "Must be Java thread!"); JavaThread *jt = (JavaThread *)THREAD; @@ -1402,8 +1317,8 @@ guarantee(_owner != Self, "reenter already owner"); enter (THREAD); // enter the monitor guarantee (_recursions == 0, "reenter recursion"); - _recursions = recursions; - return; + _recursions = saved_recursions; + _trace_exit_stack = saved_trace_exit_stack; } @@ -1524,10 +1439,11 @@ if ((SyncFlags & 4) == 0) { _Responsible = NULL ; } - intptr_t save = _recursions; // record the old recursion count + intptr_t saved_recursions = _recursions; // record the old recursion count + intptr_t saved_trace_exit_stack = _trace_exit_stack; _waiters++; // increment the number of waiters _recursions = 0; // set the recursion level to be 1 - exit (true, Self) ; // exit the monitor + exit(&saved_trace_exit_stack, true, Self); // exit, knows how to handle exit stack guarantee (_owner != Self, "invariant") ; // The thread is on the WaitSet list - now park() it. @@ -1644,7 +1560,13 @@ assert (_owner != Self, "invariant") ; ObjectWaiter::TStates v = node.TState ; if (v == ObjectWaiter::TS_RUN) { - enter (Self) ; + int after_wait = TraceTypes::enter_after_wait_other; + if (node._notified) { + after_wait = TraceTypes::enter_after_wait_notify; + } else if (ret == OS_TIMEOUT) { + after_wait = TraceTypes::enter_after_wait_timeout; + } + enter (after_wait, Self) ; } else { guarantee (v == ObjectWaiter::TS_ENTER || v == ObjectWaiter::TS_CXQ, "invariant") ; ReenterI (Self, &node) ; @@ -1663,7 +1585,9 @@ jt->set_current_waiting_monitor(NULL); guarantee (_recursions == 0, "invariant") ; - _recursions = save; // restore the old recursion count + // restore the saved recursion count and exit stack + _recursions = saved_recursions; + _trace_exit_stack = saved_trace_exit_stack; _waiters--; // decrement the number of waiters // Verify a few postconditions @@ -2529,6 +2453,9 @@ SETKNOB(FastHSSEC) ; #undef SETKNOB + guarantee(Knob_ExitPolicy == 0, "Sorry, event tracing does not support non-default ExitPolicy"); + guarantee(Knob_QMode == 0, "Sorry, event tracing does not support non-default QMode"); + if (os::is_MP()) { BackOffMask = (1 << Knob_SpinBackOff) - 1 ; if (Knob_ReportSettings) ::printf ("BackOffMask=%X\n", BackOffMask) ;