--- old/src/share/vm/classfile/vmSymbols.hpp Fri May 15 15:30:21 2015 +++ new/src/share/vm/classfile/vmSymbols.hpp Fri May 15 15:30:20 2015 @@ -639,6 +639,10 @@ do_name( getClass_name, "getClass") \ do_intrinsic(_clone, java_lang_Object, clone_name, void_object_signature, F_R) \ do_name( clone_name, "clone") \ + do_intrinsic(_notify, java_lang_Object, notify_name, void_method_signature, F_R) \ + do_name( notify_name, "notify") \ + do_intrinsic(_notifyAll, java_lang_Object, notifyAll_name, void_method_signature, F_R) \ + do_name( notifyAll_name, "notifyAll") \ \ /* Math & StrictMath intrinsics are defined in terms of just a few signatures: */ \ do_class(java_lang_Math, "java/lang/Math") \ --- old/src/share/vm/opto/library_call.cpp Fri May 15 15:30:23 2015 +++ new/src/share/vm/opto/library_call.cpp Fri May 15 15:30:22 2015 @@ -225,6 +225,7 @@ bool inline_pow(); Node* finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName); bool inline_min_max(vmIntrinsics::ID id); + bool inline_notify(vmIntrinsics::ID id); Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y); // This returns Type::AnyPtr, RawPtr, or OopPtr. int classify_unsafe_addr(Node* &base, Node* &offset); @@ -737,6 +738,13 @@ case vmIntrinsics::_min: case vmIntrinsics::_max: return inline_min_max(intrinsic_id()); + case vmIntrinsics::_notify: + case vmIntrinsics::_notifyAll: + if (InlineNotify) { + return inline_notify(intrinsic_id()); + } + return false; + case vmIntrinsics::_addExactI: return inline_math_addExactI(false /* add */); case vmIntrinsics::_addExactL: return inline_math_addExactL(false /* add */); case vmIntrinsics::_decrementExactI: return inline_math_subtractExactI(true /* decrement */); @@ -2017,6 +2025,21 @@ ); } +//----------------------------inline_notify-----------------------------------* +bool LibraryCallKit::inline_notify(vmIntrinsics::ID id) { + const TypeFunc* ftype = OptoRuntime::monitor_notify_Type(); + address func; + if (id == vmIntrinsics::_notify) { + func = OptoRuntime::monitor_notify_Java(); + } else { + func = OptoRuntime::monitor_notifyAll_Java(); + } + Node* call = make_runtime_call(RC_NO_LEAF, ftype, func, NULL, TypeRawPtr::BOTTOM, argument(0)); + make_slow_call_ex(call, env()->Throwable_klass(), false); + return true; +} + + //----------------------------inline_min_max----------------------------------- bool LibraryCallKit::inline_min_max(vmIntrinsics::ID id) { set_result(generate_min_max(id, argument(0), argument(1))); --- old/src/share/vm/opto/runtime.cpp Fri May 15 15:30:26 2015 +++ new/src/share/vm/opto/runtime.cpp Fri May 15 15:30:25 2015 @@ -96,6 +96,8 @@ address OptoRuntime::_g1_wb_post_Java = NULL; address OptoRuntime::_vtable_must_compile_Java = NULL; address OptoRuntime::_complete_monitor_locking_Java = NULL; +address OptoRuntime::_monitor_notify_Java = NULL; +address OptoRuntime::_monitor_notifyAll_Java = NULL; address OptoRuntime::_rethrow_Java = NULL; address OptoRuntime::_slow_arraycopy_Java = NULL; @@ -144,6 +146,8 @@ gen(env, _g1_wb_pre_Java , g1_wb_pre_Type , SharedRuntime::g1_wb_pre , 0 , false, false, false); gen(env, _g1_wb_post_Java , g1_wb_post_Type , SharedRuntime::g1_wb_post , 0 , false, false, false); gen(env, _complete_monitor_locking_Java , complete_monitor_enter_Type , SharedRuntime::complete_monitor_locking_C, 0, false, false, false); + gen(env, _monitor_notify_Java , monitor_notify_Type , monitor_notify_C , 0 , false, false, false); + gen(env, _monitor_notifyAll_Java , monitor_notify_Type , monitor_notifyAll_C , 0 , false, false, false); gen(env, _rethrow_Java , rethrow_Type , rethrow_C , 2 , true , false, true ); gen(env, _slow_arraycopy_Java , slow_arraycopy_Type , SharedRuntime::slow_arraycopy_C , 0 , false, false, false); @@ -426,7 +430,50 @@ thread->set_vm_result(obj); JRT_END +// Note that hashCode() deserves the same treatment as notify/notifyAll. +// The situation is slightly more complicated since hashCode() is virtual, +// requiring guards, whereas notify/notifyAll are final +JRT_BLOCK_ENTRY(void, OptoRuntime::monitor_notify_C(oopDesc* obj, JavaThread *thread)) + + // Very few notify/notifyAll operations find any threads on the waitset, so + // the dominant fast-path is to simply return. + // Relatedly, it's critical that notify/notifyAll be fast in order to + // reduce lock hold times. + if (!SafepointSynchronize::is_synchronizing()) { + if (ObjectSynchronizer::quick_notify(obj, thread, false)) { + return; + } + } + + // This is the case the fast-path above isn't provisioned to handle. + // The fast-path is designed to handle frequently arising cases in an efficient manner. + // (The fast-path is just a degenerate variant of the slow-path). + // Perform the dreaded state transition and pass control into the slow-path. + JRT_BLOCK; + Handle h_obj(THREAD, obj); + ObjectSynchronizer::notify(h_obj, CHECK); + JRT_BLOCK_END; +JRT_END + +JRT_BLOCK_ENTRY(void, OptoRuntime::monitor_notifyAll_C(oopDesc* obj, JavaThread *thread)) + + if (!SafepointSynchronize::is_synchronizing() ) { + if (ObjectSynchronizer::quick_notify(obj, thread, true)) { + return; + } + } + + // This is the case the fast-path above isn't provisioned to handle. + // The fast-path is designed to handle frequently arising cases in an efficient manner. + // (The fast-path is just a degenerate variant of the slow-path). + // Perform the dreaded state transition and pass control into the slow-path. + JRT_BLOCK; + Handle h_obj(THREAD, obj); + ObjectSynchronizer::notifyall(h_obj, CHECK); + JRT_BLOCK_END; +JRT_END + const TypeFunc *OptoRuntime::new_instance_Type() { // create input type (domain) const Type **fields = TypeTuple::fields(1); @@ -604,16 +651,28 @@ fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Object to be Locked fields[TypeFunc::Parms+1] = TypeRawPtr::BOTTOM; // Address of stack location for lock - BasicLock fields[TypeFunc::Parms+2] = TypeRawPtr::BOTTOM; // Thread pointer (Self) - const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+3,fields); + const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+3, fields); // create result type (range) fields = TypeTuple::fields(0); - const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields); + const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields); - return TypeFunc::make(domain,range); + return TypeFunc::make(domain, range); } +const TypeFunc *OptoRuntime::monitor_notify_Type() { + // create input type (domain) + const Type **fields = TypeTuple::fields(1); + fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Object to be Locked + const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields); + + // create result type (range) + fields = TypeTuple::fields(0); + const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields); + return TypeFunc::make(domain, range); +} + const TypeFunc* OptoRuntime::flush_windows_Type() { // create input type (domain) const Type** fields = TypeTuple::fields(1); --- old/src/share/vm/opto/runtime.hpp Fri May 15 15:30:28 2015 +++ new/src/share/vm/opto/runtime.hpp Fri May 15 15:30:28 2015 @@ -146,6 +146,8 @@ static address _vtable_must_compile_Java; static address _complete_monitor_locking_Java; static address _rethrow_Java; + static address _monitor_notify_Java; + static address _monitor_notifyAll_Java; static address _slow_arraycopy_Java; static address _register_finalizer_Java; @@ -186,6 +188,9 @@ static void complete_monitor_locking_C(oopDesc* obj, BasicLock* lock, JavaThread* thread); static void complete_monitor_unlocking_C(oopDesc* obj, BasicLock* lock, JavaThread* thread); + static void monitor_notify_C(oopDesc* obj, JavaThread* thread); + static void monitor_notifyAll_C(oopDesc* obj, JavaThread* thread); + private: // Implicit exception support @@ -244,7 +249,9 @@ static address g1_wb_pre_Java() { return _g1_wb_pre_Java; } static address g1_wb_post_Java() { return _g1_wb_post_Java; } static address vtable_must_compile_stub() { return _vtable_must_compile_Java; } - static address complete_monitor_locking_Java() { return _complete_monitor_locking_Java; } + static address complete_monitor_locking_Java() { return _complete_monitor_locking_Java; } + static address monitor_notify_Java() { return _monitor_notify_Java; } + static address monitor_notifyAll_Java() { return _monitor_notifyAll_Java; } static address slow_arraycopy_Java() { return _slow_arraycopy_Java; } static address register_finalizer_Java() { return _register_finalizer_Java; } @@ -285,6 +292,7 @@ static const TypeFunc* g1_wb_post_Type(); static const TypeFunc* complete_monitor_enter_Type(); static const TypeFunc* complete_monitor_exit_Type(); + static const TypeFunc* monitor_notify_Type(); static const TypeFunc* uncommon_trap_Type(); static const TypeFunc* athrow_Type(); static const TypeFunc* rethrow_Type(); --- old/src/share/vm/runtime/globals.hpp Fri May 15 15:30:30 2015 +++ new/src/share/vm/runtime/globals.hpp Fri May 15 15:30:30 2015 @@ -1178,6 +1178,8 @@ \ experimental(intx, SyncVerbose, 0, "(Unstable)") \ \ + product(bool, InlineNotify, true, "intrinsify subset of notify" ) \ + \ experimental(intx, ClearFPUAtPark, 0, "(Unsafe, Unstable)") \ \ experimental(intx, hashCode, 5, \ --- old/src/share/vm/runtime/objectMonitor.cpp Fri May 15 15:30:33 2015 +++ new/src/share/vm/runtime/objectMonitor.cpp Fri May 15 15:30:32 2015 @@ -1652,16 +1652,9 @@ // then instead of transferring a thread from the WaitSet to the EntryList // we might just dequeue a thread from the WaitSet and directly unpark() it. -void ObjectMonitor::notify(TRAPS) { - CHECK_OWNER(); - if (_WaitSet == NULL) { - TEVENT(Empty-Notify); - return; - } - DTRACE_MONITOR_PROBE(notify, this, object(), THREAD); +int ObjectMonitor::INotify(Thread * Self) { + const int policy = Knob_MoveNotifyee; - int Policy = Knob_MoveNotifyee; - Thread::SpinAcquire(&_WaitSetLock, "WaitSet - notify"); ObjectWaiter * iterator = DequeueWaiter(); if (iterator != NULL) { @@ -1668,32 +1661,35 @@ TEVENT(Notify1 - Transfer); guarantee(iterator->TState == ObjectWaiter::TS_WAIT, "invariant"); guarantee(iterator->_notified == 0, "invariant"); - if (Policy != 4) { + // Disposition - what might we do with iterator ? + // a. add it directly to the EntryList - either tail or head. + // b. push it onto the front of the _cxq. + // For now we use (a). + if (policy != 4) { iterator->TState = ObjectWaiter::TS_ENTER; } iterator->_notified = 1; - Thread * Self = THREAD; iterator->_notifier_tid = Self->osthread()->thread_id(); - ObjectWaiter * List = _EntryList; - if (List != NULL) { - assert(List->_prev == NULL, "invariant"); - assert(List->TState == ObjectWaiter::TS_ENTER, "invariant"); - assert(List != iterator, "invariant"); + ObjectWaiter * list = _EntryList; + if (list != NULL) { + assert(list->_prev == NULL, "invariant"); + assert(list->TState == ObjectWaiter::TS_ENTER, "invariant"); + assert(list != iterator, "invariant"); } - if (Policy == 0) { // prepend to EntryList - if (List == NULL) { + if (policy == 0) { // prepend to EntryList + if (list == NULL) { iterator->_next = iterator->_prev = NULL; _EntryList = iterator; } else { - List->_prev = iterator; - iterator->_next = List; + list->_prev = iterator; + iterator->_next = list; iterator->_prev = NULL; _EntryList = iterator; } - } else if (Policy == 1) { // append to EntryList - if (List == NULL) { + } else if (policy == 1) { // append to EntryList + if (list == NULL) { iterator->_next = iterator->_prev = NULL; _EntryList = iterator; } else { @@ -1700,42 +1696,41 @@ // CONSIDER: finding the tail currently requires a linear-time walk of // the EntryList. We can make tail access constant-time by converting to // a CDLL instead of using our current DLL. - ObjectWaiter * Tail; - for (Tail = List; Tail->_next != NULL; Tail = Tail->_next) /* empty */; - assert(Tail != NULL && Tail->_next == NULL, "invariant"); - Tail->_next = iterator; - iterator->_prev = Tail; + ObjectWaiter * tail; + for (tail = list; tail->_next != NULL; tail = tail->_next) /* empty */; + assert(tail != NULL && tail->_next == NULL, "invariant"); + tail->_next = iterator; + iterator->_prev = tail; iterator->_next = NULL; } - } else if (Policy == 2) { // prepend to cxq + } else if (policy == 2) { // prepend to cxq // prepend to cxq - if (List == NULL) { + if (list == NULL) { iterator->_next = iterator->_prev = NULL; _EntryList = iterator; } else { iterator->TState = ObjectWaiter::TS_CXQ; for (;;) { - ObjectWaiter * Front = _cxq; - iterator->_next = Front; - if (Atomic::cmpxchg_ptr (iterator, &_cxq, Front) == Front) { + ObjectWaiter * front = _cxq; + iterator->_next = front; + if (Atomic::cmpxchg_ptr(iterator, &_cxq, front) == front) { break; } } } - } else if (Policy == 3) { // append to cxq + } else if (policy == 3) { // append to cxq iterator->TState = ObjectWaiter::TS_CXQ; for (;;) { - ObjectWaiter * Tail; - Tail = _cxq; - if (Tail == NULL) { + ObjectWaiter * tail = _cxq; + if (tail == NULL) { iterator->_next = NULL; - if (Atomic::cmpxchg_ptr (iterator, &_cxq, NULL) == NULL) { + if (Atomic::cmpxchg_ptr(iterator, &_cxq, NULL) == NULL) { break; } } else { - while (Tail->_next != NULL) Tail = Tail->_next; - Tail->_next = iterator; - iterator->_prev = Tail; + while (tail->_next != NULL) tail = tail->_next; + tail->_next = iterator; + iterator->_prev = tail; iterator->_next = NULL; break; } @@ -1747,10 +1742,6 @@ ev->unpark(); } - if (Policy < 4) { - iterator->wait_reenter_begin(this); - } - // _WaitSetLock protects the wait queue, not the EntryList. We could // move the add-to-EntryList operation, above, outside the critical section // protected by _WaitSetLock. In practice that's not useful. With the @@ -1758,133 +1749,60 @@ // is the only thread that grabs _WaitSetLock. There's almost no contention // on _WaitSetLock so it's not profitable to reduce the length of the // critical section. - } + if (policy < 4) { + iterator->wait_reenter_begin(this); + } + } Thread::SpinRelease(&_WaitSetLock); - if (iterator != NULL && ObjectMonitor::_sync_Notifications != NULL) { - ObjectMonitor::_sync_Notifications->inc(); + return 0; +} + +// Consider: a non-uncommon synchronization bug is to use notify() when notifyAll() +// is more appropriate, potentially resulting in lost wakeups and stranded threads. +// A useful diagnostic option is to force all notify() operations to behave +// as notifyAll(). We can also detect many such problems with MinimumWait. +// When MinimumWait is set to a small non-zero timeout value and the program +// does not hang whereas it did absent MininumWait, that suggests a lost wakeup bug. + +void ObjectMonitor::notify(TRAPS) { + CHECK_OWNER(); + if (_WaitSet == NULL) { + TEVENT(Empty-Notify); + return; } + DTRACE_MONITOR_PROBE(notify, this, object(), THREAD); + INotify(THREAD); + if (ObjectMonitor::_sync_Notifications != NULL) { + ObjectMonitor::_sync_Notifications->inc(1); + } } +// The current implementation of notifyAll() transfers the waiters one-at-a-time +// from the waitset to the EntryList. This could be done more efficiently with a +// single bulk transfer but in practice it's not time-critical. Beware too, +// that in prepend-mode we invert the order of the waiters. Let's say that the +// waitset is "ABCD" and the EntryList is "XYZ". After a notifyAll() in prepend +// mode the waitset will be empty and the EntryList will be "DCBAXYZ". + void ObjectMonitor::notifyAll(TRAPS) { CHECK_OWNER(); - ObjectWaiter* iterator; if (_WaitSet == NULL) { TEVENT(Empty-NotifyAll); return; } - DTRACE_MONITOR_PROBE(notifyAll, this, object(), THREAD); - int Policy = Knob_MoveNotifyee; - int Tally = 0; - Thread::SpinAcquire(&_WaitSetLock, "WaitSet - notifyall"); - - for (;;) { - iterator = DequeueWaiter(); - if (iterator == NULL) break; - TEVENT(NotifyAll - Transfer1); - ++Tally; - - // Disposition - what might we do with iterator ? - // a. add it directly to the EntryList - either tail or head. - // b. push it onto the front of the _cxq. - // For now we use (a). - - guarantee(iterator->TState == ObjectWaiter::TS_WAIT, "invariant"); - guarantee(iterator->_notified == 0, "invariant"); - iterator->_notified = 1; - Thread * Self = THREAD; - iterator->_notifier_tid = Self->osthread()->thread_id(); - if (Policy != 4) { - iterator->TState = ObjectWaiter::TS_ENTER; - } - - ObjectWaiter * List = _EntryList; - if (List != NULL) { - assert(List->_prev == NULL, "invariant"); - assert(List->TState == ObjectWaiter::TS_ENTER, "invariant"); - assert(List != iterator, "invariant"); - } - - if (Policy == 0) { // prepend to EntryList - if (List == NULL) { - iterator->_next = iterator->_prev = NULL; - _EntryList = iterator; - } else { - List->_prev = iterator; - iterator->_next = List; - iterator->_prev = NULL; - _EntryList = iterator; - } - } else if (Policy == 1) { // append to EntryList - if (List == NULL) { - iterator->_next = iterator->_prev = NULL; - _EntryList = iterator; - } else { - // CONSIDER: finding the tail currently requires a linear-time walk of - // the EntryList. We can make tail access constant-time by converting to - // a CDLL instead of using our current DLL. - ObjectWaiter * Tail; - for (Tail = List; Tail->_next != NULL; Tail = Tail->_next) /* empty */; - assert(Tail != NULL && Tail->_next == NULL, "invariant"); - Tail->_next = iterator; - iterator->_prev = Tail; - iterator->_next = NULL; - } - } else if (Policy == 2) { // prepend to cxq - // prepend to cxq - iterator->TState = ObjectWaiter::TS_CXQ; - for (;;) { - ObjectWaiter * Front = _cxq; - iterator->_next = Front; - if (Atomic::cmpxchg_ptr (iterator, &_cxq, Front) == Front) { - break; - } - } - } else if (Policy == 3) { // append to cxq - iterator->TState = ObjectWaiter::TS_CXQ; - for (;;) { - ObjectWaiter * Tail; - Tail = _cxq; - if (Tail == NULL) { - iterator->_next = NULL; - if (Atomic::cmpxchg_ptr (iterator, &_cxq, NULL) == NULL) { - break; - } - } else { - while (Tail->_next != NULL) Tail = Tail->_next; - Tail->_next = iterator; - iterator->_prev = Tail; - iterator->_next = NULL; - break; - } - } - } else { - ParkEvent * ev = iterator->_event; - iterator->TState = ObjectWaiter::TS_RUN; - OrderAccess::fence(); - ev->unpark(); - } - - if (Policy < 4) { - iterator->wait_reenter_begin(this); - } - - // _WaitSetLock protects the wait queue, not the EntryList. We could - // move the add-to-EntryList operation, above, outside the critical section - // protected by _WaitSetLock. In practice that's not useful. With the - // exception of wait() timeouts and interrupts the monitor owner - // is the only thread that grabs _WaitSetLock. There's almost no contention - // on _WaitSetLock so it's not profitable to reduce the length of the - // critical section. + DTRACE_MONITOR_PROBE(notifyAll, this, object(), THREAD); + int tally = 0; + while (_WaitSet != NULL) { + tally++; + INotify(THREAD); } - Thread::SpinRelease(&_WaitSetLock); - - if (Tally != 0 && ObjectMonitor::_sync_Notifications != NULL) { - ObjectMonitor::_sync_Notifications->inc(Tally); + if (tally != 0 && ObjectMonitor::_sync_Notifications != NULL) { + ObjectMonitor::_sync_Notifications->inc(tally); } } --- old/src/share/vm/runtime/objectMonitor.hpp Fri May 15 15:30:35 2015 +++ new/src/share/vm/runtime/objectMonitor.hpp Fri May 15 15:30:34 2015 @@ -331,7 +331,7 @@ private: void AddWaiter(ObjectWaiter * waiter); static void DeferredInitialize(); - + int INotify(Thread * Self); ObjectWaiter * DequeueWaiter(); void DequeueSpecificWaiter(ObjectWaiter * waiter); void EnterI(TRAPS); --- old/src/share/vm/runtime/sharedRuntime.cpp Fri May 15 15:30:37 2015 +++ new/src/share/vm/runtime/sharedRuntime.cpp Fri May 15 15:30:36 2015 @@ -1793,9 +1793,7 @@ // Handles the uncommon case in locking, i.e., contention or an inflated lock. JRT_BLOCK_ENTRY(void, SharedRuntime::complete_monitor_locking_C(oopDesc* _obj, BasicLock* lock, JavaThread* thread)) - // Disable ObjectSynchronizer::quick_enter() in default config - // until JDK-8077392 is resolved. - if ((SyncFlags & 256) != 0 && !SafepointSynchronize::is_synchronizing()) { + if (!SafepointSynchronize::is_synchronizing()) { // Only try quick_enter() if we're not trying to reach a safepoint // so that the calling thread reaches the safepoint more quickly. if (ObjectSynchronizer::quick_enter(_obj, thread, lock)) return; --- old/src/share/vm/runtime/synchronizer.cpp Fri May 15 15:30:40 2015 +++ new/src/share/vm/runtime/synchronizer.cpp Fri May 15 15:30:39 2015 @@ -84,6 +84,8 @@ } \ } +#define HOTSPOT_MONITOR_PROBE_notify HOTSPOT_MONITOR_NOTIFY +#define HOTSPOT_MONITOR_PROBE_notifyAll HOTSPOT_MONITOR_NOTIFYALL #define HOTSPOT_MONITOR_PROBE_waited HOTSPOT_MONITOR_WAITED #define DTRACE_MONITOR_PROBE(probe, monitor, obj, thread) \ @@ -144,7 +146,62 @@ // operators: safepoints or indefinite blocking (blocking that might span a // safepoint) are forbidden. Generally the thread_state() is _in_Java upon // entry. +// +// An interesting optimization is to have the JIT recognize the following +// common idiom: +// synchronized (someobj) { .... ; notify(); } +// That is, we find a notify() or notifyAll() call that immediately precedes +// the monitorexit operation. In that case the JIT could fuse the operations +// into a single notifyAndExit() runtime primitive. +bool ObjectSynchronizer::quick_notify(oopDesc * obj, Thread * Self, bool All) { + assert(!SafepointSynchronize::is_at_safepoint(), "invariant"); + assert(Self->is_Java_thread(), "invariant"); + assert(((JavaThread *) Self)->thread_state() == _thread_in_Java, "invariant"); + No_Safepoint_Verifier nsv; + if (obj == NULL) return false; + const markOop mark = obj->mark(); + + if (mark->has_locker() && Self->is_lock_owned((address)mark->locker())) { + // Degenerate notify + // stack-locked by caller so by definition the implied waitset is empty. + return true; + } + + if (mark->has_monitor()) { + ObjectMonitor * const mon = mark->monitor(); + assert(mon->object() == obj, "invariant"); + if (mon->owner() != Self) return false; + + // As long as the object is unbiased and doesn't require safepoint revocation + // and is owned by the caller we can transfer a thread or threads from + // the waitset to the entrylist here and now, avoiding the slow-path. + // That is, the only case where the slow-path is mandatory is + // when the object is biased or we need to throw IMSX exceptions. + if (mon->first_waiter() != NULL) { + if (All) { + DTRACE_MONITOR_PROBE(notifyAll, mon, obj, Self); + } else { + DTRACE_MONITOR_PROBE(notify, mon, obj, Self); + } + int tally = 0; + for (;;) { + if (mon->first_waiter() == NULL) break; + mon->INotify(Self); + ++tally; + if (!All) break; + } + if (ObjectMonitor::_sync_Notifications != NULL) { + ObjectMonitor::_sync_Notifications->inc(tally); + } + } + return true; + } + + return false; // revert to slow-path +} + + // The LockNode emitted directly at the synchronization site would have // been too big if it were to have included support for the cases of inflated // recursive enter and exit, so they go here instead. --- old/src/share/vm/runtime/synchronizer.hpp Fri May 15 15:30:42 2015 +++ new/src/share/vm/runtime/synchronizer.hpp Fri May 15 15:30:41 2015 @@ -72,6 +72,7 @@ static void notify(Handle obj, TRAPS); static void notifyall(Handle obj, TRAPS); + static bool quick_notify(oopDesc* obj, Thread* Self, bool All); static bool quick_enter(oop obj, Thread* Self, BasicLock* Lock); // Special internal-use-only method for use by JVM infrastructure