234 // them together was facile -- a bit too facile. The current implementation badly
235 // conflates the two concerns.
236 //
237 // * TODO-FIXME:
238 //
239 // -- Add DTRACE probes for contended acquire, contended acquired, contended unlock
240 // We should also add DTRACE probes in the ParkEvent subsystem for
241 // Park-entry, Park-exit, and Unpark.
242 //
243 // -- We have an excess of mutex-like constructs in the JVM, namely:
244 // 1. objectMonitors for Java-level synchronization (synchronizer.cpp)
245 // 2. low-level muxAcquire and muxRelease
246 // 3. low-level spinAcquire and spinRelease
247 // 4. native Mutex:: and Monitor::
248 // 5. jvm_raw_lock() and _unlock()
249 // 6. JVMTI raw monitors -- distinct from (5) despite having a confusingly
250 // similar name.
251 //
252 // o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o
253
254
255 // CASPTR() uses the canonical argument order that dominates in the literature.
256 // Our internal cmpxchg_ptr() uses a bastardized ordering to accommodate Sun .il templates.
257
258 #define CASPTR(a, c, s) \
259 intptr_t(Atomic::cmpxchg_ptr((void *)(s), (void *)(a), (void *)(c)))
260 #define UNS(x) (uintptr_t(x))
261 #define TRACE(m) \
262 { \
263 static volatile int ctr = 0; \
264 int x = ++ctr; \
265 if ((x & (x - 1)) == 0) { \
266 ::printf("%d:%s\n", x, #m); \
267 ::fflush(stdout); \
268 } \
269 }
270
271 // Simplistic low-quality Marsaglia SHIFT-XOR RNG.
272 // Bijective except for the trailing mask operation.
273 // Useful for spin loops as the compiler can't optimize it away.
274
275 static inline jint MarsagliaXORV(jint x) {
276 if (x == 0) x = 1|os::random();
277 x ^= x << 6;
278 x ^= ((unsigned)x) >> 21;
279 x ^= x << 7;
280 return x & 0x7FFFFFFF;
281 }
282
283 static int Stall(int its) {
284 static volatile jint rv = 1;
285 volatile int OnFrame = 0;
286 jint v = rv ^ UNS(OnFrame);
287 while (--its >= 0) {
288 v = MarsagliaXORV(v);
289 }
290 // Make this impossible for the compiler to optimize away,
291 // but (mostly) avoid W coherency sharing on MP systems.
292 if (v == 0x12345) rv = v;
293 return v;
294 }
295
296 int Monitor::TryLock() {
297 intptr_t v = _LockWord.FullWord;
298 for (;;) {
299 if ((v & _LBIT) != 0) return 0;
300 const intptr_t u = CASPTR(&_LockWord, v, v|_LBIT);
301 if (v == u) return 1;
302 v = u;
303 }
304 }
305
306 int Monitor::TryFast() {
307 // Optimistic fast-path form ...
308 // Fast-path attempt for the common uncontended case.
309 // Avoid RTS->RTO $ coherence upgrade on typical SMP systems.
310 intptr_t v = CASPTR(&_LockWord, 0, _LBIT); // agro ...
311 if (v == 0) return 1;
312
313 for (;;) {
314 if ((v & _LBIT) != 0) return 0;
315 const intptr_t u = CASPTR(&_LockWord, v, v|_LBIT);
316 if (v == u) return 1;
317 v = u;
318 }
319 }
320
321 int Monitor::ILocked() {
322 const intptr_t w = _LockWord.FullWord & 0xFF;
323 assert(w == 0 || w == _LBIT, "invariant");
324 return w == _LBIT;
325 }
326
327 // Polite TATAS spinlock with exponential backoff - bounded spin.
328 // Ideally we'd use processor cycles, time or vtime to control
329 // the loop, but we currently use iterations.
330 // All the constants within were derived empirically but work over
331 // over the spectrum of J2SE reference platforms.
332 // On Niagara-class systems the back-off is unnecessary but
333 // is relatively harmless. (At worst it'll slightly retard
334 // acquisition times). The back-off is critical for older SMP systems
335 // where constant fetching of the LockWord would otherwise impair
336 // scalability.
337 //
338 // Clamp spinning at approximately 1/2 of a context-switch round-trip.
339 // See synchronizer.cpp for details and rationale.
340
341 int Monitor::TrySpin(Thread * const Self) {
342 if (TryLock()) return 1;
343 if (!os::is_MP()) return 0;
344
345 int Probes = 0;
346 int Delay = 0;
347 int Steps = 0;
348 int SpinMax = NativeMonitorSpinLimit;
349 int flgs = NativeMonitorFlags;
350 for (;;) {
351 intptr_t v = _LockWord.FullWord;
352 if ((v & _LBIT) == 0) {
353 if (CASPTR (&_LockWord, v, v|_LBIT) == v) {
354 return 1;
355 }
356 continue;
357 }
358
359 if ((flgs & 8) == 0) {
360 SpinPause();
361 }
362
363 // Periodically increase Delay -- variable Delay form
364 // conceptually: delay *= 1 + 1/Exponent
365 ++Probes;
366 if (Probes > SpinMax) return 0;
367
368 if ((Probes & 0x7) == 0) {
369 Delay = ((Delay << 1)|1) & 0x7FF;
370 // CONSIDER: Delay += 1 + (Delay/4); Delay &= 0x7FF ;
371 }
372
373 if (flgs & 2) continue;
402
403 static int ParkCommon(ParkEvent * ev, jlong timo) {
404 // Diagnostic support - periodically unwedge blocked threads
405 intx nmt = NativeMonitorTimeout;
406 if (nmt > 0 && (nmt < timo || timo <= 0)) {
407 timo = nmt;
408 }
409 int err = OS_OK;
410 if (0 == timo) {
411 ev->park();
412 } else {
413 err = ev->park(timo);
414 }
415 return err;
416 }
417
418 inline int Monitor::AcquireOrPush(ParkEvent * ESelf) {
419 intptr_t v = _LockWord.FullWord;
420 for (;;) {
421 if ((v & _LBIT) == 0) {
422 const intptr_t u = CASPTR(&_LockWord, v, v|_LBIT);
423 if (u == v) return 1; // indicate acquired
424 v = u;
425 } else {
426 // Anticipate success ...
427 ESelf->ListNext = (ParkEvent *)(v & ~_LBIT);
428 const intptr_t u = CASPTR(&_LockWord, v, intptr_t(ESelf)|_LBIT);
429 if (u == v) return 0; // indicate pushed onto cxq
430 v = u;
431 }
432 // Interference - LockWord change - just retry
433 }
434 }
435
436 // ILock and IWait are the lowest level primitive internal blocking
437 // synchronization functions. The callers of IWait and ILock must have
438 // performed any needed state transitions beforehand.
439 // IWait and ILock may directly call park() without any concern for thread state.
440 // Note that ILock and IWait do *not* access _owner.
441 // _owner is a higher-level logical concept.
442
443 void Monitor::ILock(Thread * Self) {
444 assert(_OnDeck != Self->_MutexEvent, "invariant");
445
446 if (TryFast()) {
447 Exeunt:
448 assert(ILocked(), "invariant");
449 return;
450 }
451
452 ParkEvent * const ESelf = Self->_MutexEvent;
453 assert(_OnDeck != ESelf, "invariant");
454
455 // As an optimization, spinners could conditionally try to set _OnDeck to _LBIT
456 // Synchronizer.cpp uses a similar optimization.
457 if (TrySpin(Self)) goto Exeunt;
458
459 // Slow-path - the lock is contended.
460 // Either Enqueue Self on cxq or acquire the outer lock.
461 // LockWord encoding = (cxq,LOCKBYTE)
462 ESelf->reset();
463 OrderAccess::fence();
464
465 // Optional optimization ... try barging on the inner lock
466 if ((NativeMonitorFlags & 32) && CASPTR (&_OnDeck, NULL, UNS(ESelf)) == 0) {
467 goto OnDeck_LOOP;
468 }
469
470 if (AcquireOrPush(ESelf)) goto Exeunt;
471
472 // At any given time there is at most one ondeck thread.
473 // ondeck implies not resident on cxq and not resident on EntryList
474 // Only the OnDeck thread can try to acquire -- contend for -- the lock.
475 // CONSIDER: use Self->OnDeck instead of m->OnDeck.
476 // Deschedule Self so that others may run.
477 while (OrderAccess::load_ptr_acquire(&_OnDeck) != ESelf) {
478 ParkCommon(ESelf, 0);
479 }
480
481 // Self is now in the OnDeck position and will remain so until it
482 // manages to acquire the lock.
483 OnDeck_LOOP:
484 for (;;) {
485 assert(_OnDeck == ESelf, "invariant");
486 if (TrySpin(Self)) break;
487 // It's probably wise to spin only if we *actually* blocked
488 // CONSIDER: check the lockbyte, if it remains set then
489 // preemptively drain the cxq into the EntryList.
490 // The best place and time to perform queue operations -- lock metadata --
491 // is _before having acquired the outer lock, while waiting for the lock to drop.
492 ParkCommon(ESelf, 0);
493 }
494
495 assert(_OnDeck == ESelf, "invariant");
496 _OnDeck = NULL;
497
553 if (((cxq & ~_LBIT)|UNS(_EntryList)) == 0) {
554 return; // normal fast-path exit - cxq and EntryList both empty
555 }
556 if (cxq & _LBIT) {
557 // Optional optimization ...
558 // Some other thread acquired the lock in the window since this
559 // thread released it. Succession is now that thread's responsibility.
560 return;
561 }
562
563 Succession:
564 // Slow-path exit - this thread must ensure succession and progress.
565 // OnDeck serves as lock to protect cxq and EntryList.
566 // Only the holder of OnDeck can manipulate EntryList or detach the RATs from cxq.
567 // Avoid ABA - allow multiple concurrent producers (enqueue via push-CAS)
568 // but only one concurrent consumer (detacher of RATs).
569 // Consider protecting this critical section with schedctl on Solaris.
570 // Unlike a normal lock, however, the exiting thread "locks" OnDeck,
571 // picks a successor and marks that thread as OnDeck. That successor
572 // thread will then clear OnDeck once it eventually acquires the outer lock.
573 if (CASPTR (&_OnDeck, NULL, _LBIT) != UNS(NULL)) {
574 return;
575 }
576
577 ParkEvent * List = _EntryList;
578 if (List != NULL) {
579 // Transfer the head of the EntryList to the OnDeck position.
580 // Once OnDeck, a thread stays OnDeck until it acquires the lock.
581 // For a given lock there is at most OnDeck thread at any one instant.
582 WakeOne:
583 assert(List == _EntryList, "invariant");
584 ParkEvent * const w = List;
585 assert(RelaxAssert || w != Thread::current()->_MutexEvent, "invariant");
586 _EntryList = w->ListNext;
587 // as a diagnostic measure consider setting w->_ListNext = BAD
588 assert(UNS(_OnDeck) == _LBIT, "invariant");
589
590 // Pass OnDeck role to w, ensuring that _EntryList has been set first.
591 // w will clear _OnDeck once it acquires the outer lock.
592 // Note that once we set _OnDeck that thread can acquire the mutex, proceed
593 // with its critical section and then enter this code to unlock the mutex. So
594 // you can have multiple threads active in IUnlock at the same time.
595 OrderAccess::release_store_ptr(&_OnDeck, w);
596
597 // Another optional optimization ...
598 // For heavily contended locks it's not uncommon that some other
599 // thread acquired the lock while this thread was arranging succession.
600 // Try to defer the unpark() operation - Delegate the responsibility
601 // for unpark()ing the OnDeck thread to the current or subsequent owners
602 // That is, the new owner is responsible for unparking the OnDeck thread.
603 OrderAccess::storeload();
604 cxq = _LockWord.FullWord;
605 if (cxq & _LBIT) return;
606
607 w->unpark();
608 return;
609 }
610
611 cxq = _LockWord.FullWord;
612 if ((cxq & ~_LBIT) != 0) {
613 // The EntryList is empty but the cxq is populated.
614 // drain RATs from cxq into EntryList
615 // Detach RATs segment with CAS and then merge into EntryList
616 for (;;) {
617 // optional optimization - if locked, the owner is responsible for succession
618 if (cxq & _LBIT) goto Punt;
619 const intptr_t vfy = CASPTR(&_LockWord, cxq, cxq & _LBIT);
620 if (vfy == cxq) break;
621 cxq = vfy;
622 // Interference - LockWord changed - Just retry
623 // We can see concurrent interference from contending threads
624 // pushing themselves onto the cxq or from lock-unlock operations.
625 // From the perspective of this thread, EntryList is stable and
626 // the cxq is prepend-only -- the head is volatile but the interior
627 // of the cxq is stable. In theory if we encounter interference from threads
628 // pushing onto cxq we could simply break off the original cxq suffix and
629 // move that segment to the EntryList, avoiding a 2nd or multiple CAS attempts
630 // on the high-traffic LockWord variable. For instance lets say the cxq is "ABCD"
631 // when we first fetch cxq above. Between the fetch -- where we observed "A"
632 // -- and CAS -- where we attempt to CAS null over A -- "PQR" arrive,
633 // yielding cxq = "PQRABCD". In this case we could simply set A.ListNext
634 // null, leaving cxq = "PQRA" and transfer the "BCD" segment to the EntryList.
635 // Note too, that it's safe for this thread to traverse the cxq
636 // without taking any special concurrency precautions.
637 }
638
639 // We don't currently reorder the cxq segment as we move it onto
676 bool Monitor::notify() {
677 assert(_owner == Thread::current(), "invariant");
678 assert(ILocked(), "invariant");
679 if (_WaitSet == NULL) return true;
680 NotifyCount++;
681
682 // Transfer one thread from the WaitSet to the EntryList or cxq.
683 // Currently we just unlink the head of the WaitSet and prepend to the cxq.
684 // And of course we could just unlink it and unpark it, too, but
685 // in that case it'd likely impale itself on the reentry.
686 Thread::muxAcquire(_WaitLock, "notify:WaitLock");
687 ParkEvent * nfy = _WaitSet;
688 if (nfy != NULL) { // DCL idiom
689 _WaitSet = nfy->ListNext;
690 assert(nfy->Notified == 0, "invariant");
691 // push nfy onto the cxq
692 for (;;) {
693 const intptr_t v = _LockWord.FullWord;
694 assert((v & 0xFF) == _LBIT, "invariant");
695 nfy->ListNext = (ParkEvent *)(v & ~_LBIT);
696 if (CASPTR (&_LockWord, v, UNS(nfy)|_LBIT) == v) break;
697 // interference - _LockWord changed -- just retry
698 }
699 // Note that setting Notified before pushing nfy onto the cxq is
700 // also legal and safe, but the safety properties are much more
701 // subtle, so for the sake of code stewardship ...
702 OrderAccess::fence();
703 nfy->Notified = 1;
704 }
705 Thread::muxRelease(_WaitLock);
706 if (nfy != NULL && (NativeMonitorFlags & 16)) {
707 // Experimental code ... light up the wakee in the hope that this thread (the owner)
708 // will drop the lock just about the time the wakee comes ONPROC.
709 nfy->unpark();
710 }
711 assert(ILocked(), "invariant");
712 return true;
713 }
714
715 // Currently notifyAll() transfers the waiters one-at-a-time from the waitset
716 // to the cxq. This could be done more efficiently with a single bulk en-mass transfer,
823 } else { // found in interior
824 assert(q->ListNext == p, "invariant");
825 q->ListNext = p->ListNext;
826 }
827 WasOnWaitSet = 1; // We were *not* notified but instead encountered timeout
828 }
829 Thread::muxRelease(_WaitLock);
830 }
831
832 // Reentry phase - reacquire the lock
833 if (WasOnWaitSet) {
834 // ESelf was previously on the WaitSet but we just unlinked it above
835 // because of a timeout. ESelf is not resident on any list and is not OnDeck
836 assert(_OnDeck != ESelf, "invariant");
837 ILock(Self);
838 } else {
839 // A prior notify() operation moved ESelf from the WaitSet to the cxq.
840 // ESelf is now on the cxq, EntryList or at the OnDeck position.
841 // The following fragment is extracted from Monitor::ILock()
842 for (;;) {
843 if (OrderAccess::load_ptr_acquire(&_OnDeck) == ESelf && TrySpin(Self)) break;
844 ParkCommon(ESelf, 0);
845 }
846 assert(_OnDeck == ESelf, "invariant");
847 _OnDeck = NULL;
848 }
849
850 assert(ILocked(), "invariant");
851 return WasOnWaitSet != 0; // return true IFF timeout
852 }
853
854
855 // ON THE VMTHREAD SNEAKING PAST HELD LOCKS:
856 // In particular, there are certain types of global lock that may be held
857 // by a Java thread while it is blocked at a safepoint but before it has
858 // written the _owner field. These locks may be sneakily acquired by the
859 // VM thread during a safepoint to avoid deadlocks. Alternatively, one should
860 // identify all such locks, and ensure that Java threads never block at
861 // safepoints while holding them (_no_safepoint_check_flag). While it
862 // seems as though this could increase the time to reach a safepoint
863 // (or at least increase the mean, if not the variance), the latter
1041
1042 // slow-path - apparent contention
1043 // Allocate a ParkEvent for transient use.
1044 // The ParkEvent remains associated with this thread until
1045 // the time the thread manages to acquire the lock.
1046 ParkEvent * const ESelf = ParkEvent::Allocate(NULL);
1047 ESelf->reset();
1048 OrderAccess::storeload();
1049
1050 // Either Enqueue Self on cxq or acquire the outer lock.
1051 if (AcquireOrPush (ESelf)) {
1052 ParkEvent::Release(ESelf); // surrender the ParkEvent
1053 goto Exeunt;
1054 }
1055
1056 // At any given time there is at most one ondeck thread.
1057 // ondeck implies not resident on cxq and not resident on EntryList
1058 // Only the OnDeck thread can try to acquire -- contend for -- the lock.
1059 // CONSIDER: use Self->OnDeck instead of m->OnDeck.
1060 for (;;) {
1061 if (OrderAccess::load_ptr_acquire(&_OnDeck) == ESelf && TrySpin(NULL)) break;
1062 ParkCommon(ESelf, 0);
1063 }
1064
1065 assert(_OnDeck == ESelf, "invariant");
1066 _OnDeck = NULL;
1067 ParkEvent::Release(ESelf); // surrender the ParkEvent
1068 goto Exeunt;
1069 }
1070
1071 void Monitor::jvm_raw_unlock() {
1072 // Nearly the same as Monitor::unlock() ...
1073 // directly set _owner instead of using set_owner(null)
1074 _owner = NULL;
1075 if (_snuck) { // ???
1076 assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "sneak");
1077 _snuck = false;
1078 return;
1079 }
1080 IUnlock(false);
1081 }
|
234 // them together was facile -- a bit too facile. The current implementation badly
235 // conflates the two concerns.
236 //
237 // * TODO-FIXME:
238 //
239 // -- Add DTRACE probes for contended acquire, contended acquired, contended unlock
240 // We should also add DTRACE probes in the ParkEvent subsystem for
241 // Park-entry, Park-exit, and Unpark.
242 //
243 // -- We have an excess of mutex-like constructs in the JVM, namely:
244 // 1. objectMonitors for Java-level synchronization (synchronizer.cpp)
245 // 2. low-level muxAcquire and muxRelease
246 // 3. low-level spinAcquire and spinRelease
247 // 4. native Mutex:: and Monitor::
248 // 5. jvm_raw_lock() and _unlock()
249 // 6. JVMTI raw monitors -- distinct from (5) despite having a confusingly
250 // similar name.
251 //
252 // o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o
253
254 #define UNS(x) (uintptr_t(x))
255 #define TRACE(m) \
256 { \
257 static volatile int ctr = 0; \
258 int x = ++ctr; \
259 if ((x & (x - 1)) == 0) { \
260 ::printf("%d:%s\n", x, #m); \
261 ::fflush(stdout); \
262 } \
263 }
264
265 // Simplistic low-quality Marsaglia SHIFT-XOR RNG.
266 // Bijective except for the trailing mask operation.
267 // Useful for spin loops as the compiler can't optimize it away.
268
269 static inline jint MarsagliaXORV(jint x) {
270 if (x == 0) x = 1|os::random();
271 x ^= x << 6;
272 x ^= ((unsigned)x) >> 21;
273 x ^= x << 7;
274 return x & 0x7FFFFFFF;
275 }
276
277 static int Stall(int its) {
278 static volatile jint rv = 1;
279 volatile int OnFrame = 0;
280 jint v = rv ^ UNS(OnFrame);
281 while (--its >= 0) {
282 v = MarsagliaXORV(v);
283 }
284 // Make this impossible for the compiler to optimize away,
285 // but (mostly) avoid W coherency sharing on MP systems.
286 if (v == 0x12345) rv = v;
287 return v;
288 }
289
290 int Monitor::TryLock() {
291 intptr_t v = _LockWord.FullWord;
292 for (;;) {
293 if ((v & _LBIT) != 0) return 0;
294 const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v);
295 if (v == u) return 1;
296 v = u;
297 }
298 }
299
300 int Monitor::TryFast() {
301 // Optimistic fast-path form ...
302 // Fast-path attempt for the common uncontended case.
303 // Avoid RTS->RTO $ coherence upgrade on typical SMP systems.
304 intptr_t v = Atomic::cmpxchg((intptr_t)_LBIT, &_LockWord.FullWord, (intptr_t)0); // agro ...
305 if (v == 0) return 1;
306
307 for (;;) {
308 if ((v & _LBIT) != 0) return 0;
309 const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v);
310 if (v == u) return 1;
311 v = u;
312 }
313 }
314
315 int Monitor::ILocked() {
316 const intptr_t w = _LockWord.FullWord & 0xFF;
317 assert(w == 0 || w == _LBIT, "invariant");
318 return w == _LBIT;
319 }
320
321 // Polite TATAS spinlock with exponential backoff - bounded spin.
322 // Ideally we'd use processor cycles, time or vtime to control
323 // the loop, but we currently use iterations.
324 // All the constants within were derived empirically but work over
325 // over the spectrum of J2SE reference platforms.
326 // On Niagara-class systems the back-off is unnecessary but
327 // is relatively harmless. (At worst it'll slightly retard
328 // acquisition times). The back-off is critical for older SMP systems
329 // where constant fetching of the LockWord would otherwise impair
330 // scalability.
331 //
332 // Clamp spinning at approximately 1/2 of a context-switch round-trip.
333 // See synchronizer.cpp for details and rationale.
334
335 int Monitor::TrySpin(Thread * const Self) {
336 if (TryLock()) return 1;
337 if (!os::is_MP()) return 0;
338
339 int Probes = 0;
340 int Delay = 0;
341 int Steps = 0;
342 int SpinMax = NativeMonitorSpinLimit;
343 int flgs = NativeMonitorFlags;
344 for (;;) {
345 intptr_t v = _LockWord.FullWord;
346 if ((v & _LBIT) == 0) {
347 if (Atomic::cmpxchg (v|_LBIT, &_LockWord.FullWord, v) == v) {
348 return 1;
349 }
350 continue;
351 }
352
353 if ((flgs & 8) == 0) {
354 SpinPause();
355 }
356
357 // Periodically increase Delay -- variable Delay form
358 // conceptually: delay *= 1 + 1/Exponent
359 ++Probes;
360 if (Probes > SpinMax) return 0;
361
362 if ((Probes & 0x7) == 0) {
363 Delay = ((Delay << 1)|1) & 0x7FF;
364 // CONSIDER: Delay += 1 + (Delay/4); Delay &= 0x7FF ;
365 }
366
367 if (flgs & 2) continue;
396
397 static int ParkCommon(ParkEvent * ev, jlong timo) {
398 // Diagnostic support - periodically unwedge blocked threads
399 intx nmt = NativeMonitorTimeout;
400 if (nmt > 0 && (nmt < timo || timo <= 0)) {
401 timo = nmt;
402 }
403 int err = OS_OK;
404 if (0 == timo) {
405 ev->park();
406 } else {
407 err = ev->park(timo);
408 }
409 return err;
410 }
411
412 inline int Monitor::AcquireOrPush(ParkEvent * ESelf) {
413 intptr_t v = _LockWord.FullWord;
414 for (;;) {
415 if ((v & _LBIT) == 0) {
416 const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v);
417 if (u == v) return 1; // indicate acquired
418 v = u;
419 } else {
420 // Anticipate success ...
421 ESelf->ListNext = (ParkEvent *)(v & ~_LBIT);
422 const intptr_t u = Atomic::cmpxchg(intptr_t(ESelf)|_LBIT, &_LockWord.FullWord, v);
423 if (u == v) return 0; // indicate pushed onto cxq
424 v = u;
425 }
426 // Interference - LockWord change - just retry
427 }
428 }
429
430 // ILock and IWait are the lowest level primitive internal blocking
431 // synchronization functions. The callers of IWait and ILock must have
432 // performed any needed state transitions beforehand.
433 // IWait and ILock may directly call park() without any concern for thread state.
434 // Note that ILock and IWait do *not* access _owner.
435 // _owner is a higher-level logical concept.
436
437 void Monitor::ILock(Thread * Self) {
438 assert(_OnDeck != Self->_MutexEvent, "invariant");
439
440 if (TryFast()) {
441 Exeunt:
442 assert(ILocked(), "invariant");
443 return;
444 }
445
446 ParkEvent * const ESelf = Self->_MutexEvent;
447 assert(_OnDeck != ESelf, "invariant");
448
449 // As an optimization, spinners could conditionally try to set _OnDeck to _LBIT
450 // Synchronizer.cpp uses a similar optimization.
451 if (TrySpin(Self)) goto Exeunt;
452
453 // Slow-path - the lock is contended.
454 // Either Enqueue Self on cxq or acquire the outer lock.
455 // LockWord encoding = (cxq,LOCKBYTE)
456 ESelf->reset();
457 OrderAccess::fence();
458
459 // Optional optimization ... try barging on the inner lock
460 if ((NativeMonitorFlags & 32) && Atomic::cmpxchg(ESelf, &_OnDeck, (ParkEvent*)NULL) == NULL) {
461 goto OnDeck_LOOP;
462 }
463
464 if (AcquireOrPush(ESelf)) goto Exeunt;
465
466 // At any given time there is at most one ondeck thread.
467 // ondeck implies not resident on cxq and not resident on EntryList
468 // Only the OnDeck thread can try to acquire -- contend for -- the lock.
469 // CONSIDER: use Self->OnDeck instead of m->OnDeck.
470 // Deschedule Self so that others may run.
471 while (OrderAccess::load_acquire(&_OnDeck) != ESelf) {
472 ParkCommon(ESelf, 0);
473 }
474
475 // Self is now in the OnDeck position and will remain so until it
476 // manages to acquire the lock.
477 OnDeck_LOOP:
478 for (;;) {
479 assert(_OnDeck == ESelf, "invariant");
480 if (TrySpin(Self)) break;
481 // It's probably wise to spin only if we *actually* blocked
482 // CONSIDER: check the lockbyte, if it remains set then
483 // preemptively drain the cxq into the EntryList.
484 // The best place and time to perform queue operations -- lock metadata --
485 // is _before having acquired the outer lock, while waiting for the lock to drop.
486 ParkCommon(ESelf, 0);
487 }
488
489 assert(_OnDeck == ESelf, "invariant");
490 _OnDeck = NULL;
491
547 if (((cxq & ~_LBIT)|UNS(_EntryList)) == 0) {
548 return; // normal fast-path exit - cxq and EntryList both empty
549 }
550 if (cxq & _LBIT) {
551 // Optional optimization ...
552 // Some other thread acquired the lock in the window since this
553 // thread released it. Succession is now that thread's responsibility.
554 return;
555 }
556
557 Succession:
558 // Slow-path exit - this thread must ensure succession and progress.
559 // OnDeck serves as lock to protect cxq and EntryList.
560 // Only the holder of OnDeck can manipulate EntryList or detach the RATs from cxq.
561 // Avoid ABA - allow multiple concurrent producers (enqueue via push-CAS)
562 // but only one concurrent consumer (detacher of RATs).
563 // Consider protecting this critical section with schedctl on Solaris.
564 // Unlike a normal lock, however, the exiting thread "locks" OnDeck,
565 // picks a successor and marks that thread as OnDeck. That successor
566 // thread will then clear OnDeck once it eventually acquires the outer lock.
567 if (Atomic::cmpxchg((ParkEvent*)_LBIT, &_OnDeck, (ParkEvent*)NULL) != NULL) {
568 return;
569 }
570
571 ParkEvent * List = _EntryList;
572 if (List != NULL) {
573 // Transfer the head of the EntryList to the OnDeck position.
574 // Once OnDeck, a thread stays OnDeck until it acquires the lock.
575 // For a given lock there is at most OnDeck thread at any one instant.
576 WakeOne:
577 assert(List == _EntryList, "invariant");
578 ParkEvent * const w = List;
579 assert(RelaxAssert || w != Thread::current()->_MutexEvent, "invariant");
580 _EntryList = w->ListNext;
581 // as a diagnostic measure consider setting w->_ListNext = BAD
582 assert(UNS(_OnDeck) == _LBIT, "invariant");
583
584 // Pass OnDeck role to w, ensuring that _EntryList has been set first.
585 // w will clear _OnDeck once it acquires the outer lock.
586 // Note that once we set _OnDeck that thread can acquire the mutex, proceed
587 // with its critical section and then enter this code to unlock the mutex. So
588 // you can have multiple threads active in IUnlock at the same time.
589 OrderAccess::release_store(&_OnDeck, w);
590
591 // Another optional optimization ...
592 // For heavily contended locks it's not uncommon that some other
593 // thread acquired the lock while this thread was arranging succession.
594 // Try to defer the unpark() operation - Delegate the responsibility
595 // for unpark()ing the OnDeck thread to the current or subsequent owners
596 // That is, the new owner is responsible for unparking the OnDeck thread.
597 OrderAccess::storeload();
598 cxq = _LockWord.FullWord;
599 if (cxq & _LBIT) return;
600
601 w->unpark();
602 return;
603 }
604
605 cxq = _LockWord.FullWord;
606 if ((cxq & ~_LBIT) != 0) {
607 // The EntryList is empty but the cxq is populated.
608 // drain RATs from cxq into EntryList
609 // Detach RATs segment with CAS and then merge into EntryList
610 for (;;) {
611 // optional optimization - if locked, the owner is responsible for succession
612 if (cxq & _LBIT) goto Punt;
613 const intptr_t vfy = Atomic::cmpxchg(cxq & _LBIT, &_LockWord.FullWord, cxq);
614 if (vfy == cxq) break;
615 cxq = vfy;
616 // Interference - LockWord changed - Just retry
617 // We can see concurrent interference from contending threads
618 // pushing themselves onto the cxq or from lock-unlock operations.
619 // From the perspective of this thread, EntryList is stable and
620 // the cxq is prepend-only -- the head is volatile but the interior
621 // of the cxq is stable. In theory if we encounter interference from threads
622 // pushing onto cxq we could simply break off the original cxq suffix and
623 // move that segment to the EntryList, avoiding a 2nd or multiple CAS attempts
624 // on the high-traffic LockWord variable. For instance lets say the cxq is "ABCD"
625 // when we first fetch cxq above. Between the fetch -- where we observed "A"
626 // -- and CAS -- where we attempt to CAS null over A -- "PQR" arrive,
627 // yielding cxq = "PQRABCD". In this case we could simply set A.ListNext
628 // null, leaving cxq = "PQRA" and transfer the "BCD" segment to the EntryList.
629 // Note too, that it's safe for this thread to traverse the cxq
630 // without taking any special concurrency precautions.
631 }
632
633 // We don't currently reorder the cxq segment as we move it onto
670 bool Monitor::notify() {
671 assert(_owner == Thread::current(), "invariant");
672 assert(ILocked(), "invariant");
673 if (_WaitSet == NULL) return true;
674 NotifyCount++;
675
676 // Transfer one thread from the WaitSet to the EntryList or cxq.
677 // Currently we just unlink the head of the WaitSet and prepend to the cxq.
678 // And of course we could just unlink it and unpark it, too, but
679 // in that case it'd likely impale itself on the reentry.
680 Thread::muxAcquire(_WaitLock, "notify:WaitLock");
681 ParkEvent * nfy = _WaitSet;
682 if (nfy != NULL) { // DCL idiom
683 _WaitSet = nfy->ListNext;
684 assert(nfy->Notified == 0, "invariant");
685 // push nfy onto the cxq
686 for (;;) {
687 const intptr_t v = _LockWord.FullWord;
688 assert((v & 0xFF) == _LBIT, "invariant");
689 nfy->ListNext = (ParkEvent *)(v & ~_LBIT);
690 if (Atomic::cmpxchg(intptr_t(nfy)|_LBIT, &_LockWord.FullWord, v) == v) break;
691 // interference - _LockWord changed -- just retry
692 }
693 // Note that setting Notified before pushing nfy onto the cxq is
694 // also legal and safe, but the safety properties are much more
695 // subtle, so for the sake of code stewardship ...
696 OrderAccess::fence();
697 nfy->Notified = 1;
698 }
699 Thread::muxRelease(_WaitLock);
700 if (nfy != NULL && (NativeMonitorFlags & 16)) {
701 // Experimental code ... light up the wakee in the hope that this thread (the owner)
702 // will drop the lock just about the time the wakee comes ONPROC.
703 nfy->unpark();
704 }
705 assert(ILocked(), "invariant");
706 return true;
707 }
708
709 // Currently notifyAll() transfers the waiters one-at-a-time from the waitset
710 // to the cxq. This could be done more efficiently with a single bulk en-mass transfer,
817 } else { // found in interior
818 assert(q->ListNext == p, "invariant");
819 q->ListNext = p->ListNext;
820 }
821 WasOnWaitSet = 1; // We were *not* notified but instead encountered timeout
822 }
823 Thread::muxRelease(_WaitLock);
824 }
825
826 // Reentry phase - reacquire the lock
827 if (WasOnWaitSet) {
828 // ESelf was previously on the WaitSet but we just unlinked it above
829 // because of a timeout. ESelf is not resident on any list and is not OnDeck
830 assert(_OnDeck != ESelf, "invariant");
831 ILock(Self);
832 } else {
833 // A prior notify() operation moved ESelf from the WaitSet to the cxq.
834 // ESelf is now on the cxq, EntryList or at the OnDeck position.
835 // The following fragment is extracted from Monitor::ILock()
836 for (;;) {
837 if (OrderAccess::load_acquire(&_OnDeck) == ESelf && TrySpin(Self)) break;
838 ParkCommon(ESelf, 0);
839 }
840 assert(_OnDeck == ESelf, "invariant");
841 _OnDeck = NULL;
842 }
843
844 assert(ILocked(), "invariant");
845 return WasOnWaitSet != 0; // return true IFF timeout
846 }
847
848
849 // ON THE VMTHREAD SNEAKING PAST HELD LOCKS:
850 // In particular, there are certain types of global lock that may be held
851 // by a Java thread while it is blocked at a safepoint but before it has
852 // written the _owner field. These locks may be sneakily acquired by the
853 // VM thread during a safepoint to avoid deadlocks. Alternatively, one should
854 // identify all such locks, and ensure that Java threads never block at
855 // safepoints while holding them (_no_safepoint_check_flag). While it
856 // seems as though this could increase the time to reach a safepoint
857 // (or at least increase the mean, if not the variance), the latter
1035
1036 // slow-path - apparent contention
1037 // Allocate a ParkEvent for transient use.
1038 // The ParkEvent remains associated with this thread until
1039 // the time the thread manages to acquire the lock.
1040 ParkEvent * const ESelf = ParkEvent::Allocate(NULL);
1041 ESelf->reset();
1042 OrderAccess::storeload();
1043
1044 // Either Enqueue Self on cxq or acquire the outer lock.
1045 if (AcquireOrPush (ESelf)) {
1046 ParkEvent::Release(ESelf); // surrender the ParkEvent
1047 goto Exeunt;
1048 }
1049
1050 // At any given time there is at most one ondeck thread.
1051 // ondeck implies not resident on cxq and not resident on EntryList
1052 // Only the OnDeck thread can try to acquire -- contend for -- the lock.
1053 // CONSIDER: use Self->OnDeck instead of m->OnDeck.
1054 for (;;) {
1055 if (OrderAccess::load_acquire(&_OnDeck) == ESelf && TrySpin(NULL)) break;
1056 ParkCommon(ESelf, 0);
1057 }
1058
1059 assert(_OnDeck == ESelf, "invariant");
1060 _OnDeck = NULL;
1061 ParkEvent::Release(ESelf); // surrender the ParkEvent
1062 goto Exeunt;
1063 }
1064
1065 void Monitor::jvm_raw_unlock() {
1066 // Nearly the same as Monitor::unlock() ...
1067 // directly set _owner instead of using set_owner(null)
1068 _owner = NULL;
1069 if (_snuck) { // ???
1070 assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "sneak");
1071 _snuck = false;
1072 return;
1073 }
1074 IUnlock(false);
1075 }
|