234 // them together was facile -- a bit too facile. The current implementation badly
235 // conflates the two concerns.
236 //
237 // * TODO-FIXME:
238 //
239 // -- Add DTRACE probes for contended acquire, contended acquired, contended unlock
240 // We should also add DTRACE probes in the ParkEvent subsystem for
241 // Park-entry, Park-exit, and Unpark.
242 //
243 // -- We have an excess of mutex-like constructs in the JVM, namely:
244 // 1. objectMonitors for Java-level synchronization (synchronizer.cpp)
245 // 2. low-level muxAcquire and muxRelease
246 // 3. low-level spinAcquire and spinRelease
247 // 4. native Mutex:: and Monitor::
248 // 5. jvm_raw_lock() and _unlock()
249 // 6. JVMTI raw monitors -- distinct from (5) despite having a confusingly
250 // similar name.
251 //
252 // o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o
253
254
255 // CASPTR() uses the canonical argument order that dominates in the literature.
256 // Our internal cmpxchg_ptr() uses a bastardized ordering to accommodate Sun .il templates.
257
258 #define CASPTR(a, c, s) \
259 intptr_t(Atomic::cmpxchg_ptr((void *)(s), (void *)(a), (void *)(c)))
260 #define UNS(x) (uintptr_t(x))
261 #define TRACE(m) \
262 { \
263 static volatile int ctr = 0; \
264 int x = ++ctr; \
265 if ((x & (x - 1)) == 0) { \
266 ::printf("%d:%s\n", x, #m); \
267 ::fflush(stdout); \
268 } \
269 }
270
271 // Simplistic low-quality Marsaglia SHIFT-XOR RNG.
272 // Bijective except for the trailing mask operation.
273 // Useful for spin loops as the compiler can't optimize it away.
274
275 static inline jint MarsagliaXORV(jint x) {
276 if (x == 0) x = 1|os::random();
277 x ^= x << 6;
278 x ^= ((unsigned)x) >> 21;
279 x ^= x << 7;
280 return x & 0x7FFFFFFF;
281 }
282
283 static int Stall(int its) {
284 static volatile jint rv = 1;
285 volatile int OnFrame = 0;
286 jint v = rv ^ UNS(OnFrame);
287 while (--its >= 0) {
288 v = MarsagliaXORV(v);
289 }
290 // Make this impossible for the compiler to optimize away,
291 // but (mostly) avoid W coherency sharing on MP systems.
292 if (v == 0x12345) rv = v;
293 return v;
294 }
295
296 int Monitor::TryLock() {
297 intptr_t v = _LockWord.FullWord;
298 for (;;) {
299 if ((v & _LBIT) != 0) return 0;
300 const intptr_t u = CASPTR(&_LockWord, v, v|_LBIT);
301 if (v == u) return 1;
302 v = u;
303 }
304 }
305
306 int Monitor::TryFast() {
307 // Optimistic fast-path form ...
308 // Fast-path attempt for the common uncontended case.
309 // Avoid RTS->RTO $ coherence upgrade on typical SMP systems.
310 intptr_t v = CASPTR(&_LockWord, 0, _LBIT); // agro ...
311 if (v == 0) return 1;
312
313 for (;;) {
314 if ((v & _LBIT) != 0) return 0;
315 const intptr_t u = CASPTR(&_LockWord, v, v|_LBIT);
316 if (v == u) return 1;
317 v = u;
318 }
319 }
320
321 int Monitor::ILocked() {
322 const intptr_t w = _LockWord.FullWord & 0xFF;
323 assert(w == 0 || w == _LBIT, "invariant");
324 return w == _LBIT;
325 }
326
327 // Polite TATAS spinlock with exponential backoff - bounded spin.
328 // Ideally we'd use processor cycles, time or vtime to control
329 // the loop, but we currently use iterations.
330 // All the constants within were derived empirically but work over
331 // over the spectrum of J2SE reference platforms.
332 // On Niagara-class systems the back-off is unnecessary but
333 // is relatively harmless. (At worst it'll slightly retard
334 // acquisition times). The back-off is critical for older SMP systems
335 // where constant fetching of the LockWord would otherwise impair
336 // scalability.
337 //
338 // Clamp spinning at approximately 1/2 of a context-switch round-trip.
339 // See synchronizer.cpp for details and rationale.
340
341 int Monitor::TrySpin(Thread * const Self) {
342 if (TryLock()) return 1;
343 if (!os::is_MP()) return 0;
344
345 int Probes = 0;
346 int Delay = 0;
347 int Steps = 0;
348 int SpinMax = NativeMonitorSpinLimit;
349 int flgs = NativeMonitorFlags;
350 for (;;) {
351 intptr_t v = _LockWord.FullWord;
352 if ((v & _LBIT) == 0) {
353 if (CASPTR (&_LockWord, v, v|_LBIT) == v) {
354 return 1;
355 }
356 continue;
357 }
358
359 if ((flgs & 8) == 0) {
360 SpinPause();
361 }
362
363 // Periodically increase Delay -- variable Delay form
364 // conceptually: delay *= 1 + 1/Exponent
365 ++Probes;
366 if (Probes > SpinMax) return 0;
367
368 if ((Probes & 0x7) == 0) {
369 Delay = ((Delay << 1)|1) & 0x7FF;
370 // CONSIDER: Delay += 1 + (Delay/4); Delay &= 0x7FF ;
371 }
372
373 if (flgs & 2) continue;
402
403 static int ParkCommon(ParkEvent * ev, jlong timo) {
404 // Diagnostic support - periodically unwedge blocked threads
405 intx nmt = NativeMonitorTimeout;
406 if (nmt > 0 && (nmt < timo || timo <= 0)) {
407 timo = nmt;
408 }
409 int err = OS_OK;
410 if (0 == timo) {
411 ev->park();
412 } else {
413 err = ev->park(timo);
414 }
415 return err;
416 }
417
418 inline int Monitor::AcquireOrPush(ParkEvent * ESelf) {
419 intptr_t v = _LockWord.FullWord;
420 for (;;) {
421 if ((v & _LBIT) == 0) {
422 const intptr_t u = CASPTR(&_LockWord, v, v|_LBIT);
423 if (u == v) return 1; // indicate acquired
424 v = u;
425 } else {
426 // Anticipate success ...
427 ESelf->ListNext = (ParkEvent *)(v & ~_LBIT);
428 const intptr_t u = CASPTR(&_LockWord, v, intptr_t(ESelf)|_LBIT);
429 if (u == v) return 0; // indicate pushed onto cxq
430 v = u;
431 }
432 // Interference - LockWord change - just retry
433 }
434 }
435
436 // ILock and IWait are the lowest level primitive internal blocking
437 // synchronization functions. The callers of IWait and ILock must have
438 // performed any needed state transitions beforehand.
439 // IWait and ILock may directly call park() without any concern for thread state.
440 // Note that ILock and IWait do *not* access _owner.
441 // _owner is a higher-level logical concept.
442
443 void Monitor::ILock(Thread * Self) {
444 assert(_OnDeck != Self->_MutexEvent, "invariant");
445
446 if (TryFast()) {
447 Exeunt:
448 assert(ILocked(), "invariant");
449 return;
450 }
451
452 ParkEvent * const ESelf = Self->_MutexEvent;
453 assert(_OnDeck != ESelf, "invariant");
454
455 // As an optimization, spinners could conditionally try to set _OnDeck to _LBIT
456 // Synchronizer.cpp uses a similar optimization.
457 if (TrySpin(Self)) goto Exeunt;
458
459 // Slow-path - the lock is contended.
460 // Either Enqueue Self on cxq or acquire the outer lock.
461 // LockWord encoding = (cxq,LOCKBYTE)
462 ESelf->reset();
463 OrderAccess::fence();
464
465 // Optional optimization ... try barging on the inner lock
466 if ((NativeMonitorFlags & 32) && CASPTR (&_OnDeck, NULL, UNS(ESelf)) == 0) {
467 goto OnDeck_LOOP;
468 }
469
470 if (AcquireOrPush(ESelf)) goto Exeunt;
471
472 // At any given time there is at most one ondeck thread.
473 // ondeck implies not resident on cxq and not resident on EntryList
474 // Only the OnDeck thread can try to acquire -- contend for -- the lock.
475 // CONSIDER: use Self->OnDeck instead of m->OnDeck.
476 // Deschedule Self so that others may run.
477 while (OrderAccess::load_ptr_acquire(&_OnDeck) != ESelf) {
478 ParkCommon(ESelf, 0);
479 }
480
481 // Self is now in the OnDeck position and will remain so until it
482 // manages to acquire the lock.
483 OnDeck_LOOP:
484 for (;;) {
485 assert(_OnDeck == ESelf, "invariant");
486 if (TrySpin(Self)) break;
487 // It's probably wise to spin only if we *actually* blocked
488 // CONSIDER: check the lockbyte, if it remains set then
489 // preemptively drain the cxq into the EntryList.
490 // The best place and time to perform queue operations -- lock metadata --
491 // is _before having acquired the outer lock, while waiting for the lock to drop.
492 ParkCommon(ESelf, 0);
493 }
494
495 assert(_OnDeck == ESelf, "invariant");
496 _OnDeck = NULL;
497
553 if (((cxq & ~_LBIT)|UNS(_EntryList)) == 0) {
554 return; // normal fast-path exit - cxq and EntryList both empty
555 }
556 if (cxq & _LBIT) {
557 // Optional optimization ...
558 // Some other thread acquired the lock in the window since this
559 // thread released it. Succession is now that thread's responsibility.
560 return;
561 }
562
563 Succession:
564 // Slow-path exit - this thread must ensure succession and progress.
565 // OnDeck serves as lock to protect cxq and EntryList.
566 // Only the holder of OnDeck can manipulate EntryList or detach the RATs from cxq.
567 // Avoid ABA - allow multiple concurrent producers (enqueue via push-CAS)
568 // but only one concurrent consumer (detacher of RATs).
569 // Consider protecting this critical section with schedctl on Solaris.
570 // Unlike a normal lock, however, the exiting thread "locks" OnDeck,
571 // picks a successor and marks that thread as OnDeck. That successor
572 // thread will then clear OnDeck once it eventually acquires the outer lock.
573 if (CASPTR (&_OnDeck, NULL, _LBIT) != UNS(NULL)) {
574 return;
575 }
576
577 ParkEvent * List = _EntryList;
578 if (List != NULL) {
579 // Transfer the head of the EntryList to the OnDeck position.
580 // Once OnDeck, a thread stays OnDeck until it acquires the lock.
581 // For a given lock there is at most OnDeck thread at any one instant.
582 WakeOne:
583 assert(List == _EntryList, "invariant");
584 ParkEvent * const w = List;
585 assert(RelaxAssert || w != Thread::current()->_MutexEvent, "invariant");
586 _EntryList = w->ListNext;
587 // as a diagnostic measure consider setting w->_ListNext = BAD
588 assert(UNS(_OnDeck) == _LBIT, "invariant");
589
590 // Pass OnDeck role to w, ensuring that _EntryList has been set first.
591 // w will clear _OnDeck once it acquires the outer lock.
592 // Note that once we set _OnDeck that thread can acquire the mutex, proceed
593 // with its critical section and then enter this code to unlock the mutex. So
594 // you can have multiple threads active in IUnlock at the same time.
595 OrderAccess::release_store_ptr(&_OnDeck, w);
596
597 // Another optional optimization ...
598 // For heavily contended locks it's not uncommon that some other
599 // thread acquired the lock while this thread was arranging succession.
600 // Try to defer the unpark() operation - Delegate the responsibility
601 // for unpark()ing the OnDeck thread to the current or subsequent owners
602 // That is, the new owner is responsible for unparking the OnDeck thread.
603 OrderAccess::storeload();
604 cxq = _LockWord.FullWord;
605 if (cxq & _LBIT) return;
606
607 w->unpark();
608 return;
609 }
610
611 cxq = _LockWord.FullWord;
612 if ((cxq & ~_LBIT) != 0) {
613 // The EntryList is empty but the cxq is populated.
614 // drain RATs from cxq into EntryList
615 // Detach RATs segment with CAS and then merge into EntryList
616 for (;;) {
617 // optional optimization - if locked, the owner is responsible for succession
618 if (cxq & _LBIT) goto Punt;
619 const intptr_t vfy = CASPTR(&_LockWord, cxq, cxq & _LBIT);
620 if (vfy == cxq) break;
621 cxq = vfy;
622 // Interference - LockWord changed - Just retry
623 // We can see concurrent interference from contending threads
624 // pushing themselves onto the cxq or from lock-unlock operations.
625 // From the perspective of this thread, EntryList is stable and
626 // the cxq is prepend-only -- the head is volatile but the interior
627 // of the cxq is stable. In theory if we encounter interference from threads
628 // pushing onto cxq we could simply break off the original cxq suffix and
629 // move that segment to the EntryList, avoiding a 2nd or multiple CAS attempts
630 // on the high-traffic LockWord variable. For instance lets say the cxq is "ABCD"
631 // when we first fetch cxq above. Between the fetch -- where we observed "A"
632 // -- and CAS -- where we attempt to CAS null over A -- "PQR" arrive,
633 // yielding cxq = "PQRABCD". In this case we could simply set A.ListNext
634 // null, leaving cxq = "PQRA" and transfer the "BCD" segment to the EntryList.
635 // Note too, that it's safe for this thread to traverse the cxq
636 // without taking any special concurrency precautions.
637 }
638
639 // We don't currently reorder the cxq segment as we move it onto
640 // the EntryList, but it might make sense to reverse the order
641 // or perhaps sort by thread priority. See the comments in
642 // synchronizer.cpp objectMonitor::exit().
643 assert(_EntryList == NULL, "invariant");
644 _EntryList = List = (ParkEvent *)(cxq & ~_LBIT);
645 assert(List != NULL, "invariant");
646 goto WakeOne;
647 }
648
649 // cxq|EntryList is empty.
650 // w == NULL implies that cxq|EntryList == NULL in the past.
651 // Possible race - rare inopportune interleaving.
652 // A thread could have added itself to cxq since this thread previously checked.
653 // Detect and recover by refetching cxq.
654 Punt:
655 assert(UNS(_OnDeck) == _LBIT, "invariant");
656 _OnDeck = NULL; // Release inner lock.
657 OrderAccess::storeload(); // Dekker duality - pivot point
658
659 // Resample LockWord/cxq to recover from possible race.
660 // For instance, while this thread T1 held OnDeck, some other thread T2 might
661 // acquire the outer lock. Another thread T3 might try to acquire the outer
662 // lock, but encounter contention and enqueue itself on cxq. T2 then drops the
663 // outer lock, but skips succession as this thread T1 still holds OnDeck.
664 // T1 is and remains responsible for ensuring succession of T3.
665 //
666 // Note that we don't need to recheck EntryList, just cxq.
667 // If threads moved onto EntryList since we dropped OnDeck
668 // that implies some other thread forced succession.
669 cxq = _LockWord.FullWord;
670 if ((cxq & ~_LBIT) != 0 && (cxq & _LBIT) == 0) {
671 goto Succession; // potential race -- re-run succession
672 }
673 return;
674 }
675
676 bool Monitor::notify() {
677 assert(_owner == Thread::current(), "invariant");
678 assert(ILocked(), "invariant");
679 if (_WaitSet == NULL) return true;
680 NotifyCount++;
681
682 // Transfer one thread from the WaitSet to the EntryList or cxq.
683 // Currently we just unlink the head of the WaitSet and prepend to the cxq.
684 // And of course we could just unlink it and unpark it, too, but
685 // in that case it'd likely impale itself on the reentry.
686 Thread::muxAcquire(_WaitLock, "notify:WaitLock");
687 ParkEvent * nfy = _WaitSet;
688 if (nfy != NULL) { // DCL idiom
689 _WaitSet = nfy->ListNext;
690 assert(nfy->Notified == 0, "invariant");
691 // push nfy onto the cxq
692 for (;;) {
693 const intptr_t v = _LockWord.FullWord;
694 assert((v & 0xFF) == _LBIT, "invariant");
695 nfy->ListNext = (ParkEvent *)(v & ~_LBIT);
696 if (CASPTR (&_LockWord, v, UNS(nfy)|_LBIT) == v) break;
697 // interference - _LockWord changed -- just retry
698 }
699 // Note that setting Notified before pushing nfy onto the cxq is
700 // also legal and safe, but the safety properties are much more
701 // subtle, so for the sake of code stewardship ...
702 OrderAccess::fence();
703 nfy->Notified = 1;
704 }
705 Thread::muxRelease(_WaitLock);
706 if (nfy != NULL && (NativeMonitorFlags & 16)) {
707 // Experimental code ... light up the wakee in the hope that this thread (the owner)
708 // will drop the lock just about the time the wakee comes ONPROC.
709 nfy->unpark();
710 }
711 assert(ILocked(), "invariant");
712 return true;
713 }
714
715 // Currently notifyAll() transfers the waiters one-at-a-time from the waitset
716 // to the cxq. This could be done more efficiently with a single bulk en-mass transfer,
823 } else { // found in interior
824 assert(q->ListNext == p, "invariant");
825 q->ListNext = p->ListNext;
826 }
827 WasOnWaitSet = 1; // We were *not* notified but instead encountered timeout
828 }
829 Thread::muxRelease(_WaitLock);
830 }
831
832 // Reentry phase - reacquire the lock
833 if (WasOnWaitSet) {
834 // ESelf was previously on the WaitSet but we just unlinked it above
835 // because of a timeout. ESelf is not resident on any list and is not OnDeck
836 assert(_OnDeck != ESelf, "invariant");
837 ILock(Self);
838 } else {
839 // A prior notify() operation moved ESelf from the WaitSet to the cxq.
840 // ESelf is now on the cxq, EntryList or at the OnDeck position.
841 // The following fragment is extracted from Monitor::ILock()
842 for (;;) {
843 if (OrderAccess::load_ptr_acquire(&_OnDeck) == ESelf && TrySpin(Self)) break;
844 ParkCommon(ESelf, 0);
845 }
846 assert(_OnDeck == ESelf, "invariant");
847 _OnDeck = NULL;
848 }
849
850 assert(ILocked(), "invariant");
851 return WasOnWaitSet != 0; // return true IFF timeout
852 }
853
854
855 // ON THE VMTHREAD SNEAKING PAST HELD LOCKS:
856 // In particular, there are certain types of global lock that may be held
857 // by a Java thread while it is blocked at a safepoint but before it has
858 // written the _owner field. These locks may be sneakily acquired by the
859 // VM thread during a safepoint to avoid deadlocks. Alternatively, one should
860 // identify all such locks, and ensure that Java threads never block at
861 // safepoints while holding them (_no_safepoint_check_flag). While it
862 // seems as though this could increase the time to reach a safepoint
863 // (or at least increase the mean, if not the variance), the latter
1041
1042 // slow-path - apparent contention
1043 // Allocate a ParkEvent for transient use.
1044 // The ParkEvent remains associated with this thread until
1045 // the time the thread manages to acquire the lock.
1046 ParkEvent * const ESelf = ParkEvent::Allocate(NULL);
1047 ESelf->reset();
1048 OrderAccess::storeload();
1049
1050 // Either Enqueue Self on cxq or acquire the outer lock.
1051 if (AcquireOrPush (ESelf)) {
1052 ParkEvent::Release(ESelf); // surrender the ParkEvent
1053 goto Exeunt;
1054 }
1055
1056 // At any given time there is at most one ondeck thread.
1057 // ondeck implies not resident on cxq and not resident on EntryList
1058 // Only the OnDeck thread can try to acquire -- contend for -- the lock.
1059 // CONSIDER: use Self->OnDeck instead of m->OnDeck.
1060 for (;;) {
1061 if (OrderAccess::load_ptr_acquire(&_OnDeck) == ESelf && TrySpin(NULL)) break;
1062 ParkCommon(ESelf, 0);
1063 }
1064
1065 assert(_OnDeck == ESelf, "invariant");
1066 _OnDeck = NULL;
1067 ParkEvent::Release(ESelf); // surrender the ParkEvent
1068 goto Exeunt;
1069 }
1070
1071 void Monitor::jvm_raw_unlock() {
1072 // Nearly the same as Monitor::unlock() ...
1073 // directly set _owner instead of using set_owner(null)
1074 _owner = NULL;
1075 if (_snuck) { // ???
1076 assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "sneak");
1077 _snuck = false;
1078 return;
1079 }
1080 IUnlock(false);
1081 }
|
234 // them together was facile -- a bit too facile. The current implementation badly
235 // conflates the two concerns.
236 //
237 // * TODO-FIXME:
238 //
239 // -- Add DTRACE probes for contended acquire, contended acquired, contended unlock
240 // We should also add DTRACE probes in the ParkEvent subsystem for
241 // Park-entry, Park-exit, and Unpark.
242 //
243 // -- We have an excess of mutex-like constructs in the JVM, namely:
244 // 1. objectMonitors for Java-level synchronization (synchronizer.cpp)
245 // 2. low-level muxAcquire and muxRelease
246 // 3. low-level spinAcquire and spinRelease
247 // 4. native Mutex:: and Monitor::
248 // 5. jvm_raw_lock() and _unlock()
249 // 6. JVMTI raw monitors -- distinct from (5) despite having a confusingly
250 // similar name.
251 //
252 // o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o
253
254 #define UNS(x) (uintptr_t(x))
255 #define TRACE(m) \
256 { \
257 static volatile int ctr = 0; \
258 int x = ++ctr; \
259 if ((x & (x - 1)) == 0) { \
260 ::printf("%d:%s\n", x, #m); \
261 ::fflush(stdout); \
262 } \
263 }
264
265 const intptr_t _LBIT = 1;
266
267 // Endian-ness ... index of least-significant byte in SplitWord.Bytes[]
268 #ifdef VM_LITTLE_ENDIAN
269 #define _LSBINDEX 0
270 #else
271 #define _LSBINDEX (sizeof(intptr_t)-1)
272 #endif
273
274 // Simplistic low-quality Marsaglia SHIFT-XOR RNG.
275 // Bijective except for the trailing mask operation.
276 // Useful for spin loops as the compiler can't optimize it away.
277
278 static inline jint MarsagliaXORV(jint x) {
279 if (x == 0) x = 1|os::random();
280 x ^= x << 6;
281 x ^= ((unsigned)x) >> 21;
282 x ^= x << 7;
283 return x & 0x7FFFFFFF;
284 }
285
286 static int Stall(int its) {
287 static volatile jint rv = 1;
288 volatile int OnFrame = 0;
289 jint v = rv ^ UNS(OnFrame);
290 while (--its >= 0) {
291 v = MarsagliaXORV(v);
292 }
293 // Make this impossible for the compiler to optimize away,
294 // but (mostly) avoid W coherency sharing on MP systems.
295 if (v == 0x12345) rv = v;
296 return v;
297 }
298
299 int Monitor::TryLock() {
300 intptr_t v = _LockWord.FullWord;
301 for (;;) {
302 if ((v & _LBIT) != 0) return 0;
303 const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v);
304 if (v == u) return 1;
305 v = u;
306 }
307 }
308
309 int Monitor::TryFast() {
310 // Optimistic fast-path form ...
311 // Fast-path attempt for the common uncontended case.
312 // Avoid RTS->RTO $ coherence upgrade on typical SMP systems.
313 intptr_t v = Atomic::cmpxchg(_LBIT, &_LockWord.FullWord, (intptr_t)0); // agro ...
314 if (v == 0) return 1;
315
316 for (;;) {
317 if ((v & _LBIT) != 0) return 0;
318 const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v);
319 if (v == u) return 1;
320 v = u;
321 }
322 }
323
324 int Monitor::ILocked() {
325 const intptr_t w = _LockWord.FullWord & 0xFF;
326 assert(w == 0 || w == _LBIT, "invariant");
327 return w == _LBIT;
328 }
329
330 // Polite TATAS spinlock with exponential backoff - bounded spin.
331 // Ideally we'd use processor cycles, time or vtime to control
332 // the loop, but we currently use iterations.
333 // All the constants within were derived empirically but work over
334 // over the spectrum of J2SE reference platforms.
335 // On Niagara-class systems the back-off is unnecessary but
336 // is relatively harmless. (At worst it'll slightly retard
337 // acquisition times). The back-off is critical for older SMP systems
338 // where constant fetching of the LockWord would otherwise impair
339 // scalability.
340 //
341 // Clamp spinning at approximately 1/2 of a context-switch round-trip.
342 // See synchronizer.cpp for details and rationale.
343
344 int Monitor::TrySpin(Thread * const Self) {
345 if (TryLock()) return 1;
346 if (!os::is_MP()) return 0;
347
348 int Probes = 0;
349 int Delay = 0;
350 int Steps = 0;
351 int SpinMax = NativeMonitorSpinLimit;
352 int flgs = NativeMonitorFlags;
353 for (;;) {
354 intptr_t v = _LockWord.FullWord;
355 if ((v & _LBIT) == 0) {
356 if (Atomic::cmpxchg (v|_LBIT, &_LockWord.FullWord, v) == v) {
357 return 1;
358 }
359 continue;
360 }
361
362 if ((flgs & 8) == 0) {
363 SpinPause();
364 }
365
366 // Periodically increase Delay -- variable Delay form
367 // conceptually: delay *= 1 + 1/Exponent
368 ++Probes;
369 if (Probes > SpinMax) return 0;
370
371 if ((Probes & 0x7) == 0) {
372 Delay = ((Delay << 1)|1) & 0x7FF;
373 // CONSIDER: Delay += 1 + (Delay/4); Delay &= 0x7FF ;
374 }
375
376 if (flgs & 2) continue;
405
406 static int ParkCommon(ParkEvent * ev, jlong timo) {
407 // Diagnostic support - periodically unwedge blocked threads
408 intx nmt = NativeMonitorTimeout;
409 if (nmt > 0 && (nmt < timo || timo <= 0)) {
410 timo = nmt;
411 }
412 int err = OS_OK;
413 if (0 == timo) {
414 ev->park();
415 } else {
416 err = ev->park(timo);
417 }
418 return err;
419 }
420
421 inline int Monitor::AcquireOrPush(ParkEvent * ESelf) {
422 intptr_t v = _LockWord.FullWord;
423 for (;;) {
424 if ((v & _LBIT) == 0) {
425 const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v);
426 if (u == v) return 1; // indicate acquired
427 v = u;
428 } else {
429 // Anticipate success ...
430 ESelf->ListNext = (ParkEvent *)(v & ~_LBIT);
431 const intptr_t u = Atomic::cmpxchg(intptr_t(ESelf)|_LBIT, &_LockWord.FullWord, v);
432 if (u == v) return 0; // indicate pushed onto cxq
433 v = u;
434 }
435 // Interference - LockWord change - just retry
436 }
437 }
438
439 // ILock and IWait are the lowest level primitive internal blocking
440 // synchronization functions. The callers of IWait and ILock must have
441 // performed any needed state transitions beforehand.
442 // IWait and ILock may directly call park() without any concern for thread state.
443 // Note that ILock and IWait do *not* access _owner.
444 // _owner is a higher-level logical concept.
445
446 void Monitor::ILock(Thread * Self) {
447 assert(_OnDeck != Self->_MutexEvent, "invariant");
448
449 if (TryFast()) {
450 Exeunt:
451 assert(ILocked(), "invariant");
452 return;
453 }
454
455 ParkEvent * const ESelf = Self->_MutexEvent;
456 assert(_OnDeck != ESelf, "invariant");
457
458 // As an optimization, spinners could conditionally try to set _OnDeck to _LBIT
459 // Synchronizer.cpp uses a similar optimization.
460 if (TrySpin(Self)) goto Exeunt;
461
462 // Slow-path - the lock is contended.
463 // Either Enqueue Self on cxq or acquire the outer lock.
464 // LockWord encoding = (cxq,LOCKBYTE)
465 ESelf->reset();
466 OrderAccess::fence();
467
468 // Optional optimization ... try barging on the inner lock
469 if ((NativeMonitorFlags & 32) && Atomic::cmpxchg(ESelf, &_OnDeck, (ParkEvent*)NULL) == NULL) {
470 goto OnDeck_LOOP;
471 }
472
473 if (AcquireOrPush(ESelf)) goto Exeunt;
474
475 // At any given time there is at most one ondeck thread.
476 // ondeck implies not resident on cxq and not resident on EntryList
477 // Only the OnDeck thread can try to acquire -- contend for -- the lock.
478 // CONSIDER: use Self->OnDeck instead of m->OnDeck.
479 // Deschedule Self so that others may run.
480 while (OrderAccess::load_acquire(&_OnDeck) != ESelf) {
481 ParkCommon(ESelf, 0);
482 }
483
484 // Self is now in the OnDeck position and will remain so until it
485 // manages to acquire the lock.
486 OnDeck_LOOP:
487 for (;;) {
488 assert(_OnDeck == ESelf, "invariant");
489 if (TrySpin(Self)) break;
490 // It's probably wise to spin only if we *actually* blocked
491 // CONSIDER: check the lockbyte, if it remains set then
492 // preemptively drain the cxq into the EntryList.
493 // The best place and time to perform queue operations -- lock metadata --
494 // is _before having acquired the outer lock, while waiting for the lock to drop.
495 ParkCommon(ESelf, 0);
496 }
497
498 assert(_OnDeck == ESelf, "invariant");
499 _OnDeck = NULL;
500
556 if (((cxq & ~_LBIT)|UNS(_EntryList)) == 0) {
557 return; // normal fast-path exit - cxq and EntryList both empty
558 }
559 if (cxq & _LBIT) {
560 // Optional optimization ...
561 // Some other thread acquired the lock in the window since this
562 // thread released it. Succession is now that thread's responsibility.
563 return;
564 }
565
566 Succession:
567 // Slow-path exit - this thread must ensure succession and progress.
568 // OnDeck serves as lock to protect cxq and EntryList.
569 // Only the holder of OnDeck can manipulate EntryList or detach the RATs from cxq.
570 // Avoid ABA - allow multiple concurrent producers (enqueue via push-CAS)
571 // but only one concurrent consumer (detacher of RATs).
572 // Consider protecting this critical section with schedctl on Solaris.
573 // Unlike a normal lock, however, the exiting thread "locks" OnDeck,
574 // picks a successor and marks that thread as OnDeck. That successor
575 // thread will then clear OnDeck once it eventually acquires the outer lock.
576 if (Atomic::cmpxchg((ParkEvent*)_LBIT, &_OnDeck, (ParkEvent*)NULL) != NULL) {
577 return;
578 }
579
580 ParkEvent * List = _EntryList;
581 if (List != NULL) {
582 // Transfer the head of the EntryList to the OnDeck position.
583 // Once OnDeck, a thread stays OnDeck until it acquires the lock.
584 // For a given lock there is at most OnDeck thread at any one instant.
585 WakeOne:
586 assert(List == _EntryList, "invariant");
587 ParkEvent * const w = List;
588 assert(RelaxAssert || w != Thread::current()->_MutexEvent, "invariant");
589 _EntryList = w->ListNext;
590 // as a diagnostic measure consider setting w->_ListNext = BAD
591 assert(intptr_t(_OnDeck) == _LBIT, "invariant");
592
593 // Pass OnDeck role to w, ensuring that _EntryList has been set first.
594 // w will clear _OnDeck once it acquires the outer lock.
595 // Note that once we set _OnDeck that thread can acquire the mutex, proceed
596 // with its critical section and then enter this code to unlock the mutex. So
597 // you can have multiple threads active in IUnlock at the same time.
598 OrderAccess::release_store(&_OnDeck, w);
599
600 // Another optional optimization ...
601 // For heavily contended locks it's not uncommon that some other
602 // thread acquired the lock while this thread was arranging succession.
603 // Try to defer the unpark() operation - Delegate the responsibility
604 // for unpark()ing the OnDeck thread to the current or subsequent owners
605 // That is, the new owner is responsible for unparking the OnDeck thread.
606 OrderAccess::storeload();
607 cxq = _LockWord.FullWord;
608 if (cxq & _LBIT) return;
609
610 w->unpark();
611 return;
612 }
613
614 cxq = _LockWord.FullWord;
615 if ((cxq & ~_LBIT) != 0) {
616 // The EntryList is empty but the cxq is populated.
617 // drain RATs from cxq into EntryList
618 // Detach RATs segment with CAS and then merge into EntryList
619 for (;;) {
620 // optional optimization - if locked, the owner is responsible for succession
621 if (cxq & _LBIT) goto Punt;
622 const intptr_t vfy = Atomic::cmpxchg(cxq & _LBIT, &_LockWord.FullWord, cxq);
623 if (vfy == cxq) break;
624 cxq = vfy;
625 // Interference - LockWord changed - Just retry
626 // We can see concurrent interference from contending threads
627 // pushing themselves onto the cxq or from lock-unlock operations.
628 // From the perspective of this thread, EntryList is stable and
629 // the cxq is prepend-only -- the head is volatile but the interior
630 // of the cxq is stable. In theory if we encounter interference from threads
631 // pushing onto cxq we could simply break off the original cxq suffix and
632 // move that segment to the EntryList, avoiding a 2nd or multiple CAS attempts
633 // on the high-traffic LockWord variable. For instance lets say the cxq is "ABCD"
634 // when we first fetch cxq above. Between the fetch -- where we observed "A"
635 // -- and CAS -- where we attempt to CAS null over A -- "PQR" arrive,
636 // yielding cxq = "PQRABCD". In this case we could simply set A.ListNext
637 // null, leaving cxq = "PQRA" and transfer the "BCD" segment to the EntryList.
638 // Note too, that it's safe for this thread to traverse the cxq
639 // without taking any special concurrency precautions.
640 }
641
642 // We don't currently reorder the cxq segment as we move it onto
643 // the EntryList, but it might make sense to reverse the order
644 // or perhaps sort by thread priority. See the comments in
645 // synchronizer.cpp objectMonitor::exit().
646 assert(_EntryList == NULL, "invariant");
647 _EntryList = List = (ParkEvent *)(cxq & ~_LBIT);
648 assert(List != NULL, "invariant");
649 goto WakeOne;
650 }
651
652 // cxq|EntryList is empty.
653 // w == NULL implies that cxq|EntryList == NULL in the past.
654 // Possible race - rare inopportune interleaving.
655 // A thread could have added itself to cxq since this thread previously checked.
656 // Detect and recover by refetching cxq.
657 Punt:
658 assert(intptr_t(_OnDeck) == _LBIT, "invariant");
659 _OnDeck = NULL; // Release inner lock.
660 OrderAccess::storeload(); // Dekker duality - pivot point
661
662 // Resample LockWord/cxq to recover from possible race.
663 // For instance, while this thread T1 held OnDeck, some other thread T2 might
664 // acquire the outer lock. Another thread T3 might try to acquire the outer
665 // lock, but encounter contention and enqueue itself on cxq. T2 then drops the
666 // outer lock, but skips succession as this thread T1 still holds OnDeck.
667 // T1 is and remains responsible for ensuring succession of T3.
668 //
669 // Note that we don't need to recheck EntryList, just cxq.
670 // If threads moved onto EntryList since we dropped OnDeck
671 // that implies some other thread forced succession.
672 cxq = _LockWord.FullWord;
673 if ((cxq & ~_LBIT) != 0 && (cxq & _LBIT) == 0) {
674 goto Succession; // potential race -- re-run succession
675 }
676 return;
677 }
678
679 bool Monitor::notify() {
680 assert(_owner == Thread::current(), "invariant");
681 assert(ILocked(), "invariant");
682 if (_WaitSet == NULL) return true;
683 NotifyCount++;
684
685 // Transfer one thread from the WaitSet to the EntryList or cxq.
686 // Currently we just unlink the head of the WaitSet and prepend to the cxq.
687 // And of course we could just unlink it and unpark it, too, but
688 // in that case it'd likely impale itself on the reentry.
689 Thread::muxAcquire(_WaitLock, "notify:WaitLock");
690 ParkEvent * nfy = _WaitSet;
691 if (nfy != NULL) { // DCL idiom
692 _WaitSet = nfy->ListNext;
693 assert(nfy->Notified == 0, "invariant");
694 // push nfy onto the cxq
695 for (;;) {
696 const intptr_t v = _LockWord.FullWord;
697 assert((v & 0xFF) == _LBIT, "invariant");
698 nfy->ListNext = (ParkEvent *)(v & ~_LBIT);
699 if (Atomic::cmpxchg(intptr_t(nfy)|_LBIT, &_LockWord.FullWord, v) == v) break;
700 // interference - _LockWord changed -- just retry
701 }
702 // Note that setting Notified before pushing nfy onto the cxq is
703 // also legal and safe, but the safety properties are much more
704 // subtle, so for the sake of code stewardship ...
705 OrderAccess::fence();
706 nfy->Notified = 1;
707 }
708 Thread::muxRelease(_WaitLock);
709 if (nfy != NULL && (NativeMonitorFlags & 16)) {
710 // Experimental code ... light up the wakee in the hope that this thread (the owner)
711 // will drop the lock just about the time the wakee comes ONPROC.
712 nfy->unpark();
713 }
714 assert(ILocked(), "invariant");
715 return true;
716 }
717
718 // Currently notifyAll() transfers the waiters one-at-a-time from the waitset
719 // to the cxq. This could be done more efficiently with a single bulk en-mass transfer,
826 } else { // found in interior
827 assert(q->ListNext == p, "invariant");
828 q->ListNext = p->ListNext;
829 }
830 WasOnWaitSet = 1; // We were *not* notified but instead encountered timeout
831 }
832 Thread::muxRelease(_WaitLock);
833 }
834
835 // Reentry phase - reacquire the lock
836 if (WasOnWaitSet) {
837 // ESelf was previously on the WaitSet but we just unlinked it above
838 // because of a timeout. ESelf is not resident on any list and is not OnDeck
839 assert(_OnDeck != ESelf, "invariant");
840 ILock(Self);
841 } else {
842 // A prior notify() operation moved ESelf from the WaitSet to the cxq.
843 // ESelf is now on the cxq, EntryList or at the OnDeck position.
844 // The following fragment is extracted from Monitor::ILock()
845 for (;;) {
846 if (OrderAccess::load_acquire(&_OnDeck) == ESelf && TrySpin(Self)) break;
847 ParkCommon(ESelf, 0);
848 }
849 assert(_OnDeck == ESelf, "invariant");
850 _OnDeck = NULL;
851 }
852
853 assert(ILocked(), "invariant");
854 return WasOnWaitSet != 0; // return true IFF timeout
855 }
856
857
858 // ON THE VMTHREAD SNEAKING PAST HELD LOCKS:
859 // In particular, there are certain types of global lock that may be held
860 // by a Java thread while it is blocked at a safepoint but before it has
861 // written the _owner field. These locks may be sneakily acquired by the
862 // VM thread during a safepoint to avoid deadlocks. Alternatively, one should
863 // identify all such locks, and ensure that Java threads never block at
864 // safepoints while holding them (_no_safepoint_check_flag). While it
865 // seems as though this could increase the time to reach a safepoint
866 // (or at least increase the mean, if not the variance), the latter
1044
1045 // slow-path - apparent contention
1046 // Allocate a ParkEvent for transient use.
1047 // The ParkEvent remains associated with this thread until
1048 // the time the thread manages to acquire the lock.
1049 ParkEvent * const ESelf = ParkEvent::Allocate(NULL);
1050 ESelf->reset();
1051 OrderAccess::storeload();
1052
1053 // Either Enqueue Self on cxq or acquire the outer lock.
1054 if (AcquireOrPush (ESelf)) {
1055 ParkEvent::Release(ESelf); // surrender the ParkEvent
1056 goto Exeunt;
1057 }
1058
1059 // At any given time there is at most one ondeck thread.
1060 // ondeck implies not resident on cxq and not resident on EntryList
1061 // Only the OnDeck thread can try to acquire -- contend for -- the lock.
1062 // CONSIDER: use Self->OnDeck instead of m->OnDeck.
1063 for (;;) {
1064 if (OrderAccess::load_acquire(&_OnDeck) == ESelf && TrySpin(NULL)) break;
1065 ParkCommon(ESelf, 0);
1066 }
1067
1068 assert(_OnDeck == ESelf, "invariant");
1069 _OnDeck = NULL;
1070 ParkEvent::Release(ESelf); // surrender the ParkEvent
1071 goto Exeunt;
1072 }
1073
1074 void Monitor::jvm_raw_unlock() {
1075 // Nearly the same as Monitor::unlock() ...
1076 // directly set _owner instead of using set_owner(null)
1077 _owner = NULL;
1078 if (_snuck) { // ???
1079 assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "sneak");
1080 _snuck = false;
1081 return;
1082 }
1083 IUnlock(false);
1084 }
|