331 // Polite TATAS spinlock with exponential backoff - bounded spin.
332 // Ideally we'd use processor cycles, time or vtime to control
333 // the loop, but we currently use iterations.
334 // All the constants within were derived empirically but work over
335 // over the spectrum of J2SE reference platforms.
336 // On Niagara-class systems the back-off is unnecessary but
337 // is relatively harmless. (At worst it'll slightly retard
338 // acquisition times). The back-off is critical for older SMP systems
339 // where constant fetching of the LockWord would otherwise impair
340 // scalability.
341 //
342 // Clamp spinning at approximately 1/2 of a context-switch round-trip.
343 // See synchronizer.cpp for details and rationale.
344
345 int Monitor::TrySpin(Thread * const Self) {
346 if (TryLock()) return 1;
347 if (!os::is_MP()) return 0;
348
349 int Probes = 0;
350 int Delay = 0;
351 int Steps = 0;
352 int SpinMax = NativeMonitorSpinLimit;
353 int flgs = NativeMonitorFlags;
354 for (;;) {
355 intptr_t v = _LockWord.FullWord;
356 if ((v & _LBIT) == 0) {
357 if (Atomic::cmpxchg (v|_LBIT, &_LockWord.FullWord, v) == v) {
358 return 1;
359 }
360 continue;
361 }
362
363 if ((flgs & 8) == 0) {
364 SpinPause();
365 }
366
367 // Periodically increase Delay -- variable Delay form
368 // conceptually: delay *= 1 + 1/Exponent
369 ++Probes;
370 if (Probes > SpinMax) return 0;
371
372 if ((Probes & 0x7) == 0) {
373 Delay = ((Delay << 1)|1) & 0x7FF;
374 // CONSIDER: Delay += 1 + (Delay/4); Delay &= 0x7FF ;
375 }
376
377 if (flgs & 2) continue;
378
379 // Consider checking _owner's schedctl state, if OFFPROC abort spin.
380 // If the owner is OFFPROC then it's unlike that the lock will be dropped
381 // in a timely fashion, which suggests that spinning would not be fruitful
382 // or profitable.
383
384 // Stall for "Delay" time units - iterations in the current implementation.
385 // Avoid generating coherency traffic while stalled.
386 // Possible ways to delay:
387 // PAUSE, SLEEP, MEMBAR #sync, MEMBAR #halt,
388 // wr %g0,%asi, gethrtime, rdstick, rdtick, rdtsc, etc. ...
389 // Note that on Niagara-class systems we want to minimize STs in the
390 // spin loop. N1 and brethren write-around the L1$ over the xbar into the L2$.
391 // Furthermore, they don't have a W$ like traditional SPARC processors.
392 // We currently use a Marsaglia Shift-Xor RNG loop.
393 Steps += Delay;
394 if (Self != NULL) {
395 jint rv = Self->rng[0];
396 for (int k = Delay; --k >= 0;) {
397 rv = MarsagliaXORV(rv);
398 if ((flgs & 4) == 0 && SafepointMechanism::poll(Self)) return 0;
399 }
400 Self->rng[0] = rv;
401 } else {
402 Stall(Delay);
403 }
404 }
405 }
406
407 static int ParkCommon(ParkEvent * ev, jlong timo) {
408 // Diagnostic support - periodically unwedge blocked threads
409 intx nmt = NativeMonitorTimeout;
410 if (nmt > 0 && (nmt < timo || timo <= 0)) {
411 timo = nmt;
412 }
413 int err = OS_OK;
414 if (0 == timo) {
415 ev->park();
416 } else {
417 err = ev->park(timo);
418 }
419 return err;
420 }
421
422 inline int Monitor::AcquireOrPush(ParkEvent * ESelf) {
423 intptr_t v = _LockWord.FullWord;
424 for (;;) {
425 if ((v & _LBIT) == 0) {
426 const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v);
427 if (u == v) return 1; // indicate acquired
428 v = u;
429 } else {
430 // Anticipate success ...
431 ESelf->ListNext = (ParkEvent *)(v & ~_LBIT);
432 const intptr_t u = Atomic::cmpxchg(intptr_t(ESelf)|_LBIT, &_LockWord.FullWord, v);
449
450 if (TryFast()) {
451 Exeunt:
452 assert(ILocked(), "invariant");
453 return;
454 }
455
456 ParkEvent * const ESelf = Self->_MutexEvent;
457 assert(_OnDeck != ESelf, "invariant");
458
459 // As an optimization, spinners could conditionally try to set _OnDeck to _LBIT
460 // Synchronizer.cpp uses a similar optimization.
461 if (TrySpin(Self)) goto Exeunt;
462
463 // Slow-path - the lock is contended.
464 // Either Enqueue Self on cxq or acquire the outer lock.
465 // LockWord encoding = (cxq,LOCKBYTE)
466 ESelf->reset();
467 OrderAccess::fence();
468
469 // Optional optimization ... try barging on the inner lock
470 if ((NativeMonitorFlags & 32) && Atomic::replace_if_null(ESelf, &_OnDeck)) {
471 goto OnDeck_LOOP;
472 }
473
474 if (AcquireOrPush(ESelf)) goto Exeunt;
475
476 // At any given time there is at most one ondeck thread.
477 // ondeck implies not resident on cxq and not resident on EntryList
478 // Only the OnDeck thread can try to acquire -- contend for -- the lock.
479 // CONSIDER: use Self->OnDeck instead of m->OnDeck.
480 // Deschedule Self so that others may run.
481 while (OrderAccess::load_acquire(&_OnDeck) != ESelf) {
482 ParkCommon(ESelf, 0);
483 }
484
485 // Self is now in the OnDeck position and will remain so until it
486 // manages to acquire the lock.
487 OnDeck_LOOP:
488 for (;;) {
489 assert(_OnDeck == ESelf, "invariant");
490 if (TrySpin(Self)) break;
491 // It's probably wise to spin only if we *actually* blocked
492 // CONSIDER: check the lockbyte, if it remains set then
493 // preemptively drain the cxq into the EntryList.
494 // The best place and time to perform queue operations -- lock metadata --
495 // is _before having acquired the outer lock, while waiting for the lock to drop.
496 ParkCommon(ESelf, 0);
497 }
498
499 assert(_OnDeck == ESelf, "invariant");
500 _OnDeck = NULL;
501
502 // Note that we current drop the inner lock (clear OnDeck) in the slow-path
503 // epilogue immediately after having acquired the outer lock.
504 // But instead we could consider the following optimizations:
505 // A. Shift or defer dropping the inner lock until the subsequent IUnlock() operation.
506 // This might avoid potential reacquisition of the inner lock in IUlock().
507 // B. While still holding the inner lock, attempt to opportunistically select
689 Thread::muxAcquire(_WaitLock, "notify:WaitLock");
690 ParkEvent * nfy = _WaitSet;
691 if (nfy != NULL) { // DCL idiom
692 _WaitSet = nfy->ListNext;
693 assert(nfy->Notified == 0, "invariant");
694 // push nfy onto the cxq
695 for (;;) {
696 const intptr_t v = _LockWord.FullWord;
697 assert((v & 0xFF) == _LBIT, "invariant");
698 nfy->ListNext = (ParkEvent *)(v & ~_LBIT);
699 if (Atomic::cmpxchg(intptr_t(nfy)|_LBIT, &_LockWord.FullWord, v) == v) break;
700 // interference - _LockWord changed -- just retry
701 }
702 // Note that setting Notified before pushing nfy onto the cxq is
703 // also legal and safe, but the safety properties are much more
704 // subtle, so for the sake of code stewardship ...
705 OrderAccess::fence();
706 nfy->Notified = 1;
707 }
708 Thread::muxRelease(_WaitLock);
709 if (nfy != NULL && (NativeMonitorFlags & 16)) {
710 // Experimental code ... light up the wakee in the hope that this thread (the owner)
711 // will drop the lock just about the time the wakee comes ONPROC.
712 nfy->unpark();
713 }
714 assert(ILocked(), "invariant");
715 return true;
716 }
717
718 // Currently notifyAll() transfers the waiters one-at-a-time from the waitset
719 // to the cxq. This could be done more efficiently with a single bulk en-mass transfer,
720 // but in practice notifyAll() for large #s of threads is rare and not time-critical.
721 // Beware too, that we invert the order of the waiters. Lets say that the
722 // waitset is "ABCD" and the cxq is "XYZ". After a notifyAll() the waitset
723 // will be empty and the cxq will be "DCBAXYZ". This is benign, of course.
724
725 bool Monitor::notify_all() {
726 assert(_owner == Thread::current(), "invariant");
727 assert(ILocked(), "invariant");
728 while (_WaitSet != NULL) notify();
729 return true;
730 }
731
732 int Monitor::IWait(Thread * Self, jlong timo) {
733 assert(ILocked(), "invariant");
777
778 // Release the outer lock
779 // We call IUnlock (RelaxAssert=true) as a thread T1 might
780 // enqueue itself on the WaitSet, call IUnlock(), drop the lock,
781 // and then stall before it can attempt to wake a successor.
782 // Some other thread T2 acquires the lock, and calls notify(), moving
783 // T1 from the WaitSet to the cxq. T2 then drops the lock. T1 resumes,
784 // and then finds *itself* on the cxq. During the course of a normal
785 // IUnlock() call a thread should _never find itself on the EntryList
786 // or cxq, but in the case of wait() it's possible.
787 // See synchronizer.cpp objectMonitor::wait().
788 IUnlock(true);
789
790 // Wait for either notification or timeout
791 // Beware that in some circumstances we might propagate
792 // spurious wakeups back to the caller.
793
794 for (;;) {
795 if (ESelf->Notified) break;
796 int err = ParkCommon(ESelf, timo);
797 if (err == OS_TIMEOUT || (NativeMonitorFlags & 1)) break;
798 }
799
800 // Prepare for reentry - if necessary, remove ESelf from WaitSet
801 // ESelf can be:
802 // 1. Still on the WaitSet. This can happen if we exited the loop by timeout.
803 // 2. On the cxq or EntryList
804 // 3. Not resident on cxq, EntryList or WaitSet, but in the OnDeck position.
805
806 OrderAccess::fence();
807 int WasOnWaitSet = 0;
808 if (ESelf->Notified == 0) {
809 Thread::muxAcquire(_WaitLock, "wait:WaitLock:remove");
810 if (ESelf->Notified == 0) { // DCL idiom
811 assert(_OnDeck != ESelf, "invariant"); // can't be both OnDeck and on WaitSet
812 // ESelf is resident on the WaitSet -- unlink it.
813 // A doubly-linked list would be better here so we can unlink in constant-time.
814 // We have to unlink before we potentially recontend as ESelf might otherwise
815 // end up on the cxq|EntryList -- it can't be on two lists at once.
816 ParkEvent * p = _WaitSet;
817 ParkEvent * q = NULL; // classic q chases p
|
331 // Polite TATAS spinlock with exponential backoff - bounded spin.
332 // Ideally we'd use processor cycles, time or vtime to control
333 // the loop, but we currently use iterations.
334 // All the constants within were derived empirically but work over
335 // over the spectrum of J2SE reference platforms.
336 // On Niagara-class systems the back-off is unnecessary but
337 // is relatively harmless. (At worst it'll slightly retard
338 // acquisition times). The back-off is critical for older SMP systems
339 // where constant fetching of the LockWord would otherwise impair
340 // scalability.
341 //
342 // Clamp spinning at approximately 1/2 of a context-switch round-trip.
343 // See synchronizer.cpp for details and rationale.
344
345 int Monitor::TrySpin(Thread * const Self) {
346 if (TryLock()) return 1;
347 if (!os::is_MP()) return 0;
348
349 int Probes = 0;
350 int Delay = 0;
351 int SpinMax = 20;
352 for (;;) {
353 intptr_t v = _LockWord.FullWord;
354 if ((v & _LBIT) == 0) {
355 if (Atomic::cmpxchg (v|_LBIT, &_LockWord.FullWord, v) == v) {
356 return 1;
357 }
358 continue;
359 }
360
361 SpinPause();
362
363 // Periodically increase Delay -- variable Delay form
364 // conceptually: delay *= 1 + 1/Exponent
365 ++Probes;
366 if (Probes > SpinMax) return 0;
367
368 if ((Probes & 0x7) == 0) {
369 Delay = ((Delay << 1)|1) & 0x7FF;
370 // CONSIDER: Delay += 1 + (Delay/4); Delay &= 0x7FF ;
371 }
372
373 // Consider checking _owner's schedctl state, if OFFPROC abort spin.
374 // If the owner is OFFPROC then it's unlike that the lock will be dropped
375 // in a timely fashion, which suggests that spinning would not be fruitful
376 // or profitable.
377
378 // Stall for "Delay" time units - iterations in the current implementation.
379 // Avoid generating coherency traffic while stalled.
380 // Possible ways to delay:
381 // PAUSE, SLEEP, MEMBAR #sync, MEMBAR #halt,
382 // wr %g0,%asi, gethrtime, rdstick, rdtick, rdtsc, etc. ...
383 // Note that on Niagara-class systems we want to minimize STs in the
384 // spin loop. N1 and brethren write-around the L1$ over the xbar into the L2$.
385 // Furthermore, they don't have a W$ like traditional SPARC processors.
386 // We currently use a Marsaglia Shift-Xor RNG loop.
387 if (Self != NULL) {
388 jint rv = Self->rng[0];
389 for (int k = Delay; --k >= 0;) {
390 rv = MarsagliaXORV(rv);
391 if (SafepointMechanism::poll(Self)) return 0;
392 }
393 Self->rng[0] = rv;
394 } else {
395 Stall(Delay);
396 }
397 }
398 }
399
400 static int ParkCommon(ParkEvent * ev, jlong timo) {
401 // Diagnostic support - periodically unwedge blocked threads
402 int err = OS_OK;
403 if (0 == timo) {
404 ev->park();
405 } else {
406 err = ev->park(timo);
407 }
408 return err;
409 }
410
411 inline int Monitor::AcquireOrPush(ParkEvent * ESelf) {
412 intptr_t v = _LockWord.FullWord;
413 for (;;) {
414 if ((v & _LBIT) == 0) {
415 const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v);
416 if (u == v) return 1; // indicate acquired
417 v = u;
418 } else {
419 // Anticipate success ...
420 ESelf->ListNext = (ParkEvent *)(v & ~_LBIT);
421 const intptr_t u = Atomic::cmpxchg(intptr_t(ESelf)|_LBIT, &_LockWord.FullWord, v);
438
439 if (TryFast()) {
440 Exeunt:
441 assert(ILocked(), "invariant");
442 return;
443 }
444
445 ParkEvent * const ESelf = Self->_MutexEvent;
446 assert(_OnDeck != ESelf, "invariant");
447
448 // As an optimization, spinners could conditionally try to set _OnDeck to _LBIT
449 // Synchronizer.cpp uses a similar optimization.
450 if (TrySpin(Self)) goto Exeunt;
451
452 // Slow-path - the lock is contended.
453 // Either Enqueue Self on cxq or acquire the outer lock.
454 // LockWord encoding = (cxq,LOCKBYTE)
455 ESelf->reset();
456 OrderAccess::fence();
457
458 if (AcquireOrPush(ESelf)) goto Exeunt;
459
460 // At any given time there is at most one ondeck thread.
461 // ondeck implies not resident on cxq and not resident on EntryList
462 // Only the OnDeck thread can try to acquire -- contend for -- the lock.
463 // CONSIDER: use Self->OnDeck instead of m->OnDeck.
464 // Deschedule Self so that others may run.
465 while (OrderAccess::load_acquire(&_OnDeck) != ESelf) {
466 ParkCommon(ESelf, 0);
467 }
468
469 // Self is now in the OnDeck position and will remain so until it
470 // manages to acquire the lock.
471 for (;;) {
472 assert(_OnDeck == ESelf, "invariant");
473 if (TrySpin(Self)) break;
474 // It's probably wise to spin only if we *actually* blocked
475 // CONSIDER: check the lockbyte, if it remains set then
476 // preemptively drain the cxq into the EntryList.
477 // The best place and time to perform queue operations -- lock metadata --
478 // is _before having acquired the outer lock, while waiting for the lock to drop.
479 ParkCommon(ESelf, 0);
480 }
481
482 assert(_OnDeck == ESelf, "invariant");
483 _OnDeck = NULL;
484
485 // Note that we current drop the inner lock (clear OnDeck) in the slow-path
486 // epilogue immediately after having acquired the outer lock.
487 // But instead we could consider the following optimizations:
488 // A. Shift or defer dropping the inner lock until the subsequent IUnlock() operation.
489 // This might avoid potential reacquisition of the inner lock in IUlock().
490 // B. While still holding the inner lock, attempt to opportunistically select
672 Thread::muxAcquire(_WaitLock, "notify:WaitLock");
673 ParkEvent * nfy = _WaitSet;
674 if (nfy != NULL) { // DCL idiom
675 _WaitSet = nfy->ListNext;
676 assert(nfy->Notified == 0, "invariant");
677 // push nfy onto the cxq
678 for (;;) {
679 const intptr_t v = _LockWord.FullWord;
680 assert((v & 0xFF) == _LBIT, "invariant");
681 nfy->ListNext = (ParkEvent *)(v & ~_LBIT);
682 if (Atomic::cmpxchg(intptr_t(nfy)|_LBIT, &_LockWord.FullWord, v) == v) break;
683 // interference - _LockWord changed -- just retry
684 }
685 // Note that setting Notified before pushing nfy onto the cxq is
686 // also legal and safe, but the safety properties are much more
687 // subtle, so for the sake of code stewardship ...
688 OrderAccess::fence();
689 nfy->Notified = 1;
690 }
691 Thread::muxRelease(_WaitLock);
692 assert(ILocked(), "invariant");
693 return true;
694 }
695
696 // Currently notifyAll() transfers the waiters one-at-a-time from the waitset
697 // to the cxq. This could be done more efficiently with a single bulk en-mass transfer,
698 // but in practice notifyAll() for large #s of threads is rare and not time-critical.
699 // Beware too, that we invert the order of the waiters. Lets say that the
700 // waitset is "ABCD" and the cxq is "XYZ". After a notifyAll() the waitset
701 // will be empty and the cxq will be "DCBAXYZ". This is benign, of course.
702
703 bool Monitor::notify_all() {
704 assert(_owner == Thread::current(), "invariant");
705 assert(ILocked(), "invariant");
706 while (_WaitSet != NULL) notify();
707 return true;
708 }
709
710 int Monitor::IWait(Thread * Self, jlong timo) {
711 assert(ILocked(), "invariant");
755
756 // Release the outer lock
757 // We call IUnlock (RelaxAssert=true) as a thread T1 might
758 // enqueue itself on the WaitSet, call IUnlock(), drop the lock,
759 // and then stall before it can attempt to wake a successor.
760 // Some other thread T2 acquires the lock, and calls notify(), moving
761 // T1 from the WaitSet to the cxq. T2 then drops the lock. T1 resumes,
762 // and then finds *itself* on the cxq. During the course of a normal
763 // IUnlock() call a thread should _never find itself on the EntryList
764 // or cxq, but in the case of wait() it's possible.
765 // See synchronizer.cpp objectMonitor::wait().
766 IUnlock(true);
767
768 // Wait for either notification or timeout
769 // Beware that in some circumstances we might propagate
770 // spurious wakeups back to the caller.
771
772 for (;;) {
773 if (ESelf->Notified) break;
774 int err = ParkCommon(ESelf, timo);
775 if (err == OS_TIMEOUT) break;
776 }
777
778 // Prepare for reentry - if necessary, remove ESelf from WaitSet
779 // ESelf can be:
780 // 1. Still on the WaitSet. This can happen if we exited the loop by timeout.
781 // 2. On the cxq or EntryList
782 // 3. Not resident on cxq, EntryList or WaitSet, but in the OnDeck position.
783
784 OrderAccess::fence();
785 int WasOnWaitSet = 0;
786 if (ESelf->Notified == 0) {
787 Thread::muxAcquire(_WaitLock, "wait:WaitLock:remove");
788 if (ESelf->Notified == 0) { // DCL idiom
789 assert(_OnDeck != ESelf, "invariant"); // can't be both OnDeck and on WaitSet
790 // ESelf is resident on the WaitSet -- unlink it.
791 // A doubly-linked list would be better here so we can unlink in constant-time.
792 // We have to unlink before we potentially recontend as ESelf might otherwise
793 // end up on the cxq|EntryList -- it can't be on two lists at once.
794 ParkEvent * p = _WaitSet;
795 ParkEvent * q = NULL; // classic q chases p
|