< prev index next >

src/hotspot/share/runtime/mutex.cpp

Print this page




 331 // Polite TATAS spinlock with exponential backoff - bounded spin.
 332 // Ideally we'd use processor cycles, time or vtime to control
 333 // the loop, but we currently use iterations.
 334 // All the constants within were derived empirically but work over
 335 // over the spectrum of J2SE reference platforms.
 336 // On Niagara-class systems the back-off is unnecessary but
 337 // is relatively harmless.  (At worst it'll slightly retard
 338 // acquisition times).  The back-off is critical for older SMP systems
 339 // where constant fetching of the LockWord would otherwise impair
 340 // scalability.
 341 //
 342 // Clamp spinning at approximately 1/2 of a context-switch round-trip.
 343 // See synchronizer.cpp for details and rationale.
 344 
 345 int Monitor::TrySpin(Thread * const Self) {
 346   if (TryLock())    return 1;
 347   if (!os::is_MP()) return 0;
 348 
 349   int Probes  = 0;
 350   int Delay   = 0;
 351   int Steps   = 0;
 352   int SpinMax = NativeMonitorSpinLimit;
 353   int flgs    = NativeMonitorFlags;
 354   for (;;) {
 355     intptr_t v = _LockWord.FullWord;
 356     if ((v & _LBIT) == 0) {
 357       if (Atomic::cmpxchg (v|_LBIT, &_LockWord.FullWord, v) == v) {
 358         return 1;
 359       }
 360       continue;
 361     }
 362 
 363     if ((flgs & 8) == 0) {
 364       SpinPause();
 365     }
 366 
 367     // Periodically increase Delay -- variable Delay form
 368     // conceptually: delay *= 1 + 1/Exponent
 369     ++Probes;
 370     if (Probes > SpinMax) return 0;
 371 
 372     if ((Probes & 0x7) == 0) {
 373       Delay = ((Delay << 1)|1) & 0x7FF;
 374       // CONSIDER: Delay += 1 + (Delay/4); Delay &= 0x7FF ;
 375     }
 376 
 377     if (flgs & 2) continue;
 378 
 379     // Consider checking _owner's schedctl state, if OFFPROC abort spin.
 380     // If the owner is OFFPROC then it's unlike that the lock will be dropped
 381     // in a timely fashion, which suggests that spinning would not be fruitful
 382     // or profitable.
 383 
 384     // Stall for "Delay" time units - iterations in the current implementation.
 385     // Avoid generating coherency traffic while stalled.
 386     // Possible ways to delay:
 387     //   PAUSE, SLEEP, MEMBAR #sync, MEMBAR #halt,
 388     //   wr %g0,%asi, gethrtime, rdstick, rdtick, rdtsc, etc. ...
 389     // Note that on Niagara-class systems we want to minimize STs in the
 390     // spin loop.  N1 and brethren write-around the L1$ over the xbar into the L2$.
 391     // Furthermore, they don't have a W$ like traditional SPARC processors.
 392     // We currently use a Marsaglia Shift-Xor RNG loop.
 393     Steps += Delay;
 394     if (Self != NULL) {
 395       jint rv = Self->rng[0];
 396       for (int k = Delay; --k >= 0;) {
 397         rv = MarsagliaXORV(rv);
 398         if ((flgs & 4) == 0 && SafepointMechanism::poll(Self)) return 0;
 399       }
 400       Self->rng[0] = rv;
 401     } else {
 402       Stall(Delay);
 403     }
 404   }
 405 }
 406 
 407 static int ParkCommon(ParkEvent * ev, jlong timo) {
 408   // Diagnostic support - periodically unwedge blocked threads
 409   intx nmt = NativeMonitorTimeout;
 410   if (nmt > 0 && (nmt < timo || timo <= 0)) {
 411     timo = nmt;
 412   }
 413   int err = OS_OK;
 414   if (0 == timo) {
 415     ev->park();
 416   } else {
 417     err = ev->park(timo);
 418   }
 419   return err;
 420 }
 421 
 422 inline int Monitor::AcquireOrPush(ParkEvent * ESelf) {
 423   intptr_t v = _LockWord.FullWord;
 424   for (;;) {
 425     if ((v & _LBIT) == 0) {
 426       const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v);
 427       if (u == v) return 1;        // indicate acquired
 428       v = u;
 429     } else {
 430       // Anticipate success ...
 431       ESelf->ListNext = (ParkEvent *)(v & ~_LBIT);
 432       const intptr_t u = Atomic::cmpxchg(intptr_t(ESelf)|_LBIT, &_LockWord.FullWord, v);


 449 
 450   if (TryFast()) {
 451  Exeunt:
 452     assert(ILocked(), "invariant");
 453     return;
 454   }
 455 
 456   ParkEvent * const ESelf = Self->_MutexEvent;
 457   assert(_OnDeck != ESelf, "invariant");
 458 
 459   // As an optimization, spinners could conditionally try to set _OnDeck to _LBIT
 460   // Synchronizer.cpp uses a similar optimization.
 461   if (TrySpin(Self)) goto Exeunt;
 462 
 463   // Slow-path - the lock is contended.
 464   // Either Enqueue Self on cxq or acquire the outer lock.
 465   // LockWord encoding = (cxq,LOCKBYTE)
 466   ESelf->reset();
 467   OrderAccess::fence();
 468 
 469   // Optional optimization ... try barging on the inner lock
 470   if ((NativeMonitorFlags & 32) && Atomic::replace_if_null(ESelf, &_OnDeck)) {
 471     goto OnDeck_LOOP;
 472   }
 473 
 474   if (AcquireOrPush(ESelf)) goto Exeunt;
 475 
 476   // At any given time there is at most one ondeck thread.
 477   // ondeck implies not resident on cxq and not resident on EntryList
 478   // Only the OnDeck thread can try to acquire -- contend for -- the lock.
 479   // CONSIDER: use Self->OnDeck instead of m->OnDeck.
 480   // Deschedule Self so that others may run.
 481   while (OrderAccess::load_acquire(&_OnDeck) != ESelf) {
 482     ParkCommon(ESelf, 0);
 483   }
 484 
 485   // Self is now in the OnDeck position and will remain so until it
 486   // manages to acquire the lock.
 487  OnDeck_LOOP:
 488   for (;;) {
 489     assert(_OnDeck == ESelf, "invariant");
 490     if (TrySpin(Self)) break;
 491     // It's probably wise to spin only if we *actually* blocked
 492     // CONSIDER: check the lockbyte, if it remains set then
 493     // preemptively drain the cxq into the EntryList.
 494     // The best place and time to perform queue operations -- lock metadata --
 495     // is _before having acquired the outer lock, while waiting for the lock to drop.
 496     ParkCommon(ESelf, 0);
 497   }
 498 
 499   assert(_OnDeck == ESelf, "invariant");
 500   _OnDeck = NULL;
 501 
 502   // Note that we current drop the inner lock (clear OnDeck) in the slow-path
 503   // epilogue immediately after having acquired the outer lock.
 504   // But instead we could consider the following optimizations:
 505   // A. Shift or defer dropping the inner lock until the subsequent IUnlock() operation.
 506   //    This might avoid potential reacquisition of the inner lock in IUlock().
 507   // B. While still holding the inner lock, attempt to opportunistically select


 689   Thread::muxAcquire(_WaitLock, "notify:WaitLock");
 690   ParkEvent * nfy = _WaitSet;
 691   if (nfy != NULL) {                  // DCL idiom
 692     _WaitSet = nfy->ListNext;
 693     assert(nfy->Notified == 0, "invariant");
 694     // push nfy onto the cxq
 695     for (;;) {
 696       const intptr_t v = _LockWord.FullWord;
 697       assert((v & 0xFF) == _LBIT, "invariant");
 698       nfy->ListNext = (ParkEvent *)(v & ~_LBIT);
 699       if (Atomic::cmpxchg(intptr_t(nfy)|_LBIT, &_LockWord.FullWord, v) == v) break;
 700       // interference - _LockWord changed -- just retry
 701     }
 702     // Note that setting Notified before pushing nfy onto the cxq is
 703     // also legal and safe, but the safety properties are much more
 704     // subtle, so for the sake of code stewardship ...
 705     OrderAccess::fence();
 706     nfy->Notified = 1;
 707   }
 708   Thread::muxRelease(_WaitLock);
 709   if (nfy != NULL && (NativeMonitorFlags & 16)) {
 710     // Experimental code ... light up the wakee in the hope that this thread (the owner)
 711     // will drop the lock just about the time the wakee comes ONPROC.
 712     nfy->unpark();
 713   }
 714   assert(ILocked(), "invariant");
 715   return true;
 716 }
 717 
 718 // Currently notifyAll() transfers the waiters one-at-a-time from the waitset
 719 // to the cxq.  This could be done more efficiently with a single bulk en-mass transfer,
 720 // but in practice notifyAll() for large #s of threads is rare and not time-critical.
 721 // Beware too, that we invert the order of the waiters.  Lets say that the
 722 // waitset is "ABCD" and the cxq is "XYZ".  After a notifyAll() the waitset
 723 // will be empty and the cxq will be "DCBAXYZ".  This is benign, of course.
 724 
 725 bool Monitor::notify_all() {
 726   assert(_owner == Thread::current(), "invariant");
 727   assert(ILocked(), "invariant");
 728   while (_WaitSet != NULL) notify();
 729   return true;
 730 }
 731 
 732 int Monitor::IWait(Thread * Self, jlong timo) {
 733   assert(ILocked(), "invariant");


 777 
 778   // Release the outer lock
 779   // We call IUnlock (RelaxAssert=true) as a thread T1 might
 780   // enqueue itself on the WaitSet, call IUnlock(), drop the lock,
 781   // and then stall before it can attempt to wake a successor.
 782   // Some other thread T2 acquires the lock, and calls notify(), moving
 783   // T1 from the WaitSet to the cxq.  T2 then drops the lock.  T1 resumes,
 784   // and then finds *itself* on the cxq.  During the course of a normal
 785   // IUnlock() call a thread should _never find itself on the EntryList
 786   // or cxq, but in the case of wait() it's possible.
 787   // See synchronizer.cpp objectMonitor::wait().
 788   IUnlock(true);
 789 
 790   // Wait for either notification or timeout
 791   // Beware that in some circumstances we might propagate
 792   // spurious wakeups back to the caller.
 793 
 794   for (;;) {
 795     if (ESelf->Notified) break;
 796     int err = ParkCommon(ESelf, timo);
 797     if (err == OS_TIMEOUT || (NativeMonitorFlags & 1)) break;
 798   }
 799 
 800   // Prepare for reentry - if necessary, remove ESelf from WaitSet
 801   // ESelf can be:
 802   // 1. Still on the WaitSet.  This can happen if we exited the loop by timeout.
 803   // 2. On the cxq or EntryList
 804   // 3. Not resident on cxq, EntryList or WaitSet, but in the OnDeck position.
 805 
 806   OrderAccess::fence();
 807   int WasOnWaitSet = 0;
 808   if (ESelf->Notified == 0) {
 809     Thread::muxAcquire(_WaitLock, "wait:WaitLock:remove");
 810     if (ESelf->Notified == 0) {     // DCL idiom
 811       assert(_OnDeck != ESelf, "invariant");   // can't be both OnDeck and on WaitSet
 812       // ESelf is resident on the WaitSet -- unlink it.
 813       // A doubly-linked list would be better here so we can unlink in constant-time.
 814       // We have to unlink before we potentially recontend as ESelf might otherwise
 815       // end up on the cxq|EntryList -- it can't be on two lists at once.
 816       ParkEvent * p = _WaitSet;
 817       ParkEvent * q = NULL;            // classic q chases p




 331 // Polite TATAS spinlock with exponential backoff - bounded spin.
 332 // Ideally we'd use processor cycles, time or vtime to control
 333 // the loop, but we currently use iterations.
 334 // All the constants within were derived empirically but work over
 335 // over the spectrum of J2SE reference platforms.
 336 // On Niagara-class systems the back-off is unnecessary but
 337 // is relatively harmless.  (At worst it'll slightly retard
 338 // acquisition times).  The back-off is critical for older SMP systems
 339 // where constant fetching of the LockWord would otherwise impair
 340 // scalability.
 341 //
 342 // Clamp spinning at approximately 1/2 of a context-switch round-trip.
 343 // See synchronizer.cpp for details and rationale.
 344 
 345 int Monitor::TrySpin(Thread * const Self) {
 346   if (TryLock())    return 1;
 347   if (!os::is_MP()) return 0;
 348 
 349   int Probes  = 0;
 350   int Delay   = 0;
 351   int SpinMax = 20;


 352   for (;;) {
 353     intptr_t v = _LockWord.FullWord;
 354     if ((v & _LBIT) == 0) {
 355       if (Atomic::cmpxchg (v|_LBIT, &_LockWord.FullWord, v) == v) {
 356         return 1;
 357       }
 358       continue;
 359     }
 360 

 361     SpinPause();

 362 
 363     // Periodically increase Delay -- variable Delay form
 364     // conceptually: delay *= 1 + 1/Exponent
 365     ++Probes;
 366     if (Probes > SpinMax) return 0;
 367 
 368     if ((Probes & 0x7) == 0) {
 369       Delay = ((Delay << 1)|1) & 0x7FF;
 370       // CONSIDER: Delay += 1 + (Delay/4); Delay &= 0x7FF ;
 371     }
 372 


 373     // Consider checking _owner's schedctl state, if OFFPROC abort spin.
 374     // If the owner is OFFPROC then it's unlike that the lock will be dropped
 375     // in a timely fashion, which suggests that spinning would not be fruitful
 376     // or profitable.
 377 
 378     // Stall for "Delay" time units - iterations in the current implementation.
 379     // Avoid generating coherency traffic while stalled.
 380     // Possible ways to delay:
 381     //   PAUSE, SLEEP, MEMBAR #sync, MEMBAR #halt,
 382     //   wr %g0,%asi, gethrtime, rdstick, rdtick, rdtsc, etc. ...
 383     // Note that on Niagara-class systems we want to minimize STs in the
 384     // spin loop.  N1 and brethren write-around the L1$ over the xbar into the L2$.
 385     // Furthermore, they don't have a W$ like traditional SPARC processors.
 386     // We currently use a Marsaglia Shift-Xor RNG loop.

 387     if (Self != NULL) {
 388       jint rv = Self->rng[0];
 389       for (int k = Delay; --k >= 0;) {
 390         rv = MarsagliaXORV(rv);
 391         if (SafepointMechanism::poll(Self)) return 0;
 392       }
 393       Self->rng[0] = rv;
 394     } else {
 395       Stall(Delay);
 396     }
 397   }
 398 }
 399 
 400 static int ParkCommon(ParkEvent * ev, jlong timo) {
 401   // Diagnostic support - periodically unwedge blocked threads




 402   int err = OS_OK;
 403   if (0 == timo) {
 404     ev->park();
 405   } else {
 406     err = ev->park(timo);
 407   }
 408   return err;
 409 }
 410 
 411 inline int Monitor::AcquireOrPush(ParkEvent * ESelf) {
 412   intptr_t v = _LockWord.FullWord;
 413   for (;;) {
 414     if ((v & _LBIT) == 0) {
 415       const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v);
 416       if (u == v) return 1;        // indicate acquired
 417       v = u;
 418     } else {
 419       // Anticipate success ...
 420       ESelf->ListNext = (ParkEvent *)(v & ~_LBIT);
 421       const intptr_t u = Atomic::cmpxchg(intptr_t(ESelf)|_LBIT, &_LockWord.FullWord, v);


 438 
 439   if (TryFast()) {
 440  Exeunt:
 441     assert(ILocked(), "invariant");
 442     return;
 443   }
 444 
 445   ParkEvent * const ESelf = Self->_MutexEvent;
 446   assert(_OnDeck != ESelf, "invariant");
 447 
 448   // As an optimization, spinners could conditionally try to set _OnDeck to _LBIT
 449   // Synchronizer.cpp uses a similar optimization.
 450   if (TrySpin(Self)) goto Exeunt;
 451 
 452   // Slow-path - the lock is contended.
 453   // Either Enqueue Self on cxq or acquire the outer lock.
 454   // LockWord encoding = (cxq,LOCKBYTE)
 455   ESelf->reset();
 456   OrderAccess::fence();
 457 





 458   if (AcquireOrPush(ESelf)) goto Exeunt;
 459 
 460   // At any given time there is at most one ondeck thread.
 461   // ondeck implies not resident on cxq and not resident on EntryList
 462   // Only the OnDeck thread can try to acquire -- contend for -- the lock.
 463   // CONSIDER: use Self->OnDeck instead of m->OnDeck.
 464   // Deschedule Self so that others may run.
 465   while (OrderAccess::load_acquire(&_OnDeck) != ESelf) {
 466     ParkCommon(ESelf, 0);
 467   }
 468 
 469   // Self is now in the OnDeck position and will remain so until it
 470   // manages to acquire the lock.

 471   for (;;) {
 472     assert(_OnDeck == ESelf, "invariant");
 473     if (TrySpin(Self)) break;
 474     // It's probably wise to spin only if we *actually* blocked
 475     // CONSIDER: check the lockbyte, if it remains set then
 476     // preemptively drain the cxq into the EntryList.
 477     // The best place and time to perform queue operations -- lock metadata --
 478     // is _before having acquired the outer lock, while waiting for the lock to drop.
 479     ParkCommon(ESelf, 0);
 480   }
 481 
 482   assert(_OnDeck == ESelf, "invariant");
 483   _OnDeck = NULL;
 484 
 485   // Note that we current drop the inner lock (clear OnDeck) in the slow-path
 486   // epilogue immediately after having acquired the outer lock.
 487   // But instead we could consider the following optimizations:
 488   // A. Shift or defer dropping the inner lock until the subsequent IUnlock() operation.
 489   //    This might avoid potential reacquisition of the inner lock in IUlock().
 490   // B. While still holding the inner lock, attempt to opportunistically select


 672   Thread::muxAcquire(_WaitLock, "notify:WaitLock");
 673   ParkEvent * nfy = _WaitSet;
 674   if (nfy != NULL) {                  // DCL idiom
 675     _WaitSet = nfy->ListNext;
 676     assert(nfy->Notified == 0, "invariant");
 677     // push nfy onto the cxq
 678     for (;;) {
 679       const intptr_t v = _LockWord.FullWord;
 680       assert((v & 0xFF) == _LBIT, "invariant");
 681       nfy->ListNext = (ParkEvent *)(v & ~_LBIT);
 682       if (Atomic::cmpxchg(intptr_t(nfy)|_LBIT, &_LockWord.FullWord, v) == v) break;
 683       // interference - _LockWord changed -- just retry
 684     }
 685     // Note that setting Notified before pushing nfy onto the cxq is
 686     // also legal and safe, but the safety properties are much more
 687     // subtle, so for the sake of code stewardship ...
 688     OrderAccess::fence();
 689     nfy->Notified = 1;
 690   }
 691   Thread::muxRelease(_WaitLock);





 692   assert(ILocked(), "invariant");
 693   return true;
 694 }
 695 
 696 // Currently notifyAll() transfers the waiters one-at-a-time from the waitset
 697 // to the cxq.  This could be done more efficiently with a single bulk en-mass transfer,
 698 // but in practice notifyAll() for large #s of threads is rare and not time-critical.
 699 // Beware too, that we invert the order of the waiters.  Lets say that the
 700 // waitset is "ABCD" and the cxq is "XYZ".  After a notifyAll() the waitset
 701 // will be empty and the cxq will be "DCBAXYZ".  This is benign, of course.
 702 
 703 bool Monitor::notify_all() {
 704   assert(_owner == Thread::current(), "invariant");
 705   assert(ILocked(), "invariant");
 706   while (_WaitSet != NULL) notify();
 707   return true;
 708 }
 709 
 710 int Monitor::IWait(Thread * Self, jlong timo) {
 711   assert(ILocked(), "invariant");


 755 
 756   // Release the outer lock
 757   // We call IUnlock (RelaxAssert=true) as a thread T1 might
 758   // enqueue itself on the WaitSet, call IUnlock(), drop the lock,
 759   // and then stall before it can attempt to wake a successor.
 760   // Some other thread T2 acquires the lock, and calls notify(), moving
 761   // T1 from the WaitSet to the cxq.  T2 then drops the lock.  T1 resumes,
 762   // and then finds *itself* on the cxq.  During the course of a normal
 763   // IUnlock() call a thread should _never find itself on the EntryList
 764   // or cxq, but in the case of wait() it's possible.
 765   // See synchronizer.cpp objectMonitor::wait().
 766   IUnlock(true);
 767 
 768   // Wait for either notification or timeout
 769   // Beware that in some circumstances we might propagate
 770   // spurious wakeups back to the caller.
 771 
 772   for (;;) {
 773     if (ESelf->Notified) break;
 774     int err = ParkCommon(ESelf, timo);
 775     if (err == OS_TIMEOUT) break;
 776   }
 777 
 778   // Prepare for reentry - if necessary, remove ESelf from WaitSet
 779   // ESelf can be:
 780   // 1. Still on the WaitSet.  This can happen if we exited the loop by timeout.
 781   // 2. On the cxq or EntryList
 782   // 3. Not resident on cxq, EntryList or WaitSet, but in the OnDeck position.
 783 
 784   OrderAccess::fence();
 785   int WasOnWaitSet = 0;
 786   if (ESelf->Notified == 0) {
 787     Thread::muxAcquire(_WaitLock, "wait:WaitLock:remove");
 788     if (ESelf->Notified == 0) {     // DCL idiom
 789       assert(_OnDeck != ESelf, "invariant");   // can't be both OnDeck and on WaitSet
 790       // ESelf is resident on the WaitSet -- unlink it.
 791       // A doubly-linked list would be better here so we can unlink in constant-time.
 792       // We have to unlink before we potentially recontend as ESelf might otherwise
 793       // end up on the cxq|EntryList -- it can't be on two lists at once.
 794       ParkEvent * p = _WaitSet;
 795       ParkEvent * q = NULL;            // classic q chases p


< prev index next >