open Sdiff src/hotspot/share/runtime

src/hotspot/share/runtime/mutex.cpp

 234 //   them together was facile -- a bit too facile.   The current implementation badly
 235 //   conflates the two concerns.
 236 //
 237 // * TODO-FIXME:
 238 //
 239 //   -- Add DTRACE probes for contended acquire, contended acquired, contended unlock
 240 //      We should also add DTRACE probes in the ParkEvent subsystem for
 241 //      Park-entry, Park-exit, and Unpark.
 242 //
 243 //   -- We have an excess of mutex-like constructs in the JVM, namely:
 244 //      1. objectMonitors for Java-level synchronization (synchronizer.cpp)
 245 //      2. low-level muxAcquire and muxRelease
 246 //      3. low-level spinAcquire and spinRelease
 247 //      4. native Mutex:: and Monitor::
 248 //      5. jvm_raw_lock() and _unlock()
 249 //      6. JVMTI raw monitors -- distinct from (5) despite having a confusingly
 250 //         similar name.
 251 //
 252 // o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o
 253 
 254 
 255 // CASPTR() uses the canonical argument order that dominates in the literature.
 256 // Our internal cmpxchg_ptr() uses a bastardized ordering to accommodate Sun .il templates.
 257 
 258 #define CASPTR(a, c, s)  \
 259   intptr_t(Atomic::cmpxchg_ptr((void *)(s), (void *)(a), (void *)(c)))
 260 #define UNS(x) (uintptr_t(x))
 261 #define TRACE(m)                   \
 262   {                                \
 263     static volatile int ctr = 0;   \
 264     int x = ++ctr;                 \
 265     if ((x & (x - 1)) == 0) {      \
 266       ::printf("%d:%s\n", x, #m);  \
 267       ::fflush(stdout);            \
 268     }                              \
 269   }
 270 
 271 // Simplistic low-quality Marsaglia SHIFT-XOR RNG.
 272 // Bijective except for the trailing mask operation.
 273 // Useful for spin loops as the compiler can't optimize it away.
 274 
 275 static inline jint MarsagliaXORV(jint x) {
 276   if (x == 0) x = 1|os::random();
 277   x ^= x << 6;
 278   x ^= ((unsigned)x) >> 21;
 279   x ^= x << 7;
 280   return x & 0x7FFFFFFF;
 281 }
 282 
 283 static int Stall(int its) {
 284   static volatile jint rv = 1;
 285   volatile int OnFrame = 0;
 286   jint v = rv ^ UNS(OnFrame);
 287   while (--its >= 0) {
 288     v = MarsagliaXORV(v);
 289   }
 290   // Make this impossible for the compiler to optimize away,
 291   // but (mostly) avoid W coherency sharing on MP systems.
 292   if (v == 0x12345) rv = v;
 293   return v;
 294 }
 295 
 296 int Monitor::TryLock() {
 297   intptr_t v = _LockWord.FullWord;
 298   for (;;) {
 299     if ((v & _LBIT) != 0) return 0;
 300     const intptr_t u = CASPTR(&_LockWord, v, v|_LBIT);
 301     if (v == u) return 1;
 302     v = u;
 303   }
 304 }
 305 
 306 int Monitor::TryFast() {
 307   // Optimistic fast-path form ...
 308   // Fast-path attempt for the common uncontended case.
 309   // Avoid RTS->RTO $ coherence upgrade on typical SMP systems.
 310   intptr_t v = CASPTR(&_LockWord, 0, _LBIT);  // agro ...
 311   if (v == 0) return 1;
 312 
 313   for (;;) {
 314     if ((v & _LBIT) != 0) return 0;
 315     const intptr_t u = CASPTR(&_LockWord, v, v|_LBIT);
 316     if (v == u) return 1;
 317     v = u;
 318   }
 319 }
 320 
 321 int Monitor::ILocked() {
 322   const intptr_t w = _LockWord.FullWord & 0xFF;
 323   assert(w == 0 || w == _LBIT, "invariant");
 324   return w == _LBIT;
 325 }
 326 
 327 // Polite TATAS spinlock with exponential backoff - bounded spin.
 328 // Ideally we'd use processor cycles, time or vtime to control
 329 // the loop, but we currently use iterations.
 330 // All the constants within were derived empirically but work over
 331 // over the spectrum of J2SE reference platforms.
 332 // On Niagara-class systems the back-off is unnecessary but
 333 // is relatively harmless.  (At worst it'll slightly retard
 334 // acquisition times).  The back-off is critical for older SMP systems
 335 // where constant fetching of the LockWord would otherwise impair
 336 // scalability.
 337 //
 338 // Clamp spinning at approximately 1/2 of a context-switch round-trip.
 339 // See synchronizer.cpp for details and rationale.
 340 
 341 int Monitor::TrySpin(Thread * const Self) {
 342   if (TryLock())    return 1;
 343   if (!os::is_MP()) return 0;
 344 
 345   int Probes  = 0;
 346   int Delay   = 0;
 347   int Steps   = 0;
 348   int SpinMax = NativeMonitorSpinLimit;
 349   int flgs    = NativeMonitorFlags;
 350   for (;;) {
 351     intptr_t v = _LockWord.FullWord;
 352     if ((v & _LBIT) == 0) {
 353       if (CASPTR (&_LockWord, v, v|_LBIT) == v) {
 354         return 1;
 355       }
 356       continue;
 357     }
 358 
 359     if ((flgs & 8) == 0) {
 360       SpinPause();
 361     }
 362 
 363     // Periodically increase Delay -- variable Delay form
 364     // conceptually: delay *= 1 + 1/Exponent
 365     ++Probes;
 366     if (Probes > SpinMax) return 0;
 367 
 368     if ((Probes & 0x7) == 0) {
 369       Delay = ((Delay << 1)|1) & 0x7FF;
 370       // CONSIDER: Delay += 1 + (Delay/4); Delay &= 0x7FF ;
 371     }
 372 
 373     if (flgs & 2) continue;

 402 
 403 static int ParkCommon(ParkEvent * ev, jlong timo) {
 404   // Diagnostic support - periodically unwedge blocked threads
 405   intx nmt = NativeMonitorTimeout;
 406   if (nmt > 0 && (nmt < timo || timo <= 0)) {
 407     timo = nmt;
 408   }
 409   int err = OS_OK;
 410   if (0 == timo) {
 411     ev->park();
 412   } else {
 413     err = ev->park(timo);
 414   }
 415   return err;
 416 }
 417 
 418 inline int Monitor::AcquireOrPush(ParkEvent * ESelf) {
 419   intptr_t v = _LockWord.FullWord;
 420   for (;;) {
 421     if ((v & _LBIT) == 0) {
 422       const intptr_t u = CASPTR(&_LockWord, v, v|_LBIT);
 423       if (u == v) return 1;        // indicate acquired
 424       v = u;
 425     } else {
 426       // Anticipate success ...
 427       ESelf->ListNext = (ParkEvent *)(v & ~_LBIT);
 428       const intptr_t u = CASPTR(&_LockWord, v, intptr_t(ESelf)|_LBIT);
 429       if (u == v) return 0;        // indicate pushed onto cxq
 430       v = u;
 431     }
 432     // Interference - LockWord change - just retry
 433   }
 434 }
 435 
 436 // ILock and IWait are the lowest level primitive internal blocking
 437 // synchronization functions.  The callers of IWait and ILock must have
 438 // performed any needed state transitions beforehand.
 439 // IWait and ILock may directly call park() without any concern for thread state.
 440 // Note that ILock and IWait do *not* access _owner.
 441 // _owner is a higher-level logical concept.
 442 
 443 void Monitor::ILock(Thread * Self) {
 444   assert(_OnDeck != Self->_MutexEvent, "invariant");
 445 
 446   if (TryFast()) {
 447  Exeunt:
 448     assert(ILocked(), "invariant");
 449     return;
 450   }
 451 
 452   ParkEvent * const ESelf = Self->_MutexEvent;
 453   assert(_OnDeck != ESelf, "invariant");
 454 
 455   // As an optimization, spinners could conditionally try to set _OnDeck to _LBIT
 456   // Synchronizer.cpp uses a similar optimization.
 457   if (TrySpin(Self)) goto Exeunt;
 458 
 459   // Slow-path - the lock is contended.
 460   // Either Enqueue Self on cxq or acquire the outer lock.
 461   // LockWord encoding = (cxq,LOCKBYTE)
 462   ESelf->reset();
 463   OrderAccess::fence();
 464 
 465   // Optional optimization ... try barging on the inner lock
 466   if ((NativeMonitorFlags & 32) && CASPTR (&_OnDeck, NULL, UNS(ESelf)) == 0) {
 467     goto OnDeck_LOOP;
 468   }
 469 
 470   if (AcquireOrPush(ESelf)) goto Exeunt;
 471 
 472   // At any given time there is at most one ondeck thread.
 473   // ondeck implies not resident on cxq and not resident on EntryList
 474   // Only the OnDeck thread can try to acquire -- contend for -- the lock.
 475   // CONSIDER: use Self->OnDeck instead of m->OnDeck.
 476   // Deschedule Self so that others may run.
 477   while (OrderAccess::load_ptr_acquire(&_OnDeck) != ESelf) {
 478     ParkCommon(ESelf, 0);
 479   }
 480 
 481   // Self is now in the OnDeck position and will remain so until it
 482   // manages to acquire the lock.
 483  OnDeck_LOOP:
 484   for (;;) {
 485     assert(_OnDeck == ESelf, "invariant");
 486     if (TrySpin(Self)) break;
 487     // It's probably wise to spin only if we *actually* blocked
 488     // CONSIDER: check the lockbyte, if it remains set then
 489     // preemptively drain the cxq into the EntryList.
 490     // The best place and time to perform queue operations -- lock metadata --
 491     // is _before having acquired the outer lock, while waiting for the lock to drop.
 492     ParkCommon(ESelf, 0);
 493   }
 494 
 495   assert(_OnDeck == ESelf, "invariant");
 496   _OnDeck = NULL;
 497

 553   if (((cxq & ~_LBIT)|UNS(_EntryList)) == 0) {
 554     return;      // normal fast-path exit - cxq and EntryList both empty
 555   }
 556   if (cxq & _LBIT) {
 557     // Optional optimization ...
 558     // Some other thread acquired the lock in the window since this
 559     // thread released it.  Succession is now that thread's responsibility.
 560     return;
 561   }
 562 
 563  Succession:
 564   // Slow-path exit - this thread must ensure succession and progress.
 565   // OnDeck serves as lock to protect cxq and EntryList.
 566   // Only the holder of OnDeck can manipulate EntryList or detach the RATs from cxq.
 567   // Avoid ABA - allow multiple concurrent producers (enqueue via push-CAS)
 568   // but only one concurrent consumer (detacher of RATs).
 569   // Consider protecting this critical section with schedctl on Solaris.
 570   // Unlike a normal lock, however, the exiting thread "locks" OnDeck,
 571   // picks a successor and marks that thread as OnDeck.  That successor
 572   // thread will then clear OnDeck once it eventually acquires the outer lock.
 573   if (CASPTR (&_OnDeck, NULL, _LBIT) != UNS(NULL)) {
 574     return;
 575   }
 576 
 577   ParkEvent * List = _EntryList;
 578   if (List != NULL) {
 579     // Transfer the head of the EntryList to the OnDeck position.
 580     // Once OnDeck, a thread stays OnDeck until it acquires the lock.
 581     // For a given lock there is at most OnDeck thread at any one instant.
 582    WakeOne:
 583     assert(List == _EntryList, "invariant");
 584     ParkEvent * const w = List;
 585     assert(RelaxAssert || w != Thread::current()->_MutexEvent, "invariant");
 586     _EntryList = w->ListNext;
 587     // as a diagnostic measure consider setting w->_ListNext = BAD
 588     assert(UNS(_OnDeck) == _LBIT, "invariant");
 589 
 590     // Pass OnDeck role to w, ensuring that _EntryList has been set first.
 591     // w will clear _OnDeck once it acquires the outer lock.
 592     // Note that once we set _OnDeck that thread can acquire the mutex, proceed
 593     // with its critical section and then enter this code to unlock the mutex. So
 594     // you can have multiple threads active in IUnlock at the same time.
 595     OrderAccess::release_store_ptr(&_OnDeck, w);
 596 
 597     // Another optional optimization ...
 598     // For heavily contended locks it's not uncommon that some other
 599     // thread acquired the lock while this thread was arranging succession.
 600     // Try to defer the unpark() operation - Delegate the responsibility
 601     // for unpark()ing the OnDeck thread to the current or subsequent owners
 602     // That is, the new owner is responsible for unparking the OnDeck thread.
 603     OrderAccess::storeload();
 604     cxq = _LockWord.FullWord;
 605     if (cxq & _LBIT) return;
 606 
 607     w->unpark();
 608     return;
 609   }
 610 
 611   cxq = _LockWord.FullWord;
 612   if ((cxq & ~_LBIT) != 0) {
 613     // The EntryList is empty but the cxq is populated.
 614     // drain RATs from cxq into EntryList
 615     // Detach RATs segment with CAS and then merge into EntryList
 616     for (;;) {
 617       // optional optimization - if locked, the owner is responsible for succession
 618       if (cxq & _LBIT) goto Punt;
 619       const intptr_t vfy = CASPTR(&_LockWord, cxq, cxq & _LBIT);
 620       if (vfy == cxq) break;
 621       cxq = vfy;
 622       // Interference - LockWord changed - Just retry
 623       // We can see concurrent interference from contending threads
 624       // pushing themselves onto the cxq or from lock-unlock operations.
 625       // From the perspective of this thread, EntryList is stable and
 626       // the cxq is prepend-only -- the head is volatile but the interior
 627       // of the cxq is stable.  In theory if we encounter interference from threads
 628       // pushing onto cxq we could simply break off the original cxq suffix and
 629       // move that segment to the EntryList, avoiding a 2nd or multiple CAS attempts
 630       // on the high-traffic LockWord variable.   For instance lets say the cxq is "ABCD"
 631       // when we first fetch cxq above.  Between the fetch -- where we observed "A"
 632       // -- and CAS -- where we attempt to CAS null over A -- "PQR" arrive,
 633       // yielding cxq = "PQRABCD".  In this case we could simply set A.ListNext
 634       // null, leaving cxq = "PQRA" and transfer the "BCD" segment to the EntryList.
 635       // Note too, that it's safe for this thread to traverse the cxq
 636       // without taking any special concurrency precautions.
 637     }
 638 
 639     // We don't currently reorder the cxq segment as we move it onto

 676 bool Monitor::notify() {
 677   assert(_owner == Thread::current(), "invariant");
 678   assert(ILocked(), "invariant");
 679   if (_WaitSet == NULL) return true;
 680   NotifyCount++;
 681 
 682   // Transfer one thread from the WaitSet to the EntryList or cxq.
 683   // Currently we just unlink the head of the WaitSet and prepend to the cxq.
 684   // And of course we could just unlink it and unpark it, too, but
 685   // in that case it'd likely impale itself on the reentry.
 686   Thread::muxAcquire(_WaitLock, "notify:WaitLock");
 687   ParkEvent * nfy = _WaitSet;
 688   if (nfy != NULL) {                  // DCL idiom
 689     _WaitSet = nfy->ListNext;
 690     assert(nfy->Notified == 0, "invariant");
 691     // push nfy onto the cxq
 692     for (;;) {
 693       const intptr_t v = _LockWord.FullWord;
 694       assert((v & 0xFF) == _LBIT, "invariant");
 695       nfy->ListNext = (ParkEvent *)(v & ~_LBIT);
 696       if (CASPTR (&_LockWord, v, UNS(nfy)|_LBIT) == v) break;
 697       // interference - _LockWord changed -- just retry
 698     }
 699     // Note that setting Notified before pushing nfy onto the cxq is
 700     // also legal and safe, but the safety properties are much more
 701     // subtle, so for the sake of code stewardship ...
 702     OrderAccess::fence();
 703     nfy->Notified = 1;
 704   }
 705   Thread::muxRelease(_WaitLock);
 706   if (nfy != NULL && (NativeMonitorFlags & 16)) {
 707     // Experimental code ... light up the wakee in the hope that this thread (the owner)
 708     // will drop the lock just about the time the wakee comes ONPROC.
 709     nfy->unpark();
 710   }
 711   assert(ILocked(), "invariant");
 712   return true;
 713 }
 714 
 715 // Currently notifyAll() transfers the waiters one-at-a-time from the waitset
 716 // to the cxq.  This could be done more efficiently with a single bulk en-mass transfer,

 823       } else {                  // found in interior
 824         assert(q->ListNext == p, "invariant");
 825         q->ListNext = p->ListNext;
 826       }
 827       WasOnWaitSet = 1;        // We were *not* notified but instead encountered timeout
 828     }
 829     Thread::muxRelease(_WaitLock);
 830   }
 831 
 832   // Reentry phase - reacquire the lock
 833   if (WasOnWaitSet) {
 834     // ESelf was previously on the WaitSet but we just unlinked it above
 835     // because of a timeout.  ESelf is not resident on any list and is not OnDeck
 836     assert(_OnDeck != ESelf, "invariant");
 837     ILock(Self);
 838   } else {
 839     // A prior notify() operation moved ESelf from the WaitSet to the cxq.
 840     // ESelf is now on the cxq, EntryList or at the OnDeck position.
 841     // The following fragment is extracted from Monitor::ILock()
 842     for (;;) {
 843       if (OrderAccess::load_ptr_acquire(&_OnDeck) == ESelf && TrySpin(Self)) break;
 844       ParkCommon(ESelf, 0);
 845     }
 846     assert(_OnDeck == ESelf, "invariant");
 847     _OnDeck = NULL;
 848   }
 849 
 850   assert(ILocked(), "invariant");
 851   return WasOnWaitSet != 0;        // return true IFF timeout
 852 }
 853 
 854 
 855 // ON THE VMTHREAD SNEAKING PAST HELD LOCKS:
 856 // In particular, there are certain types of global lock that may be held
 857 // by a Java thread while it is blocked at a safepoint but before it has
 858 // written the _owner field. These locks may be sneakily acquired by the
 859 // VM thread during a safepoint to avoid deadlocks. Alternatively, one should
 860 // identify all such locks, and ensure that Java threads never block at
 861 // safepoints while holding them (_no_safepoint_check_flag). While it
 862 // seems as though this could increase the time to reach a safepoint
 863 // (or at least increase the mean, if not the variance), the latter

1041 
1042   // slow-path - apparent contention
1043   // Allocate a ParkEvent for transient use.
1044   // The ParkEvent remains associated with this thread until
1045   // the time the thread manages to acquire the lock.
1046   ParkEvent * const ESelf = ParkEvent::Allocate(NULL);
1047   ESelf->reset();
1048   OrderAccess::storeload();
1049 
1050   // Either Enqueue Self on cxq or acquire the outer lock.
1051   if (AcquireOrPush (ESelf)) {
1052     ParkEvent::Release(ESelf);      // surrender the ParkEvent
1053     goto Exeunt;
1054   }
1055 
1056   // At any given time there is at most one ondeck thread.
1057   // ondeck implies not resident on cxq and not resident on EntryList
1058   // Only the OnDeck thread can try to acquire -- contend for -- the lock.
1059   // CONSIDER: use Self->OnDeck instead of m->OnDeck.
1060   for (;;) {
1061     if (OrderAccess::load_ptr_acquire(&_OnDeck) == ESelf && TrySpin(NULL)) break;
1062     ParkCommon(ESelf, 0);
1063   }
1064 
1065   assert(_OnDeck == ESelf, "invariant");
1066   _OnDeck = NULL;
1067   ParkEvent::Release(ESelf);      // surrender the ParkEvent
1068   goto Exeunt;
1069 }
1070 
1071 void Monitor::jvm_raw_unlock() {
1072   // Nearly the same as Monitor::unlock() ...
1073   // directly set _owner instead of using set_owner(null)
1074   _owner = NULL;
1075   if (_snuck) {         // ???
1076     assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "sneak");
1077     _snuck = false;
1078     return;
1079   }
1080   IUnlock(false);
1081 }

 234 //   them together was facile -- a bit too facile.   The current implementation badly
 235 //   conflates the two concerns.
 236 //
 237 // * TODO-FIXME:
 238 //
 239 //   -- Add DTRACE probes for contended acquire, contended acquired, contended unlock
 240 //      We should also add DTRACE probes in the ParkEvent subsystem for
 241 //      Park-entry, Park-exit, and Unpark.
 242 //
 243 //   -- We have an excess of mutex-like constructs in the JVM, namely:
 244 //      1. objectMonitors for Java-level synchronization (synchronizer.cpp)
 245 //      2. low-level muxAcquire and muxRelease
 246 //      3. low-level spinAcquire and spinRelease
 247 //      4. native Mutex:: and Monitor::
 248 //      5. jvm_raw_lock() and _unlock()
 249 //      6. JVMTI raw monitors -- distinct from (5) despite having a confusingly
 250 //         similar name.
 251 //
 252 // o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o
 253 






 254 #define UNS(x) (uintptr_t(x))
 255 #define TRACE(m)                   \
 256   {                                \
 257     static volatile int ctr = 0;   \
 258     int x = ++ctr;                 \
 259     if ((x & (x - 1)) == 0) {      \
 260       ::printf("%d:%s\n", x, #m);  \
 261       ::fflush(stdout);            \
 262     }                              \
 263   }
 264 
 265 // Simplistic low-quality Marsaglia SHIFT-XOR RNG.
 266 // Bijective except for the trailing mask operation.
 267 // Useful for spin loops as the compiler can't optimize it away.
 268 
 269 static inline jint MarsagliaXORV(jint x) {
 270   if (x == 0) x = 1|os::random();
 271   x ^= x << 6;
 272   x ^= ((unsigned)x) >> 21;
 273   x ^= x << 7;
 274   return x & 0x7FFFFFFF;
 275 }
 276 
 277 static int Stall(int its) {
 278   static volatile jint rv = 1;
 279   volatile int OnFrame = 0;
 280   jint v = rv ^ UNS(OnFrame);
 281   while (--its >= 0) {
 282     v = MarsagliaXORV(v);
 283   }
 284   // Make this impossible for the compiler to optimize away,
 285   // but (mostly) avoid W coherency sharing on MP systems.
 286   if (v == 0x12345) rv = v;
 287   return v;
 288 }
 289 
 290 int Monitor::TryLock() {
 291   intptr_t v = _LockWord.FullWord;
 292   for (;;) {
 293     if ((v & _LBIT) != 0) return 0;
 294     const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v);
 295     if (v == u) return 1;
 296     v = u;
 297   }
 298 }
 299 
 300 int Monitor::TryFast() {
 301   // Optimistic fast-path form ...
 302   // Fast-path attempt for the common uncontended case.
 303   // Avoid RTS->RTO $ coherence upgrade on typical SMP systems.
 304   intptr_t v = Atomic::cmpxchg((intptr_t)_LBIT, &_LockWord.FullWord, (intptr_t)0);  // agro ...
 305   if (v == 0) return 1;
 306 
 307   for (;;) {
 308     if ((v & _LBIT) != 0) return 0;
 309     const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v);
 310     if (v == u) return 1;
 311     v = u;
 312   }
 313 }
 314 
 315 int Monitor::ILocked() {
 316   const intptr_t w = _LockWord.FullWord & 0xFF;
 317   assert(w == 0 || w == _LBIT, "invariant");
 318   return w == _LBIT;
 319 }
 320 
 321 // Polite TATAS spinlock with exponential backoff - bounded spin.
 322 // Ideally we'd use processor cycles, time or vtime to control
 323 // the loop, but we currently use iterations.
 324 // All the constants within were derived empirically but work over
 325 // over the spectrum of J2SE reference platforms.
 326 // On Niagara-class systems the back-off is unnecessary but
 327 // is relatively harmless.  (At worst it'll slightly retard
 328 // acquisition times).  The back-off is critical for older SMP systems
 329 // where constant fetching of the LockWord would otherwise impair
 330 // scalability.
 331 //
 332 // Clamp spinning at approximately 1/2 of a context-switch round-trip.
 333 // See synchronizer.cpp for details and rationale.
 334 
 335 int Monitor::TrySpin(Thread * const Self) {
 336   if (TryLock())    return 1;
 337   if (!os::is_MP()) return 0;
 338 
 339   int Probes  = 0;
 340   int Delay   = 0;
 341   int Steps   = 0;
 342   int SpinMax = NativeMonitorSpinLimit;
 343   int flgs    = NativeMonitorFlags;
 344   for (;;) {
 345     intptr_t v = _LockWord.FullWord;
 346     if ((v & _LBIT) == 0) {
 347       if (Atomic::cmpxchg (v|_LBIT, &_LockWord.FullWord, v) == v) {
 348         return 1;
 349       }
 350       continue;
 351     }
 352 
 353     if ((flgs & 8) == 0) {
 354       SpinPause();
 355     }
 356 
 357     // Periodically increase Delay -- variable Delay form
 358     // conceptually: delay *= 1 + 1/Exponent
 359     ++Probes;
 360     if (Probes > SpinMax) return 0;
 361 
 362     if ((Probes & 0x7) == 0) {
 363       Delay = ((Delay << 1)|1) & 0x7FF;
 364       // CONSIDER: Delay += 1 + (Delay/4); Delay &= 0x7FF ;
 365     }
 366 
 367     if (flgs & 2) continue;

 396 
 397 static int ParkCommon(ParkEvent * ev, jlong timo) {
 398   // Diagnostic support - periodically unwedge blocked threads
 399   intx nmt = NativeMonitorTimeout;
 400   if (nmt > 0 && (nmt < timo || timo <= 0)) {
 401     timo = nmt;
 402   }
 403   int err = OS_OK;
 404   if (0 == timo) {
 405     ev->park();
 406   } else {
 407     err = ev->park(timo);
 408   }
 409   return err;
 410 }
 411 
 412 inline int Monitor::AcquireOrPush(ParkEvent * ESelf) {
 413   intptr_t v = _LockWord.FullWord;
 414   for (;;) {
 415     if ((v & _LBIT) == 0) {
 416       const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v);
 417       if (u == v) return 1;        // indicate acquired
 418       v = u;
 419     } else {
 420       // Anticipate success ...
 421       ESelf->ListNext = (ParkEvent *)(v & ~_LBIT);
 422       const intptr_t u = Atomic::cmpxchg(intptr_t(ESelf)|_LBIT, &_LockWord.FullWord, v);
 423       if (u == v) return 0;        // indicate pushed onto cxq
 424       v = u;
 425     }
 426     // Interference - LockWord change - just retry
 427   }
 428 }
 429 
 430 // ILock and IWait are the lowest level primitive internal blocking
 431 // synchronization functions.  The callers of IWait and ILock must have
 432 // performed any needed state transitions beforehand.
 433 // IWait and ILock may directly call park() without any concern for thread state.
 434 // Note that ILock and IWait do *not* access _owner.
 435 // _owner is a higher-level logical concept.
 436 
 437 void Monitor::ILock(Thread * Self) {
 438   assert(_OnDeck != Self->_MutexEvent, "invariant");
 439 
 440   if (TryFast()) {
 441  Exeunt:
 442     assert(ILocked(), "invariant");
 443     return;
 444   }
 445 
 446   ParkEvent * const ESelf = Self->_MutexEvent;
 447   assert(_OnDeck != ESelf, "invariant");
 448 
 449   // As an optimization, spinners could conditionally try to set _OnDeck to _LBIT
 450   // Synchronizer.cpp uses a similar optimization.
 451   if (TrySpin(Self)) goto Exeunt;
 452 
 453   // Slow-path - the lock is contended.
 454   // Either Enqueue Self on cxq or acquire the outer lock.
 455   // LockWord encoding = (cxq,LOCKBYTE)
 456   ESelf->reset();
 457   OrderAccess::fence();
 458 
 459   // Optional optimization ... try barging on the inner lock
 460   if ((NativeMonitorFlags & 32) && Atomic::cmpxchg(ESelf, &_OnDeck, (ParkEvent*)NULL) == NULL) {
 461     goto OnDeck_LOOP;
 462   }
 463 
 464   if (AcquireOrPush(ESelf)) goto Exeunt;
 465 
 466   // At any given time there is at most one ondeck thread.
 467   // ondeck implies not resident on cxq and not resident on EntryList
 468   // Only the OnDeck thread can try to acquire -- contend for -- the lock.
 469   // CONSIDER: use Self->OnDeck instead of m->OnDeck.
 470   // Deschedule Self so that others may run.
 471   while (OrderAccess::load_acquire(&_OnDeck) != ESelf) {
 472     ParkCommon(ESelf, 0);
 473   }
 474 
 475   // Self is now in the OnDeck position and will remain so until it
 476   // manages to acquire the lock.
 477  OnDeck_LOOP:
 478   for (;;) {
 479     assert(_OnDeck == ESelf, "invariant");
 480     if (TrySpin(Self)) break;
 481     // It's probably wise to spin only if we *actually* blocked
 482     // CONSIDER: check the lockbyte, if it remains set then
 483     // preemptively drain the cxq into the EntryList.
 484     // The best place and time to perform queue operations -- lock metadata --
 485     // is _before having acquired the outer lock, while waiting for the lock to drop.
 486     ParkCommon(ESelf, 0);
 487   }
 488 
 489   assert(_OnDeck == ESelf, "invariant");
 490   _OnDeck = NULL;
 491

 547   if (((cxq & ~_LBIT)|UNS(_EntryList)) == 0) {
 548     return;      // normal fast-path exit - cxq and EntryList both empty
 549   }
 550   if (cxq & _LBIT) {
 551     // Optional optimization ...
 552     // Some other thread acquired the lock in the window since this
 553     // thread released it.  Succession is now that thread's responsibility.
 554     return;
 555   }
 556 
 557  Succession:
 558   // Slow-path exit - this thread must ensure succession and progress.
 559   // OnDeck serves as lock to protect cxq and EntryList.
 560   // Only the holder of OnDeck can manipulate EntryList or detach the RATs from cxq.
 561   // Avoid ABA - allow multiple concurrent producers (enqueue via push-CAS)
 562   // but only one concurrent consumer (detacher of RATs).
 563   // Consider protecting this critical section with schedctl on Solaris.
 564   // Unlike a normal lock, however, the exiting thread "locks" OnDeck,
 565   // picks a successor and marks that thread as OnDeck.  That successor
 566   // thread will then clear OnDeck once it eventually acquires the outer lock.
 567   if (Atomic::cmpxchg((ParkEvent*)_LBIT, &_OnDeck, (ParkEvent*)NULL) != NULL) {
 568     return;
 569   }
 570 
 571   ParkEvent * List = _EntryList;
 572   if (List != NULL) {
 573     // Transfer the head of the EntryList to the OnDeck position.
 574     // Once OnDeck, a thread stays OnDeck until it acquires the lock.
 575     // For a given lock there is at most OnDeck thread at any one instant.
 576    WakeOne:
 577     assert(List == _EntryList, "invariant");
 578     ParkEvent * const w = List;
 579     assert(RelaxAssert || w != Thread::current()->_MutexEvent, "invariant");
 580     _EntryList = w->ListNext;
 581     // as a diagnostic measure consider setting w->_ListNext = BAD
 582     assert(UNS(_OnDeck) == _LBIT, "invariant");
 583 
 584     // Pass OnDeck role to w, ensuring that _EntryList has been set first.
 585     // w will clear _OnDeck once it acquires the outer lock.
 586     // Note that once we set _OnDeck that thread can acquire the mutex, proceed
 587     // with its critical section and then enter this code to unlock the mutex. So
 588     // you can have multiple threads active in IUnlock at the same time.
 589     OrderAccess::release_store(&_OnDeck, w);
 590 
 591     // Another optional optimization ...
 592     // For heavily contended locks it's not uncommon that some other
 593     // thread acquired the lock while this thread was arranging succession.
 594     // Try to defer the unpark() operation - Delegate the responsibility
 595     // for unpark()ing the OnDeck thread to the current or subsequent owners
 596     // That is, the new owner is responsible for unparking the OnDeck thread.
 597     OrderAccess::storeload();
 598     cxq = _LockWord.FullWord;
 599     if (cxq & _LBIT) return;
 600 
 601     w->unpark();
 602     return;
 603   }
 604 
 605   cxq = _LockWord.FullWord;
 606   if ((cxq & ~_LBIT) != 0) {
 607     // The EntryList is empty but the cxq is populated.
 608     // drain RATs from cxq into EntryList
 609     // Detach RATs segment with CAS and then merge into EntryList
 610     for (;;) {
 611       // optional optimization - if locked, the owner is responsible for succession
 612       if (cxq & _LBIT) goto Punt;
 613       const intptr_t vfy = Atomic::cmpxchg(cxq & _LBIT, &_LockWord.FullWord, cxq);
 614       if (vfy == cxq) break;
 615       cxq = vfy;
 616       // Interference - LockWord changed - Just retry
 617       // We can see concurrent interference from contending threads
 618       // pushing themselves onto the cxq or from lock-unlock operations.
 619       // From the perspective of this thread, EntryList is stable and
 620       // the cxq is prepend-only -- the head is volatile but the interior
 621       // of the cxq is stable.  In theory if we encounter interference from threads
 622       // pushing onto cxq we could simply break off the original cxq suffix and
 623       // move that segment to the EntryList, avoiding a 2nd or multiple CAS attempts
 624       // on the high-traffic LockWord variable.   For instance lets say the cxq is "ABCD"
 625       // when we first fetch cxq above.  Between the fetch -- where we observed "A"
 626       // -- and CAS -- where we attempt to CAS null over A -- "PQR" arrive,
 627       // yielding cxq = "PQRABCD".  In this case we could simply set A.ListNext
 628       // null, leaving cxq = "PQRA" and transfer the "BCD" segment to the EntryList.
 629       // Note too, that it's safe for this thread to traverse the cxq
 630       // without taking any special concurrency precautions.
 631     }
 632 
 633     // We don't currently reorder the cxq segment as we move it onto

 670 bool Monitor::notify() {
 671   assert(_owner == Thread::current(), "invariant");
 672   assert(ILocked(), "invariant");
 673   if (_WaitSet == NULL) return true;
 674   NotifyCount++;
 675 
 676   // Transfer one thread from the WaitSet to the EntryList or cxq.
 677   // Currently we just unlink the head of the WaitSet and prepend to the cxq.
 678   // And of course we could just unlink it and unpark it, too, but
 679   // in that case it'd likely impale itself on the reentry.
 680   Thread::muxAcquire(_WaitLock, "notify:WaitLock");
 681   ParkEvent * nfy = _WaitSet;
 682   if (nfy != NULL) {                  // DCL idiom
 683     _WaitSet = nfy->ListNext;
 684     assert(nfy->Notified == 0, "invariant");
 685     // push nfy onto the cxq
 686     for (;;) {
 687       const intptr_t v = _LockWord.FullWord;
 688       assert((v & 0xFF) == _LBIT, "invariant");
 689       nfy->ListNext = (ParkEvent *)(v & ~_LBIT);
 690       if (Atomic::cmpxchg(intptr_t(nfy)|_LBIT, &_LockWord.FullWord, v) == v) break;
 691       // interference - _LockWord changed -- just retry
 692     }
 693     // Note that setting Notified before pushing nfy onto the cxq is
 694     // also legal and safe, but the safety properties are much more
 695     // subtle, so for the sake of code stewardship ...
 696     OrderAccess::fence();
 697     nfy->Notified = 1;
 698   }
 699   Thread::muxRelease(_WaitLock);
 700   if (nfy != NULL && (NativeMonitorFlags & 16)) {
 701     // Experimental code ... light up the wakee in the hope that this thread (the owner)
 702     // will drop the lock just about the time the wakee comes ONPROC.
 703     nfy->unpark();
 704   }
 705   assert(ILocked(), "invariant");
 706   return true;
 707 }
 708 
 709 // Currently notifyAll() transfers the waiters one-at-a-time from the waitset
 710 // to the cxq.  This could be done more efficiently with a single bulk en-mass transfer,

 817       } else {                  // found in interior
 818         assert(q->ListNext == p, "invariant");
 819         q->ListNext = p->ListNext;
 820       }
 821       WasOnWaitSet = 1;        // We were *not* notified but instead encountered timeout
 822     }
 823     Thread::muxRelease(_WaitLock);
 824   }
 825 
 826   // Reentry phase - reacquire the lock
 827   if (WasOnWaitSet) {
 828     // ESelf was previously on the WaitSet but we just unlinked it above
 829     // because of a timeout.  ESelf is not resident on any list and is not OnDeck
 830     assert(_OnDeck != ESelf, "invariant");
 831     ILock(Self);
 832   } else {
 833     // A prior notify() operation moved ESelf from the WaitSet to the cxq.
 834     // ESelf is now on the cxq, EntryList or at the OnDeck position.
 835     // The following fragment is extracted from Monitor::ILock()
 836     for (;;) {
 837       if (OrderAccess::load_acquire(&_OnDeck) == ESelf && TrySpin(Self)) break;
 838       ParkCommon(ESelf, 0);
 839     }
 840     assert(_OnDeck == ESelf, "invariant");
 841     _OnDeck = NULL;
 842   }
 843 
 844   assert(ILocked(), "invariant");
 845   return WasOnWaitSet != 0;        // return true IFF timeout
 846 }
 847 
 848 
 849 // ON THE VMTHREAD SNEAKING PAST HELD LOCKS:
 850 // In particular, there are certain types of global lock that may be held
 851 // by a Java thread while it is blocked at a safepoint but before it has
 852 // written the _owner field. These locks may be sneakily acquired by the
 853 // VM thread during a safepoint to avoid deadlocks. Alternatively, one should
 854 // identify all such locks, and ensure that Java threads never block at
 855 // safepoints while holding them (_no_safepoint_check_flag). While it
 856 // seems as though this could increase the time to reach a safepoint
 857 // (or at least increase the mean, if not the variance), the latter

1035 
1036   // slow-path - apparent contention
1037   // Allocate a ParkEvent for transient use.
1038   // The ParkEvent remains associated with this thread until
1039   // the time the thread manages to acquire the lock.
1040   ParkEvent * const ESelf = ParkEvent::Allocate(NULL);
1041   ESelf->reset();
1042   OrderAccess::storeload();
1043 
1044   // Either Enqueue Self on cxq or acquire the outer lock.
1045   if (AcquireOrPush (ESelf)) {
1046     ParkEvent::Release(ESelf);      // surrender the ParkEvent
1047     goto Exeunt;
1048   }
1049 
1050   // At any given time there is at most one ondeck thread.
1051   // ondeck implies not resident on cxq and not resident on EntryList
1052   // Only the OnDeck thread can try to acquire -- contend for -- the lock.
1053   // CONSIDER: use Self->OnDeck instead of m->OnDeck.
1054   for (;;) {
1055     if (OrderAccess::load_acquire(&_OnDeck) == ESelf && TrySpin(NULL)) break;
1056     ParkCommon(ESelf, 0);
1057   }
1058 
1059   assert(_OnDeck == ESelf, "invariant");
1060   _OnDeck = NULL;
1061   ParkEvent::Release(ESelf);      // surrender the ParkEvent
1062   goto Exeunt;
1063 }
1064 
1065 void Monitor::jvm_raw_unlock() {
1066   // Nearly the same as Monitor::unlock() ...
1067   // directly set _owner instead of using set_owner(null)
1068   _owner = NULL;
1069   if (_snuck) {         // ???
1070     assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "sneak");
1071     _snuck = false;
1072     return;
1073   }
1074   IUnlock(false);
1075 }

< prev index next >