open Sdiff src/hotspot/share/runtime

src/hotspot/share/runtime/mutex.cpp

 234 //   them together was facile -- a bit too facile.   The current implementation badly
 235 //   conflates the two concerns.
 236 //
 237 // * TODO-FIXME:
 238 //
 239 //   -- Add DTRACE probes for contended acquire, contended acquired, contended unlock
 240 //      We should also add DTRACE probes in the ParkEvent subsystem for
 241 //      Park-entry, Park-exit, and Unpark.
 242 //
 243 //   -- We have an excess of mutex-like constructs in the JVM, namely:
 244 //      1. objectMonitors for Java-level synchronization (synchronizer.cpp)
 245 //      2. low-level muxAcquire and muxRelease
 246 //      3. low-level spinAcquire and spinRelease
 247 //      4. native Mutex:: and Monitor::
 248 //      5. jvm_raw_lock() and _unlock()
 249 //      6. JVMTI raw monitors -- distinct from (5) despite having a confusingly
 250 //         similar name.
 251 //
 252 // o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o
 253 
 254 
 255 // CASPTR() uses the canonical argument order that dominates in the literature.
 256 // Our internal cmpxchg_ptr() uses a bastardized ordering to accommodate Sun .il templates.
 257 
 258 #define CASPTR(a, c, s)  \
 259   intptr_t(Atomic::cmpxchg_ptr((void *)(s), (void *)(a), (void *)(c)))
 260 #define UNS(x) (uintptr_t(x))
 261 #define TRACE(m)                   \
 262   {                                \
 263     static volatile int ctr = 0;   \
 264     int x = ++ctr;                 \
 265     if ((x & (x - 1)) == 0) {      \
 266       ::printf("%d:%s\n", x, #m);  \
 267       ::fflush(stdout);            \
 268     }                              \
 269   }
 270 









 271 // Simplistic low-quality Marsaglia SHIFT-XOR RNG.
 272 // Bijective except for the trailing mask operation.
 273 // Useful for spin loops as the compiler can't optimize it away.
 274 
 275 static inline jint MarsagliaXORV(jint x) {
 276   if (x == 0) x = 1|os::random();
 277   x ^= x << 6;
 278   x ^= ((unsigned)x) >> 21;
 279   x ^= x << 7;
 280   return x & 0x7FFFFFFF;
 281 }
 282 
 283 static int Stall(int its) {
 284   static volatile jint rv = 1;
 285   volatile int OnFrame = 0;
 286   jint v = rv ^ UNS(OnFrame);
 287   while (--its >= 0) {
 288     v = MarsagliaXORV(v);
 289   }
 290   // Make this impossible for the compiler to optimize away,
 291   // but (mostly) avoid W coherency sharing on MP systems.
 292   if (v == 0x12345) rv = v;
 293   return v;
 294 }
 295 
 296 int Monitor::TryLock() {
 297   intptr_t v = _LockWord.FullWord;
 298   for (;;) {
 299     if ((v & _LBIT) != 0) return 0;
 300     const intptr_t u = CASPTR(&_LockWord, v, v|_LBIT);
 301     if (v == u) return 1;
 302     v = u;
 303   }
 304 }
 305 
 306 int Monitor::TryFast() {
 307   // Optimistic fast-path form ...
 308   // Fast-path attempt for the common uncontended case.
 309   // Avoid RTS->RTO $ coherence upgrade on typical SMP systems.
 310   intptr_t v = CASPTR(&_LockWord, 0, _LBIT);  // agro ...
 311   if (v == 0) return 1;
 312 
 313   for (;;) {
 314     if ((v & _LBIT) != 0) return 0;
 315     const intptr_t u = CASPTR(&_LockWord, v, v|_LBIT);
 316     if (v == u) return 1;
 317     v = u;
 318   }
 319 }
 320 
 321 int Monitor::ILocked() {
 322   const intptr_t w = _LockWord.FullWord & 0xFF;
 323   assert(w == 0 || w == _LBIT, "invariant");
 324   return w == _LBIT;
 325 }
 326 
 327 // Polite TATAS spinlock with exponential backoff - bounded spin.
 328 // Ideally we'd use processor cycles, time or vtime to control
 329 // the loop, but we currently use iterations.
 330 // All the constants within were derived empirically but work over
 331 // over the spectrum of J2SE reference platforms.
 332 // On Niagara-class systems the back-off is unnecessary but
 333 // is relatively harmless.  (At worst it'll slightly retard
 334 // acquisition times).  The back-off is critical for older SMP systems
 335 // where constant fetching of the LockWord would otherwise impair
 336 // scalability.
 337 //
 338 // Clamp spinning at approximately 1/2 of a context-switch round-trip.
 339 // See synchronizer.cpp for details and rationale.
 340 
 341 int Monitor::TrySpin(Thread * const Self) {
 342   if (TryLock())    return 1;
 343   if (!os::is_MP()) return 0;
 344 
 345   int Probes  = 0;
 346   int Delay   = 0;
 347   int Steps   = 0;
 348   int SpinMax = NativeMonitorSpinLimit;
 349   int flgs    = NativeMonitorFlags;
 350   for (;;) {
 351     intptr_t v = _LockWord.FullWord;
 352     if ((v & _LBIT) == 0) {
 353       if (CASPTR (&_LockWord, v, v|_LBIT) == v) {
 354         return 1;
 355       }
 356       continue;
 357     }
 358 
 359     if ((flgs & 8) == 0) {
 360       SpinPause();
 361     }
 362 
 363     // Periodically increase Delay -- variable Delay form
 364     // conceptually: delay *= 1 + 1/Exponent
 365     ++Probes;
 366     if (Probes > SpinMax) return 0;
 367 
 368     if ((Probes & 0x7) == 0) {
 369       Delay = ((Delay << 1)|1) & 0x7FF;
 370       // CONSIDER: Delay += 1 + (Delay/4); Delay &= 0x7FF ;
 371     }
 372 
 373     if (flgs & 2) continue;

 402 
 403 static int ParkCommon(ParkEvent * ev, jlong timo) {
 404   // Diagnostic support - periodically unwedge blocked threads
 405   intx nmt = NativeMonitorTimeout;
 406   if (nmt > 0 && (nmt < timo || timo <= 0)) {
 407     timo = nmt;
 408   }
 409   int err = OS_OK;
 410   if (0 == timo) {
 411     ev->park();
 412   } else {
 413     err = ev->park(timo);
 414   }
 415   return err;
 416 }
 417 
 418 inline int Monitor::AcquireOrPush(ParkEvent * ESelf) {
 419   intptr_t v = _LockWord.FullWord;
 420   for (;;) {
 421     if ((v & _LBIT) == 0) {
 422       const intptr_t u = CASPTR(&_LockWord, v, v|_LBIT);
 423       if (u == v) return 1;        // indicate acquired
 424       v = u;
 425     } else {
 426       // Anticipate success ...
 427       ESelf->ListNext = (ParkEvent *)(v & ~_LBIT);
 428       const intptr_t u = CASPTR(&_LockWord, v, intptr_t(ESelf)|_LBIT);
 429       if (u == v) return 0;        // indicate pushed onto cxq
 430       v = u;
 431     }
 432     // Interference - LockWord change - just retry
 433   }
 434 }
 435 
 436 // ILock and IWait are the lowest level primitive internal blocking
 437 // synchronization functions.  The callers of IWait and ILock must have
 438 // performed any needed state transitions beforehand.
 439 // IWait and ILock may directly call park() without any concern for thread state.
 440 // Note that ILock and IWait do *not* access _owner.
 441 // _owner is a higher-level logical concept.
 442 
 443 void Monitor::ILock(Thread * Self) {
 444   assert(_OnDeck != Self->_MutexEvent, "invariant");
 445 
 446   if (TryFast()) {
 447  Exeunt:
 448     assert(ILocked(), "invariant");
 449     return;
 450   }
 451 
 452   ParkEvent * const ESelf = Self->_MutexEvent;
 453   assert(_OnDeck != ESelf, "invariant");
 454 
 455   // As an optimization, spinners could conditionally try to set _OnDeck to _LBIT
 456   // Synchronizer.cpp uses a similar optimization.
 457   if (TrySpin(Self)) goto Exeunt;
 458 
 459   // Slow-path - the lock is contended.
 460   // Either Enqueue Self on cxq or acquire the outer lock.
 461   // LockWord encoding = (cxq,LOCKBYTE)
 462   ESelf->reset();
 463   OrderAccess::fence();
 464 
 465   // Optional optimization ... try barging on the inner lock
 466   if ((NativeMonitorFlags & 32) && CASPTR (&_OnDeck, NULL, UNS(ESelf)) == 0) {
 467     goto OnDeck_LOOP;
 468   }
 469 
 470   if (AcquireOrPush(ESelf)) goto Exeunt;
 471 
 472   // At any given time there is at most one ondeck thread.
 473   // ondeck implies not resident on cxq and not resident on EntryList
 474   // Only the OnDeck thread can try to acquire -- contend for -- the lock.
 475   // CONSIDER: use Self->OnDeck instead of m->OnDeck.
 476   // Deschedule Self so that others may run.
 477   while (OrderAccess::load_ptr_acquire(&_OnDeck) != ESelf) {
 478     ParkCommon(ESelf, 0);
 479   }
 480 
 481   // Self is now in the OnDeck position and will remain so until it
 482   // manages to acquire the lock.
 483  OnDeck_LOOP:
 484   for (;;) {
 485     assert(_OnDeck == ESelf, "invariant");
 486     if (TrySpin(Self)) break;
 487     // It's probably wise to spin only if we *actually* blocked
 488     // CONSIDER: check the lockbyte, if it remains set then
 489     // preemptively drain the cxq into the EntryList.
 490     // The best place and time to perform queue operations -- lock metadata --
 491     // is _before having acquired the outer lock, while waiting for the lock to drop.
 492     ParkCommon(ESelf, 0);
 493   }
 494 
 495   assert(_OnDeck == ESelf, "invariant");
 496   _OnDeck = NULL;
 497

 553   if (((cxq & ~_LBIT)|UNS(_EntryList)) == 0) {
 554     return;      // normal fast-path exit - cxq and EntryList both empty
 555   }
 556   if (cxq & _LBIT) {
 557     // Optional optimization ...
 558     // Some other thread acquired the lock in the window since this
 559     // thread released it.  Succession is now that thread's responsibility.
 560     return;
 561   }
 562 
 563  Succession:
 564   // Slow-path exit - this thread must ensure succession and progress.
 565   // OnDeck serves as lock to protect cxq and EntryList.
 566   // Only the holder of OnDeck can manipulate EntryList or detach the RATs from cxq.
 567   // Avoid ABA - allow multiple concurrent producers (enqueue via push-CAS)
 568   // but only one concurrent consumer (detacher of RATs).
 569   // Consider protecting this critical section with schedctl on Solaris.
 570   // Unlike a normal lock, however, the exiting thread "locks" OnDeck,
 571   // picks a successor and marks that thread as OnDeck.  That successor
 572   // thread will then clear OnDeck once it eventually acquires the outer lock.
 573   if (CASPTR (&_OnDeck, NULL, _LBIT) != UNS(NULL)) {
 574     return;
 575   }
 576 
 577   ParkEvent * List = _EntryList;
 578   if (List != NULL) {
 579     // Transfer the head of the EntryList to the OnDeck position.
 580     // Once OnDeck, a thread stays OnDeck until it acquires the lock.
 581     // For a given lock there is at most OnDeck thread at any one instant.
 582    WakeOne:
 583     assert(List == _EntryList, "invariant");
 584     ParkEvent * const w = List;
 585     assert(RelaxAssert || w != Thread::current()->_MutexEvent, "invariant");
 586     _EntryList = w->ListNext;
 587     // as a diagnostic measure consider setting w->_ListNext = BAD
 588     assert(UNS(_OnDeck) == _LBIT, "invariant");
 589 
 590     // Pass OnDeck role to w, ensuring that _EntryList has been set first.
 591     // w will clear _OnDeck once it acquires the outer lock.
 592     // Note that once we set _OnDeck that thread can acquire the mutex, proceed
 593     // with its critical section and then enter this code to unlock the mutex. So
 594     // you can have multiple threads active in IUnlock at the same time.
 595     OrderAccess::release_store_ptr(&_OnDeck, w);
 596 
 597     // Another optional optimization ...
 598     // For heavily contended locks it's not uncommon that some other
 599     // thread acquired the lock while this thread was arranging succession.
 600     // Try to defer the unpark() operation - Delegate the responsibility
 601     // for unpark()ing the OnDeck thread to the current or subsequent owners
 602     // That is, the new owner is responsible for unparking the OnDeck thread.
 603     OrderAccess::storeload();
 604     cxq = _LockWord.FullWord;
 605     if (cxq & _LBIT) return;
 606 
 607     w->unpark();
 608     return;
 609   }
 610 
 611   cxq = _LockWord.FullWord;
 612   if ((cxq & ~_LBIT) != 0) {
 613     // The EntryList is empty but the cxq is populated.
 614     // drain RATs from cxq into EntryList
 615     // Detach RATs segment with CAS and then merge into EntryList
 616     for (;;) {
 617       // optional optimization - if locked, the owner is responsible for succession
 618       if (cxq & _LBIT) goto Punt;
 619       const intptr_t vfy = CASPTR(&_LockWord, cxq, cxq & _LBIT);
 620       if (vfy == cxq) break;
 621       cxq = vfy;
 622       // Interference - LockWord changed - Just retry
 623       // We can see concurrent interference from contending threads
 624       // pushing themselves onto the cxq or from lock-unlock operations.
 625       // From the perspective of this thread, EntryList is stable and
 626       // the cxq is prepend-only -- the head is volatile but the interior
 627       // of the cxq is stable.  In theory if we encounter interference from threads
 628       // pushing onto cxq we could simply break off the original cxq suffix and
 629       // move that segment to the EntryList, avoiding a 2nd or multiple CAS attempts
 630       // on the high-traffic LockWord variable.   For instance lets say the cxq is "ABCD"
 631       // when we first fetch cxq above.  Between the fetch -- where we observed "A"
 632       // -- and CAS -- where we attempt to CAS null over A -- "PQR" arrive,
 633       // yielding cxq = "PQRABCD".  In this case we could simply set A.ListNext
 634       // null, leaving cxq = "PQRA" and transfer the "BCD" segment to the EntryList.
 635       // Note too, that it's safe for this thread to traverse the cxq
 636       // without taking any special concurrency precautions.
 637     }
 638 
 639     // We don't currently reorder the cxq segment as we move it onto
 640     // the EntryList, but it might make sense to reverse the order
 641     // or perhaps sort by thread priority.  See the comments in
 642     // synchronizer.cpp objectMonitor::exit().
 643     assert(_EntryList == NULL, "invariant");
 644     _EntryList = List = (ParkEvent *)(cxq & ~_LBIT);
 645     assert(List != NULL, "invariant");
 646     goto WakeOne;
 647   }
 648 
 649   // cxq|EntryList is empty.
 650   // w == NULL implies that cxq|EntryList == NULL in the past.
 651   // Possible race - rare inopportune interleaving.
 652   // A thread could have added itself to cxq since this thread previously checked.
 653   // Detect and recover by refetching cxq.
 654  Punt:
 655   assert(UNS(_OnDeck) == _LBIT, "invariant");
 656   _OnDeck = NULL;            // Release inner lock.
 657   OrderAccess::storeload();   // Dekker duality - pivot point
 658 
 659   // Resample LockWord/cxq to recover from possible race.
 660   // For instance, while this thread T1 held OnDeck, some other thread T2 might
 661   // acquire the outer lock.  Another thread T3 might try to acquire the outer
 662   // lock, but encounter contention and enqueue itself on cxq.  T2 then drops the
 663   // outer lock, but skips succession as this thread T1 still holds OnDeck.
 664   // T1 is and remains responsible for ensuring succession of T3.
 665   //
 666   // Note that we don't need to recheck EntryList, just cxq.
 667   // If threads moved onto EntryList since we dropped OnDeck
 668   // that implies some other thread forced succession.
 669   cxq = _LockWord.FullWord;
 670   if ((cxq & ~_LBIT) != 0 && (cxq & _LBIT) == 0) {
 671     goto Succession;         // potential race -- re-run succession
 672   }
 673   return;
 674 }
 675 
 676 bool Monitor::notify() {
 677   assert(_owner == Thread::current(), "invariant");
 678   assert(ILocked(), "invariant");
 679   if (_WaitSet == NULL) return true;
 680   NotifyCount++;
 681 
 682   // Transfer one thread from the WaitSet to the EntryList or cxq.
 683   // Currently we just unlink the head of the WaitSet and prepend to the cxq.
 684   // And of course we could just unlink it and unpark it, too, but
 685   // in that case it'd likely impale itself on the reentry.
 686   Thread::muxAcquire(_WaitLock, "notify:WaitLock");
 687   ParkEvent * nfy = _WaitSet;
 688   if (nfy != NULL) {                  // DCL idiom
 689     _WaitSet = nfy->ListNext;
 690     assert(nfy->Notified == 0, "invariant");
 691     // push nfy onto the cxq
 692     for (;;) {
 693       const intptr_t v = _LockWord.FullWord;
 694       assert((v & 0xFF) == _LBIT, "invariant");
 695       nfy->ListNext = (ParkEvent *)(v & ~_LBIT);
 696       if (CASPTR (&_LockWord, v, UNS(nfy)|_LBIT) == v) break;
 697       // interference - _LockWord changed -- just retry
 698     }
 699     // Note that setting Notified before pushing nfy onto the cxq is
 700     // also legal and safe, but the safety properties are much more
 701     // subtle, so for the sake of code stewardship ...
 702     OrderAccess::fence();
 703     nfy->Notified = 1;
 704   }
 705   Thread::muxRelease(_WaitLock);
 706   if (nfy != NULL && (NativeMonitorFlags & 16)) {
 707     // Experimental code ... light up the wakee in the hope that this thread (the owner)
 708     // will drop the lock just about the time the wakee comes ONPROC.
 709     nfy->unpark();
 710   }
 711   assert(ILocked(), "invariant");
 712   return true;
 713 }
 714 
 715 // Currently notifyAll() transfers the waiters one-at-a-time from the waitset
 716 // to the cxq.  This could be done more efficiently with a single bulk en-mass transfer,

 823       } else {                  // found in interior
 824         assert(q->ListNext == p, "invariant");
 825         q->ListNext = p->ListNext;
 826       }
 827       WasOnWaitSet = 1;        // We were *not* notified but instead encountered timeout
 828     }
 829     Thread::muxRelease(_WaitLock);
 830   }
 831 
 832   // Reentry phase - reacquire the lock
 833   if (WasOnWaitSet) {
 834     // ESelf was previously on the WaitSet but we just unlinked it above
 835     // because of a timeout.  ESelf is not resident on any list and is not OnDeck
 836     assert(_OnDeck != ESelf, "invariant");
 837     ILock(Self);
 838   } else {
 839     // A prior notify() operation moved ESelf from the WaitSet to the cxq.
 840     // ESelf is now on the cxq, EntryList or at the OnDeck position.
 841     // The following fragment is extracted from Monitor::ILock()
 842     for (;;) {
 843       if (OrderAccess::load_ptr_acquire(&_OnDeck) == ESelf && TrySpin(Self)) break;
 844       ParkCommon(ESelf, 0);
 845     }
 846     assert(_OnDeck == ESelf, "invariant");
 847     _OnDeck = NULL;
 848   }
 849 
 850   assert(ILocked(), "invariant");
 851   return WasOnWaitSet != 0;        // return true IFF timeout
 852 }
 853 
 854 
 855 // ON THE VMTHREAD SNEAKING PAST HELD LOCKS:
 856 // In particular, there are certain types of global lock that may be held
 857 // by a Java thread while it is blocked at a safepoint but before it has
 858 // written the _owner field. These locks may be sneakily acquired by the
 859 // VM thread during a safepoint to avoid deadlocks. Alternatively, one should
 860 // identify all such locks, and ensure that Java threads never block at
 861 // safepoints while holding them (_no_safepoint_check_flag). While it
 862 // seems as though this could increase the time to reach a safepoint
 863 // (or at least increase the mean, if not the variance), the latter

1041 
1042   // slow-path - apparent contention
1043   // Allocate a ParkEvent for transient use.
1044   // The ParkEvent remains associated with this thread until
1045   // the time the thread manages to acquire the lock.
1046   ParkEvent * const ESelf = ParkEvent::Allocate(NULL);
1047   ESelf->reset();
1048   OrderAccess::storeload();
1049 
1050   // Either Enqueue Self on cxq or acquire the outer lock.
1051   if (AcquireOrPush (ESelf)) {
1052     ParkEvent::Release(ESelf);      // surrender the ParkEvent
1053     goto Exeunt;
1054   }
1055 
1056   // At any given time there is at most one ondeck thread.
1057   // ondeck implies not resident on cxq and not resident on EntryList
1058   // Only the OnDeck thread can try to acquire -- contend for -- the lock.
1059   // CONSIDER: use Self->OnDeck instead of m->OnDeck.
1060   for (;;) {
1061     if (OrderAccess::load_ptr_acquire(&_OnDeck) == ESelf && TrySpin(NULL)) break;
1062     ParkCommon(ESelf, 0);
1063   }
1064 
1065   assert(_OnDeck == ESelf, "invariant");
1066   _OnDeck = NULL;
1067   ParkEvent::Release(ESelf);      // surrender the ParkEvent
1068   goto Exeunt;
1069 }
1070 
1071 void Monitor::jvm_raw_unlock() {
1072   // Nearly the same as Monitor::unlock() ...
1073   // directly set _owner instead of using set_owner(null)
1074   _owner = NULL;
1075   if (_snuck) {         // ???
1076     assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "sneak");
1077     _snuck = false;
1078     return;
1079   }
1080   IUnlock(false);
1081 }

 234 //   them together was facile -- a bit too facile.   The current implementation badly
 235 //   conflates the two concerns.
 236 //
 237 // * TODO-FIXME:
 238 //
 239 //   -- Add DTRACE probes for contended acquire, contended acquired, contended unlock
 240 //      We should also add DTRACE probes in the ParkEvent subsystem for
 241 //      Park-entry, Park-exit, and Unpark.
 242 //
 243 //   -- We have an excess of mutex-like constructs in the JVM, namely:
 244 //      1. objectMonitors for Java-level synchronization (synchronizer.cpp)
 245 //      2. low-level muxAcquire and muxRelease
 246 //      3. low-level spinAcquire and spinRelease
 247 //      4. native Mutex:: and Monitor::
 248 //      5. jvm_raw_lock() and _unlock()
 249 //      6. JVMTI raw monitors -- distinct from (5) despite having a confusingly
 250 //         similar name.
 251 //
 252 // o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o
 253 






 254 #define UNS(x) (uintptr_t(x))
 255 #define TRACE(m)                   \
 256   {                                \
 257     static volatile int ctr = 0;   \
 258     int x = ++ctr;                 \
 259     if ((x & (x - 1)) == 0) {      \
 260       ::printf("%d:%s\n", x, #m);  \
 261       ::fflush(stdout);            \
 262     }                              \
 263   }
 264 
 265 const intptr_t _LBIT = 1;
 266 
 267 // Endian-ness ... index of least-significant byte in SplitWord.Bytes[]
 268 #ifdef VM_LITTLE_ENDIAN
 269  #define _LSBINDEX 0
 270 #else
 271  #define _LSBINDEX (sizeof(intptr_t)-1)
 272 #endif
 273 
 274 // Simplistic low-quality Marsaglia SHIFT-XOR RNG.
 275 // Bijective except for the trailing mask operation.
 276 // Useful for spin loops as the compiler can't optimize it away.
 277 
 278 static inline jint MarsagliaXORV(jint x) {
 279   if (x == 0) x = 1|os::random();
 280   x ^= x << 6;
 281   x ^= ((unsigned)x) >> 21;
 282   x ^= x << 7;
 283   return x & 0x7FFFFFFF;
 284 }
 285 
 286 static int Stall(int its) {
 287   static volatile jint rv = 1;
 288   volatile int OnFrame = 0;
 289   jint v = rv ^ UNS(OnFrame);
 290   while (--its >= 0) {
 291     v = MarsagliaXORV(v);
 292   }
 293   // Make this impossible for the compiler to optimize away,
 294   // but (mostly) avoid W coherency sharing on MP systems.
 295   if (v == 0x12345) rv = v;
 296   return v;
 297 }
 298 
 299 int Monitor::TryLock() {
 300   intptr_t v = _LockWord.FullWord;
 301   for (;;) {
 302     if ((v & _LBIT) != 0) return 0;
 303     const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v);
 304     if (v == u) return 1;
 305     v = u;
 306   }
 307 }
 308 
 309 int Monitor::TryFast() {
 310   // Optimistic fast-path form ...
 311   // Fast-path attempt for the common uncontended case.
 312   // Avoid RTS->RTO $ coherence upgrade on typical SMP systems.
 313   intptr_t v = Atomic::cmpxchg(_LBIT, &_LockWord.FullWord, (intptr_t)0);  // agro ...
 314   if (v == 0) return 1;
 315 
 316   for (;;) {
 317     if ((v & _LBIT) != 0) return 0;
 318     const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v);
 319     if (v == u) return 1;
 320     v = u;
 321   }
 322 }
 323 
 324 int Monitor::ILocked() {
 325   const intptr_t w = _LockWord.FullWord & 0xFF;
 326   assert(w == 0 || w == _LBIT, "invariant");
 327   return w == _LBIT;
 328 }
 329 
 330 // Polite TATAS spinlock with exponential backoff - bounded spin.
 331 // Ideally we'd use processor cycles, time or vtime to control
 332 // the loop, but we currently use iterations.
 333 // All the constants within were derived empirically but work over
 334 // over the spectrum of J2SE reference platforms.
 335 // On Niagara-class systems the back-off is unnecessary but
 336 // is relatively harmless.  (At worst it'll slightly retard
 337 // acquisition times).  The back-off is critical for older SMP systems
 338 // where constant fetching of the LockWord would otherwise impair
 339 // scalability.
 340 //
 341 // Clamp spinning at approximately 1/2 of a context-switch round-trip.
 342 // See synchronizer.cpp for details and rationale.
 343 
 344 int Monitor::TrySpin(Thread * const Self) {
 345   if (TryLock())    return 1;
 346   if (!os::is_MP()) return 0;
 347 
 348   int Probes  = 0;
 349   int Delay   = 0;
 350   int Steps   = 0;
 351   int SpinMax = NativeMonitorSpinLimit;
 352   int flgs    = NativeMonitorFlags;
 353   for (;;) {
 354     intptr_t v = _LockWord.FullWord;
 355     if ((v & _LBIT) == 0) {
 356       if (Atomic::cmpxchg (v|_LBIT, &_LockWord.FullWord, v) == v) {
 357         return 1;
 358       }
 359       continue;
 360     }
 361 
 362     if ((flgs & 8) == 0) {
 363       SpinPause();
 364     }
 365 
 366     // Periodically increase Delay -- variable Delay form
 367     // conceptually: delay *= 1 + 1/Exponent
 368     ++Probes;
 369     if (Probes > SpinMax) return 0;
 370 
 371     if ((Probes & 0x7) == 0) {
 372       Delay = ((Delay << 1)|1) & 0x7FF;
 373       // CONSIDER: Delay += 1 + (Delay/4); Delay &= 0x7FF ;
 374     }
 375 
 376     if (flgs & 2) continue;

 405 
 406 static int ParkCommon(ParkEvent * ev, jlong timo) {
 407   // Diagnostic support - periodically unwedge blocked threads
 408   intx nmt = NativeMonitorTimeout;
 409   if (nmt > 0 && (nmt < timo || timo <= 0)) {
 410     timo = nmt;
 411   }
 412   int err = OS_OK;
 413   if (0 == timo) {
 414     ev->park();
 415   } else {
 416     err = ev->park(timo);
 417   }
 418   return err;
 419 }
 420 
 421 inline int Monitor::AcquireOrPush(ParkEvent * ESelf) {
 422   intptr_t v = _LockWord.FullWord;
 423   for (;;) {
 424     if ((v & _LBIT) == 0) {
 425       const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v);
 426       if (u == v) return 1;        // indicate acquired
 427       v = u;
 428     } else {
 429       // Anticipate success ...
 430       ESelf->ListNext = (ParkEvent *)(v & ~_LBIT);
 431       const intptr_t u = Atomic::cmpxchg(intptr_t(ESelf)|_LBIT, &_LockWord.FullWord, v);
 432       if (u == v) return 0;        // indicate pushed onto cxq
 433       v = u;
 434     }
 435     // Interference - LockWord change - just retry
 436   }
 437 }
 438 
 439 // ILock and IWait are the lowest level primitive internal blocking
 440 // synchronization functions.  The callers of IWait and ILock must have
 441 // performed any needed state transitions beforehand.
 442 // IWait and ILock may directly call park() without any concern for thread state.
 443 // Note that ILock and IWait do *not* access _owner.
 444 // _owner is a higher-level logical concept.
 445 
 446 void Monitor::ILock(Thread * Self) {
 447   assert(_OnDeck != Self->_MutexEvent, "invariant");
 448 
 449   if (TryFast()) {
 450  Exeunt:
 451     assert(ILocked(), "invariant");
 452     return;
 453   }
 454 
 455   ParkEvent * const ESelf = Self->_MutexEvent;
 456   assert(_OnDeck != ESelf, "invariant");
 457 
 458   // As an optimization, spinners could conditionally try to set _OnDeck to _LBIT
 459   // Synchronizer.cpp uses a similar optimization.
 460   if (TrySpin(Self)) goto Exeunt;
 461 
 462   // Slow-path - the lock is contended.
 463   // Either Enqueue Self on cxq or acquire the outer lock.
 464   // LockWord encoding = (cxq,LOCKBYTE)
 465   ESelf->reset();
 466   OrderAccess::fence();
 467 
 468   // Optional optimization ... try barging on the inner lock
 469   if ((NativeMonitorFlags & 32) && Atomic::cmpxchg(ESelf, &_OnDeck, (ParkEvent*)NULL) == NULL) {
 470     goto OnDeck_LOOP;
 471   }
 472 
 473   if (AcquireOrPush(ESelf)) goto Exeunt;
 474 
 475   // At any given time there is at most one ondeck thread.
 476   // ondeck implies not resident on cxq and not resident on EntryList
 477   // Only the OnDeck thread can try to acquire -- contend for -- the lock.
 478   // CONSIDER: use Self->OnDeck instead of m->OnDeck.
 479   // Deschedule Self so that others may run.
 480   while (OrderAccess::load_acquire(&_OnDeck) != ESelf) {
 481     ParkCommon(ESelf, 0);
 482   }
 483 
 484   // Self is now in the OnDeck position and will remain so until it
 485   // manages to acquire the lock.
 486  OnDeck_LOOP:
 487   for (;;) {
 488     assert(_OnDeck == ESelf, "invariant");
 489     if (TrySpin(Self)) break;
 490     // It's probably wise to spin only if we *actually* blocked
 491     // CONSIDER: check the lockbyte, if it remains set then
 492     // preemptively drain the cxq into the EntryList.
 493     // The best place and time to perform queue operations -- lock metadata --
 494     // is _before having acquired the outer lock, while waiting for the lock to drop.
 495     ParkCommon(ESelf, 0);
 496   }
 497 
 498   assert(_OnDeck == ESelf, "invariant");
 499   _OnDeck = NULL;
 500

 556   if (((cxq & ~_LBIT)|UNS(_EntryList)) == 0) {
 557     return;      // normal fast-path exit - cxq and EntryList both empty
 558   }
 559   if (cxq & _LBIT) {
 560     // Optional optimization ...
 561     // Some other thread acquired the lock in the window since this
 562     // thread released it.  Succession is now that thread's responsibility.
 563     return;
 564   }
 565 
 566  Succession:
 567   // Slow-path exit - this thread must ensure succession and progress.
 568   // OnDeck serves as lock to protect cxq and EntryList.
 569   // Only the holder of OnDeck can manipulate EntryList or detach the RATs from cxq.
 570   // Avoid ABA - allow multiple concurrent producers (enqueue via push-CAS)
 571   // but only one concurrent consumer (detacher of RATs).
 572   // Consider protecting this critical section with schedctl on Solaris.
 573   // Unlike a normal lock, however, the exiting thread "locks" OnDeck,
 574   // picks a successor and marks that thread as OnDeck.  That successor
 575   // thread will then clear OnDeck once it eventually acquires the outer lock.
 576   if (Atomic::cmpxchg((ParkEvent*)_LBIT, &_OnDeck, (ParkEvent*)NULL) != NULL) {
 577     return;
 578   }
 579 
 580   ParkEvent * List = _EntryList;
 581   if (List != NULL) {
 582     // Transfer the head of the EntryList to the OnDeck position.
 583     // Once OnDeck, a thread stays OnDeck until it acquires the lock.
 584     // For a given lock there is at most OnDeck thread at any one instant.
 585    WakeOne:
 586     assert(List == _EntryList, "invariant");
 587     ParkEvent * const w = List;
 588     assert(RelaxAssert || w != Thread::current()->_MutexEvent, "invariant");
 589     _EntryList = w->ListNext;
 590     // as a diagnostic measure consider setting w->_ListNext = BAD
 591     assert(intptr_t(_OnDeck) == _LBIT, "invariant");
 592 
 593     // Pass OnDeck role to w, ensuring that _EntryList has been set first.
 594     // w will clear _OnDeck once it acquires the outer lock.
 595     // Note that once we set _OnDeck that thread can acquire the mutex, proceed
 596     // with its critical section and then enter this code to unlock the mutex. So
 597     // you can have multiple threads active in IUnlock at the same time.
 598     OrderAccess::release_store(&_OnDeck, w);
 599 
 600     // Another optional optimization ...
 601     // For heavily contended locks it's not uncommon that some other
 602     // thread acquired the lock while this thread was arranging succession.
 603     // Try to defer the unpark() operation - Delegate the responsibility
 604     // for unpark()ing the OnDeck thread to the current or subsequent owners
 605     // That is, the new owner is responsible for unparking the OnDeck thread.
 606     OrderAccess::storeload();
 607     cxq = _LockWord.FullWord;
 608     if (cxq & _LBIT) return;
 609 
 610     w->unpark();
 611     return;
 612   }
 613 
 614   cxq = _LockWord.FullWord;
 615   if ((cxq & ~_LBIT) != 0) {
 616     // The EntryList is empty but the cxq is populated.
 617     // drain RATs from cxq into EntryList
 618     // Detach RATs segment with CAS and then merge into EntryList
 619     for (;;) {
 620       // optional optimization - if locked, the owner is responsible for succession
 621       if (cxq & _LBIT) goto Punt;
 622       const intptr_t vfy = Atomic::cmpxchg(cxq & _LBIT, &_LockWord.FullWord, cxq);
 623       if (vfy == cxq) break;
 624       cxq = vfy;
 625       // Interference - LockWord changed - Just retry
 626       // We can see concurrent interference from contending threads
 627       // pushing themselves onto the cxq or from lock-unlock operations.
 628       // From the perspective of this thread, EntryList is stable and
 629       // the cxq is prepend-only -- the head is volatile but the interior
 630       // of the cxq is stable.  In theory if we encounter interference from threads
 631       // pushing onto cxq we could simply break off the original cxq suffix and
 632       // move that segment to the EntryList, avoiding a 2nd or multiple CAS attempts
 633       // on the high-traffic LockWord variable.   For instance lets say the cxq is "ABCD"
 634       // when we first fetch cxq above.  Between the fetch -- where we observed "A"
 635       // -- and CAS -- where we attempt to CAS null over A -- "PQR" arrive,
 636       // yielding cxq = "PQRABCD".  In this case we could simply set A.ListNext
 637       // null, leaving cxq = "PQRA" and transfer the "BCD" segment to the EntryList.
 638       // Note too, that it's safe for this thread to traverse the cxq
 639       // without taking any special concurrency precautions.
 640     }
 641 
 642     // We don't currently reorder the cxq segment as we move it onto
 643     // the EntryList, but it might make sense to reverse the order
 644     // or perhaps sort by thread priority.  See the comments in
 645     // synchronizer.cpp objectMonitor::exit().
 646     assert(_EntryList == NULL, "invariant");
 647     _EntryList = List = (ParkEvent *)(cxq & ~_LBIT);
 648     assert(List != NULL, "invariant");
 649     goto WakeOne;
 650   }
 651 
 652   // cxq|EntryList is empty.
 653   // w == NULL implies that cxq|EntryList == NULL in the past.
 654   // Possible race - rare inopportune interleaving.
 655   // A thread could have added itself to cxq since this thread previously checked.
 656   // Detect and recover by refetching cxq.
 657  Punt:
 658   assert(intptr_t(_OnDeck) == _LBIT, "invariant");
 659   _OnDeck = NULL;            // Release inner lock.
 660   OrderAccess::storeload();   // Dekker duality - pivot point
 661 
 662   // Resample LockWord/cxq to recover from possible race.
 663   // For instance, while this thread T1 held OnDeck, some other thread T2 might
 664   // acquire the outer lock.  Another thread T3 might try to acquire the outer
 665   // lock, but encounter contention and enqueue itself on cxq.  T2 then drops the
 666   // outer lock, but skips succession as this thread T1 still holds OnDeck.
 667   // T1 is and remains responsible for ensuring succession of T3.
 668   //
 669   // Note that we don't need to recheck EntryList, just cxq.
 670   // If threads moved onto EntryList since we dropped OnDeck
 671   // that implies some other thread forced succession.
 672   cxq = _LockWord.FullWord;
 673   if ((cxq & ~_LBIT) != 0 && (cxq & _LBIT) == 0) {
 674     goto Succession;         // potential race -- re-run succession
 675   }
 676   return;
 677 }
 678 
 679 bool Monitor::notify() {
 680   assert(_owner == Thread::current(), "invariant");
 681   assert(ILocked(), "invariant");
 682   if (_WaitSet == NULL) return true;
 683   NotifyCount++;
 684 
 685   // Transfer one thread from the WaitSet to the EntryList or cxq.
 686   // Currently we just unlink the head of the WaitSet and prepend to the cxq.
 687   // And of course we could just unlink it and unpark it, too, but
 688   // in that case it'd likely impale itself on the reentry.
 689   Thread::muxAcquire(_WaitLock, "notify:WaitLock");
 690   ParkEvent * nfy = _WaitSet;
 691   if (nfy != NULL) {                  // DCL idiom
 692     _WaitSet = nfy->ListNext;
 693     assert(nfy->Notified == 0, "invariant");
 694     // push nfy onto the cxq
 695     for (;;) {
 696       const intptr_t v = _LockWord.FullWord;
 697       assert((v & 0xFF) == _LBIT, "invariant");
 698       nfy->ListNext = (ParkEvent *)(v & ~_LBIT);
 699       if (Atomic::cmpxchg(intptr_t(nfy)|_LBIT, &_LockWord.FullWord, v) == v) break;
 700       // interference - _LockWord changed -- just retry
 701     }
 702     // Note that setting Notified before pushing nfy onto the cxq is
 703     // also legal and safe, but the safety properties are much more
 704     // subtle, so for the sake of code stewardship ...
 705     OrderAccess::fence();
 706     nfy->Notified = 1;
 707   }
 708   Thread::muxRelease(_WaitLock);
 709   if (nfy != NULL && (NativeMonitorFlags & 16)) {
 710     // Experimental code ... light up the wakee in the hope that this thread (the owner)
 711     // will drop the lock just about the time the wakee comes ONPROC.
 712     nfy->unpark();
 713   }
 714   assert(ILocked(), "invariant");
 715   return true;
 716 }
 717 
 718 // Currently notifyAll() transfers the waiters one-at-a-time from the waitset
 719 // to the cxq.  This could be done more efficiently with a single bulk en-mass transfer,

 826       } else {                  // found in interior
 827         assert(q->ListNext == p, "invariant");
 828         q->ListNext = p->ListNext;
 829       }
 830       WasOnWaitSet = 1;        // We were *not* notified but instead encountered timeout
 831     }
 832     Thread::muxRelease(_WaitLock);
 833   }
 834 
 835   // Reentry phase - reacquire the lock
 836   if (WasOnWaitSet) {
 837     // ESelf was previously on the WaitSet but we just unlinked it above
 838     // because of a timeout.  ESelf is not resident on any list and is not OnDeck
 839     assert(_OnDeck != ESelf, "invariant");
 840     ILock(Self);
 841   } else {
 842     // A prior notify() operation moved ESelf from the WaitSet to the cxq.
 843     // ESelf is now on the cxq, EntryList or at the OnDeck position.
 844     // The following fragment is extracted from Monitor::ILock()
 845     for (;;) {
 846       if (OrderAccess::load_acquire(&_OnDeck) == ESelf && TrySpin(Self)) break;
 847       ParkCommon(ESelf, 0);
 848     }
 849     assert(_OnDeck == ESelf, "invariant");
 850     _OnDeck = NULL;
 851   }
 852 
 853   assert(ILocked(), "invariant");
 854   return WasOnWaitSet != 0;        // return true IFF timeout
 855 }
 856 
 857 
 858 // ON THE VMTHREAD SNEAKING PAST HELD LOCKS:
 859 // In particular, there are certain types of global lock that may be held
 860 // by a Java thread while it is blocked at a safepoint but before it has
 861 // written the _owner field. These locks may be sneakily acquired by the
 862 // VM thread during a safepoint to avoid deadlocks. Alternatively, one should
 863 // identify all such locks, and ensure that Java threads never block at
 864 // safepoints while holding them (_no_safepoint_check_flag). While it
 865 // seems as though this could increase the time to reach a safepoint
 866 // (or at least increase the mean, if not the variance), the latter

1044 
1045   // slow-path - apparent contention
1046   // Allocate a ParkEvent for transient use.
1047   // The ParkEvent remains associated with this thread until
1048   // the time the thread manages to acquire the lock.
1049   ParkEvent * const ESelf = ParkEvent::Allocate(NULL);
1050   ESelf->reset();
1051   OrderAccess::storeload();
1052 
1053   // Either Enqueue Self on cxq or acquire the outer lock.
1054   if (AcquireOrPush (ESelf)) {
1055     ParkEvent::Release(ESelf);      // surrender the ParkEvent
1056     goto Exeunt;
1057   }
1058 
1059   // At any given time there is at most one ondeck thread.
1060   // ondeck implies not resident on cxq and not resident on EntryList
1061   // Only the OnDeck thread can try to acquire -- contend for -- the lock.
1062   // CONSIDER: use Self->OnDeck instead of m->OnDeck.
1063   for (;;) {
1064     if (OrderAccess::load_acquire(&_OnDeck) == ESelf && TrySpin(NULL)) break;
1065     ParkCommon(ESelf, 0);
1066   }
1067 
1068   assert(_OnDeck == ESelf, "invariant");
1069   _OnDeck = NULL;
1070   ParkEvent::Release(ESelf);      // surrender the ParkEvent
1071   goto Exeunt;
1072 }
1073 
1074 void Monitor::jvm_raw_unlock() {
1075   // Nearly the same as Monitor::unlock() ...
1076   // directly set _owner instead of using set_owner(null)
1077   _owner = NULL;
1078   if (_snuck) {         // ???
1079     assert(SafepointSynchronize::is_at_safepoint() && Thread::current()->is_VM_thread(), "sneak");
1080     _snuck = false;
1081     return;
1082   }
1083   IUnlock(false);
1084 }

< prev index next >