245 // 2. low-level muxAcquire and muxRelease
246 // 3. low-level spinAcquire and spinRelease
247 // 4. native Mutex:: and Monitor::
248 // 5. jvm_raw_lock() and _unlock()
249 // 6. JVMTI raw monitors -- distinct from (5) despite having a confusingly
250 // similar name.
251 //
252 // o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o
253
254 #define UNS(x) (uintptr_t(x))
255 #define TRACE(m) \
256 { \
257 static volatile int ctr = 0; \
258 int x = ++ctr; \
259 if ((x & (x - 1)) == 0) { \
260 ::printf("%d:%s\n", x, #m); \
261 ::fflush(stdout); \
262 } \
263 }
264
265 // Simplistic low-quality Marsaglia SHIFT-XOR RNG.
266 // Bijective except for the trailing mask operation.
267 // Useful for spin loops as the compiler can't optimize it away.
268
269 static inline jint MarsagliaXORV(jint x) {
270 if (x == 0) x = 1|os::random();
271 x ^= x << 6;
272 x ^= ((unsigned)x) >> 21;
273 x ^= x << 7;
274 return x & 0x7FFFFFFF;
275 }
276
277 static int Stall(int its) {
278 static volatile jint rv = 1;
279 volatile int OnFrame = 0;
280 jint v = rv ^ UNS(OnFrame);
281 while (--its >= 0) {
282 v = MarsagliaXORV(v);
283 }
284 // Make this impossible for the compiler to optimize away,
285 // but (mostly) avoid W coherency sharing on MP systems.
286 if (v == 0x12345) rv = v;
287 return v;
288 }
289
290 int Monitor::TryLock() {
291 intptr_t v = _LockWord.FullWord;
292 for (;;) {
293 if ((v & _LBIT) != 0) return 0;
294 const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v);
295 if (v == u) return 1;
296 v = u;
297 }
298 }
299
300 int Monitor::TryFast() {
301 // Optimistic fast-path form ...
302 // Fast-path attempt for the common uncontended case.
303 // Avoid RTS->RTO $ coherence upgrade on typical SMP systems.
304 intptr_t v = Atomic::cmpxchg((intptr_t)_LBIT, &_LockWord.FullWord, (intptr_t)0); // agro ...
305 if (v == 0) return 1;
306
307 for (;;) {
308 if ((v & _LBIT) != 0) return 0;
309 const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v);
310 if (v == u) return 1;
311 v = u;
312 }
313 }
314
315 int Monitor::ILocked() {
316 const intptr_t w = _LockWord.FullWord & 0xFF;
317 assert(w == 0 || w == _LBIT, "invariant");
318 return w == _LBIT;
319 }
320
321 // Polite TATAS spinlock with exponential backoff - bounded spin.
322 // Ideally we'd use processor cycles, time or vtime to control
323 // the loop, but we currently use iterations.
324 // All the constants within were derived empirically but work over
562 // but only one concurrent consumer (detacher of RATs).
563 // Consider protecting this critical section with schedctl on Solaris.
564 // Unlike a normal lock, however, the exiting thread "locks" OnDeck,
565 // picks a successor and marks that thread as OnDeck. That successor
566 // thread will then clear OnDeck once it eventually acquires the outer lock.
567 if (Atomic::cmpxchg((ParkEvent*)_LBIT, &_OnDeck, (ParkEvent*)NULL) != NULL) {
568 return;
569 }
570
571 ParkEvent * List = _EntryList;
572 if (List != NULL) {
573 // Transfer the head of the EntryList to the OnDeck position.
574 // Once OnDeck, a thread stays OnDeck until it acquires the lock.
575 // For a given lock there is at most OnDeck thread at any one instant.
576 WakeOne:
577 assert(List == _EntryList, "invariant");
578 ParkEvent * const w = List;
579 assert(RelaxAssert || w != Thread::current()->_MutexEvent, "invariant");
580 _EntryList = w->ListNext;
581 // as a diagnostic measure consider setting w->_ListNext = BAD
582 assert(UNS(_OnDeck) == _LBIT, "invariant");
583
584 // Pass OnDeck role to w, ensuring that _EntryList has been set first.
585 // w will clear _OnDeck once it acquires the outer lock.
586 // Note that once we set _OnDeck that thread can acquire the mutex, proceed
587 // with its critical section and then enter this code to unlock the mutex. So
588 // you can have multiple threads active in IUnlock at the same time.
589 OrderAccess::release_store(&_OnDeck, w);
590
591 // Another optional optimization ...
592 // For heavily contended locks it's not uncommon that some other
593 // thread acquired the lock while this thread was arranging succession.
594 // Try to defer the unpark() operation - Delegate the responsibility
595 // for unpark()ing the OnDeck thread to the current or subsequent owners
596 // That is, the new owner is responsible for unparking the OnDeck thread.
597 OrderAccess::storeload();
598 cxq = _LockWord.FullWord;
599 if (cxq & _LBIT) return;
600
601 w->unpark();
602 return;
629 // Note too, that it's safe for this thread to traverse the cxq
630 // without taking any special concurrency precautions.
631 }
632
633 // We don't currently reorder the cxq segment as we move it onto
634 // the EntryList, but it might make sense to reverse the order
635 // or perhaps sort by thread priority. See the comments in
636 // synchronizer.cpp objectMonitor::exit().
637 assert(_EntryList == NULL, "invariant");
638 _EntryList = List = (ParkEvent *)(cxq & ~_LBIT);
639 assert(List != NULL, "invariant");
640 goto WakeOne;
641 }
642
643 // cxq|EntryList is empty.
644 // w == NULL implies that cxq|EntryList == NULL in the past.
645 // Possible race - rare inopportune interleaving.
646 // A thread could have added itself to cxq since this thread previously checked.
647 // Detect and recover by refetching cxq.
648 Punt:
649 assert(UNS(_OnDeck) == _LBIT, "invariant");
650 _OnDeck = NULL; // Release inner lock.
651 OrderAccess::storeload(); // Dekker duality - pivot point
652
653 // Resample LockWord/cxq to recover from possible race.
654 // For instance, while this thread T1 held OnDeck, some other thread T2 might
655 // acquire the outer lock. Another thread T3 might try to acquire the outer
656 // lock, but encounter contention and enqueue itself on cxq. T2 then drops the
657 // outer lock, but skips succession as this thread T1 still holds OnDeck.
658 // T1 is and remains responsible for ensuring succession of T3.
659 //
660 // Note that we don't need to recheck EntryList, just cxq.
661 // If threads moved onto EntryList since we dropped OnDeck
662 // that implies some other thread forced succession.
663 cxq = _LockWord.FullWord;
664 if ((cxq & ~_LBIT) != 0 && (cxq & _LBIT) == 0) {
665 goto Succession; // potential race -- re-run succession
666 }
667 return;
668 }
669
|
245 // 2. low-level muxAcquire and muxRelease
246 // 3. low-level spinAcquire and spinRelease
247 // 4. native Mutex:: and Monitor::
248 // 5. jvm_raw_lock() and _unlock()
249 // 6. JVMTI raw monitors -- distinct from (5) despite having a confusingly
250 // similar name.
251 //
252 // o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o-o
253
254 #define UNS(x) (uintptr_t(x))
255 #define TRACE(m) \
256 { \
257 static volatile int ctr = 0; \
258 int x = ++ctr; \
259 if ((x & (x - 1)) == 0) { \
260 ::printf("%d:%s\n", x, #m); \
261 ::fflush(stdout); \
262 } \
263 }
264
265 const intptr_t _LBIT = 1;
266
267 // Endian-ness ... index of least-significant byte in SplitWord.Bytes[]
268 #ifdef VM_LITTLE_ENDIAN
269 #define _LSBINDEX 0
270 #else
271 #define _LSBINDEX (sizeof(intptr_t)-1)
272 #endif
273
274 // Simplistic low-quality Marsaglia SHIFT-XOR RNG.
275 // Bijective except for the trailing mask operation.
276 // Useful for spin loops as the compiler can't optimize it away.
277
278 static inline jint MarsagliaXORV(jint x) {
279 if (x == 0) x = 1|os::random();
280 x ^= x << 6;
281 x ^= ((unsigned)x) >> 21;
282 x ^= x << 7;
283 return x & 0x7FFFFFFF;
284 }
285
286 static int Stall(int its) {
287 static volatile jint rv = 1;
288 volatile int OnFrame = 0;
289 jint v = rv ^ UNS(OnFrame);
290 while (--its >= 0) {
291 v = MarsagliaXORV(v);
292 }
293 // Make this impossible for the compiler to optimize away,
294 // but (mostly) avoid W coherency sharing on MP systems.
295 if (v == 0x12345) rv = v;
296 return v;
297 }
298
299 int Monitor::TryLock() {
300 intptr_t v = _LockWord.FullWord;
301 for (;;) {
302 if ((v & _LBIT) != 0) return 0;
303 const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v);
304 if (v == u) return 1;
305 v = u;
306 }
307 }
308
309 int Monitor::TryFast() {
310 // Optimistic fast-path form ...
311 // Fast-path attempt for the common uncontended case.
312 // Avoid RTS->RTO $ coherence upgrade on typical SMP systems.
313 intptr_t v = Atomic::cmpxchg(_LBIT, &_LockWord.FullWord, (intptr_t)0); // agro ...
314 if (v == 0) return 1;
315
316 for (;;) {
317 if ((v & _LBIT) != 0) return 0;
318 const intptr_t u = Atomic::cmpxchg(v|_LBIT, &_LockWord.FullWord, v);
319 if (v == u) return 1;
320 v = u;
321 }
322 }
323
324 int Monitor::ILocked() {
325 const intptr_t w = _LockWord.FullWord & 0xFF;
326 assert(w == 0 || w == _LBIT, "invariant");
327 return w == _LBIT;
328 }
329
330 // Polite TATAS spinlock with exponential backoff - bounded spin.
331 // Ideally we'd use processor cycles, time or vtime to control
332 // the loop, but we currently use iterations.
333 // All the constants within were derived empirically but work over
571 // but only one concurrent consumer (detacher of RATs).
572 // Consider protecting this critical section with schedctl on Solaris.
573 // Unlike a normal lock, however, the exiting thread "locks" OnDeck,
574 // picks a successor and marks that thread as OnDeck. That successor
575 // thread will then clear OnDeck once it eventually acquires the outer lock.
576 if (Atomic::cmpxchg((ParkEvent*)_LBIT, &_OnDeck, (ParkEvent*)NULL) != NULL) {
577 return;
578 }
579
580 ParkEvent * List = _EntryList;
581 if (List != NULL) {
582 // Transfer the head of the EntryList to the OnDeck position.
583 // Once OnDeck, a thread stays OnDeck until it acquires the lock.
584 // For a given lock there is at most OnDeck thread at any one instant.
585 WakeOne:
586 assert(List == _EntryList, "invariant");
587 ParkEvent * const w = List;
588 assert(RelaxAssert || w != Thread::current()->_MutexEvent, "invariant");
589 _EntryList = w->ListNext;
590 // as a diagnostic measure consider setting w->_ListNext = BAD
591 assert(intptr_t(_OnDeck) == _LBIT, "invariant");
592
593 // Pass OnDeck role to w, ensuring that _EntryList has been set first.
594 // w will clear _OnDeck once it acquires the outer lock.
595 // Note that once we set _OnDeck that thread can acquire the mutex, proceed
596 // with its critical section and then enter this code to unlock the mutex. So
597 // you can have multiple threads active in IUnlock at the same time.
598 OrderAccess::release_store(&_OnDeck, w);
599
600 // Another optional optimization ...
601 // For heavily contended locks it's not uncommon that some other
602 // thread acquired the lock while this thread was arranging succession.
603 // Try to defer the unpark() operation - Delegate the responsibility
604 // for unpark()ing the OnDeck thread to the current or subsequent owners
605 // That is, the new owner is responsible for unparking the OnDeck thread.
606 OrderAccess::storeload();
607 cxq = _LockWord.FullWord;
608 if (cxq & _LBIT) return;
609
610 w->unpark();
611 return;
638 // Note too, that it's safe for this thread to traverse the cxq
639 // without taking any special concurrency precautions.
640 }
641
642 // We don't currently reorder the cxq segment as we move it onto
643 // the EntryList, but it might make sense to reverse the order
644 // or perhaps sort by thread priority. See the comments in
645 // synchronizer.cpp objectMonitor::exit().
646 assert(_EntryList == NULL, "invariant");
647 _EntryList = List = (ParkEvent *)(cxq & ~_LBIT);
648 assert(List != NULL, "invariant");
649 goto WakeOne;
650 }
651
652 // cxq|EntryList is empty.
653 // w == NULL implies that cxq|EntryList == NULL in the past.
654 // Possible race - rare inopportune interleaving.
655 // A thread could have added itself to cxq since this thread previously checked.
656 // Detect and recover by refetching cxq.
657 Punt:
658 assert(intptr_t(_OnDeck) == _LBIT, "invariant");
659 _OnDeck = NULL; // Release inner lock.
660 OrderAccess::storeload(); // Dekker duality - pivot point
661
662 // Resample LockWord/cxq to recover from possible race.
663 // For instance, while this thread T1 held OnDeck, some other thread T2 might
664 // acquire the outer lock. Another thread T3 might try to acquire the outer
665 // lock, but encounter contention and enqueue itself on cxq. T2 then drops the
666 // outer lock, but skips succession as this thread T1 still holds OnDeck.
667 // T1 is and remains responsible for ensuring succession of T3.
668 //
669 // Note that we don't need to recheck EntryList, just cxq.
670 // If threads moved onto EntryList since we dropped OnDeck
671 // that implies some other thread forced succession.
672 cxq = _LockWord.FullWord;
673 if ((cxq & ~_LBIT) != 0 && (cxq & _LBIT) == 0) {
674 goto Succession; // potential race -- re-run succession
675 }
676 return;
677 }
678
|