4684 // * It's possible to construct a 1-0 lock if we encode the lockword as 4685 // (List,LockByte). Acquire will CAS the full lockword while Release 4686 // will STB 0 into the LockByte. The 1-0 scheme admits stranding, so 4687 // acquiring threads use timers (ParkTimed) to detect and recover from 4688 // the stranding window. Thread/Node structures must be aligned on 256-byte 4689 // boundaries by using placement-new. 4690 // * Augment MCS with advisory back-link fields maintained with CAS(). 4691 // Pictorially: LockWord -> T1 <-> T2 <-> T3 <-> ... <-> Tn <-> Owner. 4692 // The validity of the backlinks must be ratified before we trust the value. 4693 // If the backlinks are invalid the exiting thread must back-track through the 4694 // the forward links, which are always trustworthy. 4695 // * Add a successor indication. The LockWord is currently encoded as 4696 // (List, LOCKBIT:1). We could also add a SUCCBIT or an explicit _succ variable 4697 // to provide the usual futile-wakeup optimization. 4698 // See RTStt for details. 4699 // * Consider schedctl.sc_nopreempt to cover the critical section. 4700 // 4701 4702 4703 typedef volatile intptr_t MutexT; // Mux Lock-word 4704 enum MuxBits { LOCKBIT = 1 }; 4705 4706 void Thread::muxAcquire(volatile intptr_t * Lock, const char * LockName) { 4707 intptr_t w = Atomic::cmpxchg_ptr(LOCKBIT, Lock, 0); 4708 if (w == 0) return; 4709 if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) { 4710 return; 4711 } 4712 4713 TEVENT(muxAcquire - Contention); 4714 ParkEvent * const Self = Thread::current()->_MuxEvent; 4715 assert((intptr_t(Self) & LOCKBIT) == 0, "invariant"); 4716 for (;;) { 4717 int its = (os::is_MP() ? 100 : 0) + 1; 4718 4719 // Optional spin phase: spin-then-park strategy 4720 while (--its >= 0) { 4721 w = *Lock; 4722 if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) { 4723 return; 4724 } 4725 } 4726 4727 Self->reset(); 4728 Self->OnList = intptr_t(Lock); 4729 // The following fence() isn't _strictly necessary as the subsequent 4730 // CAS() both serializes execution and ratifies the fetched *Lock value. 4731 OrderAccess::fence(); 4732 for (;;) { 4733 w = *Lock; 4734 if ((w & LOCKBIT) == 0) { 4735 if (Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) { 4736 Self->OnList = 0; // hygiene - allows stronger asserts 4737 return; 4738 } 4739 continue; // Interference -- *Lock changed -- Just retry 4740 } 4741 assert(w & LOCKBIT, "invariant"); 4742 Self->ListNext = (ParkEvent *) (w & ~LOCKBIT); 4743 if (Atomic::cmpxchg_ptr(intptr_t(Self)|LOCKBIT, Lock, w) == w) break; 4744 } 4745 4746 while (Self->OnList != 0) { 4747 Self->park(); 4748 } 4749 } 4750 } 4751 4752 void Thread::muxAcquireW(volatile intptr_t * Lock, ParkEvent * ev) { 4753 intptr_t w = Atomic::cmpxchg_ptr(LOCKBIT, Lock, 0); 4754 if (w == 0) return; 4755 if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) { 4756 return; 4757 } 4758 4759 TEVENT(muxAcquire - Contention); 4760 ParkEvent * ReleaseAfter = NULL; 4761 if (ev == NULL) { 4762 ev = ReleaseAfter = ParkEvent::Allocate(NULL); 4763 } 4764 assert((intptr_t(ev) & LOCKBIT) == 0, "invariant"); 4765 for (;;) { 4766 guarantee(ev->OnList == 0, "invariant"); 4767 int its = (os::is_MP() ? 100 : 0) + 1; 4768 4769 // Optional spin phase: spin-then-park strategy 4770 while (--its >= 0) { 4771 w = *Lock; 4772 if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) { 4773 if (ReleaseAfter != NULL) { 4774 ParkEvent::Release(ReleaseAfter); 4775 } 4776 return; 4777 } 4778 } 4779 4780 ev->reset(); 4781 ev->OnList = intptr_t(Lock); 4782 // The following fence() isn't _strictly necessary as the subsequent 4783 // CAS() both serializes execution and ratifies the fetched *Lock value. 4784 OrderAccess::fence(); 4785 for (;;) { 4786 w = *Lock; 4787 if ((w & LOCKBIT) == 0) { 4788 if (Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) { 4789 ev->OnList = 0; 4790 // We call ::Release while holding the outer lock, thus 4791 // artificially lengthening the critical section. 4792 // Consider deferring the ::Release() until the subsequent unlock(), 4793 // after we've dropped the outer lock. 4794 if (ReleaseAfter != NULL) { 4795 ParkEvent::Release(ReleaseAfter); 4796 } 4797 return; 4798 } 4799 continue; // Interference -- *Lock changed -- Just retry 4800 } 4801 assert(w & LOCKBIT, "invariant"); 4802 ev->ListNext = (ParkEvent *) (w & ~LOCKBIT); 4803 if (Atomic::cmpxchg_ptr(intptr_t(ev)|LOCKBIT, Lock, w) == w) break; 4804 } 4805 4806 while (ev->OnList != 0) { 4807 ev->park(); 4808 } 4809 } 4810 } 4811 4812 // Release() must extract a successor from the list and then wake that thread. 4813 // It can "pop" the front of the list or use a detach-modify-reattach (DMR) scheme 4814 // similar to that used by ParkEvent::Allocate() and ::Release(). DMR-based 4815 // Release() would : 4816 // (A) CAS() or swap() null to *Lock, releasing the lock and detaching the list. 4817 // (B) Extract a successor from the private list "in-hand" 4818 // (C) attempt to CAS() the residual back into *Lock over null. 4819 // If there were any newly arrived threads and the CAS() would fail. 4820 // In that case Release() would detach the RATs, re-merge the list in-hand 4821 // with the RATs and repeat as needed. Alternately, Release() might 4822 // detach and extract a successor, but then pass the residual list to the wakee. 4823 // The wakee would be responsible for reattaching and remerging before it 4824 // competed for the lock. 4825 // 4826 // Both "pop" and DMR are immune from ABA corruption -- there can be 4827 // multiple concurrent pushers, but only one popper or detacher. 4828 // This implementation pops from the head of the list. This is unfair, 4829 // but tends to provide excellent throughput as hot threads remain hot. 4830 // (We wake recently run threads first). 4831 // 4832 // All paths through muxRelease() will execute a CAS. 4833 // Release consistency -- We depend on the CAS in muxRelease() to provide full 4834 // bidirectional fence/MEMBAR semantics, ensuring that all prior memory operations 4835 // executed within the critical section are complete and globally visible before the 4836 // store (CAS) to the lock-word that releases the lock becomes globally visible. 4837 void Thread::muxRelease(volatile intptr_t * Lock) { 4838 for (;;) { 4839 const intptr_t w = Atomic::cmpxchg_ptr(0, Lock, LOCKBIT); 4840 assert(w & LOCKBIT, "invariant"); 4841 if (w == LOCKBIT) return; 4842 ParkEvent * const List = (ParkEvent *) (w & ~LOCKBIT); 4843 assert(List != NULL, "invariant"); 4844 assert(List->OnList == intptr_t(Lock), "invariant"); 4845 ParkEvent * const nxt = List->ListNext; 4846 guarantee((intptr_t(nxt) & LOCKBIT) == 0, "invariant"); 4847 4848 // The following CAS() releases the lock and pops the head element. 4849 // The CAS() also ratifies the previously fetched lock-word value. 4850 if (Atomic::cmpxchg_ptr (intptr_t(nxt), Lock, w) != w) { 4851 continue; 4852 } 4853 List->OnList = 0; 4854 OrderAccess::fence(); 4855 List->unpark(); 4856 return; 4857 } 4858 } 4859 4860 4861 void Threads::verify() { 4862 ALL_JAVA_THREADS(p) { 4863 p->verify(); 4864 } 4865 VMThread* thread = VMThread::vm_thread(); 4866 if (thread != NULL) thread->verify(); 4867 } | 4684 // * It's possible to construct a 1-0 lock if we encode the lockword as 4685 // (List,LockByte). Acquire will CAS the full lockword while Release 4686 // will STB 0 into the LockByte. The 1-0 scheme admits stranding, so 4687 // acquiring threads use timers (ParkTimed) to detect and recover from 4688 // the stranding window. Thread/Node structures must be aligned on 256-byte 4689 // boundaries by using placement-new. 4690 // * Augment MCS with advisory back-link fields maintained with CAS(). 4691 // Pictorially: LockWord -> T1 <-> T2 <-> T3 <-> ... <-> Tn <-> Owner. 4692 // The validity of the backlinks must be ratified before we trust the value. 4693 // If the backlinks are invalid the exiting thread must back-track through the 4694 // the forward links, which are always trustworthy. 4695 // * Add a successor indication. The LockWord is currently encoded as 4696 // (List, LOCKBIT:1). We could also add a SUCCBIT or an explicit _succ variable 4697 // to provide the usual futile-wakeup optimization. 4698 // See RTStt for details. 4699 // * Consider schedctl.sc_nopreempt to cover the critical section. 4700 // 4701 4702 4703 typedef volatile intptr_t MutexT; // Mux Lock-word 4704 const intptr_t LOCKBIT = 1; 4705 4706 void Thread::muxAcquire(volatile intptr_t * Lock, const char * LockName) { 4707 intptr_t w = Atomic::cmpxchg(LOCKBIT, Lock, (intptr_t)0); 4708 if (w == 0) return; 4709 if ((w & LOCKBIT) == 0 && Atomic::cmpxchg(w|LOCKBIT, Lock, w) == w) { 4710 return; 4711 } 4712 4713 TEVENT(muxAcquire - Contention); 4714 ParkEvent * const Self = Thread::current()->_MuxEvent; 4715 assert((intptr_t(Self) & LOCKBIT) == 0, "invariant"); 4716 for (;;) { 4717 int its = (os::is_MP() ? 100 : 0) + 1; 4718 4719 // Optional spin phase: spin-then-park strategy 4720 while (--its >= 0) { 4721 w = *Lock; 4722 if ((w & LOCKBIT) == 0 && Atomic::cmpxchg(w|LOCKBIT, Lock, w) == w) { 4723 return; 4724 } 4725 } 4726 4727 Self->reset(); 4728 Self->OnList = intptr_t(Lock); 4729 // The following fence() isn't _strictly necessary as the subsequent 4730 // CAS() both serializes execution and ratifies the fetched *Lock value. 4731 OrderAccess::fence(); 4732 for (;;) { 4733 w = *Lock; 4734 if ((w & LOCKBIT) == 0) { 4735 if (Atomic::cmpxchg(w|LOCKBIT, Lock, w) == w) { 4736 Self->OnList = 0; // hygiene - allows stronger asserts 4737 return; 4738 } 4739 continue; // Interference -- *Lock changed -- Just retry 4740 } 4741 assert(w & LOCKBIT, "invariant"); 4742 Self->ListNext = (ParkEvent *) (w & ~LOCKBIT); 4743 if (Atomic::cmpxchg(intptr_t(Self)|LOCKBIT, Lock, w) == w) break; 4744 } 4745 4746 while (Self->OnList != 0) { 4747 Self->park(); 4748 } 4749 } 4750 } 4751 4752 void Thread::muxAcquireW(volatile intptr_t * Lock, ParkEvent * ev) { 4753 intptr_t w = Atomic::cmpxchg(LOCKBIT, Lock, (intptr_t)0); 4754 if (w == 0) return; 4755 if ((w & LOCKBIT) == 0 && Atomic::cmpxchg(w|LOCKBIT, Lock, w) == w) { 4756 return; 4757 } 4758 4759 TEVENT(muxAcquire - Contention); 4760 ParkEvent * ReleaseAfter = NULL; 4761 if (ev == NULL) { 4762 ev = ReleaseAfter = ParkEvent::Allocate(NULL); 4763 } 4764 assert((intptr_t(ev) & LOCKBIT) == 0, "invariant"); 4765 for (;;) { 4766 guarantee(ev->OnList == 0, "invariant"); 4767 int its = (os::is_MP() ? 100 : 0) + 1; 4768 4769 // Optional spin phase: spin-then-park strategy 4770 while (--its >= 0) { 4771 w = *Lock; 4772 if ((w & LOCKBIT) == 0 && Atomic::cmpxchg(w|LOCKBIT, Lock, w) == w) { 4773 if (ReleaseAfter != NULL) { 4774 ParkEvent::Release(ReleaseAfter); 4775 } 4776 return; 4777 } 4778 } 4779 4780 ev->reset(); 4781 ev->OnList = intptr_t(Lock); 4782 // The following fence() isn't _strictly necessary as the subsequent 4783 // CAS() both serializes execution and ratifies the fetched *Lock value. 4784 OrderAccess::fence(); 4785 for (;;) { 4786 w = *Lock; 4787 if ((w & LOCKBIT) == 0) { 4788 if (Atomic::cmpxchg(w|LOCKBIT, Lock, w) == w) { 4789 ev->OnList = 0; 4790 // We call ::Release while holding the outer lock, thus 4791 // artificially lengthening the critical section. 4792 // Consider deferring the ::Release() until the subsequent unlock(), 4793 // after we've dropped the outer lock. 4794 if (ReleaseAfter != NULL) { 4795 ParkEvent::Release(ReleaseAfter); 4796 } 4797 return; 4798 } 4799 continue; // Interference -- *Lock changed -- Just retry 4800 } 4801 assert(w & LOCKBIT, "invariant"); 4802 ev->ListNext = (ParkEvent *) (w & ~LOCKBIT); 4803 if (Atomic::cmpxchg(intptr_t(ev)|LOCKBIT, Lock, w) == w) break; 4804 } 4805 4806 while (ev->OnList != 0) { 4807 ev->park(); 4808 } 4809 } 4810 } 4811 4812 // Release() must extract a successor from the list and then wake that thread. 4813 // It can "pop" the front of the list or use a detach-modify-reattach (DMR) scheme 4814 // similar to that used by ParkEvent::Allocate() and ::Release(). DMR-based 4815 // Release() would : 4816 // (A) CAS() or swap() null to *Lock, releasing the lock and detaching the list. 4817 // (B) Extract a successor from the private list "in-hand" 4818 // (C) attempt to CAS() the residual back into *Lock over null. 4819 // If there were any newly arrived threads and the CAS() would fail. 4820 // In that case Release() would detach the RATs, re-merge the list in-hand 4821 // with the RATs and repeat as needed. Alternately, Release() might 4822 // detach and extract a successor, but then pass the residual list to the wakee. 4823 // The wakee would be responsible for reattaching and remerging before it 4824 // competed for the lock. 4825 // 4826 // Both "pop" and DMR are immune from ABA corruption -- there can be 4827 // multiple concurrent pushers, but only one popper or detacher. 4828 // This implementation pops from the head of the list. This is unfair, 4829 // but tends to provide excellent throughput as hot threads remain hot. 4830 // (We wake recently run threads first). 4831 // 4832 // All paths through muxRelease() will execute a CAS. 4833 // Release consistency -- We depend on the CAS in muxRelease() to provide full 4834 // bidirectional fence/MEMBAR semantics, ensuring that all prior memory operations 4835 // executed within the critical section are complete and globally visible before the 4836 // store (CAS) to the lock-word that releases the lock becomes globally visible. 4837 void Thread::muxRelease(volatile intptr_t * Lock) { 4838 for (;;) { 4839 const intptr_t w = Atomic::cmpxchg((intptr_t)0, Lock, LOCKBIT); 4840 assert(w & LOCKBIT, "invariant"); 4841 if (w == LOCKBIT) return; 4842 ParkEvent * const List = (ParkEvent *) (w & ~LOCKBIT); 4843 assert(List != NULL, "invariant"); 4844 assert(List->OnList == intptr_t(Lock), "invariant"); 4845 ParkEvent * const nxt = List->ListNext; 4846 guarantee((intptr_t(nxt) & LOCKBIT) == 0, "invariant"); 4847 4848 // The following CAS() releases the lock and pops the head element. 4849 // The CAS() also ratifies the previously fetched lock-word value. 4850 if (Atomic::cmpxchg(intptr_t(nxt), Lock, w) != w) { 4851 continue; 4852 } 4853 List->OnList = 0; 4854 OrderAccess::fence(); 4855 List->unpark(); 4856 return; 4857 } 4858 } 4859 4860 4861 void Threads::verify() { 4862 ALL_JAVA_THREADS(p) { 4863 p->verify(); 4864 } 4865 VMThread* thread = VMThread::vm_thread(); 4866 if (thread != NULL) thread->verify(); 4867 } |