1764 #ifndef _LP64 1765 // The object is inflated. 1766 1767 // boxReg refers to the on-stack BasicLock in the current frame. 1768 // We'd like to write: 1769 // set box->_displaced_header = markOopDesc::unused_mark(). Any non-0 value suffices. 1770 // This is convenient but results a ST-before-CAS penalty. The following CAS suffers 1771 // additional latency as we have another ST in the store buffer that must drain. 1772 1773 if (EmitSync & 8192) { 1774 movptr(Address(boxReg, 0), 3); // results in ST-before-CAS penalty 1775 get_thread (scrReg); 1776 movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] 1777 movptr(tmpReg, NULL_WORD); // consider: xor vs mov 1778 if (os::is_MP()) { 1779 lock(); 1780 } 1781 cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 1782 } else 1783 if ((EmitSync & 128) == 0) { // avoid ST-before-CAS 1784 movptr(scrReg, boxReg); 1785 movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] 1786 1787 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes 1788 if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { 1789 // prefetchw [eax + Offset(_owner)-2] 1790 prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 1791 } 1792 1793 if ((EmitSync & 64) == 0) { 1794 // Optimistic form: consider XORL tmpReg,tmpReg 1795 movptr(tmpReg, NULL_WORD); 1796 } else { 1797 // Can suffer RTS->RTO upgrades on shared or cold $ lines 1798 // Test-And-CAS instead of CAS 1799 movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); // rax, = m->_owner 1800 testptr(tmpReg, tmpReg); // Locked ? 1801 jccb (Assembler::notZero, DONE_LABEL); 1802 } 1803 1804 // Appears unlocked - try to swing _owner from null to non-null. 1805 // Ideally, I'd manifest "Self" with get_thread and then attempt 1806 // to CAS the register containing Self into m->Owner. 1807 // But we don't have enough registers, so instead we can either try to CAS 1808 // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds 1809 // we later store "Self" into m->Owner. Transiently storing a stack address 1810 // (rsp or the address of the box) into m->owner is harmless. 1811 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. 1812 if (os::is_MP()) { 1813 lock(); 1814 } 1815 cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 1816 movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3 1817 jccb (Assembler::notZero, DONE_LABEL); 1818 get_thread (scrReg); // beware: clobbers ICCs 1819 movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg); 1820 xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success 1821 1822 // If the CAS fails we can either retry or pass control to the slow-path. 1823 // We use the latter tactic. 1824 // Pass the CAS result in the icc.ZFlag into DONE_LABEL 1825 // If the CAS was successful ... 1826 // Self has acquired the lock 1827 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. 1828 // Intentional fall-through into DONE_LABEL ... 1829 } else { 1830 movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark())); // results in ST-before-CAS penalty 1831 movptr(boxReg, tmpReg); 1832 1833 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes 1834 if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { 1835 // prefetchw [eax + Offset(_owner)-2] 1836 prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 1837 } 2066 // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0. 2067 // The integer condition codes will tell us if succ was 0. 2068 // Since _succ and _owner should reside in the same $line and 2069 // we just stored into _owner, it's likely that the $line 2070 // remains in M-state for the lock:orl. 2071 // 2072 // We currently use (3), although it's likely that switching to (2) 2073 // is correct for the future. 2074 2075 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); 2076 if (os::is_MP()) { 2077 lock(); addptr(Address(rsp, 0), 0); 2078 } 2079 // Ratify _succ remains non-null 2080 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), 0); 2081 jccb (Assembler::notZero, LSuccess); 2082 2083 xorptr(boxReg, boxReg); // box is really EAX 2084 if (os::is_MP()) { lock(); } 2085 cmpxchgptr(rsp, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 2086 jccb (Assembler::notEqual, LSuccess); 2087 // Since we're low on registers we installed rsp as a placeholding in _owner. 2088 // Now install Self over rsp. This is safe as we're transitioning from 2089 // non-null to non=null 2090 get_thread (boxReg); 2091 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), boxReg); 2092 // Intentional fall-through into LGoSlowPath ... 2093 2094 bind (LGoSlowPath); 2095 orptr(boxReg, 1); // set ICC.ZF=0 to indicate failure 2096 jmpb (DONE_LABEL); 2097 2098 bind (LSuccess); 2099 xorptr(boxReg, boxReg); // set ICC.ZF=1 to indicate success 2100 jmpb (DONE_LABEL); 2101 } 2102 2103 bind (Stacked); 2104 // It's not inflated and it's not recursively stack-locked and it's not biased. 2105 // It must be stack-locked. 2173 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD); 2174 jccb (Assembler::notZero, LSuccess); 2175 2176 // Rare inopportune interleaving - race. 2177 // The successor vanished in the small window above. 2178 // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor. 2179 // We need to ensure progress and succession. 2180 // Try to reacquire the lock. 2181 // If that fails then the new owner is responsible for succession and this 2182 // thread needs to take no further action and can exit via the fast path (success). 2183 // If the re-acquire succeeds then pass control into the slow path. 2184 // As implemented, this latter mode is horrible because we generated more 2185 // coherence traffic on the lock *and* artifically extended the critical section 2186 // length while by virtue of passing control into the slow path. 2187 2188 // box is really RAX -- the following CMPXCHG depends on that binding 2189 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R) 2190 movptr(boxReg, (int32_t)NULL_WORD); 2191 if (os::is_MP()) { lock(); } 2192 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 2193 jccb (Assembler::notEqual, LSuccess); 2194 // Intentional fall-through into slow-path 2195 2196 bind (LGoSlowPath); 2197 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure 2198 jmpb (DONE_LABEL); 2199 2200 bind (LSuccess); 2201 testl (boxReg, 0); // set ICC.ZF=1 to indicate success 2202 jmpb (DONE_LABEL); 2203 } 2204 2205 bind (Stacked); 2206 movptr(tmpReg, Address (boxReg, 0)); // re-fetch 2207 if (os::is_MP()) { lock(); } 2208 cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box 2209 2210 if (EmitSync & 65536) { 2211 bind (CheckSucc); 2212 } | 1764 #ifndef _LP64 1765 // The object is inflated. 1766 1767 // boxReg refers to the on-stack BasicLock in the current frame. 1768 // We'd like to write: 1769 // set box->_displaced_header = markOopDesc::unused_mark(). Any non-0 value suffices. 1770 // This is convenient but results a ST-before-CAS penalty. The following CAS suffers 1771 // additional latency as we have another ST in the store buffer that must drain. 1772 1773 if (EmitSync & 8192) { 1774 movptr(Address(boxReg, 0), 3); // results in ST-before-CAS penalty 1775 get_thread (scrReg); 1776 movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] 1777 movptr(tmpReg, NULL_WORD); // consider: xor vs mov 1778 if (os::is_MP()) { 1779 lock(); 1780 } 1781 cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 1782 } else 1783 if ((EmitSync & 128) == 0) { // avoid ST-before-CAS 1784 // register juggle because we need tmpReg for cmpxchgptr below 1785 movptr(scrReg, boxReg); 1786 movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2] 1787 1788 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes 1789 if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { 1790 // prefetchw [eax + Offset(_owner)-2] 1791 prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 1792 } 1793 1794 if ((EmitSync & 64) == 0) { 1795 // Optimistic form: consider XORL tmpReg,tmpReg 1796 movptr(tmpReg, NULL_WORD); 1797 } else { 1798 // Can suffer RTS->RTO upgrades on shared or cold $ lines 1799 // Test-And-CAS instead of CAS 1800 movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); // rax, = m->_owner 1801 testptr(tmpReg, tmpReg); // Locked ? 1802 jccb (Assembler::notZero, DONE_LABEL); 1803 } 1804 1805 // Appears unlocked - try to swing _owner from null to non-null. 1806 // Ideally, I'd manifest "Self" with get_thread and then attempt 1807 // to CAS the register containing Self into m->Owner. 1808 // But we don't have enough registers, so instead we can either try to CAS 1809 // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds 1810 // we later store "Self" into m->Owner. Transiently storing a stack address 1811 // (rsp or the address of the box) into m->owner is harmless. 1812 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand. 1813 if (os::is_MP()) { 1814 lock(); 1815 } 1816 cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 1817 movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3 1818 // If we weren't able to swing _owner from NULL to the BasicLock 1819 // then take the slow path. 1820 jccb (Assembler::notZero, DONE_LABEL); 1821 // update _owner from from BasicLock to thread 1822 get_thread (scrReg); // beware: clobbers ICCs 1823 movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg); 1824 xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success 1825 1826 // If the CAS fails we can either retry or pass control to the slow-path. 1827 // We use the latter tactic. 1828 // Pass the CAS result in the icc.ZFlag into DONE_LABEL 1829 // If the CAS was successful ... 1830 // Self has acquired the lock 1831 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. 1832 // Intentional fall-through into DONE_LABEL ... 1833 } else { 1834 movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark())); // results in ST-before-CAS penalty 1835 movptr(boxReg, tmpReg); 1836 1837 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes 1838 if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) { 1839 // prefetchw [eax + Offset(_owner)-2] 1840 prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 1841 } 2070 // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0. 2071 // The integer condition codes will tell us if succ was 0. 2072 // Since _succ and _owner should reside in the same $line and 2073 // we just stored into _owner, it's likely that the $line 2074 // remains in M-state for the lock:orl. 2075 // 2076 // We currently use (3), although it's likely that switching to (2) 2077 // is correct for the future. 2078 2079 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD); 2080 if (os::is_MP()) { 2081 lock(); addptr(Address(rsp, 0), 0); 2082 } 2083 // Ratify _succ remains non-null 2084 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), 0); 2085 jccb (Assembler::notZero, LSuccess); 2086 2087 xorptr(boxReg, boxReg); // box is really EAX 2088 if (os::is_MP()) { lock(); } 2089 cmpxchgptr(rsp, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 2090 // There's no successor so we tried to regrab the lock with 2091 // the placeholder value. If that didn't work, then another 2092 // grabbed the lock so we're done (and exit was a success). 2093 jccb (Assembler::notEqual, LSuccess); 2094 // Since we're low on registers we installed rsp as a placeholding in _owner. 2095 // Now install Self over rsp. This is safe as we're transitioning from 2096 // non-null to non=null 2097 get_thread (boxReg); 2098 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), boxReg); 2099 // Intentional fall-through into LGoSlowPath ... 2100 2101 bind (LGoSlowPath); 2102 orptr(boxReg, 1); // set ICC.ZF=0 to indicate failure 2103 jmpb (DONE_LABEL); 2104 2105 bind (LSuccess); 2106 xorptr(boxReg, boxReg); // set ICC.ZF=1 to indicate success 2107 jmpb (DONE_LABEL); 2108 } 2109 2110 bind (Stacked); 2111 // It's not inflated and it's not recursively stack-locked and it's not biased. 2112 // It must be stack-locked. 2180 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD); 2181 jccb (Assembler::notZero, LSuccess); 2182 2183 // Rare inopportune interleaving - race. 2184 // The successor vanished in the small window above. 2185 // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor. 2186 // We need to ensure progress and succession. 2187 // Try to reacquire the lock. 2188 // If that fails then the new owner is responsible for succession and this 2189 // thread needs to take no further action and can exit via the fast path (success). 2190 // If the re-acquire succeeds then pass control into the slow path. 2191 // As implemented, this latter mode is horrible because we generated more 2192 // coherence traffic on the lock *and* artifically extended the critical section 2193 // length while by virtue of passing control into the slow path. 2194 2195 // box is really RAX -- the following CMPXCHG depends on that binding 2196 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R) 2197 movptr(boxReg, (int32_t)NULL_WORD); 2198 if (os::is_MP()) { lock(); } 2199 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); 2200 // There's no successor so we tried to regrab the lock. 2201 // If that didn't work, then another grabbed the lock 2202 // so we're done (and exit was a success). 2203 jccb (Assembler::notEqual, LSuccess); 2204 // Intentional fall-through into slow-path 2205 2206 bind (LGoSlowPath); 2207 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure 2208 jmpb (DONE_LABEL); 2209 2210 bind (LSuccess); 2211 testl (boxReg, 0); // set ICC.ZF=1 to indicate success 2212 jmpb (DONE_LABEL); 2213 } 2214 2215 bind (Stacked); 2216 movptr(tmpReg, Address (boxReg, 0)); // re-fetch 2217 if (os::is_MP()) { lock(); } 2218 cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box 2219 2220 if (EmitSync & 65536) { 2221 bind (CheckSucc); 2222 } |