src/cpu/x86/vm/macroAssembler_x86.cpp

Print this page




1764 #ifndef _LP64
1765     // The object is inflated.
1766 
1767     // boxReg refers to the on-stack BasicLock in the current frame.
1768     // We'd like to write:
1769     //   set box->_displaced_header = markOopDesc::unused_mark().  Any non-0 value suffices.
1770     // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
1771     // additional latency as we have another ST in the store buffer that must drain.
1772 
1773     if (EmitSync & 8192) {
1774        movptr(Address(boxReg, 0), 3);            // results in ST-before-CAS penalty
1775        get_thread (scrReg);
1776        movptr(boxReg, tmpReg);                    // consider: LEA box, [tmp-2]
1777        movptr(tmpReg, NULL_WORD);                 // consider: xor vs mov
1778        if (os::is_MP()) {
1779          lock();
1780        }
1781        cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1782     } else
1783     if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS

1784        movptr(scrReg, boxReg);
1785        movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2]
1786 
1787        // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
1788        if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
1789           // prefetchw [eax + Offset(_owner)-2]
1790           prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1791        }
1792 
1793        if ((EmitSync & 64) == 0) {
1794          // Optimistic form: consider XORL tmpReg,tmpReg
1795          movptr(tmpReg, NULL_WORD);
1796        } else {
1797          // Can suffer RTS->RTO upgrades on shared or cold $ lines
1798          // Test-And-CAS instead of CAS
1799          movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));   // rax, = m->_owner
1800          testptr(tmpReg, tmpReg);                   // Locked ?
1801          jccb  (Assembler::notZero, DONE_LABEL);
1802        }
1803 
1804        // Appears unlocked - try to swing _owner from null to non-null.
1805        // Ideally, I'd manifest "Self" with get_thread and then attempt
1806        // to CAS the register containing Self into m->Owner.
1807        // But we don't have enough registers, so instead we can either try to CAS
1808        // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
1809        // we later store "Self" into m->Owner.  Transiently storing a stack address
1810        // (rsp or the address of the box) into  m->owner is harmless.
1811        // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
1812        if (os::is_MP()) {
1813          lock();
1814        }
1815        cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1816        movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3


1817        jccb  (Assembler::notZero, DONE_LABEL);

1818        get_thread (scrReg);                    // beware: clobbers ICCs
1819        movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
1820        xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
1821 
1822        // If the CAS fails we can either retry or pass control to the slow-path.
1823        // We use the latter tactic.
1824        // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1825        // If the CAS was successful ...
1826        //   Self has acquired the lock
1827        //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1828        // Intentional fall-through into DONE_LABEL ...
1829     } else {
1830        movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark()));  // results in ST-before-CAS penalty
1831        movptr(boxReg, tmpReg);
1832 
1833        // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
1834        if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
1835           // prefetchw [eax + Offset(_owner)-2]
1836           prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1837        }


2066        // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
2067        //     The integer condition codes will tell us if succ was 0.
2068        //     Since _succ and _owner should reside in the same $line and
2069        //     we just stored into _owner, it's likely that the $line
2070        //     remains in M-state for the lock:orl.
2071        //
2072        // We currently use (3), although it's likely that switching to (2)
2073        // is correct for the future.
2074 
2075        movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
2076        if (os::is_MP()) {
2077          lock(); addptr(Address(rsp, 0), 0);
2078        }
2079        // Ratify _succ remains non-null
2080        cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), 0);
2081        jccb  (Assembler::notZero, LSuccess);
2082 
2083        xorptr(boxReg, boxReg);                  // box is really EAX
2084        if (os::is_MP()) { lock(); }
2085        cmpxchgptr(rsp, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));



2086        jccb  (Assembler::notEqual, LSuccess);
2087        // Since we're low on registers we installed rsp as a placeholding in _owner.
2088        // Now install Self over rsp.  This is safe as we're transitioning from
2089        // non-null to non=null
2090        get_thread (boxReg);
2091        movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), boxReg);
2092        // Intentional fall-through into LGoSlowPath ...
2093 
2094        bind  (LGoSlowPath);
2095        orptr(boxReg, 1);                      // set ICC.ZF=0 to indicate failure
2096        jmpb  (DONE_LABEL);
2097 
2098        bind  (LSuccess);
2099        xorptr(boxReg, boxReg);                 // set ICC.ZF=1 to indicate success
2100        jmpb  (DONE_LABEL);
2101     }
2102 
2103     bind (Stacked);
2104     // It's not inflated and it's not recursively stack-locked and it's not biased.
2105     // It must be stack-locked.


2173       cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
2174       jccb  (Assembler::notZero, LSuccess);
2175 
2176       // Rare inopportune interleaving - race.
2177       // The successor vanished in the small window above.
2178       // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
2179       // We need to ensure progress and succession.
2180       // Try to reacquire the lock.
2181       // If that fails then the new owner is responsible for succession and this
2182       // thread needs to take no further action and can exit via the fast path (success).
2183       // If the re-acquire succeeds then pass control into the slow path.
2184       // As implemented, this latter mode is horrible because we generated more
2185       // coherence traffic on the lock *and* artifically extended the critical section
2186       // length while by virtue of passing control into the slow path.
2187 
2188       // box is really RAX -- the following CMPXCHG depends on that binding
2189       // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
2190       movptr(boxReg, (int32_t)NULL_WORD);
2191       if (os::is_MP()) { lock(); }
2192       cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));



2193       jccb  (Assembler::notEqual, LSuccess);
2194       // Intentional fall-through into slow-path
2195 
2196       bind  (LGoSlowPath);
2197       orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
2198       jmpb  (DONE_LABEL);
2199 
2200       bind  (LSuccess);
2201       testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
2202       jmpb  (DONE_LABEL);
2203     }
2204 
2205     bind  (Stacked);
2206     movptr(tmpReg, Address (boxReg, 0));      // re-fetch
2207     if (os::is_MP()) { lock(); }
2208     cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
2209 
2210     if (EmitSync & 65536) {
2211        bind (CheckSucc);
2212     }




1764 #ifndef _LP64
1765     // The object is inflated.
1766 
1767     // boxReg refers to the on-stack BasicLock in the current frame.
1768     // We'd like to write:
1769     //   set box->_displaced_header = markOopDesc::unused_mark().  Any non-0 value suffices.
1770     // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
1771     // additional latency as we have another ST in the store buffer that must drain.
1772 
1773     if (EmitSync & 8192) {
1774        movptr(Address(boxReg, 0), 3);            // results in ST-before-CAS penalty
1775        get_thread (scrReg);
1776        movptr(boxReg, tmpReg);                    // consider: LEA box, [tmp-2]
1777        movptr(tmpReg, NULL_WORD);                 // consider: xor vs mov
1778        if (os::is_MP()) {
1779          lock();
1780        }
1781        cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1782     } else
1783     if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS
1784        // register juggle because we need tmpReg for cmpxchgptr below
1785        movptr(scrReg, boxReg);
1786        movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2]
1787 
1788        // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
1789        if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
1790           // prefetchw [eax + Offset(_owner)-2]
1791           prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1792        }
1793 
1794        if ((EmitSync & 64) == 0) {
1795          // Optimistic form: consider XORL tmpReg,tmpReg
1796          movptr(tmpReg, NULL_WORD);
1797        } else {
1798          // Can suffer RTS->RTO upgrades on shared or cold $ lines
1799          // Test-And-CAS instead of CAS
1800          movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));   // rax, = m->_owner
1801          testptr(tmpReg, tmpReg);                   // Locked ?
1802          jccb  (Assembler::notZero, DONE_LABEL);
1803        }
1804 
1805        // Appears unlocked - try to swing _owner from null to non-null.
1806        // Ideally, I'd manifest "Self" with get_thread and then attempt
1807        // to CAS the register containing Self into m->Owner.
1808        // But we don't have enough registers, so instead we can either try to CAS
1809        // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
1810        // we later store "Self" into m->Owner.  Transiently storing a stack address
1811        // (rsp or the address of the box) into  m->owner is harmless.
1812        // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
1813        if (os::is_MP()) {
1814          lock();
1815        }
1816        cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1817        movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3
1818        // If we weren't able to swing _owner from NULL to the BasicLock
1819        // then take the slow path.
1820        jccb  (Assembler::notZero, DONE_LABEL);
1821        // update _owner from from BasicLock to thread
1822        get_thread (scrReg);                    // beware: clobbers ICCs
1823        movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
1824        xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
1825 
1826        // If the CAS fails we can either retry or pass control to the slow-path.
1827        // We use the latter tactic.
1828        // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1829        // If the CAS was successful ...
1830        //   Self has acquired the lock
1831        //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1832        // Intentional fall-through into DONE_LABEL ...
1833     } else {
1834        movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark()));  // results in ST-before-CAS penalty
1835        movptr(boxReg, tmpReg);
1836 
1837        // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
1838        if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
1839           // prefetchw [eax + Offset(_owner)-2]
1840           prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1841        }


2070        // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
2071        //     The integer condition codes will tell us if succ was 0.
2072        //     Since _succ and _owner should reside in the same $line and
2073        //     we just stored into _owner, it's likely that the $line
2074        //     remains in M-state for the lock:orl.
2075        //
2076        // We currently use (3), although it's likely that switching to (2)
2077        // is correct for the future.
2078 
2079        movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
2080        if (os::is_MP()) {
2081          lock(); addptr(Address(rsp, 0), 0);
2082        }
2083        // Ratify _succ remains non-null
2084        cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), 0);
2085        jccb  (Assembler::notZero, LSuccess);
2086 
2087        xorptr(boxReg, boxReg);                  // box is really EAX
2088        if (os::is_MP()) { lock(); }
2089        cmpxchgptr(rsp, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2090        // There's no successor so we tried to regrab the lock with
2091        // the placeholder value. If that didn't work, then another
2092        // grabbed the lock so we're done (and exit was a success).
2093        jccb  (Assembler::notEqual, LSuccess);
2094        // Since we're low on registers we installed rsp as a placeholding in _owner.
2095        // Now install Self over rsp.  This is safe as we're transitioning from
2096        // non-null to non=null
2097        get_thread (boxReg);
2098        movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), boxReg);
2099        // Intentional fall-through into LGoSlowPath ...
2100 
2101        bind  (LGoSlowPath);
2102        orptr(boxReg, 1);                      // set ICC.ZF=0 to indicate failure
2103        jmpb  (DONE_LABEL);
2104 
2105        bind  (LSuccess);
2106        xorptr(boxReg, boxReg);                 // set ICC.ZF=1 to indicate success
2107        jmpb  (DONE_LABEL);
2108     }
2109 
2110     bind (Stacked);
2111     // It's not inflated and it's not recursively stack-locked and it's not biased.
2112     // It must be stack-locked.


2180       cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
2181       jccb  (Assembler::notZero, LSuccess);
2182 
2183       // Rare inopportune interleaving - race.
2184       // The successor vanished in the small window above.
2185       // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
2186       // We need to ensure progress and succession.
2187       // Try to reacquire the lock.
2188       // If that fails then the new owner is responsible for succession and this
2189       // thread needs to take no further action and can exit via the fast path (success).
2190       // If the re-acquire succeeds then pass control into the slow path.
2191       // As implemented, this latter mode is horrible because we generated more
2192       // coherence traffic on the lock *and* artifically extended the critical section
2193       // length while by virtue of passing control into the slow path.
2194 
2195       // box is really RAX -- the following CMPXCHG depends on that binding
2196       // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
2197       movptr(boxReg, (int32_t)NULL_WORD);
2198       if (os::is_MP()) { lock(); }
2199       cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2200       // There's no successor so we tried to regrab the lock.
2201       // If that didn't work, then another grabbed the lock
2202       // so we're done (and exit was a success).
2203       jccb  (Assembler::notEqual, LSuccess);
2204       // Intentional fall-through into slow-path
2205 
2206       bind  (LGoSlowPath);
2207       orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
2208       jmpb  (DONE_LABEL);
2209 
2210       bind  (LSuccess);
2211       testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
2212       jmpb  (DONE_LABEL);
2213     }
2214 
2215     bind  (Stacked);
2216     movptr(tmpReg, Address (boxReg, 0));      // re-fetch
2217     if (os::is_MP()) { lock(); }
2218     cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
2219 
2220     if (EmitSync & 65536) {
2221        bind (CheckSucc);
2222     }