1268 return null_check_offset;
1269 }
1270
1271 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
1272 assert(UseBiasedLocking, "why call this otherwise?");
1273
1274 // Check for biased locking unlock case, which is a no-op
1275 // Note: we do not have to check the thread ID for two reasons.
1276 // First, the interpreter checks for IllegalMonitorStateException at
1277 // a higher level. Second, if the bias was revoked while we held the
1278 // lock, the object could not be rebiased toward another thread, so
1279 // the bias bit would be clear.
1280 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1281 andptr(temp_reg, markWord::biased_lock_mask_in_place);
1282 cmpptr(temp_reg, markWord::biased_lock_pattern);
1283 jcc(Assembler::equal, done);
1284 }
1285
1286 #ifdef COMPILER2
1287
1288 // Increment the ObjectMonitor's ref_count for safety or force a branch
1289 // to 'done' with ICC.ZF=0 to indicate failure/take the slow path.
1290 void MacroAssembler::inc_om_ref_count(Register obj_reg, Register om_reg, Register tmp_reg, Label& done) {
1291 atomic_incl(Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
1292
1293 Label LGoSlowPath;
1294 if (AsyncDeflateIdleMonitors) {
1295 // Race here if monitor is not owned! The above ref_count bump
1296 // will cause subsequent async deflation to skip it. However,
1297 // previous or concurrent async deflation is a race.
1298
1299 // First check: if the owner field == DEFLATER_MARKER:
1300 movptr(tmp_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1301 // DEFLATER_MARKER == reinterpret_cast<void*>(-1) so the compiler
1302 // doesn't like to use the define here:
1303 cmpptr(tmp_reg, -1);
1304 // If marked for async deflation, then take the slow path. This is a
1305 // simpler check than what ObjectMonitorHandle::save_om_ptr() does
1306 // so ObjectMonitor::install_displaced_markword_in_object() doesn't
1307 // have to be implemented in macro assembler.
1308 jccb(Assembler::equal, LGoSlowPath);
1309
1310 // Second check: if ref_count field <= 0:
1311 movptr(tmp_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
1312 cmpptr(tmp_reg, 0);
1313 // If async deflation is in the process of bailing out, but has not
1314 // yet restored the ref_count field, then we take the slow path. We
1315 // want a stable ref_count value for the fast path.
1316 jccb(Assembler::lessEqual, LGoSlowPath);
1317
1318 // Final check: if object field == obj_reg:
1319 cmpptr(obj_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(object)));
1320 // If the ObjectMonitor has been deflated and recycled, then take
1321 // the slow path.
1322 jccb(Assembler::notEqual, LGoSlowPath);
1323 }
1324
1325 Label LRetToCaller;
1326 // We leave the ref_count incremented to protect the caller's code
1327 // paths against async deflation.
1328 jmpb(LRetToCaller);
1329
1330 bind(LGoSlowPath);
1331 lock();
1332 decrementl(Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
1333 // Jump to 'done' with ICC.ZF=0 to indicate failure/take the slow path.
1334 orl(tmp_reg, 1);
1335 jmp(done);
1336
1337 bind(LRetToCaller);
1338 }
1339
1340 #if INCLUDE_RTM_OPT
1341
1342 // Update rtm_counters based on abort status
1343 // input: abort_status
1344 // rtm_counters (RTMLockingCounters*)
1345 // flags are killed
1346 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) {
1347
1348 atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset()));
1349 if (PrintPreciseRTMLockingStatistics) {
1350 for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
1351 Label check_abort;
1352 testl(abort_status, (1<<i));
1353 jccb(Assembler::equal, check_abort);
1354 atomic_incptr(Address(rtm_counters, RTMLockingCounters::abortX_count_offset() + (i * sizeof(uintx))));
1355 bind(check_abort);
1356 }
1357 }
1358 }
1359
1553 bind(L_decrement_retry);
1554 if (RTMRetryCount > 0) {
1555 // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1556 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1557 }
1558 }
1559
1560 // Use RTM for inflating locks
1561 // inputs: objReg (object to lock)
1562 // boxReg (on-stack box address (displaced header location) - KILLED)
1563 // tmpReg (ObjectMonitor address + markWord::monitor_value)
1564 void MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg,
1565 Register scrReg, Register retry_on_busy_count_Reg,
1566 Register retry_on_abort_count_Reg,
1567 RTMLockingCounters* rtm_counters,
1568 Metadata* method_data, bool profile_rtm,
1569 Label& DONE_LABEL) {
1570 assert(UseRTMLocking, "why call this otherwise?");
1571 assert(tmpReg == rax, "");
1572 assert(scrReg == rdx, "");
1573 Label L_rtm_retry, L_decrement_retry, L_on_abort, L_local_done;
1574 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
1575
1576 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1577 movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
1578
1579 if (!HandshakeAfterDeflateIdleMonitors) {
1580 // Increment the ObjectMonitor's ref_count for safety or force the
1581 // enter slow path via DONE_LABEL.
1582 // In rtm_inflated_locking(), initially tmpReg contains the object's
1583 // mark word which, in this case, is the (ObjectMonitor* | monitor_value).
1584 // Also this code uses scrReg as its temporary register.
1585 inc_om_ref_count(objReg, tmpReg /* om_reg */, scrReg /* tmp_reg */, DONE_LABEL);
1586 }
1587
1588 movptr(boxReg, tmpReg); // Save ObjectMonitor address
1589
1590 if (RTMRetryCount > 0) {
1591 movl(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy
1592 movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
1593 bind(L_rtm_retry);
1594 }
1595 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1596 Label L_noincrement;
1597 if (RTMTotalCountIncrRate > 1) {
1598 // tmpReg, scrReg and flags are killed
1599 branch_on_random_using_rdtsc(tmpReg, scrReg, RTMTotalCountIncrRate, L_noincrement);
1600 }
1601 assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
1602 atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg);
1603 bind(L_noincrement);
1604 }
1605 xbegin(L_on_abort);
1606 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));
1607 movptr(tmpReg, Address(tmpReg, owner_offset));
1608 testptr(tmpReg, tmpReg);
1609 jcc(Assembler::zero, L_local_done);
1610 if (UseRTMXendForLockBusy) {
1611 xend();
1612 jmp(L_decrement_retry);
1613 }
1614 else {
1615 xabort(0);
1616 }
1617 bind(L_on_abort);
1618 Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
1619 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1620 rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
1621 }
1622 if (RTMRetryCount > 0) {
1623 // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1624 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1625 }
1626
1627 movptr(tmpReg, Address(boxReg, owner_offset)) ;
1628 testptr(tmpReg, tmpReg) ;
1629 jccb(Assembler::notZero, L_decrement_retry) ;
1630
1631 // Appears unlocked - try to swing _owner from null to non-null.
1632 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
1633 #ifdef _LP64
1634 Register threadReg = r15_thread;
1635 #else
1636 get_thread(scrReg);
1637 Register threadReg = scrReg;
1638 #endif
1639 lock();
1640 cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg
1641
1642 if (RTMRetryCount > 0) {
1643 // success done else retry
1644 jccb(Assembler::equal, L_local_done);
1645 bind(L_decrement_retry);
1646 // Spin and retry if lock is busy.
1647 rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry);
1648 }
1649 else {
1650 bind(L_decrement_retry);
1651 }
1652
1653 // rtm_inflated_locking() exit paths come here except for a failed
1654 // inc_om_ref_count() which goes directly to DONE_LABEL.
1655 bind(L_local_done);
1656 if (!HandshakeAfterDeflateIdleMonitors) {
1657 pushf(); // Preserve flags.
1658 // Decrement the ObjectMonitor's ref_count.
1659 lock();
1660 decrementl(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
1661 popf(); // Restore flags so we have the proper ICC.ZF value.
1662 }
1663
1664 jmp(DONE_LABEL) ;
1665 }
1666
1667 #endif // INCLUDE_RTM_OPT
1668
1669 // fast_lock and fast_unlock used by C2
1670
1671 // Because the transitions from emitted code to the runtime
1672 // monitorenter/exit helper stubs are so slow it's critical that
1673 // we inline both the stack-locking fast path and the inflated fast path.
1674 //
1675 // See also: cmpFastLock and cmpFastUnlock.
1676 //
1677 // What follows is a specialized inline transliteration of the code
1678 // in enter() and exit(). If we're concerned about I$ bloat another
1679 // option would be to emit TrySlowEnter and TrySlowExit methods
1680 // at startup-time. These methods would accept arguments as
1681 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
1682 // indications in the icc.ZFlag. fast_lock and fast_unlock would simply
1683 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
1684 // In practice, however, the # of lock sites is bounded and is usually small.
1870 cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1871 movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
1872 // If we weren't able to swing _owner from NULL to the BasicLock
1873 // then take the slow path.
1874 jccb (Assembler::notZero, DONE_LABEL);
1875 // update _owner from BasicLock to thread
1876 get_thread (scrReg); // beware: clobbers ICCs
1877 movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
1878 xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
1879
1880 // If the CAS fails we can either retry or pass control to the slow path.
1881 // We use the latter tactic.
1882 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1883 // If the CAS was successful ...
1884 // Self has acquired the lock
1885 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1886 // Intentional fall-through into DONE_LABEL ...
1887 #else // _LP64
1888 // It's inflated and we use scrReg for ObjectMonitor* in this section.
1889 movq(scrReg, tmpReg);
1890
1891 // Unconditionally set box->_displaced_header = markWord::unused_mark().
1892 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1893 movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
1894
1895 if (!HandshakeAfterDeflateIdleMonitors) {
1896 // Increment the ObjectMonitor's ref_count for safety or force the
1897 // enter slow path via DONE_LABEL.
1898 // In fast_lock(), scrReg contains the object's mark word which,
1899 // in this case, is the (ObjectMonitor* | monitor_value). Also this
1900 // code uses tmpReg as its temporary register.
1901 inc_om_ref_count(objReg, scrReg /* om_reg */, tmpReg /* tmp_reg */, DONE_LABEL);
1902 }
1903
1904 xorq(tmpReg, tmpReg);
1905 lock();
1906 cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1907 // Intentional fall-through into DONE_LABEL ...
1908 // Propagate ICC.ZF from CAS above into DONE_LABEL.
1909
1910 if (!HandshakeAfterDeflateIdleMonitors) {
1911 pushf(); // Preserve flags.
1912 // Decrement the ObjectMonitor's ref_count.
1913 lock();
1914 decrementl(Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
1915 popf(); // Restore flags so we have the proper ICC.ZF value.
1916 }
1917 #endif // _LP64
1918 #if INCLUDE_RTM_OPT
1919 } // use_rtm()
1920 #endif
1921 // DONE_LABEL is a hot target - we'd really like to place it at the
1922 // start of cache line by padding with NOPs.
1923 // See the AMD and Intel software optimization manuals for the
1924 // most efficient "long" NOP encodings.
1925 // Unfortunately none of our alignment mechanisms suffice.
1926 bind(DONE_LABEL);
1927
1928 // At DONE_LABEL the icc ZFlag is set as follows ...
1929 // fast_unlock uses the same protocol.
1930 // ZFlag == 1 -> Success
1931 // ZFlag == 0 -> Failure - force control through the slow path
1932 }
1933
1934 // obj: object to unlock
1935 // box: box address (displaced header location), killed. Must be EAX.
1936 // tmp: killed, cannot be obj nor box.
1976 }
1977
1978 #if INCLUDE_RTM_OPT
1979 if (UseRTMForStackLocks && use_rtm) {
1980 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
1981 Label L_regular_unlock;
1982 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
1983 andptr(tmpReg, markWord::biased_lock_mask_in_place); // look at 3 lock bits
1984 cmpptr(tmpReg, markWord::unlocked_value); // bits = 001 unlocked
1985 jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
1986 xend(); // otherwise end...
1987 jmp(DONE_LABEL); // ... and we're done
1988 bind(L_regular_unlock);
1989 }
1990 #endif
1991
1992 cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
1993 jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
1994 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
1995 testptr(tmpReg, markWord::monitor_value); // Inflated?
1996 jcc (Assembler::zero, Stacked);
1997
1998 // It's inflated.
1999 #if INCLUDE_RTM_OPT
2000 if (use_rtm) {
2001 Label L_regular_inflated_unlock;
2002 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
2003 movptr(boxReg, Address(tmpReg, owner_offset));
2004 testptr(boxReg, boxReg);
2005 jccb(Assembler::notZero, L_regular_inflated_unlock);
2006 xend();
2007 jmp(DONE_LABEL);
2008 bind(L_regular_inflated_unlock);
2009 }
2010 #endif
2011
2012 // Despite our balanced locking property we still check that m->_owner == Self
2013 // as java routines or native JNI code called by this thread might
2014 // have released the lock.
2015 // Refer to the comments in synchronizer.cpp for how we might encode extra
2016 // state in _succ so we can avoid fetching EntryList|cxq.
2017 //
2018 // I'd like to add more cases in fast_lock() and fast_unlock() --
2019 // such as recursive enter and exit -- but we have to be wary of
2020 // I$ bloat, T$ effects and BP$ effects.
2021 //
2022 // If there's no contention try a 1-0 exit. That is, exit without
2023 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
2024 // we detect and recover from the race that the 1-0 exit admits.
2025 //
2026 // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
2027 // before it STs null into _owner, releasing the lock. Updates
2049
2050 bind (Stacked);
2051 // It's not inflated and it's not recursively stack-locked and it's not biased.
2052 // It must be stack-locked.
2053 // Try to reset the header to displaced header.
2054 // The "box" value on the stack is stable, so we can reload
2055 // and be assured we observe the same value as above.
2056 movptr(tmpReg, Address(boxReg, 0));
2057 lock();
2058 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
2059 // Intention fall-thru into DONE_LABEL
2060
2061 // DONE_LABEL is a hot target - we'd really like to place it at the
2062 // start of cache line by padding with NOPs.
2063 // See the AMD and Intel software optimization manuals for the
2064 // most efficient "long" NOP encodings.
2065 // Unfortunately none of our alignment mechanisms suffice.
2066 bind (CheckSucc);
2067 #else // _LP64
2068 // It's inflated
2069
2070 if (!HandshakeAfterDeflateIdleMonitors) {
2071 // Increment the ObjectMonitor's ref_count for safety or force the
2072 // exit slow path via DONE_LABEL.
2073 // In fast_unlock(), tmpReg contains the object's mark word which,
2074 // in this case, is the (ObjectMonitor* | monitor_value). Also this
2075 // code uses boxReg as its temporary register.
2076 inc_om_ref_count(objReg, tmpReg /* om_reg */, boxReg /* tmp_reg */, DONE_LABEL);
2077 }
2078
2079 // Try to avoid passing control into the slow path ...
2080 Label LSuccess, LGoSlowPath;
2081 xorptr(boxReg, boxReg);
2082 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
2083 jccb(Assembler::notZero, LGoSlowPath);
2084 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
2085 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
2086 jccb (Assembler::notZero, CheckSucc);
2087 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
2088 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2089 jmpb(LSuccess);
2090
2091 bind (CheckSucc);
2092
2093 // The following optional optimization can be elided if necessary
2094 // Effectively: if (succ == null) goto slow path
2095 // The code reduces the window for a race, however,
2096 // and thus benefits performance.
2097 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
2098 jccb (Assembler::zero, LGoSlowPath);
2099
2100 xorptr(boxReg, boxReg);
2101 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
2102 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2103
2104 // Memory barrier/fence
2105 // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
2106 // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
2107 // This is faster on Nehalem and AMD Shanghai/Barcelona.
2108 // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
2109 // We might also restructure (ST Owner=0;barrier;LD _Succ) to
2110 // (mov box,0; xchgq box, &m->Owner; LD _succ) .
2119 // We need to ensure progress and succession.
2120 // Try to reacquire the lock.
2121 // If that fails then the new owner is responsible for succession and this
2122 // thread needs to take no further action and can exit via the fast path (success).
2123 // If the re-acquire succeeds then pass control into the slow path.
2124 // As implemented, this latter mode is horrible because we generated more
2125 // coherence traffic on the lock *and* artifically extended the critical section
2126 // length while by virtue of passing control into the slow path.
2127
2128 // box is really RAX -- the following CMPXCHG depends on that binding
2129 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
2130 lock();
2131 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2132 // There's no successor so we tried to regrab the lock.
2133 // If that didn't work, then another thread grabbed the
2134 // lock so we're done (and exit was a success).
2135 jccb (Assembler::notEqual, LSuccess);
2136 // Intentional fall-through into slow path
2137
2138 bind (LGoSlowPath);
2139 if (!HandshakeAfterDeflateIdleMonitors) {
2140 lock();
2141 decrementl(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
2142 }
2143 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
2144 jmpb (DONE_LABEL);
2145
2146 bind (LSuccess);
2147 if (!HandshakeAfterDeflateIdleMonitors) {
2148 lock();
2149 decrementl(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
2150 }
2151 testl (boxReg, 0); // set ICC.ZF=1 to indicate success
2152 jmpb (DONE_LABEL);
2153
2154 bind (Stacked);
2155 movptr(tmpReg, Address (boxReg, 0)); // re-fetch
2156 lock();
2157 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
2158
2159 #endif
2160 bind(DONE_LABEL);
2161 }
2162 #endif // COMPILER2
2163
2164 void MacroAssembler::c2bool(Register x) {
2165 // implements x == 0 ? 0 : 1
2166 // note: must only look at least-significant byte of x
2167 // since C-style booleans are stored in one byte
2168 // only! (was bug)
2169 andl(x, 0xFF);
2170 setb(Assembler::notZero, x);
|
1268 return null_check_offset;
1269 }
1270
1271 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
1272 assert(UseBiasedLocking, "why call this otherwise?");
1273
1274 // Check for biased locking unlock case, which is a no-op
1275 // Note: we do not have to check the thread ID for two reasons.
1276 // First, the interpreter checks for IllegalMonitorStateException at
1277 // a higher level. Second, if the bias was revoked while we held the
1278 // lock, the object could not be rebiased toward another thread, so
1279 // the bias bit would be clear.
1280 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1281 andptr(temp_reg, markWord::biased_lock_mask_in_place);
1282 cmpptr(temp_reg, markWord::biased_lock_pattern);
1283 jcc(Assembler::equal, done);
1284 }
1285
1286 #ifdef COMPILER2
1287
1288 #if INCLUDE_RTM_OPT
1289
1290 // Update rtm_counters based on abort status
1291 // input: abort_status
1292 // rtm_counters (RTMLockingCounters*)
1293 // flags are killed
1294 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) {
1295
1296 atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset()));
1297 if (PrintPreciseRTMLockingStatistics) {
1298 for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
1299 Label check_abort;
1300 testl(abort_status, (1<<i));
1301 jccb(Assembler::equal, check_abort);
1302 atomic_incptr(Address(rtm_counters, RTMLockingCounters::abortX_count_offset() + (i * sizeof(uintx))));
1303 bind(check_abort);
1304 }
1305 }
1306 }
1307
1501 bind(L_decrement_retry);
1502 if (RTMRetryCount > 0) {
1503 // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1504 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1505 }
1506 }
1507
1508 // Use RTM for inflating locks
1509 // inputs: objReg (object to lock)
1510 // boxReg (on-stack box address (displaced header location) - KILLED)
1511 // tmpReg (ObjectMonitor address + markWord::monitor_value)
1512 void MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg,
1513 Register scrReg, Register retry_on_busy_count_Reg,
1514 Register retry_on_abort_count_Reg,
1515 RTMLockingCounters* rtm_counters,
1516 Metadata* method_data, bool profile_rtm,
1517 Label& DONE_LABEL) {
1518 assert(UseRTMLocking, "why call this otherwise?");
1519 assert(tmpReg == rax, "");
1520 assert(scrReg == rdx, "");
1521 Label L_rtm_retry, L_decrement_retry, L_on_abort;
1522 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
1523
1524 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1525 movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
1526 movptr(boxReg, tmpReg); // Save ObjectMonitor address
1527
1528 if (RTMRetryCount > 0) {
1529 movl(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy
1530 movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
1531 bind(L_rtm_retry);
1532 }
1533 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1534 Label L_noincrement;
1535 if (RTMTotalCountIncrRate > 1) {
1536 // tmpReg, scrReg and flags are killed
1537 branch_on_random_using_rdtsc(tmpReg, scrReg, RTMTotalCountIncrRate, L_noincrement);
1538 }
1539 assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
1540 atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg);
1541 bind(L_noincrement);
1542 }
1543 xbegin(L_on_abort);
1544 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));
1545 movptr(tmpReg, Address(tmpReg, owner_offset));
1546 testptr(tmpReg, tmpReg);
1547 jcc(Assembler::zero, DONE_LABEL);
1548 if (UseRTMXendForLockBusy) {
1549 xend();
1550 jmp(L_decrement_retry);
1551 }
1552 else {
1553 xabort(0);
1554 }
1555 bind(L_on_abort);
1556 Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
1557 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1558 rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
1559 }
1560 if (RTMRetryCount > 0) {
1561 // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1562 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1563 }
1564
1565 movptr(tmpReg, Address(boxReg, owner_offset)) ;
1566 testptr(tmpReg, tmpReg) ;
1567 jccb(Assembler::notZero, L_decrement_retry) ;
1568
1569 // Appears unlocked - try to swing _owner from null to non-null.
1570 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
1571 #ifdef _LP64
1572 Register threadReg = r15_thread;
1573 #else
1574 get_thread(scrReg);
1575 Register threadReg = scrReg;
1576 #endif
1577 lock();
1578 cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg
1579
1580 if (RTMRetryCount > 0) {
1581 // success done else retry
1582 jccb(Assembler::equal, DONE_LABEL) ;
1583 bind(L_decrement_retry);
1584 // Spin and retry if lock is busy.
1585 rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry);
1586 }
1587 else {
1588 bind(L_decrement_retry);
1589 }
1590 }
1591
1592 #endif // INCLUDE_RTM_OPT
1593
1594 // fast_lock and fast_unlock used by C2
1595
1596 // Because the transitions from emitted code to the runtime
1597 // monitorenter/exit helper stubs are so slow it's critical that
1598 // we inline both the stack-locking fast path and the inflated fast path.
1599 //
1600 // See also: cmpFastLock and cmpFastUnlock.
1601 //
1602 // What follows is a specialized inline transliteration of the code
1603 // in enter() and exit(). If we're concerned about I$ bloat another
1604 // option would be to emit TrySlowEnter and TrySlowExit methods
1605 // at startup-time. These methods would accept arguments as
1606 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
1607 // indications in the icc.ZFlag. fast_lock and fast_unlock would simply
1608 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
1609 // In practice, however, the # of lock sites is bounded and is usually small.
1795 cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1796 movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
1797 // If we weren't able to swing _owner from NULL to the BasicLock
1798 // then take the slow path.
1799 jccb (Assembler::notZero, DONE_LABEL);
1800 // update _owner from BasicLock to thread
1801 get_thread (scrReg); // beware: clobbers ICCs
1802 movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
1803 xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
1804
1805 // If the CAS fails we can either retry or pass control to the slow path.
1806 // We use the latter tactic.
1807 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1808 // If the CAS was successful ...
1809 // Self has acquired the lock
1810 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1811 // Intentional fall-through into DONE_LABEL ...
1812 #else // _LP64
1813 // It's inflated and we use scrReg for ObjectMonitor* in this section.
1814 movq(scrReg, tmpReg);
1815 xorq(tmpReg, tmpReg);
1816 lock();
1817 cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1818 // Unconditionally set box->_displaced_header = markWord::unused_mark().
1819 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1820 movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
1821 // Intentional fall-through into DONE_LABEL ...
1822 // Propagate ICC.ZF from CAS above into DONE_LABEL.
1823 #endif // _LP64
1824 #if INCLUDE_RTM_OPT
1825 } // use_rtm()
1826 #endif
1827 // DONE_LABEL is a hot target - we'd really like to place it at the
1828 // start of cache line by padding with NOPs.
1829 // See the AMD and Intel software optimization manuals for the
1830 // most efficient "long" NOP encodings.
1831 // Unfortunately none of our alignment mechanisms suffice.
1832 bind(DONE_LABEL);
1833
1834 // At DONE_LABEL the icc ZFlag is set as follows ...
1835 // fast_unlock uses the same protocol.
1836 // ZFlag == 1 -> Success
1837 // ZFlag == 0 -> Failure - force control through the slow path
1838 }
1839
1840 // obj: object to unlock
1841 // box: box address (displaced header location), killed. Must be EAX.
1842 // tmp: killed, cannot be obj nor box.
1882 }
1883
1884 #if INCLUDE_RTM_OPT
1885 if (UseRTMForStackLocks && use_rtm) {
1886 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
1887 Label L_regular_unlock;
1888 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
1889 andptr(tmpReg, markWord::biased_lock_mask_in_place); // look at 3 lock bits
1890 cmpptr(tmpReg, markWord::unlocked_value); // bits = 001 unlocked
1891 jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
1892 xend(); // otherwise end...
1893 jmp(DONE_LABEL); // ... and we're done
1894 bind(L_regular_unlock);
1895 }
1896 #endif
1897
1898 cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
1899 jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
1900 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
1901 testptr(tmpReg, markWord::monitor_value); // Inflated?
1902 jccb (Assembler::zero, Stacked);
1903
1904 // It's inflated.
1905 #if INCLUDE_RTM_OPT
1906 if (use_rtm) {
1907 Label L_regular_inflated_unlock;
1908 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
1909 movptr(boxReg, Address(tmpReg, owner_offset));
1910 testptr(boxReg, boxReg);
1911 jccb(Assembler::notZero, L_regular_inflated_unlock);
1912 xend();
1913 jmpb(DONE_LABEL);
1914 bind(L_regular_inflated_unlock);
1915 }
1916 #endif
1917
1918 // Despite our balanced locking property we still check that m->_owner == Self
1919 // as java routines or native JNI code called by this thread might
1920 // have released the lock.
1921 // Refer to the comments in synchronizer.cpp for how we might encode extra
1922 // state in _succ so we can avoid fetching EntryList|cxq.
1923 //
1924 // I'd like to add more cases in fast_lock() and fast_unlock() --
1925 // such as recursive enter and exit -- but we have to be wary of
1926 // I$ bloat, T$ effects and BP$ effects.
1927 //
1928 // If there's no contention try a 1-0 exit. That is, exit without
1929 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
1930 // we detect and recover from the race that the 1-0 exit admits.
1931 //
1932 // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
1933 // before it STs null into _owner, releasing the lock. Updates
1955
1956 bind (Stacked);
1957 // It's not inflated and it's not recursively stack-locked and it's not biased.
1958 // It must be stack-locked.
1959 // Try to reset the header to displaced header.
1960 // The "box" value on the stack is stable, so we can reload
1961 // and be assured we observe the same value as above.
1962 movptr(tmpReg, Address(boxReg, 0));
1963 lock();
1964 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
1965 // Intention fall-thru into DONE_LABEL
1966
1967 // DONE_LABEL is a hot target - we'd really like to place it at the
1968 // start of cache line by padding with NOPs.
1969 // See the AMD and Intel software optimization manuals for the
1970 // most efficient "long" NOP encodings.
1971 // Unfortunately none of our alignment mechanisms suffice.
1972 bind (CheckSucc);
1973 #else // _LP64
1974 // It's inflated
1975 xorptr(boxReg, boxReg);
1976 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
1977 jccb (Assembler::notZero, DONE_LABEL);
1978 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
1979 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
1980 jccb (Assembler::notZero, CheckSucc);
1981 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1982 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
1983 jmpb (DONE_LABEL);
1984
1985 // Try to avoid passing control into the slow_path ...
1986 Label LSuccess, LGoSlowPath ;
1987 bind (CheckSucc);
1988
1989 // The following optional optimization can be elided if necessary
1990 // Effectively: if (succ == null) goto slow path
1991 // The code reduces the window for a race, however,
1992 // and thus benefits performance.
1993 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
1994 jccb (Assembler::zero, LGoSlowPath);
1995
1996 xorptr(boxReg, boxReg);
1997 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1998 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
1999
2000 // Memory barrier/fence
2001 // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
2002 // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
2003 // This is faster on Nehalem and AMD Shanghai/Barcelona.
2004 // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
2005 // We might also restructure (ST Owner=0;barrier;LD _Succ) to
2006 // (mov box,0; xchgq box, &m->Owner; LD _succ) .
2015 // We need to ensure progress and succession.
2016 // Try to reacquire the lock.
2017 // If that fails then the new owner is responsible for succession and this
2018 // thread needs to take no further action and can exit via the fast path (success).
2019 // If the re-acquire succeeds then pass control into the slow path.
2020 // As implemented, this latter mode is horrible because we generated more
2021 // coherence traffic on the lock *and* artifically extended the critical section
2022 // length while by virtue of passing control into the slow path.
2023
2024 // box is really RAX -- the following CMPXCHG depends on that binding
2025 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
2026 lock();
2027 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2028 // There's no successor so we tried to regrab the lock.
2029 // If that didn't work, then another thread grabbed the
2030 // lock so we're done (and exit was a success).
2031 jccb (Assembler::notEqual, LSuccess);
2032 // Intentional fall-through into slow path
2033
2034 bind (LGoSlowPath);
2035 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
2036 jmpb (DONE_LABEL);
2037
2038 bind (LSuccess);
2039 testl (boxReg, 0); // set ICC.ZF=1 to indicate success
2040 jmpb (DONE_LABEL);
2041
2042 bind (Stacked);
2043 movptr(tmpReg, Address (boxReg, 0)); // re-fetch
2044 lock();
2045 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
2046
2047 #endif
2048 bind(DONE_LABEL);
2049 }
2050 #endif // COMPILER2
2051
2052 void MacroAssembler::c2bool(Register x) {
2053 // implements x == 0 ? 0 : 1
2054 // note: must only look at least-significant byte of x
2055 // since C-style booleans are stored in one byte
2056 // only! (was bug)
2057 andl(x, 0xFF);
2058 setb(Assembler::notZero, x);
|