1279 return null_check_offset;
1280 }
1281
1282 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
1283 assert(UseBiasedLocking, "why call this otherwise?");
1284
1285 // Check for biased locking unlock case, which is a no-op
1286 // Note: we do not have to check the thread ID for two reasons.
1287 // First, the interpreter checks for IllegalMonitorStateException at
1288 // a higher level. Second, if the bias was revoked while we held the
1289 // lock, the object could not be rebiased toward another thread, so
1290 // the bias bit would be clear.
1291 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1292 andptr(temp_reg, markWord::biased_lock_mask_in_place);
1293 cmpptr(temp_reg, markWord::biased_lock_pattern);
1294 jcc(Assembler::equal, done);
1295 }
1296
1297 #ifdef COMPILER2
1298
1299 #if INCLUDE_RTM_OPT
1300
1301 // Update rtm_counters based on abort status
1302 // input: abort_status
1303 // rtm_counters (RTMLockingCounters*)
1304 // flags are killed
1305 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) {
1306
1307 atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset()));
1308 if (PrintPreciseRTMLockingStatistics) {
1309 for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
1310 Label check_abort;
1311 testl(abort_status, (1<<i));
1312 jccb(Assembler::equal, check_abort);
1313 atomic_incptr(Address(rtm_counters, RTMLockingCounters::abortX_count_offset() + (i * sizeof(uintx))));
1314 bind(check_abort);
1315 }
1316 }
1317 }
1318
1512 bind(L_decrement_retry);
1513 if (RTMRetryCount > 0) {
1514 // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1515 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1516 }
1517 }
1518
1519 // Use RTM for inflating locks
1520 // inputs: objReg (object to lock)
1521 // boxReg (on-stack box address (displaced header location) - KILLED)
1522 // tmpReg (ObjectMonitor address + markWord::monitor_value)
1523 void MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg,
1524 Register scrReg, Register retry_on_busy_count_Reg,
1525 Register retry_on_abort_count_Reg,
1526 RTMLockingCounters* rtm_counters,
1527 Metadata* method_data, bool profile_rtm,
1528 Label& DONE_LABEL) {
1529 assert(UseRTMLocking, "why call this otherwise?");
1530 assert(tmpReg == rax, "");
1531 assert(scrReg == rdx, "");
1532 Label L_rtm_retry, L_decrement_retry, L_on_abort;
1533 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
1534
1535 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1536 movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
1537 movptr(boxReg, tmpReg); // Save ObjectMonitor address
1538
1539 if (RTMRetryCount > 0) {
1540 movl(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy
1541 movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
1542 bind(L_rtm_retry);
1543 }
1544 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1545 Label L_noincrement;
1546 if (RTMTotalCountIncrRate > 1) {
1547 // tmpReg, scrReg and flags are killed
1548 branch_on_random_using_rdtsc(tmpReg, scrReg, RTMTotalCountIncrRate, L_noincrement);
1549 }
1550 assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
1551 atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg);
1552 bind(L_noincrement);
1553 }
1554 xbegin(L_on_abort);
1555 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));
1556 movptr(tmpReg, Address(tmpReg, owner_offset));
1557 testptr(tmpReg, tmpReg);
1558 jcc(Assembler::zero, DONE_LABEL);
1559 if (UseRTMXendForLockBusy) {
1560 xend();
1561 jmp(L_decrement_retry);
1562 }
1563 else {
1564 xabort(0);
1565 }
1566 bind(L_on_abort);
1567 Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
1568 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1569 rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
1570 }
1571 if (RTMRetryCount > 0) {
1572 // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1573 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1574 }
1575
1576 movptr(tmpReg, Address(boxReg, owner_offset)) ;
1577 testptr(tmpReg, tmpReg) ;
1578 jccb(Assembler::notZero, L_decrement_retry) ;
1579
1580 // Appears unlocked - try to swing _owner from null to non-null.
1581 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
1582 #ifdef _LP64
1583 Register threadReg = r15_thread;
1584 #else
1585 get_thread(scrReg);
1586 Register threadReg = scrReg;
1587 #endif
1588 lock();
1589 cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg
1590
1591 if (RTMRetryCount > 0) {
1592 // success done else retry
1593 jccb(Assembler::equal, DONE_LABEL) ;
1594 bind(L_decrement_retry);
1595 // Spin and retry if lock is busy.
1596 rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry);
1597 }
1598 else {
1599 bind(L_decrement_retry);
1600 }
1601 }
1602
1603 #endif // INCLUDE_RTM_OPT
1604
1605 // fast_lock and fast_unlock used by C2
1606
1607 // Because the transitions from emitted code to the runtime
1608 // monitorenter/exit helper stubs are so slow it's critical that
1609 // we inline both the stack-locking fast path and the inflated fast path.
1610 //
1611 // See also: cmpFastLock and cmpFastUnlock.
1612 //
1613 // What follows is a specialized inline transliteration of the code
1614 // in enter() and exit(). If we're concerned about I$ bloat another
1615 // option would be to emit TrySlowEnter and TrySlowExit methods
1616 // at startup-time. These methods would accept arguments as
1617 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
1618 // indications in the icc.ZFlag. fast_lock and fast_unlock would simply
1619 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
1620 // In practice, however, the # of lock sites is bounded and is usually small.
1806 cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1807 movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
1808 // If we weren't able to swing _owner from NULL to the BasicLock
1809 // then take the slow path.
1810 jccb (Assembler::notZero, DONE_LABEL);
1811 // update _owner from BasicLock to thread
1812 get_thread (scrReg); // beware: clobbers ICCs
1813 movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
1814 xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
1815
1816 // If the CAS fails we can either retry or pass control to the slow path.
1817 // We use the latter tactic.
1818 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1819 // If the CAS was successful ...
1820 // Self has acquired the lock
1821 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1822 // Intentional fall-through into DONE_LABEL ...
1823 #else // _LP64
1824 // It's inflated and we use scrReg for ObjectMonitor* in this section.
1825 movq(scrReg, tmpReg);
1826 xorq(tmpReg, tmpReg);
1827 lock();
1828 cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1829 // Unconditionally set box->_displaced_header = markWord::unused_mark().
1830 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1831 movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
1832 // Intentional fall-through into DONE_LABEL ...
1833 // Propagate ICC.ZF from CAS above into DONE_LABEL.
1834 #endif // _LP64
1835 #if INCLUDE_RTM_OPT
1836 } // use_rtm()
1837 #endif
1838 // DONE_LABEL is a hot target - we'd really like to place it at the
1839 // start of cache line by padding with NOPs.
1840 // See the AMD and Intel software optimization manuals for the
1841 // most efficient "long" NOP encodings.
1842 // Unfortunately none of our alignment mechanisms suffice.
1843 bind(DONE_LABEL);
1844
1845 // At DONE_LABEL the icc ZFlag is set as follows ...
1846 // fast_unlock uses the same protocol.
1847 // ZFlag == 1 -> Success
1848 // ZFlag == 0 -> Failure - force control through the slow path
1849 }
1850
1851 // obj: object to unlock
1852 // box: box address (displaced header location), killed. Must be EAX.
1853 // tmp: killed, cannot be obj nor box.
1893 }
1894
1895 #if INCLUDE_RTM_OPT
1896 if (UseRTMForStackLocks && use_rtm) {
1897 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
1898 Label L_regular_unlock;
1899 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
1900 andptr(tmpReg, markWord::biased_lock_mask_in_place); // look at 3 lock bits
1901 cmpptr(tmpReg, markWord::unlocked_value); // bits = 001 unlocked
1902 jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
1903 xend(); // otherwise end...
1904 jmp(DONE_LABEL); // ... and we're done
1905 bind(L_regular_unlock);
1906 }
1907 #endif
1908
1909 cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
1910 jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
1911 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
1912 testptr(tmpReg, markWord::monitor_value); // Inflated?
1913 jccb (Assembler::zero, Stacked);
1914
1915 // It's inflated.
1916 #if INCLUDE_RTM_OPT
1917 if (use_rtm) {
1918 Label L_regular_inflated_unlock;
1919 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
1920 movptr(boxReg, Address(tmpReg, owner_offset));
1921 testptr(boxReg, boxReg);
1922 jccb(Assembler::notZero, L_regular_inflated_unlock);
1923 xend();
1924 jmpb(DONE_LABEL);
1925 bind(L_regular_inflated_unlock);
1926 }
1927 #endif
1928
1929 // Despite our balanced locking property we still check that m->_owner == Self
1930 // as java routines or native JNI code called by this thread might
1931 // have released the lock.
1932 // Refer to the comments in synchronizer.cpp for how we might encode extra
1933 // state in _succ so we can avoid fetching EntryList|cxq.
1934 //
1935 // I'd like to add more cases in fast_lock() and fast_unlock() --
1936 // such as recursive enter and exit -- but we have to be wary of
1937 // I$ bloat, T$ effects and BP$ effects.
1938 //
1939 // If there's no contention try a 1-0 exit. That is, exit without
1940 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
1941 // we detect and recover from the race that the 1-0 exit admits.
1942 //
1943 // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
1944 // before it STs null into _owner, releasing the lock. Updates
1966
1967 bind (Stacked);
1968 // It's not inflated and it's not recursively stack-locked and it's not biased.
1969 // It must be stack-locked.
1970 // Try to reset the header to displaced header.
1971 // The "box" value on the stack is stable, so we can reload
1972 // and be assured we observe the same value as above.
1973 movptr(tmpReg, Address(boxReg, 0));
1974 lock();
1975 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
1976 // Intention fall-thru into DONE_LABEL
1977
1978 // DONE_LABEL is a hot target - we'd really like to place it at the
1979 // start of cache line by padding with NOPs.
1980 // See the AMD and Intel software optimization manuals for the
1981 // most efficient "long" NOP encodings.
1982 // Unfortunately none of our alignment mechanisms suffice.
1983 bind (CheckSucc);
1984 #else // _LP64
1985 // It's inflated
1986 xorptr(boxReg, boxReg);
1987 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
1988 jccb (Assembler::notZero, DONE_LABEL);
1989 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
1990 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
1991 jccb (Assembler::notZero, CheckSucc);
1992 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1993 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
1994 jmpb (DONE_LABEL);
1995
1996 // Try to avoid passing control into the slow_path ...
1997 Label LSuccess, LGoSlowPath ;
1998 bind (CheckSucc);
1999
2000 // The following optional optimization can be elided if necessary
2001 // Effectively: if (succ == null) goto slow path
2002 // The code reduces the window for a race, however,
2003 // and thus benefits performance.
2004 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
2005 jccb (Assembler::zero, LGoSlowPath);
2006
2007 xorptr(boxReg, boxReg);
2008 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
2009 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2010
2011 // Memory barrier/fence
2012 // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
2013 // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
2014 // This is faster on Nehalem and AMD Shanghai/Barcelona.
2015 // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
2016 // We might also restructure (ST Owner=0;barrier;LD _Succ) to
2017 // (mov box,0; xchgq box, &m->Owner; LD _succ) .
2026 // We need to ensure progress and succession.
2027 // Try to reacquire the lock.
2028 // If that fails then the new owner is responsible for succession and this
2029 // thread needs to take no further action and can exit via the fast path (success).
2030 // If the re-acquire succeeds then pass control into the slow path.
2031 // As implemented, this latter mode is horrible because we generated more
2032 // coherence traffic on the lock *and* artifically extended the critical section
2033 // length while by virtue of passing control into the slow path.
2034
2035 // box is really RAX -- the following CMPXCHG depends on that binding
2036 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
2037 lock();
2038 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2039 // There's no successor so we tried to regrab the lock.
2040 // If that didn't work, then another thread grabbed the
2041 // lock so we're done (and exit was a success).
2042 jccb (Assembler::notEqual, LSuccess);
2043 // Intentional fall-through into slow path
2044
2045 bind (LGoSlowPath);
2046 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
2047 jmpb (DONE_LABEL);
2048
2049 bind (LSuccess);
2050 testl (boxReg, 0); // set ICC.ZF=1 to indicate success
2051 jmpb (DONE_LABEL);
2052
2053 bind (Stacked);
2054 movptr(tmpReg, Address (boxReg, 0)); // re-fetch
2055 lock();
2056 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
2057
2058 #endif
2059 bind(DONE_LABEL);
2060 }
2061 #endif // COMPILER2
2062
2063 void MacroAssembler::c2bool(Register x) {
2064 // implements x == 0 ? 0 : 1
2065 // note: must only look at least-significant byte of x
2066 // since C-style booleans are stored in one byte
2067 // only! (was bug)
2068 andl(x, 0xFF);
2069 setb(Assembler::notZero, x);
|
1279 return null_check_offset;
1280 }
1281
1282 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
1283 assert(UseBiasedLocking, "why call this otherwise?");
1284
1285 // Check for biased locking unlock case, which is a no-op
1286 // Note: we do not have to check the thread ID for two reasons.
1287 // First, the interpreter checks for IllegalMonitorStateException at
1288 // a higher level. Second, if the bias was revoked while we held the
1289 // lock, the object could not be rebiased toward another thread, so
1290 // the bias bit would be clear.
1291 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1292 andptr(temp_reg, markWord::biased_lock_mask_in_place);
1293 cmpptr(temp_reg, markWord::biased_lock_pattern);
1294 jcc(Assembler::equal, done);
1295 }
1296
1297 #ifdef COMPILER2
1298
1299 // Increment the ObjectMonitor's ref_count for safety or force a branch
1300 // to 'done' with ICC.ZF=0 to indicate failure/take the slow path.
1301 void MacroAssembler::inc_om_ref_count(Register obj_reg, Register om_reg, Register tmp_reg, Label& done) {
1302 atomic_incl(Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
1303
1304 Label LGoSlowPath;
1305 if (AsyncDeflateIdleMonitors) {
1306 // Race here if monitor is not owned! The above ref_count bump
1307 // will cause subsequent async deflation to skip it. However,
1308 // previous or concurrent async deflation is a race.
1309
1310 // First check: if the owner field == DEFLATER_MARKER:
1311 movptr(tmp_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1312 // DEFLATER_MARKER == reinterpret_cast<void*>(-1) so the compiler
1313 // doesn't like to use the define here:
1314 cmpptr(tmp_reg, -1);
1315 // If marked for async deflation, then take the slow path. This is a
1316 // simpler check than what ObjectMonitorHandle::save_om_ptr() does
1317 // so ObjectMonitor::install_displaced_markword_in_object() doesn't
1318 // have to be implemented in macro assembler.
1319 jccb(Assembler::equal, LGoSlowPath);
1320
1321 // Second check: if ref_count field <= 0:
1322 movptr(tmp_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
1323 cmpptr(tmp_reg, 0);
1324 // If async deflation is in the process of bailing out, but has not
1325 // yet restored the ref_count field, then we take the slow path. We
1326 // want a stable ref_count value for the fast path.
1327 jccb(Assembler::lessEqual, LGoSlowPath);
1328
1329 // Final check: if object field == obj_reg:
1330 cmpptr(obj_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(object)));
1331 // If the ObjectMonitor has been deflated and recycled, then take
1332 // the slow path.
1333 jccb(Assembler::notEqual, LGoSlowPath);
1334 }
1335
1336 Label LRetToCaller;
1337 // We leave the ref_count incremented to protect the caller's code
1338 // paths against async deflation.
1339 jmpb(LRetToCaller);
1340
1341 bind(LGoSlowPath);
1342 lock();
1343 decrementl(Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
1344 // Jump to 'done' with ICC.ZF=0 to indicate failure/take the slow path.
1345 orl(tmp_reg, 1);
1346 jmp(done);
1347
1348 bind(LRetToCaller);
1349 }
1350
1351 #if INCLUDE_RTM_OPT
1352
1353 // Update rtm_counters based on abort status
1354 // input: abort_status
1355 // rtm_counters (RTMLockingCounters*)
1356 // flags are killed
1357 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) {
1358
1359 atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset()));
1360 if (PrintPreciseRTMLockingStatistics) {
1361 for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
1362 Label check_abort;
1363 testl(abort_status, (1<<i));
1364 jccb(Assembler::equal, check_abort);
1365 atomic_incptr(Address(rtm_counters, RTMLockingCounters::abortX_count_offset() + (i * sizeof(uintx))));
1366 bind(check_abort);
1367 }
1368 }
1369 }
1370
1564 bind(L_decrement_retry);
1565 if (RTMRetryCount > 0) {
1566 // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1567 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1568 }
1569 }
1570
1571 // Use RTM for inflating locks
1572 // inputs: objReg (object to lock)
1573 // boxReg (on-stack box address (displaced header location) - KILLED)
1574 // tmpReg (ObjectMonitor address + markWord::monitor_value)
1575 void MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg,
1576 Register scrReg, Register retry_on_busy_count_Reg,
1577 Register retry_on_abort_count_Reg,
1578 RTMLockingCounters* rtm_counters,
1579 Metadata* method_data, bool profile_rtm,
1580 Label& DONE_LABEL) {
1581 assert(UseRTMLocking, "why call this otherwise?");
1582 assert(tmpReg == rax, "");
1583 assert(scrReg == rdx, "");
1584 Label L_rtm_retry, L_decrement_retry, L_on_abort, L_local_done;
1585 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
1586
1587 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1588 movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
1589
1590 if (!HandshakeAfterDeflateIdleMonitors) {
1591 // Increment the ObjectMonitor's ref_count for safety or force the
1592 // enter slow path via DONE_LABEL.
1593 // In rtm_inflated_locking(), initially tmpReg contains the object's
1594 // mark word which, in this case, is the (ObjectMonitor* | monitor_value).
1595 // Also this code uses scrReg as its temporary register.
1596 inc_om_ref_count(objReg, tmpReg /* om_reg */, scrReg /* tmp_reg */, DONE_LABEL);
1597 }
1598
1599 movptr(boxReg, tmpReg); // Save ObjectMonitor address
1600
1601 if (RTMRetryCount > 0) {
1602 movl(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy
1603 movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
1604 bind(L_rtm_retry);
1605 }
1606 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1607 Label L_noincrement;
1608 if (RTMTotalCountIncrRate > 1) {
1609 // tmpReg, scrReg and flags are killed
1610 branch_on_random_using_rdtsc(tmpReg, scrReg, RTMTotalCountIncrRate, L_noincrement);
1611 }
1612 assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
1613 atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg);
1614 bind(L_noincrement);
1615 }
1616 xbegin(L_on_abort);
1617 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));
1618 movptr(tmpReg, Address(tmpReg, owner_offset));
1619 testptr(tmpReg, tmpReg);
1620 jcc(Assembler::zero, L_local_done);
1621 if (UseRTMXendForLockBusy) {
1622 xend();
1623 jmp(L_decrement_retry);
1624 }
1625 else {
1626 xabort(0);
1627 }
1628 bind(L_on_abort);
1629 Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
1630 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1631 rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
1632 }
1633 if (RTMRetryCount > 0) {
1634 // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1635 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1636 }
1637
1638 movptr(tmpReg, Address(boxReg, owner_offset)) ;
1639 testptr(tmpReg, tmpReg) ;
1640 jccb(Assembler::notZero, L_decrement_retry) ;
1641
1642 // Appears unlocked - try to swing _owner from null to non-null.
1643 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
1644 #ifdef _LP64
1645 Register threadReg = r15_thread;
1646 #else
1647 get_thread(scrReg);
1648 Register threadReg = scrReg;
1649 #endif
1650 lock();
1651 cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg
1652
1653 if (RTMRetryCount > 0) {
1654 // success done else retry
1655 jccb(Assembler::equal, L_local_done);
1656 bind(L_decrement_retry);
1657 // Spin and retry if lock is busy.
1658 rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry);
1659 }
1660 else {
1661 bind(L_decrement_retry);
1662 }
1663
1664 // rtm_inflated_locking() exit paths come here except for a failed
1665 // inc_om_ref_count() which goes directly to DONE_LABEL.
1666 bind(L_local_done);
1667 if (!HandshakeAfterDeflateIdleMonitors) {
1668 pushf(); // Preserve flags.
1669 // Decrement the ObjectMonitor's ref_count.
1670 lock();
1671 decrementl(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
1672 popf(); // Restore flags so we have the proper ICC.ZF value.
1673 }
1674
1675 jmp(DONE_LABEL) ;
1676 }
1677
1678 #endif // INCLUDE_RTM_OPT
1679
1680 // fast_lock and fast_unlock used by C2
1681
1682 // Because the transitions from emitted code to the runtime
1683 // monitorenter/exit helper stubs are so slow it's critical that
1684 // we inline both the stack-locking fast path and the inflated fast path.
1685 //
1686 // See also: cmpFastLock and cmpFastUnlock.
1687 //
1688 // What follows is a specialized inline transliteration of the code
1689 // in enter() and exit(). If we're concerned about I$ bloat another
1690 // option would be to emit TrySlowEnter and TrySlowExit methods
1691 // at startup-time. These methods would accept arguments as
1692 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
1693 // indications in the icc.ZFlag. fast_lock and fast_unlock would simply
1694 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
1695 // In practice, however, the # of lock sites is bounded and is usually small.
1881 cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1882 movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
1883 // If we weren't able to swing _owner from NULL to the BasicLock
1884 // then take the slow path.
1885 jccb (Assembler::notZero, DONE_LABEL);
1886 // update _owner from BasicLock to thread
1887 get_thread (scrReg); // beware: clobbers ICCs
1888 movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
1889 xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
1890
1891 // If the CAS fails we can either retry or pass control to the slow path.
1892 // We use the latter tactic.
1893 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1894 // If the CAS was successful ...
1895 // Self has acquired the lock
1896 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1897 // Intentional fall-through into DONE_LABEL ...
1898 #else // _LP64
1899 // It's inflated and we use scrReg for ObjectMonitor* in this section.
1900 movq(scrReg, tmpReg);
1901
1902 // Unconditionally set box->_displaced_header = markWord::unused_mark().
1903 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1904 movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
1905
1906 if (!HandshakeAfterDeflateIdleMonitors) {
1907 // Increment the ObjectMonitor's ref_count for safety or force the
1908 // enter slow path via DONE_LABEL.
1909 // In fast_lock(), scrReg contains the object's mark word which,
1910 // in this case, is the (ObjectMonitor* | monitor_value). Also this
1911 // code uses tmpReg as its temporary register.
1912 inc_om_ref_count(objReg, scrReg /* om_reg */, tmpReg /* tmp_reg */, DONE_LABEL);
1913 }
1914
1915 xorq(tmpReg, tmpReg);
1916 lock();
1917 cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1918 // Intentional fall-through into DONE_LABEL ...
1919 // Propagate ICC.ZF from CAS above into DONE_LABEL.
1920
1921 if (!HandshakeAfterDeflateIdleMonitors) {
1922 pushf(); // Preserve flags.
1923 // Decrement the ObjectMonitor's ref_count.
1924 lock();
1925 decrementl(Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
1926 popf(); // Restore flags so we have the proper ICC.ZF value.
1927 }
1928 #endif // _LP64
1929 #if INCLUDE_RTM_OPT
1930 } // use_rtm()
1931 #endif
1932 // DONE_LABEL is a hot target - we'd really like to place it at the
1933 // start of cache line by padding with NOPs.
1934 // See the AMD and Intel software optimization manuals for the
1935 // most efficient "long" NOP encodings.
1936 // Unfortunately none of our alignment mechanisms suffice.
1937 bind(DONE_LABEL);
1938
1939 // At DONE_LABEL the icc ZFlag is set as follows ...
1940 // fast_unlock uses the same protocol.
1941 // ZFlag == 1 -> Success
1942 // ZFlag == 0 -> Failure - force control through the slow path
1943 }
1944
1945 // obj: object to unlock
1946 // box: box address (displaced header location), killed. Must be EAX.
1947 // tmp: killed, cannot be obj nor box.
1987 }
1988
1989 #if INCLUDE_RTM_OPT
1990 if (UseRTMForStackLocks && use_rtm) {
1991 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
1992 Label L_regular_unlock;
1993 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
1994 andptr(tmpReg, markWord::biased_lock_mask_in_place); // look at 3 lock bits
1995 cmpptr(tmpReg, markWord::unlocked_value); // bits = 001 unlocked
1996 jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
1997 xend(); // otherwise end...
1998 jmp(DONE_LABEL); // ... and we're done
1999 bind(L_regular_unlock);
2000 }
2001 #endif
2002
2003 cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
2004 jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
2005 movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
2006 testptr(tmpReg, markWord::monitor_value); // Inflated?
2007 jcc (Assembler::zero, Stacked);
2008
2009 // It's inflated.
2010 #if INCLUDE_RTM_OPT
2011 if (use_rtm) {
2012 Label L_regular_inflated_unlock;
2013 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
2014 movptr(boxReg, Address(tmpReg, owner_offset));
2015 testptr(boxReg, boxReg);
2016 jccb(Assembler::notZero, L_regular_inflated_unlock);
2017 xend();
2018 jmp(DONE_LABEL);
2019 bind(L_regular_inflated_unlock);
2020 }
2021 #endif
2022
2023 // Despite our balanced locking property we still check that m->_owner == Self
2024 // as java routines or native JNI code called by this thread might
2025 // have released the lock.
2026 // Refer to the comments in synchronizer.cpp for how we might encode extra
2027 // state in _succ so we can avoid fetching EntryList|cxq.
2028 //
2029 // I'd like to add more cases in fast_lock() and fast_unlock() --
2030 // such as recursive enter and exit -- but we have to be wary of
2031 // I$ bloat, T$ effects and BP$ effects.
2032 //
2033 // If there's no contention try a 1-0 exit. That is, exit without
2034 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
2035 // we detect and recover from the race that the 1-0 exit admits.
2036 //
2037 // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
2038 // before it STs null into _owner, releasing the lock. Updates
2060
2061 bind (Stacked);
2062 // It's not inflated and it's not recursively stack-locked and it's not biased.
2063 // It must be stack-locked.
2064 // Try to reset the header to displaced header.
2065 // The "box" value on the stack is stable, so we can reload
2066 // and be assured we observe the same value as above.
2067 movptr(tmpReg, Address(boxReg, 0));
2068 lock();
2069 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
2070 // Intention fall-thru into DONE_LABEL
2071
2072 // DONE_LABEL is a hot target - we'd really like to place it at the
2073 // start of cache line by padding with NOPs.
2074 // See the AMD and Intel software optimization manuals for the
2075 // most efficient "long" NOP encodings.
2076 // Unfortunately none of our alignment mechanisms suffice.
2077 bind (CheckSucc);
2078 #else // _LP64
2079 // It's inflated
2080
2081 if (!HandshakeAfterDeflateIdleMonitors) {
2082 // Increment the ObjectMonitor's ref_count for safety or force the
2083 // exit slow path via DONE_LABEL.
2084 // In fast_unlock(), tmpReg contains the object's mark word which,
2085 // in this case, is the (ObjectMonitor* | monitor_value). Also this
2086 // code uses boxReg as its temporary register.
2087 inc_om_ref_count(objReg, tmpReg /* om_reg */, boxReg /* tmp_reg */, DONE_LABEL);
2088 }
2089
2090 // Try to avoid passing control into the slow path ...
2091 Label LSuccess, LGoSlowPath;
2092 xorptr(boxReg, boxReg);
2093 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
2094 jccb(Assembler::notZero, LGoSlowPath);
2095 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
2096 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
2097 jccb (Assembler::notZero, CheckSucc);
2098 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
2099 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2100 jmpb(LSuccess);
2101
2102 bind (CheckSucc);
2103
2104 // The following optional optimization can be elided if necessary
2105 // Effectively: if (succ == null) goto slow path
2106 // The code reduces the window for a race, however,
2107 // and thus benefits performance.
2108 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
2109 jccb (Assembler::zero, LGoSlowPath);
2110
2111 xorptr(boxReg, boxReg);
2112 // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
2113 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2114
2115 // Memory barrier/fence
2116 // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
2117 // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
2118 // This is faster on Nehalem and AMD Shanghai/Barcelona.
2119 // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
2120 // We might also restructure (ST Owner=0;barrier;LD _Succ) to
2121 // (mov box,0; xchgq box, &m->Owner; LD _succ) .
2130 // We need to ensure progress and succession.
2131 // Try to reacquire the lock.
2132 // If that fails then the new owner is responsible for succession and this
2133 // thread needs to take no further action and can exit via the fast path (success).
2134 // If the re-acquire succeeds then pass control into the slow path.
2135 // As implemented, this latter mode is horrible because we generated more
2136 // coherence traffic on the lock *and* artifically extended the critical section
2137 // length while by virtue of passing control into the slow path.
2138
2139 // box is really RAX -- the following CMPXCHG depends on that binding
2140 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
2141 lock();
2142 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2143 // There's no successor so we tried to regrab the lock.
2144 // If that didn't work, then another thread grabbed the
2145 // lock so we're done (and exit was a success).
2146 jccb (Assembler::notEqual, LSuccess);
2147 // Intentional fall-through into slow path
2148
2149 bind (LGoSlowPath);
2150 if (!HandshakeAfterDeflateIdleMonitors) {
2151 lock();
2152 decrementl(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
2153 }
2154 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
2155 jmpb (DONE_LABEL);
2156
2157 bind (LSuccess);
2158 if (!HandshakeAfterDeflateIdleMonitors) {
2159 lock();
2160 decrementl(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
2161 }
2162 testl (boxReg, 0); // set ICC.ZF=1 to indicate success
2163 jmpb (DONE_LABEL);
2164
2165 bind (Stacked);
2166 movptr(tmpReg, Address (boxReg, 0)); // re-fetch
2167 lock();
2168 cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
2169
2170 #endif
2171 bind(DONE_LABEL);
2172 }
2173 #endif // COMPILER2
2174
2175 void MacroAssembler::c2bool(Register x) {
2176 // implements x == 0 ? 0 : 1
2177 // note: must only look at least-significant byte of x
2178 // since C-style booleans are stored in one byte
2179 // only! (was bug)
2180 andl(x, 0xFF);
2181 setb(Assembler::notZero, x);
|