28 #include "compiler/disassembler.hpp"
29 #include "gc/shared/cardTableModRefBS.hpp"
30 #include "gc/shared/collectedHeap.inline.hpp"
31 #include "interpreter/interpreter.hpp"
32 #include "memory/resourceArea.hpp"
33 #include "memory/universe.hpp"
34 #include "oops/klass.inline.hpp"
35 #include "prims/methodHandles.hpp"
36 #include "runtime/biasedLocking.hpp"
37 #include "runtime/interfaceSupport.hpp"
38 #include "runtime/objectMonitor.hpp"
39 #include "runtime/os.hpp"
40 #include "runtime/sharedRuntime.hpp"
41 #include "runtime/stubRoutines.hpp"
42 #include "runtime/thread.hpp"
43 #include "utilities/macros.hpp"
44 #if INCLUDE_ALL_GCS
45 #include "gc/g1/g1CollectedHeap.inline.hpp"
46 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
47 #include "gc/g1/heapRegion.hpp"
48 #endif // INCLUDE_ALL_GCS
49 #include "crc32c.h"
50 #ifdef COMPILER2
51 #include "opto/intrinsicnode.hpp"
52 #endif
53
54 #ifdef PRODUCT
55 #define BLOCK_COMMENT(str) /* nothing */
56 #define STOP(error) stop(error)
57 #else
58 #define BLOCK_COMMENT(str) block_comment(str)
59 #define STOP(error) block_comment(error); stop(error)
60 #endif
61
62 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
63
64 #ifdef ASSERT
65 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
66 #endif
67
1089
1090 bind(no_reserved_zone_enabling);
1091 }
1092
1093 int MacroAssembler::biased_locking_enter(Register lock_reg,
1094 Register obj_reg,
1095 Register swap_reg,
1096 Register tmp_reg,
1097 bool swap_reg_contains_mark,
1098 Label& done,
1099 Label* slow_case,
1100 BiasedLockingCounters* counters) {
1101 assert(UseBiasedLocking, "why call this otherwise?");
1102 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
1103 assert(tmp_reg != noreg, "tmp_reg must be supplied");
1104 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
1105 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
1106 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
1107 NOT_LP64( Address saved_mark_addr(lock_reg, 0); )
1108
1109 if (PrintBiasedLockingStatistics && counters == NULL) {
1110 counters = BiasedLocking::counters();
1111 }
1112 // Biased locking
1113 // See whether the lock is currently biased toward our thread and
1114 // whether the epoch is still valid
1115 // Note that the runtime guarantees sufficient alignment of JavaThread
1116 // pointers to allow age to be placed into low bits
1117 // First check to see whether biasing is even enabled for this object
1118 Label cas_label;
1119 int null_check_offset = -1;
1120 if (!swap_reg_contains_mark) {
1121 null_check_offset = offset();
1122 movptr(swap_reg, mark_addr);
1123 }
1124 movptr(tmp_reg, swap_reg);
1125 andptr(tmp_reg, markOopDesc::biased_lock_mask_in_place);
1126 cmpptr(tmp_reg, markOopDesc::biased_lock_pattern);
1127 jcc(Assembler::notEqual, cas_label);
1128 // The bias pattern is present in the object's header. Need to check
1152 andptr(header_reg, ~((int) markOopDesc::age_mask_in_place));
1153 if (counters != NULL) {
1154 cond_inc32(Assembler::zero,
1155 ExternalAddress((address) counters->biased_lock_entry_count_addr()));
1156 }
1157 jcc(Assembler::equal, done);
1158
1159 Label try_revoke_bias;
1160 Label try_rebias;
1161
1162 // At this point we know that the header has the bias pattern and
1163 // that we are not the bias owner in the current epoch. We need to
1164 // figure out more details about the state of the header in order to
1165 // know what operations can be legally performed on the object's
1166 // header.
1167
1168 // If the low three bits in the xor result aren't clear, that means
1169 // the prototype header is no longer biased and we have to revoke
1170 // the bias on this object.
1171 testptr(header_reg, markOopDesc::biased_lock_mask_in_place);
1172 jccb(Assembler::notZero, try_revoke_bias);
1173
1174 // Biasing is still enabled for this data type. See whether the
1175 // epoch of the current bias is still valid, meaning that the epoch
1176 // bits of the mark word are equal to the epoch bits of the
1177 // prototype header. (Note that the prototype header's epoch bits
1178 // only change at a safepoint.) If not, attempt to rebias the object
1179 // toward the current thread. Note that we must be absolutely sure
1180 // that the current epoch is invalid in order to do this because
1181 // otherwise the manipulations it performs on the mark word are
1182 // illegal.
1183 testptr(header_reg, markOopDesc::epoch_mask_in_place);
1184 jccb(Assembler::notZero, try_rebias);
1185
1186 // The epoch of the current bias is still valid but we know nothing
1187 // about the owner; it might be set or it might be clear. Try to
1188 // acquire the bias of the object using an atomic operation. If this
1189 // fails we will go in to the runtime to revoke the object's bias.
1190 // Note that we first construct the presumed unbiased header so we
1191 // don't accidentally blow away another thread's valid bias.
1192 NOT_LP64( movptr(swap_reg, saved_mark_addr); )
1193 andptr(swap_reg,
1194 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
1195 #ifdef _LP64
1196 movptr(tmp_reg, swap_reg);
1197 orptr(tmp_reg, r15_thread);
1198 #else
1199 get_thread(tmp_reg);
1200 orptr(tmp_reg, swap_reg);
1201 #endif
1202 if (os::is_MP()) {
1203 lock();
1204 }
1273 // removing the bias bit from the object's header.
1274 if (counters != NULL) {
1275 cond_inc32(Assembler::zero,
1276 ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
1277 }
1278
1279 bind(cas_label);
1280
1281 return null_check_offset;
1282 }
1283
1284 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
1285 assert(UseBiasedLocking, "why call this otherwise?");
1286
1287 // Check for biased locking unlock case, which is a no-op
1288 // Note: we do not have to check the thread ID for two reasons.
1289 // First, the interpreter checks for IllegalMonitorStateException at
1290 // a higher level. Second, if the bias was revoked while we held the
1291 // lock, the object could not be rebiased toward another thread, so
1292 // the bias bit would be clear.
1293 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1294 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
1295 cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
1296 jcc(Assembler::equal, done);
1297 }
1298
1299 #ifdef COMPILER2
1300
1301 #if INCLUDE_RTM_OPT
1302
1303 // Update rtm_counters based on abort status
1304 // input: abort_status
1305 // rtm_counters (RTMLockingCounters*)
1306 // flags are killed
1307 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) {
1308
1309 atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset()));
1310 if (PrintPreciseRTMLockingStatistics) {
1311 for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
1312 Label check_abort;
1465 incrementl(retry_count_Reg); // clear z flag
1466 }
1467
1468 // Use RTM for normal stack locks
1469 // Input: objReg (object to lock)
1470 void MacroAssembler::rtm_stack_locking(Register objReg, Register tmpReg, Register scrReg,
1471 Register retry_on_abort_count_Reg,
1472 RTMLockingCounters* stack_rtm_counters,
1473 Metadata* method_data, bool profile_rtm,
1474 Label& DONE_LABEL, Label& IsInflated) {
1475 assert(UseRTMForStackLocks, "why call this otherwise?");
1476 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
1477 assert(tmpReg == rax, "");
1478 assert(scrReg == rdx, "");
1479 Label L_rtm_retry, L_decrement_retry, L_on_abort;
1480
1481 if (RTMRetryCount > 0) {
1482 movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
1483 bind(L_rtm_retry);
1484 }
1485 movptr(tmpReg, Address(objReg, 0));
1486 testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
1487 jcc(Assembler::notZero, IsInflated);
1488
1489 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1490 Label L_noincrement;
1491 if (RTMTotalCountIncrRate > 1) {
1492 // tmpReg, scrReg and flags are killed
1493 branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement);
1494 }
1495 assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM");
1496 atomic_incptr(ExternalAddress((address)stack_rtm_counters->total_count_addr()), scrReg);
1497 bind(L_noincrement);
1498 }
1499 xbegin(L_on_abort);
1500 movptr(tmpReg, Address(objReg, 0)); // fetch markword
1501 andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
1502 cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked
1503 jcc(Assembler::equal, DONE_LABEL); // all done if unlocked
1504
1541 // Without cast to int32_t a movptr will destroy r10 which is typically obj
1542 movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
1543 movptr(boxReg, tmpReg); // Save ObjectMonitor address
1544
1545 if (RTMRetryCount > 0) {
1546 movl(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy
1547 movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
1548 bind(L_rtm_retry);
1549 }
1550 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1551 Label L_noincrement;
1552 if (RTMTotalCountIncrRate > 1) {
1553 // tmpReg, scrReg and flags are killed
1554 branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement);
1555 }
1556 assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
1557 atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg);
1558 bind(L_noincrement);
1559 }
1560 xbegin(L_on_abort);
1561 movptr(tmpReg, Address(objReg, 0));
1562 movptr(tmpReg, Address(tmpReg, owner_offset));
1563 testptr(tmpReg, tmpReg);
1564 jcc(Assembler::zero, DONE_LABEL);
1565 if (UseRTMXendForLockBusy) {
1566 xend();
1567 jmp(L_decrement_retry);
1568 }
1569 else {
1570 xabort(0);
1571 }
1572 bind(L_on_abort);
1573 Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
1574 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1575 rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
1576 }
1577 if (RTMRetryCount > 0) {
1578 // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1579 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1580 }
1687 // rax,: tmp -- KILLED
1688 // scr: tmp -- KILLED
1689 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
1690 Register scrReg, Register cx1Reg, Register cx2Reg,
1691 BiasedLockingCounters* counters,
1692 RTMLockingCounters* rtm_counters,
1693 RTMLockingCounters* stack_rtm_counters,
1694 Metadata* method_data,
1695 bool use_rtm, bool profile_rtm) {
1696 // Ensure the register assignments are disjoint
1697 assert(tmpReg == rax, "");
1698
1699 if (use_rtm) {
1700 assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
1701 } else {
1702 assert(cx1Reg == noreg, "");
1703 assert(cx2Reg == noreg, "");
1704 assert_different_registers(objReg, boxReg, tmpReg, scrReg);
1705 }
1706
1707 if (counters != NULL) {
1708 atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
1709 }
1710 if (EmitSync & 1) {
1711 // set box->dhw = markOopDesc::unused_mark()
1712 // Force all sync thru slow-path: slow_enter() and slow_exit()
1713 movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
1714 cmpptr (rsp, (int32_t)NULL_WORD);
1715 } else {
1716 // Possible cases that we'll encounter in fast_lock
1717 // ------------------------------------------------
1718 // * Inflated
1719 // -- unlocked
1720 // -- Locked
1721 // = by self
1722 // = by other
1723 // * biased
1724 // -- by Self
1725 // -- by other
1726 // * neutral
1736 // it's stack-locked, biased or neutral
1737 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
1738 // order to reduce the number of conditional branches in the most common cases.
1739 // Beware -- there's a subtle invariant that fetch of the markword
1740 // at [FETCH], below, will never observe a biased encoding (*101b).
1741 // If this invariant is not held we risk exclusion (safety) failure.
1742 if (UseBiasedLocking && !UseOptoBiasInlining) {
1743 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
1744 }
1745
1746 #if INCLUDE_RTM_OPT
1747 if (UseRTMForStackLocks && use_rtm) {
1748 rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
1749 stack_rtm_counters, method_data, profile_rtm,
1750 DONE_LABEL, IsInflated);
1751 }
1752 #endif // INCLUDE_RTM_OPT
1753
1754 movptr(tmpReg, Address(objReg, 0)); // [FETCH]
1755 testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
1756 jccb(Assembler::notZero, IsInflated);
1757
1758 // Attempt stack-locking ...
1759 orptr (tmpReg, markOopDesc::unlocked_value);
1760 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
1761 if (os::is_MP()) {
1762 lock();
1763 }
1764 cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
1765 if (counters != NULL) {
1766 cond_inc32(Assembler::equal,
1767 ExternalAddress((address)counters->fast_path_entry_count_addr()));
1768 }
1769 jcc(Assembler::equal, DONE_LABEL); // Success
1770
1771 // Recursive locking.
1772 // The object is stack-locked: markword contains stack pointer to BasicLock.
1773 // Locked by current thread if difference with current SP is less than one page.
1774 subptr(tmpReg, rsp);
1775 // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
1776 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
1813 } else
1814 if ((EmitSync & 128) == 0) { // avoid ST-before-CAS
1815 // register juggle because we need tmpReg for cmpxchgptr below
1816 movptr(scrReg, boxReg);
1817 movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
1818
1819 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
1820 if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
1821 // prefetchw [eax + Offset(_owner)-2]
1822 prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1823 }
1824
1825 if ((EmitSync & 64) == 0) {
1826 // Optimistic form: consider XORL tmpReg,tmpReg
1827 movptr(tmpReg, NULL_WORD);
1828 } else {
1829 // Can suffer RTS->RTO upgrades on shared or cold $ lines
1830 // Test-And-CAS instead of CAS
1831 movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); // rax, = m->_owner
1832 testptr(tmpReg, tmpReg); // Locked ?
1833 jccb (Assembler::notZero, DONE_LABEL);
1834 }
1835
1836 // Appears unlocked - try to swing _owner from null to non-null.
1837 // Ideally, I'd manifest "Self" with get_thread and then attempt
1838 // to CAS the register containing Self into m->Owner.
1839 // But we don't have enough registers, so instead we can either try to CAS
1840 // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
1841 // we later store "Self" into m->Owner. Transiently storing a stack address
1842 // (rsp or the address of the box) into m->owner is harmless.
1843 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
1844 if (os::is_MP()) {
1845 lock();
1846 }
1847 cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1848 movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
1849 // If we weren't able to swing _owner from NULL to the BasicLock
1850 // then take the slow path.
1851 jccb (Assembler::notZero, DONE_LABEL);
1852 // update _owner from BasicLock to thread
1853 get_thread (scrReg); // beware: clobbers ICCs
1854 movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
1855 xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
1856
1857 // If the CAS fails we can either retry or pass control to the slow-path.
1858 // We use the latter tactic.
1859 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1860 // If the CAS was successful ...
1861 // Self has acquired the lock
1862 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1863 // Intentional fall-through into DONE_LABEL ...
1864 } else {
1865 movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark())); // results in ST-before-CAS penalty
1866 movptr(boxReg, tmpReg);
1867
1868 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
1869 if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
1870 // prefetchw [eax + Offset(_owner)-2]
1871 prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1872 }
1873
1874 if ((EmitSync & 64) == 0) {
1875 // Optimistic form
1876 xorptr (tmpReg, tmpReg);
1877 } else {
1878 // Can suffer RTS->RTO upgrades on shared or cold $ lines
1879 movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); // rax, = m->_owner
1880 testptr(tmpReg, tmpReg); // Locked ?
1881 jccb (Assembler::notZero, DONE_LABEL);
1882 }
1883
1884 // Appears unlocked - try to swing _owner from null to non-null.
1885 // Use either "Self" (in scr) or rsp as thread identity in _owner.
1886 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
1887 get_thread (scrReg);
1888 if (os::is_MP()) {
1889 lock();
1890 }
1891 cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1892
1893 // If the CAS fails we can either retry or pass control to the slow-path.
1894 // We use the latter tactic.
1895 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1896 // If the CAS was successful ...
1897 // Self has acquired the lock
1898 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1899 // Intentional fall-through into DONE_LABEL ...
1900 }
1901 #else // _LP64
1949 //
1950 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
1951 // B() doesn't have provably balanced locking so it runs in the interpreter.
1952 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
1953 // is still locked by A().
1954 //
1955 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
1956 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
1957 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
1958 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
1959 // Arguably given that the spec legislates the JNI case as undefined our implementation
1960 // could reasonably *avoid* checking owner in Fast_Unlock().
1961 // In the interest of performance we elide m->Owner==Self check in unlock.
1962 // A perfectly viable alternative is to elide the owner check except when
1963 // Xcheck:jni is enabled.
1964
1965 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
1966 assert(boxReg == rax, "");
1967 assert_different_registers(objReg, boxReg, tmpReg);
1968
1969 if (EmitSync & 4) {
1970 // Disable - inhibit all inlining. Force control through the slow-path
1971 cmpptr (rsp, 0);
1972 } else {
1973 Label DONE_LABEL, Stacked, CheckSucc;
1974
1975 // Critically, the biased locking test must have precedence over
1976 // and appear before the (box->dhw == 0) recursive stack-lock test.
1977 if (UseBiasedLocking && !UseOptoBiasInlining) {
1978 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
1979 }
1980
1981 #if INCLUDE_RTM_OPT
1982 if (UseRTMForStackLocks && use_rtm) {
1983 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
1984 Label L_regular_unlock;
1985 movptr(tmpReg, Address(objReg, 0)); // fetch markword
1986 andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
1987 cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked
1988 jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
1990 jmp(DONE_LABEL); // ... and we're done
1991 bind(L_regular_unlock);
1992 }
1993 #endif
1994
1995 cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
1996 jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
1997 movptr(tmpReg, Address(objReg, 0)); // Examine the object's markword
1998 testptr(tmpReg, markOopDesc::monitor_value); // Inflated?
1999 jccb (Assembler::zero, Stacked);
2000
2001 // It's inflated.
2002 #if INCLUDE_RTM_OPT
2003 if (use_rtm) {
2004 Label L_regular_inflated_unlock;
2005 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
2006 movptr(boxReg, Address(tmpReg, owner_offset));
2007 testptr(boxReg, boxReg);
2008 jccb(Assembler::notZero, L_regular_inflated_unlock);
2009 xend();
2010 jmpb(DONE_LABEL);
2011 bind(L_regular_inflated_unlock);
2012 }
2013 #endif
2014
2015 // Despite our balanced locking property we still check that m->_owner == Self
2016 // as java routines or native JNI code called by this thread might
2017 // have released the lock.
2018 // Refer to the comments in synchronizer.cpp for how we might encode extra
2019 // state in _succ so we can avoid fetching EntryList|cxq.
2020 //
2021 // I'd like to add more cases in fast_lock() and fast_unlock() --
2022 // such as recursive enter and exit -- but we have to be wary of
2023 // I$ bloat, T$ effects and BP$ effects.
2024 //
2025 // If there's no contention try a 1-0 exit. That is, exit without
2026 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
2027 // we detect and recover from the race that the 1-0 exit admits.
2028 //
2029 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
2030 // before it STs null into _owner, releasing the lock. Updates
2034 // IA32's memory-model is SPO, so STs are ordered with respect to
2035 // each other and there's no need for an explicit barrier (fence).
2036 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
2037 #ifndef _LP64
2038 get_thread (boxReg);
2039 if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
2040 // prefetchw [ebx + Offset(_owner)-2]
2041 prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2042 }
2043
2044 // Note that we could employ various encoding schemes to reduce
2045 // the number of loads below (currently 4) to just 2 or 3.
2046 // Refer to the comments in synchronizer.cpp.
2047 // In practice the chain of fetches doesn't seem to impact performance, however.
2048 xorptr(boxReg, boxReg);
2049 if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
2050 // Attempt to reduce branch density - AMD's branch predictor.
2051 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
2052 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
2053 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
2054 jccb (Assembler::notZero, DONE_LABEL);
2055 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
2056 jmpb (DONE_LABEL);
2057 } else {
2058 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
2059 jccb (Assembler::notZero, DONE_LABEL);
2060 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
2061 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
2062 jccb (Assembler::notZero, CheckSucc);
2063 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
2064 jmpb (DONE_LABEL);
2065 }
2066
2067 // The Following code fragment (EmitSync & 65536) improves the performance of
2068 // contended applications and contended synchronization microbenchmarks.
2069 // Unfortunately the emission of the code - even though not executed - causes regressions
2070 // in scimark and jetstream, evidently because of $ effects. Replacing the code
2071 // with an equal number of never-executed NOPs results in the same regression.
2072 // We leave it off by default.
2073
2074 if ((EmitSync & 65536) != 0) {
2075 Label LSuccess, LGoSlowPath ;
2076
2077 bind (CheckSucc);
2078
2079 // Optional pre-test ... it's safe to elide this
2080 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
2081 jccb(Assembler::zero, LGoSlowPath);
2082
2083 // We have a classic Dekker-style idiom:
2084 // ST m->_owner = 0 ; MEMBAR; LD m->_succ
2114 // Ratify _succ remains non-null
2115 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), 0);
2116 jccb (Assembler::notZero, LSuccess);
2117
2118 xorptr(boxReg, boxReg); // box is really EAX
2119 if (os::is_MP()) { lock(); }
2120 cmpxchgptr(rsp, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2121 // There's no successor so we tried to regrab the lock with the
2122 // placeholder value. If that didn't work, then another thread
2123 // grabbed the lock so we're done (and exit was a success).
2124 jccb (Assembler::notEqual, LSuccess);
2125 // Since we're low on registers we installed rsp as a placeholding in _owner.
2126 // Now install Self over rsp. This is safe as we're transitioning from
2127 // non-null to non=null
2128 get_thread (boxReg);
2129 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), boxReg);
2130 // Intentional fall-through into LGoSlowPath ...
2131
2132 bind (LGoSlowPath);
2133 orptr(boxReg, 1); // set ICC.ZF=0 to indicate failure
2134 jmpb (DONE_LABEL);
2135
2136 bind (LSuccess);
2137 xorptr(boxReg, boxReg); // set ICC.ZF=1 to indicate success
2138 jmpb (DONE_LABEL);
2139 }
2140
2141 bind (Stacked);
2142 // It's not inflated and it's not recursively stack-locked and it's not biased.
2143 // It must be stack-locked.
2144 // Try to reset the header to displaced header.
2145 // The "box" value on the stack is stable, so we can reload
2146 // and be assured we observe the same value as above.
2147 movptr(tmpReg, Address(boxReg, 0));
2148 if (os::is_MP()) {
2149 lock();
2150 }
2151 cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
2152 // Intention fall-thru into DONE_LABEL
2153
2154 // DONE_LABEL is a hot target - we'd really like to place it at the
2155 // start of cache line by padding with NOPs.
2156 // See the AMD and Intel software optimization manuals for the
2157 // most efficient "long" NOP encodings.
2158 // Unfortunately none of our alignment mechanisms suffice.
2159 if ((EmitSync & 65536) == 0) {
2160 bind (CheckSucc);
2161 }
2162 #else // _LP64
2163 // It's inflated
2164 if (EmitSync & 1024) {
2165 // Emit code to check that _owner == Self
2166 // We could fold the _owner test into subsequent code more efficiently
2167 // than using a stand-alone check, but since _owner checking is off by
2168 // default we don't bother. We also might consider predicating the
2169 // _owner==Self check on Xcheck:jni or running on a debug build.
2170 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2171 xorptr(boxReg, r15_thread);
2172 } else {
2173 xorptr(boxReg, boxReg);
2174 }
2175 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
2176 jccb (Assembler::notZero, DONE_LABEL);
2177 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
2178 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
2179 jccb (Assembler::notZero, CheckSucc);
2180 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2181 jmpb (DONE_LABEL);
2182
2183 if ((EmitSync & 65536) == 0) {
2184 // Try to avoid passing control into the slow_path ...
2185 Label LSuccess, LGoSlowPath ;
2186 bind (CheckSucc);
2187
2188 // The following optional optimization can be elided if necessary
2189 // Effectively: if (succ == null) goto SlowPath
2190 // The code reduces the window for a race, however,
2191 // and thus benefits performance.
2192 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
2193 jccb (Assembler::zero, LGoSlowPath);
2194
2195 xorptr(boxReg, boxReg);
2196 if ((EmitSync & 16) && os::is_MP()) {
2197 xchgptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2198 } else {
2199 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2200 if (os::is_MP()) {
2201 // Memory barrier/fence
2218 // Try to reacquire the lock.
2219 // If that fails then the new owner is responsible for succession and this
2220 // thread needs to take no further action and can exit via the fast path (success).
2221 // If the re-acquire succeeds then pass control into the slow path.
2222 // As implemented, this latter mode is horrible because we generated more
2223 // coherence traffic on the lock *and* artifically extended the critical section
2224 // length while by virtue of passing control into the slow path.
2225
2226 // box is really RAX -- the following CMPXCHG depends on that binding
2227 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
2228 if (os::is_MP()) { lock(); }
2229 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2230 // There's no successor so we tried to regrab the lock.
2231 // If that didn't work, then another thread grabbed the
2232 // lock so we're done (and exit was a success).
2233 jccb (Assembler::notEqual, LSuccess);
2234 // Intentional fall-through into slow-path
2235
2236 bind (LGoSlowPath);
2237 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
2238 jmpb (DONE_LABEL);
2239
2240 bind (LSuccess);
2241 testl (boxReg, 0); // set ICC.ZF=1 to indicate success
2242 jmpb (DONE_LABEL);
2243 }
2244
2245 bind (Stacked);
2246 movptr(tmpReg, Address (boxReg, 0)); // re-fetch
2247 if (os::is_MP()) { lock(); }
2248 cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
2249
2250 if (EmitSync & 65536) {
2251 bind (CheckSucc);
2252 }
2253 #endif
2254 bind(DONE_LABEL);
2255 }
2256 }
2257 #endif // COMPILER2
2258
2259 void MacroAssembler::c2bool(Register x) {
2260 // implements x == 0 ? 0 : 1
2261 // note: must only look at least-significant byte of x
2262 // since C-style booleans are stored in one byte
6060 b = code_string(ss.as_string());
6061 }
6062 BLOCK_COMMENT("verify_oop {");
6063 #ifdef _LP64
6064 push(rscratch1); // save r10, trashed by movptr()
6065 #endif
6066 push(rax); // save rax,
6067 push(reg); // pass register argument
6068 ExternalAddress buffer((address) b);
6069 // avoid using pushptr, as it modifies scratch registers
6070 // and our contract is not to modify anything
6071 movptr(rax, buffer.addr());
6072 push(rax);
6073 // call indirectly to solve generation ordering problem
6074 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
6075 call(rax);
6076 // Caller pops the arguments (oop, message) and restores rax, r10
6077 BLOCK_COMMENT("} verify_oop");
6078 }
6079
6080
6081 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
6082 Register tmp,
6083 int offset) {
6084 intptr_t value = *delayed_value_addr;
6085 if (value != 0)
6086 return RegisterOrConstant(value + offset);
6087
6088 // load indirectly to solve generation ordering problem
6089 movptr(tmp, ExternalAddress((address) delayed_value_addr));
6090
6091 #ifdef ASSERT
6092 { Label L;
6093 testptr(tmp, tmp);
6094 if (WizardMode) {
6095 const char* buf = NULL;
6096 {
6097 ResourceMark rm;
6098 stringStream ss;
6099 ss.print("DelayedValue=" INTPTR_FORMAT, delayed_value_addr[1]);
|
28 #include "compiler/disassembler.hpp"
29 #include "gc/shared/cardTableModRefBS.hpp"
30 #include "gc/shared/collectedHeap.inline.hpp"
31 #include "interpreter/interpreter.hpp"
32 #include "memory/resourceArea.hpp"
33 #include "memory/universe.hpp"
34 #include "oops/klass.inline.hpp"
35 #include "prims/methodHandles.hpp"
36 #include "runtime/biasedLocking.hpp"
37 #include "runtime/interfaceSupport.hpp"
38 #include "runtime/objectMonitor.hpp"
39 #include "runtime/os.hpp"
40 #include "runtime/sharedRuntime.hpp"
41 #include "runtime/stubRoutines.hpp"
42 #include "runtime/thread.hpp"
43 #include "utilities/macros.hpp"
44 #if INCLUDE_ALL_GCS
45 #include "gc/g1/g1CollectedHeap.inline.hpp"
46 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
47 #include "gc/g1/heapRegion.hpp"
48 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
49 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
50 #endif // INCLUDE_ALL_GCS
51 #include "crc32c.h"
52 #ifdef COMPILER2
53 #include "opto/intrinsicnode.hpp"
54 #endif
55
56 #ifdef PRODUCT
57 #define BLOCK_COMMENT(str) /* nothing */
58 #define STOP(error) stop(error)
59 #else
60 #define BLOCK_COMMENT(str) block_comment(str)
61 #define STOP(error) block_comment(error); stop(error)
62 #endif
63
64 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
65
66 #ifdef ASSERT
67 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
68 #endif
69
1091
1092 bind(no_reserved_zone_enabling);
1093 }
1094
1095 int MacroAssembler::biased_locking_enter(Register lock_reg,
1096 Register obj_reg,
1097 Register swap_reg,
1098 Register tmp_reg,
1099 bool swap_reg_contains_mark,
1100 Label& done,
1101 Label* slow_case,
1102 BiasedLockingCounters* counters) {
1103 assert(UseBiasedLocking, "why call this otherwise?");
1104 assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
1105 assert(tmp_reg != noreg, "tmp_reg must be supplied");
1106 assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
1107 assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
1108 Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes());
1109 NOT_LP64( Address saved_mark_addr(lock_reg, 0); )
1110
1111 shenandoah_store_addr_check(obj_reg);
1112
1113 if (PrintBiasedLockingStatistics && counters == NULL) {
1114 counters = BiasedLocking::counters();
1115 }
1116 // Biased locking
1117 // See whether the lock is currently biased toward our thread and
1118 // whether the epoch is still valid
1119 // Note that the runtime guarantees sufficient alignment of JavaThread
1120 // pointers to allow age to be placed into low bits
1121 // First check to see whether biasing is even enabled for this object
1122 Label cas_label;
1123 int null_check_offset = -1;
1124 if (!swap_reg_contains_mark) {
1125 null_check_offset = offset();
1126 movptr(swap_reg, mark_addr);
1127 }
1128 movptr(tmp_reg, swap_reg);
1129 andptr(tmp_reg, markOopDesc::biased_lock_mask_in_place);
1130 cmpptr(tmp_reg, markOopDesc::biased_lock_pattern);
1131 jcc(Assembler::notEqual, cas_label);
1132 // The bias pattern is present in the object's header. Need to check
1156 andptr(header_reg, ~((int) markOopDesc::age_mask_in_place));
1157 if (counters != NULL) {
1158 cond_inc32(Assembler::zero,
1159 ExternalAddress((address) counters->biased_lock_entry_count_addr()));
1160 }
1161 jcc(Assembler::equal, done);
1162
1163 Label try_revoke_bias;
1164 Label try_rebias;
1165
1166 // At this point we know that the header has the bias pattern and
1167 // that we are not the bias owner in the current epoch. We need to
1168 // figure out more details about the state of the header in order to
1169 // know what operations can be legally performed on the object's
1170 // header.
1171
1172 // If the low three bits in the xor result aren't clear, that means
1173 // the prototype header is no longer biased and we have to revoke
1174 // the bias on this object.
1175 testptr(header_reg, markOopDesc::biased_lock_mask_in_place);
1176 jccb_if_possible(Assembler::notZero, try_revoke_bias);
1177
1178 // Biasing is still enabled for this data type. See whether the
1179 // epoch of the current bias is still valid, meaning that the epoch
1180 // bits of the mark word are equal to the epoch bits of the
1181 // prototype header. (Note that the prototype header's epoch bits
1182 // only change at a safepoint.) If not, attempt to rebias the object
1183 // toward the current thread. Note that we must be absolutely sure
1184 // that the current epoch is invalid in order to do this because
1185 // otherwise the manipulations it performs on the mark word are
1186 // illegal.
1187 testptr(header_reg, markOopDesc::epoch_mask_in_place);
1188 jccb_if_possible(Assembler::notZero, try_rebias);
1189
1190 // The epoch of the current bias is still valid but we know nothing
1191 // about the owner; it might be set or it might be clear. Try to
1192 // acquire the bias of the object using an atomic operation. If this
1193 // fails we will go in to the runtime to revoke the object's bias.
1194 // Note that we first construct the presumed unbiased header so we
1195 // don't accidentally blow away another thread's valid bias.
1196 NOT_LP64( movptr(swap_reg, saved_mark_addr); )
1197 andptr(swap_reg,
1198 markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
1199 #ifdef _LP64
1200 movptr(tmp_reg, swap_reg);
1201 orptr(tmp_reg, r15_thread);
1202 #else
1203 get_thread(tmp_reg);
1204 orptr(tmp_reg, swap_reg);
1205 #endif
1206 if (os::is_MP()) {
1207 lock();
1208 }
1277 // removing the bias bit from the object's header.
1278 if (counters != NULL) {
1279 cond_inc32(Assembler::zero,
1280 ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
1281 }
1282
1283 bind(cas_label);
1284
1285 return null_check_offset;
1286 }
1287
1288 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
1289 assert(UseBiasedLocking, "why call this otherwise?");
1290
1291 // Check for biased locking unlock case, which is a no-op
1292 // Note: we do not have to check the thread ID for two reasons.
1293 // First, the interpreter checks for IllegalMonitorStateException at
1294 // a higher level. Second, if the bias was revoked while we held the
1295 // lock, the object could not be rebiased toward another thread, so
1296 // the bias bit would be clear.
1297 shenandoah_store_addr_check(obj_reg); // Access mark word
1298 movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1299 andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
1300 cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
1301 jcc(Assembler::equal, done);
1302 }
1303
1304 #ifdef COMPILER2
1305
1306 #if INCLUDE_RTM_OPT
1307
1308 // Update rtm_counters based on abort status
1309 // input: abort_status
1310 // rtm_counters (RTMLockingCounters*)
1311 // flags are killed
1312 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) {
1313
1314 atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset()));
1315 if (PrintPreciseRTMLockingStatistics) {
1316 for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
1317 Label check_abort;
1470 incrementl(retry_count_Reg); // clear z flag
1471 }
1472
1473 // Use RTM for normal stack locks
1474 // Input: objReg (object to lock)
1475 void MacroAssembler::rtm_stack_locking(Register objReg, Register tmpReg, Register scrReg,
1476 Register retry_on_abort_count_Reg,
1477 RTMLockingCounters* stack_rtm_counters,
1478 Metadata* method_data, bool profile_rtm,
1479 Label& DONE_LABEL, Label& IsInflated) {
1480 assert(UseRTMForStackLocks, "why call this otherwise?");
1481 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
1482 assert(tmpReg == rax, "");
1483 assert(scrReg == rdx, "");
1484 Label L_rtm_retry, L_decrement_retry, L_on_abort;
1485
1486 if (RTMRetryCount > 0) {
1487 movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
1488 bind(L_rtm_retry);
1489 }
1490 shenandoah_store_addr_check(objReg); // Access mark word
1491 movptr(tmpReg, Address(objReg, 0));
1492 testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
1493 jcc(Assembler::notZero, IsInflated);
1494
1495 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1496 Label L_noincrement;
1497 if (RTMTotalCountIncrRate > 1) {
1498 // tmpReg, scrReg and flags are killed
1499 branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement);
1500 }
1501 assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM");
1502 atomic_incptr(ExternalAddress((address)stack_rtm_counters->total_count_addr()), scrReg);
1503 bind(L_noincrement);
1504 }
1505 xbegin(L_on_abort);
1506 movptr(tmpReg, Address(objReg, 0)); // fetch markword
1507 andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
1508 cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked
1509 jcc(Assembler::equal, DONE_LABEL); // all done if unlocked
1510
1547 // Without cast to int32_t a movptr will destroy r10 which is typically obj
1548 movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
1549 movptr(boxReg, tmpReg); // Save ObjectMonitor address
1550
1551 if (RTMRetryCount > 0) {
1552 movl(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy
1553 movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
1554 bind(L_rtm_retry);
1555 }
1556 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1557 Label L_noincrement;
1558 if (RTMTotalCountIncrRate > 1) {
1559 // tmpReg, scrReg and flags are killed
1560 branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement);
1561 }
1562 assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
1563 atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg);
1564 bind(L_noincrement);
1565 }
1566 xbegin(L_on_abort);
1567 shenandoah_store_addr_check(objReg); // Access mark word
1568 movptr(tmpReg, Address(objReg, 0));
1569 movptr(tmpReg, Address(tmpReg, owner_offset));
1570 testptr(tmpReg, tmpReg);
1571 jcc(Assembler::zero, DONE_LABEL);
1572 if (UseRTMXendForLockBusy) {
1573 xend();
1574 jmp(L_decrement_retry);
1575 }
1576 else {
1577 xabort(0);
1578 }
1579 bind(L_on_abort);
1580 Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
1581 if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1582 rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
1583 }
1584 if (RTMRetryCount > 0) {
1585 // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1586 rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1587 }
1694 // rax,: tmp -- KILLED
1695 // scr: tmp -- KILLED
1696 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
1697 Register scrReg, Register cx1Reg, Register cx2Reg,
1698 BiasedLockingCounters* counters,
1699 RTMLockingCounters* rtm_counters,
1700 RTMLockingCounters* stack_rtm_counters,
1701 Metadata* method_data,
1702 bool use_rtm, bool profile_rtm) {
1703 // Ensure the register assignments are disjoint
1704 assert(tmpReg == rax, "");
1705
1706 if (use_rtm) {
1707 assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
1708 } else {
1709 assert(cx1Reg == noreg, "");
1710 assert(cx2Reg == noreg, "");
1711 assert_different_registers(objReg, boxReg, tmpReg, scrReg);
1712 }
1713
1714 shenandoah_store_addr_check(objReg); // Access mark word
1715
1716 if (counters != NULL) {
1717 atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
1718 }
1719 if (EmitSync & 1) {
1720 // set box->dhw = markOopDesc::unused_mark()
1721 // Force all sync thru slow-path: slow_enter() and slow_exit()
1722 movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
1723 cmpptr (rsp, (int32_t)NULL_WORD);
1724 } else {
1725 // Possible cases that we'll encounter in fast_lock
1726 // ------------------------------------------------
1727 // * Inflated
1728 // -- unlocked
1729 // -- Locked
1730 // = by self
1731 // = by other
1732 // * biased
1733 // -- by Self
1734 // -- by other
1735 // * neutral
1745 // it's stack-locked, biased or neutral
1746 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
1747 // order to reduce the number of conditional branches in the most common cases.
1748 // Beware -- there's a subtle invariant that fetch of the markword
1749 // at [FETCH], below, will never observe a biased encoding (*101b).
1750 // If this invariant is not held we risk exclusion (safety) failure.
1751 if (UseBiasedLocking && !UseOptoBiasInlining) {
1752 biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
1753 }
1754
1755 #if INCLUDE_RTM_OPT
1756 if (UseRTMForStackLocks && use_rtm) {
1757 rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
1758 stack_rtm_counters, method_data, profile_rtm,
1759 DONE_LABEL, IsInflated);
1760 }
1761 #endif // INCLUDE_RTM_OPT
1762
1763 movptr(tmpReg, Address(objReg, 0)); // [FETCH]
1764 testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
1765 jccb_if_possible(Assembler::notZero, IsInflated);
1766
1767 // Attempt stack-locking ...
1768 orptr (tmpReg, markOopDesc::unlocked_value);
1769 movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
1770 if (os::is_MP()) {
1771 lock();
1772 }
1773 cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
1774 if (counters != NULL) {
1775 cond_inc32(Assembler::equal,
1776 ExternalAddress((address)counters->fast_path_entry_count_addr()));
1777 }
1778 jcc(Assembler::equal, DONE_LABEL); // Success
1779
1780 // Recursive locking.
1781 // The object is stack-locked: markword contains stack pointer to BasicLock.
1782 // Locked by current thread if difference with current SP is less than one page.
1783 subptr(tmpReg, rsp);
1784 // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
1785 andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
1822 } else
1823 if ((EmitSync & 128) == 0) { // avoid ST-before-CAS
1824 // register juggle because we need tmpReg for cmpxchgptr below
1825 movptr(scrReg, boxReg);
1826 movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
1827
1828 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
1829 if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
1830 // prefetchw [eax + Offset(_owner)-2]
1831 prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1832 }
1833
1834 if ((EmitSync & 64) == 0) {
1835 // Optimistic form: consider XORL tmpReg,tmpReg
1836 movptr(tmpReg, NULL_WORD);
1837 } else {
1838 // Can suffer RTS->RTO upgrades on shared or cold $ lines
1839 // Test-And-CAS instead of CAS
1840 movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); // rax, = m->_owner
1841 testptr(tmpReg, tmpReg); // Locked ?
1842 jccb_if_possible(Assembler::notZero, DONE_LABEL);
1843 }
1844
1845 // Appears unlocked - try to swing _owner from null to non-null.
1846 // Ideally, I'd manifest "Self" with get_thread and then attempt
1847 // to CAS the register containing Self into m->Owner.
1848 // But we don't have enough registers, so instead we can either try to CAS
1849 // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
1850 // we later store "Self" into m->Owner. Transiently storing a stack address
1851 // (rsp or the address of the box) into m->owner is harmless.
1852 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
1853 if (os::is_MP()) {
1854 lock();
1855 }
1856 cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1857 movptr(Address(scrReg, 0), 3); // box->_displaced_header = 3
1858 // If we weren't able to swing _owner from NULL to the BasicLock
1859 // then take the slow path.
1860 jccb_if_possible(Assembler::notZero, DONE_LABEL);
1861 // update _owner from BasicLock to thread
1862 get_thread (scrReg); // beware: clobbers ICCs
1863 movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
1864 xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
1865
1866 // If the CAS fails we can either retry or pass control to the slow-path.
1867 // We use the latter tactic.
1868 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1869 // If the CAS was successful ...
1870 // Self has acquired the lock
1871 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1872 // Intentional fall-through into DONE_LABEL ...
1873 } else {
1874 movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark())); // results in ST-before-CAS penalty
1875 movptr(boxReg, tmpReg);
1876
1877 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
1878 if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
1879 // prefetchw [eax + Offset(_owner)-2]
1880 prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1881 }
1882
1883 if ((EmitSync & 64) == 0) {
1884 // Optimistic form
1885 xorptr (tmpReg, tmpReg);
1886 } else {
1887 // Can suffer RTS->RTO upgrades on shared or cold $ lines
1888 movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); // rax, = m->_owner
1889 testptr(tmpReg, tmpReg); // Locked ?
1890 jccb_if_possible(Assembler::notZero, DONE_LABEL);
1891 }
1892
1893 // Appears unlocked - try to swing _owner from null to non-null.
1894 // Use either "Self" (in scr) or rsp as thread identity in _owner.
1895 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
1896 get_thread (scrReg);
1897 if (os::is_MP()) {
1898 lock();
1899 }
1900 cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1901
1902 // If the CAS fails we can either retry or pass control to the slow-path.
1903 // We use the latter tactic.
1904 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1905 // If the CAS was successful ...
1906 // Self has acquired the lock
1907 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1908 // Intentional fall-through into DONE_LABEL ...
1909 }
1910 #else // _LP64
1958 //
1959 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
1960 // B() doesn't have provably balanced locking so it runs in the interpreter.
1961 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
1962 // is still locked by A().
1963 //
1964 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
1965 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
1966 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
1967 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
1968 // Arguably given that the spec legislates the JNI case as undefined our implementation
1969 // could reasonably *avoid* checking owner in Fast_Unlock().
1970 // In the interest of performance we elide m->Owner==Self check in unlock.
1971 // A perfectly viable alternative is to elide the owner check except when
1972 // Xcheck:jni is enabled.
1973
1974 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
1975 assert(boxReg == rax, "");
1976 assert_different_registers(objReg, boxReg, tmpReg);
1977
1978 shenandoah_store_addr_check(objReg); // Access mark word
1979
1980 if (EmitSync & 4) {
1981 // Disable - inhibit all inlining. Force control through the slow-path
1982 cmpptr (rsp, 0);
1983 } else {
1984 Label DONE_LABEL, Stacked, CheckSucc;
1985
1986 // Critically, the biased locking test must have precedence over
1987 // and appear before the (box->dhw == 0) recursive stack-lock test.
1988 if (UseBiasedLocking && !UseOptoBiasInlining) {
1989 biased_locking_exit(objReg, tmpReg, DONE_LABEL);
1990 }
1991
1992 #if INCLUDE_RTM_OPT
1993 if (UseRTMForStackLocks && use_rtm) {
1994 assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
1995 Label L_regular_unlock;
1996 movptr(tmpReg, Address(objReg, 0)); // fetch markword
1997 andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
1998 cmpptr(tmpReg, markOopDesc::unlocked_value); // bits = 001 unlocked
1999 jccb(Assembler::notEqual, L_regular_unlock); // if !HLE RegularLock
2001 jmp(DONE_LABEL); // ... and we're done
2002 bind(L_regular_unlock);
2003 }
2004 #endif
2005
2006 cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
2007 jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
2008 movptr(tmpReg, Address(objReg, 0)); // Examine the object's markword
2009 testptr(tmpReg, markOopDesc::monitor_value); // Inflated?
2010 jccb (Assembler::zero, Stacked);
2011
2012 // It's inflated.
2013 #if INCLUDE_RTM_OPT
2014 if (use_rtm) {
2015 Label L_regular_inflated_unlock;
2016 int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
2017 movptr(boxReg, Address(tmpReg, owner_offset));
2018 testptr(boxReg, boxReg);
2019 jccb(Assembler::notZero, L_regular_inflated_unlock);
2020 xend();
2021 jmpb_if_possible(DONE_LABEL);
2022 bind(L_regular_inflated_unlock);
2023 }
2024 #endif
2025
2026 // Despite our balanced locking property we still check that m->_owner == Self
2027 // as java routines or native JNI code called by this thread might
2028 // have released the lock.
2029 // Refer to the comments in synchronizer.cpp for how we might encode extra
2030 // state in _succ so we can avoid fetching EntryList|cxq.
2031 //
2032 // I'd like to add more cases in fast_lock() and fast_unlock() --
2033 // such as recursive enter and exit -- but we have to be wary of
2034 // I$ bloat, T$ effects and BP$ effects.
2035 //
2036 // If there's no contention try a 1-0 exit. That is, exit without
2037 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
2038 // we detect and recover from the race that the 1-0 exit admits.
2039 //
2040 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
2041 // before it STs null into _owner, releasing the lock. Updates
2045 // IA32's memory-model is SPO, so STs are ordered with respect to
2046 // each other and there's no need for an explicit barrier (fence).
2047 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
2048 #ifndef _LP64
2049 get_thread (boxReg);
2050 if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
2051 // prefetchw [ebx + Offset(_owner)-2]
2052 prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2053 }
2054
2055 // Note that we could employ various encoding schemes to reduce
2056 // the number of loads below (currently 4) to just 2 or 3.
2057 // Refer to the comments in synchronizer.cpp.
2058 // In practice the chain of fetches doesn't seem to impact performance, however.
2059 xorptr(boxReg, boxReg);
2060 if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
2061 // Attempt to reduce branch density - AMD's branch predictor.
2062 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
2063 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
2064 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
2065 jccb_if_possible(Assembler::notZero, DONE_LABEL);
2066 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
2067 jmpb_if_possible(DONE_LABEL);
2068 } else {
2069 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
2070 jccb_if_possible(Assembler::notZero, DONE_LABEL);
2071 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
2072 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
2073 jccb (Assembler::notZero, CheckSucc);
2074 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
2075 jmpb_if_possible(DONE_LABEL);
2076 }
2077
2078 // The Following code fragment (EmitSync & 65536) improves the performance of
2079 // contended applications and contended synchronization microbenchmarks.
2080 // Unfortunately the emission of the code - even though not executed - causes regressions
2081 // in scimark and jetstream, evidently because of $ effects. Replacing the code
2082 // with an equal number of never-executed NOPs results in the same regression.
2083 // We leave it off by default.
2084
2085 if ((EmitSync & 65536) != 0) {
2086 Label LSuccess, LGoSlowPath ;
2087
2088 bind (CheckSucc);
2089
2090 // Optional pre-test ... it's safe to elide this
2091 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
2092 jccb(Assembler::zero, LGoSlowPath);
2093
2094 // We have a classic Dekker-style idiom:
2095 // ST m->_owner = 0 ; MEMBAR; LD m->_succ
2125 // Ratify _succ remains non-null
2126 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), 0);
2127 jccb (Assembler::notZero, LSuccess);
2128
2129 xorptr(boxReg, boxReg); // box is really EAX
2130 if (os::is_MP()) { lock(); }
2131 cmpxchgptr(rsp, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2132 // There's no successor so we tried to regrab the lock with the
2133 // placeholder value. If that didn't work, then another thread
2134 // grabbed the lock so we're done (and exit was a success).
2135 jccb (Assembler::notEqual, LSuccess);
2136 // Since we're low on registers we installed rsp as a placeholding in _owner.
2137 // Now install Self over rsp. This is safe as we're transitioning from
2138 // non-null to non=null
2139 get_thread (boxReg);
2140 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), boxReg);
2141 // Intentional fall-through into LGoSlowPath ...
2142
2143 bind (LGoSlowPath);
2144 orptr(boxReg, 1); // set ICC.ZF=0 to indicate failure
2145 jmpb_if_possible(DONE_LABEL);
2146
2147 bind (LSuccess);
2148 xorptr(boxReg, boxReg); // set ICC.ZF=1 to indicate success
2149 jmpb_if_possible(DONE_LABEL);
2150 }
2151
2152 bind (Stacked);
2153 // It's not inflated and it's not recursively stack-locked and it's not biased.
2154 // It must be stack-locked.
2155 // Try to reset the header to displaced header.
2156 // The "box" value on the stack is stable, so we can reload
2157 // and be assured we observe the same value as above.
2158 movptr(tmpReg, Address(boxReg, 0));
2159 if (os::is_MP()) {
2160 lock();
2161 }
2162 cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
2163 // Intention fall-thru into DONE_LABEL
2164
2165 // DONE_LABEL is a hot target - we'd really like to place it at the
2166 // start of cache line by padding with NOPs.
2167 // See the AMD and Intel software optimization manuals for the
2168 // most efficient "long" NOP encodings.
2169 // Unfortunately none of our alignment mechanisms suffice.
2170 if ((EmitSync & 65536) == 0) {
2171 bind (CheckSucc);
2172 }
2173 #else // _LP64
2174 // It's inflated
2175 if (EmitSync & 1024) {
2176 // Emit code to check that _owner == Self
2177 // We could fold the _owner test into subsequent code more efficiently
2178 // than using a stand-alone check, but since _owner checking is off by
2179 // default we don't bother. We also might consider predicating the
2180 // _owner==Self check on Xcheck:jni or running on a debug build.
2181 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2182 xorptr(boxReg, r15_thread);
2183 } else {
2184 xorptr(boxReg, boxReg);
2185 }
2186 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
2187 jccb_if_possible(Assembler::notZero, DONE_LABEL);
2188 movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
2189 orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
2190 jccb (Assembler::notZero, CheckSucc);
2191 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2192 jmpb_if_possible(DONE_LABEL);
2193
2194 if ((EmitSync & 65536) == 0) {
2195 // Try to avoid passing control into the slow_path ...
2196 Label LSuccess, LGoSlowPath ;
2197 bind (CheckSucc);
2198
2199 // The following optional optimization can be elided if necessary
2200 // Effectively: if (succ == null) goto SlowPath
2201 // The code reduces the window for a race, however,
2202 // and thus benefits performance.
2203 cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
2204 jccb (Assembler::zero, LGoSlowPath);
2205
2206 xorptr(boxReg, boxReg);
2207 if ((EmitSync & 16) && os::is_MP()) {
2208 xchgptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2209 } else {
2210 movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2211 if (os::is_MP()) {
2212 // Memory barrier/fence
2229 // Try to reacquire the lock.
2230 // If that fails then the new owner is responsible for succession and this
2231 // thread needs to take no further action and can exit via the fast path (success).
2232 // If the re-acquire succeeds then pass control into the slow path.
2233 // As implemented, this latter mode is horrible because we generated more
2234 // coherence traffic on the lock *and* artifically extended the critical section
2235 // length while by virtue of passing control into the slow path.
2236
2237 // box is really RAX -- the following CMPXCHG depends on that binding
2238 // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
2239 if (os::is_MP()) { lock(); }
2240 cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2241 // There's no successor so we tried to regrab the lock.
2242 // If that didn't work, then another thread grabbed the
2243 // lock so we're done (and exit was a success).
2244 jccb (Assembler::notEqual, LSuccess);
2245 // Intentional fall-through into slow-path
2246
2247 bind (LGoSlowPath);
2248 orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
2249 jmpb_if_possible(DONE_LABEL);
2250
2251 bind (LSuccess);
2252 testl (boxReg, 0); // set ICC.ZF=1 to indicate success
2253 jmpb_if_possible (DONE_LABEL);
2254 }
2255
2256 bind (Stacked);
2257 movptr(tmpReg, Address (boxReg, 0)); // re-fetch
2258 if (os::is_MP()) { lock(); }
2259 cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
2260
2261 if (EmitSync & 65536) {
2262 bind (CheckSucc);
2263 }
2264 #endif
2265 bind(DONE_LABEL);
2266 }
2267 }
2268 #endif // COMPILER2
2269
2270 void MacroAssembler::c2bool(Register x) {
2271 // implements x == 0 ? 0 : 1
2272 // note: must only look at least-significant byte of x
2273 // since C-style booleans are stored in one byte
6071 b = code_string(ss.as_string());
6072 }
6073 BLOCK_COMMENT("verify_oop {");
6074 #ifdef _LP64
6075 push(rscratch1); // save r10, trashed by movptr()
6076 #endif
6077 push(rax); // save rax,
6078 push(reg); // pass register argument
6079 ExternalAddress buffer((address) b);
6080 // avoid using pushptr, as it modifies scratch registers
6081 // and our contract is not to modify anything
6082 movptr(rax, buffer.addr());
6083 push(rax);
6084 // call indirectly to solve generation ordering problem
6085 movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
6086 call(rax);
6087 // Caller pops the arguments (oop, message) and restores rax, r10
6088 BLOCK_COMMENT("} verify_oop");
6089 }
6090
6091 void MacroAssembler::in_heap_check(Register raddr, Label& done) {
6092 ShenandoahHeap *h = (ShenandoahHeap *)Universe::heap();
6093
6094 HeapWord* first_region_bottom = h->first_region_bottom();
6095 HeapWord* last_region_end = first_region_bottom + (ShenandoahHeapRegion::RegionSizeBytes / HeapWordSize) * h->max_regions();
6096
6097 cmpptr(raddr, (intptr_t) first_region_bottom);
6098 jcc(Assembler::less, done);
6099 cmpptr(raddr, (intptr_t) first_region_bottom);
6100 jcc(Assembler::greaterEqual, done);
6101
6102 }
6103
6104 void MacroAssembler::shenandoah_cset_check(Register raddr, Register tmp1, Register tmp2, Label& done) {
6105 // Test that oop is not in to-space.
6106 movptr(tmp1, raddr);
6107 shrptr(tmp1, ShenandoahHeapRegion::RegionSizeShift);
6108 movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
6109 movbool(tmp2, Address(tmp2, tmp1, Address::times_1));
6110 testbool(tmp2);
6111 jcc(Assembler::zero, done);
6112
6113 // Check for cancelled GC.
6114 movptr(tmp2, (intptr_t) ShenandoahHeap::cancelled_concgc_addr());
6115 movbool(tmp2, Address(tmp2, 0));
6116 testbool(tmp2);
6117 jcc(Assembler::notZero, done);
6118
6119 }
6120
6121 void MacroAssembler::_shenandoah_store_addr_check(Address addr, const char* msg, const char* file, int line) {
6122 _shenandoah_store_addr_check(addr.base(), msg, file, line);
6123 }
6124
6125 void MacroAssembler::_shenandoah_store_addr_check(Register dst, const char* msg, const char* file, int line) {
6126 if (! UseShenandoahGC && ! ShenandoahStoreCheck) return;
6127 if (dst == rsp) return; // Stack-based target
6128
6129 Register raddr = r9;
6130 Register tmp1 = r10;
6131 Register tmp2 = r11;
6132
6133 Label done;
6134
6135 pushf();
6136 push(raddr);
6137 push(tmp1);
6138 push(tmp2);
6139
6140 movptr(raddr, dst);
6141
6142 // Check null.
6143 testptr(raddr, raddr);
6144 jcc(Assembler::zero, done);
6145
6146 in_heap_check(raddr, done);
6147 shenandoah_cset_check(raddr, tmp1, tmp2, done);
6148
6149 // Fail.
6150 pop(tmp2);
6151 pop(tmp1);
6152 pop(raddr);
6153 popf();
6154 const char* b = NULL;
6155 {
6156 ResourceMark rm;
6157 stringStream ss;
6158 ss.print("shenandoah_store_check: %s in file: %s line: %i", msg, file, line);
6159 b = code_string(ss.as_string());
6160 }
6161 stop(b);
6162
6163 bind(done);
6164
6165 pop(tmp2);
6166 pop(tmp1);
6167 pop(raddr);
6168 popf();
6169 }
6170
6171 void MacroAssembler::_shenandoah_store_check(Register dst, Register value, const char* msg, const char* file, int line) {
6172 if (! UseShenandoahGC && ! ShenandoahStoreCheck) return;
6173 if (dst == rsp) return; // Stack-based target
6174
6175 Register raddr = r8;
6176 Register rval = r9;
6177 Register tmp1 = r10;
6178 Register tmp2 = r11;
6179
6180 // Push tmp regs and flags.
6181 pushf();
6182 push(raddr);
6183 push(rval);
6184 push(tmp1);
6185 push(tmp2);
6186
6187 movptr(raddr, dst);
6188 movptr(rval, value);
6189
6190 Label done;
6191
6192 // If not in-heap target, skip check.
6193 in_heap_check(raddr, done);
6194
6195 // Test that target oop is not in to-space.
6196 shenandoah_cset_check(raddr, tmp1, tmp2, done);
6197
6198 // Do value-check only when concurrent mark is in progress.
6199 movptr(tmp1, (intptr_t) ShenandoahHeap::concurrent_mark_in_progress_addr());
6200 movbool(tmp1, Address(tmp1, 0));
6201 testbool(tmp1);
6202 jcc(Assembler::zero, done);
6203
6204 // Null-check value.
6205 testptr(rval, rval);
6206 jcc(Assembler::zero, done);
6207
6208 // Test that value oop is not in to-space.
6209 shenandoah_cset_check(rval, tmp1, tmp2, done);
6210
6211 // Failure.
6212 // Pop tmp regs and flags.
6213 pop(tmp2);
6214 pop(tmp1);
6215 pop(rval);
6216 pop(raddr);
6217 popf();
6218 const char* b = NULL;
6219 {
6220 ResourceMark rm;
6221 stringStream ss;
6222 ss.print("shenandoah_store_check: %s in file: %s line: %i", msg, file, line);
6223 b = code_string(ss.as_string());
6224 }
6225 stop(b);
6226
6227 bind(done);
6228
6229 // Pop tmp regs and flags.
6230 pop(tmp2);
6231 pop(tmp1);
6232 pop(rval);
6233 pop(raddr);
6234 popf();
6235 }
6236
6237 void MacroAssembler::_shenandoah_store_check(Address addr, Register value, const char* msg, const char* file, int line) {
6238 _shenandoah_store_check(addr.base(), value, msg, file, line);
6239 }
6240
6241 void MacroAssembler::_shenandoah_lock_check(Register dst, const char* msg, const char* file, int line) {
6242 #ifdef ASSERT
6243 if (! UseShenandoahGC && ! ShenandoahStoreCheck) return;
6244
6245 push(r8);
6246 movptr(r8, Address(dst, BasicObjectLock::obj_offset_in_bytes()));
6247 _shenandoah_store_addr_check(r8, msg, file, line);
6248 pop(r8);
6249 #endif
6250 }
6251
6252 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
6253 Register tmp,
6254 int offset) {
6255 intptr_t value = *delayed_value_addr;
6256 if (value != 0)
6257 return RegisterOrConstant(value + offset);
6258
6259 // load indirectly to solve generation ordering problem
6260 movptr(tmp, ExternalAddress((address) delayed_value_addr));
6261
6262 #ifdef ASSERT
6263 { Label L;
6264 testptr(tmp, tmp);
6265 if (WizardMode) {
6266 const char* buf = NULL;
6267 {
6268 ResourceMark rm;
6269 stringStream ss;
6270 ss.print("DelayedValue=" INTPTR_FORMAT, delayed_value_addr[1]);
|