< prev index next >

src/hotspot/cpu/x86/macroAssembler_x86.cpp

Print this page
rev 58110 : v2.09a with 8235795, 8235931 and 8236035 extracted; rebased to jdk-14+28; merge with 8236035.patch.cr1; merge with 8235795.patch.cr1; merge with 8236035.patch.cr2; merge with 8235795.patch.cr2; merge with 8235795.patch.cr3.
rev 58111 : See CR9-to-CR10-changes; merge with jdk-15+11.


1268   return null_check_offset;
1269 }
1270 
1271 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
1272   assert(UseBiasedLocking, "why call this otherwise?");
1273 
1274   // Check for biased locking unlock case, which is a no-op
1275   // Note: we do not have to check the thread ID for two reasons.
1276   // First, the interpreter checks for IllegalMonitorStateException at
1277   // a higher level. Second, if the bias was revoked while we held the
1278   // lock, the object could not be rebiased toward another thread, so
1279   // the bias bit would be clear.
1280   movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1281   andptr(temp_reg, markWord::biased_lock_mask_in_place);
1282   cmpptr(temp_reg, markWord::biased_lock_pattern);
1283   jcc(Assembler::equal, done);
1284 }
1285 
1286 #ifdef COMPILER2
1287 
1288 // Increment the ObjectMonitor's ref_count for safety or force a branch
1289 // to 'done' with ICC.ZF=0 to indicate failure/take the slow path.
1290 void MacroAssembler::inc_om_ref_count(Register obj_reg, Register om_reg, Register tmp_reg, Label& done) {
1291   atomic_incl(Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
1292 
1293   Label LGoSlowPath;
1294   if (AsyncDeflateIdleMonitors) {
1295     // Race here if monitor is not owned! The above ref_count bump
1296     // will cause subsequent async deflation to skip it. However,
1297     // previous or concurrent async deflation is a race.
1298 
1299     // First check: if the owner field == DEFLATER_MARKER:
1300     movptr(tmp_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1301     // DEFLATER_MARKER == reinterpret_cast<void*>(-1) so the compiler
1302     // doesn't like to use the define here:
1303     cmpptr(tmp_reg, -1);
1304     // If marked for async deflation, then take the slow path. This is a
1305     // simpler check than what ObjectMonitorHandle::save_om_ptr() does
1306     // so ObjectMonitor::install_displaced_markword_in_object() doesn't
1307     // have to be implemented in macro assembler.
1308     jccb(Assembler::equal, LGoSlowPath);
1309 
1310     // Second check: if ref_count field <= 0:
1311     movptr(tmp_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
1312     cmpptr(tmp_reg, 0);
1313     // If async deflation is in the process of bailing out, but has not
1314     // yet restored the ref_count field, then we take the slow path. We
1315     // want a stable ref_count value for the fast path.
1316     jccb(Assembler::lessEqual, LGoSlowPath);
1317 
1318     // Final check: if object field == obj_reg:
1319     cmpptr(obj_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(object)));
1320     // If the ObjectMonitor has been deflated and recycled, then take
1321     // the slow path.
1322     jccb(Assembler::notEqual, LGoSlowPath);
1323   }
1324 
1325   Label LRetToCaller;
1326   // We leave the ref_count incremented to protect the caller's code
1327   // paths against async deflation.
1328   jmpb(LRetToCaller);
1329 
1330   bind(LGoSlowPath);
1331   lock();
1332   decrementl(Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
1333   // Jump to 'done' with ICC.ZF=0 to indicate failure/take the slow path.
1334   orl(tmp_reg, 1);
1335   jmp(done);
1336 
1337   bind(LRetToCaller);
1338 }
1339 
1340 #if INCLUDE_RTM_OPT
1341 
1342 // Update rtm_counters based on abort status
1343 // input: abort_status
1344 //        rtm_counters (RTMLockingCounters*)
1345 // flags are killed
1346 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) {
1347 
1348   atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset()));
1349   if (PrintPreciseRTMLockingStatistics) {
1350     for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
1351       Label check_abort;
1352       testl(abort_status, (1<<i));
1353       jccb(Assembler::equal, check_abort);
1354       atomic_incptr(Address(rtm_counters, RTMLockingCounters::abortX_count_offset() + (i * sizeof(uintx))));
1355       bind(check_abort);
1356     }
1357   }
1358 }
1359 


1553   bind(L_decrement_retry);
1554   if (RTMRetryCount > 0) {
1555     // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1556     rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1557   }
1558 }
1559 
1560 // Use RTM for inflating locks
1561 // inputs: objReg (object to lock)
1562 //         boxReg (on-stack box address (displaced header location) - KILLED)
1563 //         tmpReg (ObjectMonitor address + markWord::monitor_value)
1564 void MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg,
1565                                           Register scrReg, Register retry_on_busy_count_Reg,
1566                                           Register retry_on_abort_count_Reg,
1567                                           RTMLockingCounters* rtm_counters,
1568                                           Metadata* method_data, bool profile_rtm,
1569                                           Label& DONE_LABEL) {
1570   assert(UseRTMLocking, "why call this otherwise?");
1571   assert(tmpReg == rax, "");
1572   assert(scrReg == rdx, "");
1573   Label L_rtm_retry, L_decrement_retry, L_on_abort, L_local_done;
1574   int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
1575 
1576   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1577   movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
1578 
1579   if (!HandshakeAfterDeflateIdleMonitors) {
1580     // Increment the ObjectMonitor's ref_count for safety or force the
1581     // enter slow path via DONE_LABEL.
1582     // In rtm_inflated_locking(), initially tmpReg contains the object's
1583     // mark word which, in this case, is the (ObjectMonitor* | monitor_value).
1584     // Also this code uses scrReg as its temporary register.
1585     inc_om_ref_count(objReg, tmpReg /* om_reg */, scrReg /* tmp_reg */, DONE_LABEL);
1586   }
1587 
1588   movptr(boxReg, tmpReg); // Save ObjectMonitor address
1589 
1590   if (RTMRetryCount > 0) {
1591     movl(retry_on_busy_count_Reg, RTMRetryCount);  // Retry on lock busy
1592     movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
1593     bind(L_rtm_retry);
1594   }
1595   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1596     Label L_noincrement;
1597     if (RTMTotalCountIncrRate > 1) {
1598       // tmpReg, scrReg and flags are killed
1599       branch_on_random_using_rdtsc(tmpReg, scrReg, RTMTotalCountIncrRate, L_noincrement);
1600     }
1601     assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
1602     atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg);
1603     bind(L_noincrement);
1604   }
1605   xbegin(L_on_abort);
1606   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));
1607   movptr(tmpReg, Address(tmpReg, owner_offset));
1608   testptr(tmpReg, tmpReg);
1609   jcc(Assembler::zero, L_local_done);
1610   if (UseRTMXendForLockBusy) {
1611     xend();
1612     jmp(L_decrement_retry);
1613   }
1614   else {
1615     xabort(0);
1616   }
1617   bind(L_on_abort);
1618   Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
1619   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1620     rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
1621   }
1622   if (RTMRetryCount > 0) {
1623     // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1624     rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1625   }
1626 
1627   movptr(tmpReg, Address(boxReg, owner_offset)) ;
1628   testptr(tmpReg, tmpReg) ;
1629   jccb(Assembler::notZero, L_decrement_retry) ;
1630 
1631   // Appears unlocked - try to swing _owner from null to non-null.
1632   // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
1633 #ifdef _LP64
1634   Register threadReg = r15_thread;
1635 #else
1636   get_thread(scrReg);
1637   Register threadReg = scrReg;
1638 #endif
1639   lock();
1640   cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg
1641 
1642   if (RTMRetryCount > 0) {
1643     // success done else retry
1644     jccb(Assembler::equal, L_local_done);
1645     bind(L_decrement_retry);
1646     // Spin and retry if lock is busy.
1647     rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry);
1648   }
1649   else {
1650     bind(L_decrement_retry);
1651   }
1652 
1653   // rtm_inflated_locking() exit paths come here except for a failed
1654   // inc_om_ref_count() which goes directly to DONE_LABEL.
1655   bind(L_local_done);
1656   if (!HandshakeAfterDeflateIdleMonitors) {
1657     pushf();  // Preserve flags.
1658     // Decrement the ObjectMonitor's ref_count.
1659     lock();
1660     decrementl(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
1661     popf();  // Restore flags so we have the proper ICC.ZF value.
1662   }
1663 
1664   jmp(DONE_LABEL) ;
1665 }
1666 
1667 #endif //  INCLUDE_RTM_OPT
1668 
1669 // fast_lock and fast_unlock used by C2
1670 
1671 // Because the transitions from emitted code to the runtime
1672 // monitorenter/exit helper stubs are so slow it's critical that
1673 // we inline both the stack-locking fast path and the inflated fast path.
1674 //
1675 // See also: cmpFastLock and cmpFastUnlock.
1676 //
1677 // What follows is a specialized inline transliteration of the code
1678 // in enter() and exit(). If we're concerned about I$ bloat another
1679 // option would be to emit TrySlowEnter and TrySlowExit methods
1680 // at startup-time.  These methods would accept arguments as
1681 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
1682 // indications in the icc.ZFlag.  fast_lock and fast_unlock would simply
1683 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
1684 // In practice, however, the # of lock sites is bounded and is usually small.


1870   cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1871   movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3
1872   // If we weren't able to swing _owner from NULL to the BasicLock
1873   // then take the slow path.
1874   jccb  (Assembler::notZero, DONE_LABEL);
1875   // update _owner from BasicLock to thread
1876   get_thread (scrReg);                    // beware: clobbers ICCs
1877   movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
1878   xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
1879 
1880   // If the CAS fails we can either retry or pass control to the slow path.
1881   // We use the latter tactic.
1882   // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1883   // If the CAS was successful ...
1884   //   Self has acquired the lock
1885   //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1886   // Intentional fall-through into DONE_LABEL ...
1887 #else // _LP64
1888   // It's inflated and we use scrReg for ObjectMonitor* in this section.
1889   movq(scrReg, tmpReg);
1890 
1891   // Unconditionally set box->_displaced_header = markWord::unused_mark().
1892   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1893   movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
1894 
1895   if (!HandshakeAfterDeflateIdleMonitors) {
1896     // Increment the ObjectMonitor's ref_count for safety or force the
1897     // enter slow path via DONE_LABEL.
1898     // In fast_lock(), scrReg contains the object's mark word which,
1899     // in this case, is the (ObjectMonitor* | monitor_value). Also this
1900     // code uses tmpReg as its temporary register.
1901     inc_om_ref_count(objReg, scrReg /* om_reg */, tmpReg /* tmp_reg */, DONE_LABEL);
1902   }
1903 
1904   xorq(tmpReg, tmpReg);
1905   lock();
1906   cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));



1907   // Intentional fall-through into DONE_LABEL ...
1908   // Propagate ICC.ZF from CAS above into DONE_LABEL.
1909 
1910   if (!HandshakeAfterDeflateIdleMonitors) {
1911     pushf();  // Preserve flags.
1912     // Decrement the ObjectMonitor's ref_count.
1913     lock();
1914     decrementl(Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
1915     popf();  // Restore flags so we have the proper ICC.ZF value.
1916   }
1917 #endif // _LP64
1918 #if INCLUDE_RTM_OPT
1919   } // use_rtm()
1920 #endif
1921   // DONE_LABEL is a hot target - we'd really like to place it at the
1922   // start of cache line by padding with NOPs.
1923   // See the AMD and Intel software optimization manuals for the
1924   // most efficient "long" NOP encodings.
1925   // Unfortunately none of our alignment mechanisms suffice.
1926   bind(DONE_LABEL);
1927 
1928   // At DONE_LABEL the icc ZFlag is set as follows ...
1929   // fast_unlock uses the same protocol.
1930   // ZFlag == 1 -> Success
1931   // ZFlag == 0 -> Failure - force control through the slow path
1932 }
1933 
1934 // obj: object to unlock
1935 // box: box address (displaced header location), killed.  Must be EAX.
1936 // tmp: killed, cannot be obj nor box.


1976   }
1977 
1978 #if INCLUDE_RTM_OPT
1979   if (UseRTMForStackLocks && use_rtm) {
1980     assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
1981     Label L_regular_unlock;
1982     movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
1983     andptr(tmpReg, markWord::biased_lock_mask_in_place);              // look at 3 lock bits
1984     cmpptr(tmpReg, markWord::unlocked_value);                         // bits = 001 unlocked
1985     jccb(Assembler::notEqual, L_regular_unlock);                      // if !HLE RegularLock
1986     xend();                                                           // otherwise end...
1987     jmp(DONE_LABEL);                                                  // ... and we're done
1988     bind(L_regular_unlock);
1989   }
1990 #endif
1991 
1992   cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD);                   // Examine the displaced header
1993   jcc   (Assembler::zero, DONE_LABEL);                              // 0 indicates recursive stack-lock
1994   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
1995   testptr(tmpReg, markWord::monitor_value);                         // Inflated?
1996   jcc  (Assembler::zero, Stacked);
1997 
1998   // It's inflated.
1999 #if INCLUDE_RTM_OPT
2000   if (use_rtm) {
2001     Label L_regular_inflated_unlock;
2002     int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
2003     movptr(boxReg, Address(tmpReg, owner_offset));
2004     testptr(boxReg, boxReg);
2005     jccb(Assembler::notZero, L_regular_inflated_unlock);
2006     xend();
2007     jmp(DONE_LABEL);
2008     bind(L_regular_inflated_unlock);
2009   }
2010 #endif
2011 
2012   // Despite our balanced locking property we still check that m->_owner == Self
2013   // as java routines or native JNI code called by this thread might
2014   // have released the lock.
2015   // Refer to the comments in synchronizer.cpp for how we might encode extra
2016   // state in _succ so we can avoid fetching EntryList|cxq.
2017   //
2018   // I'd like to add more cases in fast_lock() and fast_unlock() --
2019   // such as recursive enter and exit -- but we have to be wary of
2020   // I$ bloat, T$ effects and BP$ effects.
2021   //
2022   // If there's no contention try a 1-0 exit.  That is, exit without
2023   // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
2024   // we detect and recover from the race that the 1-0 exit admits.
2025   //
2026   // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
2027   // before it STs null into _owner, releasing the lock.  Updates


2049 
2050   bind (Stacked);
2051   // It's not inflated and it's not recursively stack-locked and it's not biased.
2052   // It must be stack-locked.
2053   // Try to reset the header to displaced header.
2054   // The "box" value on the stack is stable, so we can reload
2055   // and be assured we observe the same value as above.
2056   movptr(tmpReg, Address(boxReg, 0));
2057   lock();
2058   cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
2059   // Intention fall-thru into DONE_LABEL
2060 
2061   // DONE_LABEL is a hot target - we'd really like to place it at the
2062   // start of cache line by padding with NOPs.
2063   // See the AMD and Intel software optimization manuals for the
2064   // most efficient "long" NOP encodings.
2065   // Unfortunately none of our alignment mechanisms suffice.
2066   bind (CheckSucc);
2067 #else // _LP64
2068   // It's inflated
2069 
2070   if (!HandshakeAfterDeflateIdleMonitors) {
2071     // Increment the ObjectMonitor's ref_count for safety or force the
2072     // exit slow path via DONE_LABEL.
2073     // In fast_unlock(), tmpReg contains the object's mark word which,
2074     // in this case, is the (ObjectMonitor* | monitor_value). Also this
2075     // code uses boxReg as its temporary register.
2076     inc_om_ref_count(objReg, tmpReg /* om_reg */, boxReg /* tmp_reg */, DONE_LABEL);
2077   }
2078 
2079   // Try to avoid passing control into the slow path ...
2080   Label LSuccess, LGoSlowPath;
2081   xorptr(boxReg, boxReg);
2082   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
2083   jccb(Assembler::notZero, LGoSlowPath);
2084   movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
2085   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
2086   jccb  (Assembler::notZero, CheckSucc);
2087   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
2088   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2089   jmpb(LSuccess);
2090 


2091   bind  (CheckSucc);
2092 
2093   // The following optional optimization can be elided if necessary
2094   // Effectively: if (succ == null) goto slow path
2095   // The code reduces the window for a race, however,
2096   // and thus benefits performance.
2097   cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
2098   jccb  (Assembler::zero, LGoSlowPath);
2099 
2100   xorptr(boxReg, boxReg);
2101   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
2102   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2103 
2104   // Memory barrier/fence
2105   // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
2106   // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
2107   // This is faster on Nehalem and AMD Shanghai/Barcelona.
2108   // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
2109   // We might also restructure (ST Owner=0;barrier;LD _Succ) to
2110   // (mov box,0; xchgq box, &m->Owner; LD _succ) .


2119   // We need to ensure progress and succession.
2120   // Try to reacquire the lock.
2121   // If that fails then the new owner is responsible for succession and this
2122   // thread needs to take no further action and can exit via the fast path (success).
2123   // If the re-acquire succeeds then pass control into the slow path.
2124   // As implemented, this latter mode is horrible because we generated more
2125   // coherence traffic on the lock *and* artifically extended the critical section
2126   // length while by virtue of passing control into the slow path.
2127 
2128   // box is really RAX -- the following CMPXCHG depends on that binding
2129   // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
2130   lock();
2131   cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2132   // There's no successor so we tried to regrab the lock.
2133   // If that didn't work, then another thread grabbed the
2134   // lock so we're done (and exit was a success).
2135   jccb  (Assembler::notEqual, LSuccess);
2136   // Intentional fall-through into slow path
2137 
2138   bind  (LGoSlowPath);
2139   if (!HandshakeAfterDeflateIdleMonitors) {
2140     lock();
2141     decrementl(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
2142   }
2143   orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
2144   jmpb  (DONE_LABEL);
2145 
2146   bind  (LSuccess);
2147   if (!HandshakeAfterDeflateIdleMonitors) {
2148     lock();
2149     decrementl(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
2150   }
2151   testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
2152   jmpb  (DONE_LABEL);
2153 
2154   bind  (Stacked);
2155   movptr(tmpReg, Address (boxReg, 0));      // re-fetch
2156   lock();
2157   cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
2158 
2159 #endif
2160   bind(DONE_LABEL);
2161 }
2162 #endif // COMPILER2
2163 
2164 void MacroAssembler::c2bool(Register x) {
2165   // implements x == 0 ? 0 : 1
2166   // note: must only look at least-significant byte of x
2167   //       since C-style booleans are stored in one byte
2168   //       only! (was bug)
2169   andl(x, 0xFF);
2170   setb(Assembler::notZero, x);




1268   return null_check_offset;
1269 }
1270 
1271 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
1272   assert(UseBiasedLocking, "why call this otherwise?");
1273 
1274   // Check for biased locking unlock case, which is a no-op
1275   // Note: we do not have to check the thread ID for two reasons.
1276   // First, the interpreter checks for IllegalMonitorStateException at
1277   // a higher level. Second, if the bias was revoked while we held the
1278   // lock, the object could not be rebiased toward another thread, so
1279   // the bias bit would be clear.
1280   movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1281   andptr(temp_reg, markWord::biased_lock_mask_in_place);
1282   cmpptr(temp_reg, markWord::biased_lock_pattern);
1283   jcc(Assembler::equal, done);
1284 }
1285 
1286 #ifdef COMPILER2
1287 




















































1288 #if INCLUDE_RTM_OPT
1289 
1290 // Update rtm_counters based on abort status
1291 // input: abort_status
1292 //        rtm_counters (RTMLockingCounters*)
1293 // flags are killed
1294 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) {
1295 
1296   atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset()));
1297   if (PrintPreciseRTMLockingStatistics) {
1298     for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
1299       Label check_abort;
1300       testl(abort_status, (1<<i));
1301       jccb(Assembler::equal, check_abort);
1302       atomic_incptr(Address(rtm_counters, RTMLockingCounters::abortX_count_offset() + (i * sizeof(uintx))));
1303       bind(check_abort);
1304     }
1305   }
1306 }
1307 


1501   bind(L_decrement_retry);
1502   if (RTMRetryCount > 0) {
1503     // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1504     rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1505   }
1506 }
1507 
1508 // Use RTM for inflating locks
1509 // inputs: objReg (object to lock)
1510 //         boxReg (on-stack box address (displaced header location) - KILLED)
1511 //         tmpReg (ObjectMonitor address + markWord::monitor_value)
1512 void MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg,
1513                                           Register scrReg, Register retry_on_busy_count_Reg,
1514                                           Register retry_on_abort_count_Reg,
1515                                           RTMLockingCounters* rtm_counters,
1516                                           Metadata* method_data, bool profile_rtm,
1517                                           Label& DONE_LABEL) {
1518   assert(UseRTMLocking, "why call this otherwise?");
1519   assert(tmpReg == rax, "");
1520   assert(scrReg == rdx, "");
1521   Label L_rtm_retry, L_decrement_retry, L_on_abort;
1522   int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
1523 
1524   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1525   movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));










1526   movptr(boxReg, tmpReg); // Save ObjectMonitor address
1527 
1528   if (RTMRetryCount > 0) {
1529     movl(retry_on_busy_count_Reg, RTMRetryCount);  // Retry on lock busy
1530     movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
1531     bind(L_rtm_retry);
1532   }
1533   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1534     Label L_noincrement;
1535     if (RTMTotalCountIncrRate > 1) {
1536       // tmpReg, scrReg and flags are killed
1537       branch_on_random_using_rdtsc(tmpReg, scrReg, RTMTotalCountIncrRate, L_noincrement);
1538     }
1539     assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
1540     atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg);
1541     bind(L_noincrement);
1542   }
1543   xbegin(L_on_abort);
1544   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));
1545   movptr(tmpReg, Address(tmpReg, owner_offset));
1546   testptr(tmpReg, tmpReg);
1547   jcc(Assembler::zero, DONE_LABEL);
1548   if (UseRTMXendForLockBusy) {
1549     xend();
1550     jmp(L_decrement_retry);
1551   }
1552   else {
1553     xabort(0);
1554   }
1555   bind(L_on_abort);
1556   Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
1557   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1558     rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
1559   }
1560   if (RTMRetryCount > 0) {
1561     // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1562     rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1563   }
1564 
1565   movptr(tmpReg, Address(boxReg, owner_offset)) ;
1566   testptr(tmpReg, tmpReg) ;
1567   jccb(Assembler::notZero, L_decrement_retry) ;
1568 
1569   // Appears unlocked - try to swing _owner from null to non-null.
1570   // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
1571 #ifdef _LP64
1572   Register threadReg = r15_thread;
1573 #else
1574   get_thread(scrReg);
1575   Register threadReg = scrReg;
1576 #endif
1577   lock();
1578   cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg
1579 
1580   if (RTMRetryCount > 0) {
1581     // success done else retry
1582     jccb(Assembler::equal, DONE_LABEL) ;
1583     bind(L_decrement_retry);
1584     // Spin and retry if lock is busy.
1585     rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry);
1586   }
1587   else {
1588     bind(L_decrement_retry);
1589   }













1590 }
1591 
1592 #endif //  INCLUDE_RTM_OPT
1593 
1594 // fast_lock and fast_unlock used by C2
1595 
1596 // Because the transitions from emitted code to the runtime
1597 // monitorenter/exit helper stubs are so slow it's critical that
1598 // we inline both the stack-locking fast path and the inflated fast path.
1599 //
1600 // See also: cmpFastLock and cmpFastUnlock.
1601 //
1602 // What follows is a specialized inline transliteration of the code
1603 // in enter() and exit(). If we're concerned about I$ bloat another
1604 // option would be to emit TrySlowEnter and TrySlowExit methods
1605 // at startup-time.  These methods would accept arguments as
1606 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
1607 // indications in the icc.ZFlag.  fast_lock and fast_unlock would simply
1608 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
1609 // In practice, however, the # of lock sites is bounded and is usually small.


1795   cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1796   movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3
1797   // If we weren't able to swing _owner from NULL to the BasicLock
1798   // then take the slow path.
1799   jccb  (Assembler::notZero, DONE_LABEL);
1800   // update _owner from BasicLock to thread
1801   get_thread (scrReg);                    // beware: clobbers ICCs
1802   movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
1803   xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
1804 
1805   // If the CAS fails we can either retry or pass control to the slow path.
1806   // We use the latter tactic.
1807   // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1808   // If the CAS was successful ...
1809   //   Self has acquired the lock
1810   //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1811   // Intentional fall-through into DONE_LABEL ...
1812 #else // _LP64
1813   // It's inflated and we use scrReg for ObjectMonitor* in this section.
1814   movq(scrReg, tmpReg);














1815   xorq(tmpReg, tmpReg);
1816   lock();
1817   cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1818   // Unconditionally set box->_displaced_header = markWord::unused_mark().
1819   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1820   movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
1821   // Intentional fall-through into DONE_LABEL ...
1822   // Propagate ICC.ZF from CAS above into DONE_LABEL.








1823 #endif // _LP64
1824 #if INCLUDE_RTM_OPT
1825   } // use_rtm()
1826 #endif
1827   // DONE_LABEL is a hot target - we'd really like to place it at the
1828   // start of cache line by padding with NOPs.
1829   // See the AMD and Intel software optimization manuals for the
1830   // most efficient "long" NOP encodings.
1831   // Unfortunately none of our alignment mechanisms suffice.
1832   bind(DONE_LABEL);
1833 
1834   // At DONE_LABEL the icc ZFlag is set as follows ...
1835   // fast_unlock uses the same protocol.
1836   // ZFlag == 1 -> Success
1837   // ZFlag == 0 -> Failure - force control through the slow path
1838 }
1839 
1840 // obj: object to unlock
1841 // box: box address (displaced header location), killed.  Must be EAX.
1842 // tmp: killed, cannot be obj nor box.


1882   }
1883 
1884 #if INCLUDE_RTM_OPT
1885   if (UseRTMForStackLocks && use_rtm) {
1886     assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
1887     Label L_regular_unlock;
1888     movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
1889     andptr(tmpReg, markWord::biased_lock_mask_in_place);              // look at 3 lock bits
1890     cmpptr(tmpReg, markWord::unlocked_value);                         // bits = 001 unlocked
1891     jccb(Assembler::notEqual, L_regular_unlock);                      // if !HLE RegularLock
1892     xend();                                                           // otherwise end...
1893     jmp(DONE_LABEL);                                                  // ... and we're done
1894     bind(L_regular_unlock);
1895   }
1896 #endif
1897 
1898   cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD);                   // Examine the displaced header
1899   jcc   (Assembler::zero, DONE_LABEL);                              // 0 indicates recursive stack-lock
1900   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
1901   testptr(tmpReg, markWord::monitor_value);                         // Inflated?
1902   jccb  (Assembler::zero, Stacked);
1903 
1904   // It's inflated.
1905 #if INCLUDE_RTM_OPT
1906   if (use_rtm) {
1907     Label L_regular_inflated_unlock;
1908     int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
1909     movptr(boxReg, Address(tmpReg, owner_offset));
1910     testptr(boxReg, boxReg);
1911     jccb(Assembler::notZero, L_regular_inflated_unlock);
1912     xend();
1913     jmpb(DONE_LABEL);
1914     bind(L_regular_inflated_unlock);
1915   }
1916 #endif
1917 
1918   // Despite our balanced locking property we still check that m->_owner == Self
1919   // as java routines or native JNI code called by this thread might
1920   // have released the lock.
1921   // Refer to the comments in synchronizer.cpp for how we might encode extra
1922   // state in _succ so we can avoid fetching EntryList|cxq.
1923   //
1924   // I'd like to add more cases in fast_lock() and fast_unlock() --
1925   // such as recursive enter and exit -- but we have to be wary of
1926   // I$ bloat, T$ effects and BP$ effects.
1927   //
1928   // If there's no contention try a 1-0 exit.  That is, exit without
1929   // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
1930   // we detect and recover from the race that the 1-0 exit admits.
1931   //
1932   // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
1933   // before it STs null into _owner, releasing the lock.  Updates


1955 
1956   bind (Stacked);
1957   // It's not inflated and it's not recursively stack-locked and it's not biased.
1958   // It must be stack-locked.
1959   // Try to reset the header to displaced header.
1960   // The "box" value on the stack is stable, so we can reload
1961   // and be assured we observe the same value as above.
1962   movptr(tmpReg, Address(boxReg, 0));
1963   lock();
1964   cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
1965   // Intention fall-thru into DONE_LABEL
1966 
1967   // DONE_LABEL is a hot target - we'd really like to place it at the
1968   // start of cache line by padding with NOPs.
1969   // See the AMD and Intel software optimization manuals for the
1970   // most efficient "long" NOP encodings.
1971   // Unfortunately none of our alignment mechanisms suffice.
1972   bind (CheckSucc);
1973 #else // _LP64
1974   // It's inflated












1975   xorptr(boxReg, boxReg);
1976   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
1977   jccb  (Assembler::notZero, DONE_LABEL);
1978   movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
1979   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
1980   jccb  (Assembler::notZero, CheckSucc);
1981   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1982   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
1983   jmpb  (DONE_LABEL);
1984 
1985   // Try to avoid passing control into the slow_path ...
1986   Label LSuccess, LGoSlowPath ;
1987   bind  (CheckSucc);
1988 
1989   // The following optional optimization can be elided if necessary
1990   // Effectively: if (succ == null) goto slow path
1991   // The code reduces the window for a race, however,
1992   // and thus benefits performance.
1993   cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
1994   jccb  (Assembler::zero, LGoSlowPath);
1995 
1996   xorptr(boxReg, boxReg);
1997   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1998   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
1999 
2000   // Memory barrier/fence
2001   // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
2002   // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
2003   // This is faster on Nehalem and AMD Shanghai/Barcelona.
2004   // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
2005   // We might also restructure (ST Owner=0;barrier;LD _Succ) to
2006   // (mov box,0; xchgq box, &m->Owner; LD _succ) .


2015   // We need to ensure progress and succession.
2016   // Try to reacquire the lock.
2017   // If that fails then the new owner is responsible for succession and this
2018   // thread needs to take no further action and can exit via the fast path (success).
2019   // If the re-acquire succeeds then pass control into the slow path.
2020   // As implemented, this latter mode is horrible because we generated more
2021   // coherence traffic on the lock *and* artifically extended the critical section
2022   // length while by virtue of passing control into the slow path.
2023 
2024   // box is really RAX -- the following CMPXCHG depends on that binding
2025   // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
2026   lock();
2027   cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2028   // There's no successor so we tried to regrab the lock.
2029   // If that didn't work, then another thread grabbed the
2030   // lock so we're done (and exit was a success).
2031   jccb  (Assembler::notEqual, LSuccess);
2032   // Intentional fall-through into slow path
2033 
2034   bind  (LGoSlowPath);




2035   orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
2036   jmpb  (DONE_LABEL);
2037 
2038   bind  (LSuccess);




2039   testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
2040   jmpb  (DONE_LABEL);
2041 
2042   bind  (Stacked);
2043   movptr(tmpReg, Address (boxReg, 0));      // re-fetch
2044   lock();
2045   cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
2046 
2047 #endif
2048   bind(DONE_LABEL);
2049 }
2050 #endif // COMPILER2
2051 
2052 void MacroAssembler::c2bool(Register x) {
2053   // implements x == 0 ? 0 : 1
2054   // note: must only look at least-significant byte of x
2055   //       since C-style booleans are stored in one byte
2056   //       only! (was bug)
2057   andl(x, 0xFF);
2058   setb(Assembler::notZero, x);


< prev index next >