< prev index next >

src/hotspot/cpu/x86/macroAssembler_x86.cpp

Print this page
rev 57595 : v2.09a with 8235795, 8235931 and 8236035 extracted; rebased to jdk-14+28; merge with 8236035.patch.cr1; merge with 8235795.patch.cr1; merge with 8236035.patch.cr2; merge with 8235795.patch.cr2; merge with 8235795.patch.cr3.


1279   return null_check_offset;
1280 }
1281 
1282 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
1283   assert(UseBiasedLocking, "why call this otherwise?");
1284 
1285   // Check for biased locking unlock case, which is a no-op
1286   // Note: we do not have to check the thread ID for two reasons.
1287   // First, the interpreter checks for IllegalMonitorStateException at
1288   // a higher level. Second, if the bias was revoked while we held the
1289   // lock, the object could not be rebiased toward another thread, so
1290   // the bias bit would be clear.
1291   movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1292   andptr(temp_reg, markWord::biased_lock_mask_in_place);
1293   cmpptr(temp_reg, markWord::biased_lock_pattern);
1294   jcc(Assembler::equal, done);
1295 }
1296 
1297 #ifdef COMPILER2
1298 




















































1299 #if INCLUDE_RTM_OPT
1300 
1301 // Update rtm_counters based on abort status
1302 // input: abort_status
1303 //        rtm_counters (RTMLockingCounters*)
1304 // flags are killed
1305 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) {
1306 
1307   atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset()));
1308   if (PrintPreciseRTMLockingStatistics) {
1309     for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
1310       Label check_abort;
1311       testl(abort_status, (1<<i));
1312       jccb(Assembler::equal, check_abort);
1313       atomic_incptr(Address(rtm_counters, RTMLockingCounters::abortX_count_offset() + (i * sizeof(uintx))));
1314       bind(check_abort);
1315     }
1316   }
1317 }
1318 


1512   bind(L_decrement_retry);
1513   if (RTMRetryCount > 0) {
1514     // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1515     rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1516   }
1517 }
1518 
1519 // Use RTM for inflating locks
1520 // inputs: objReg (object to lock)
1521 //         boxReg (on-stack box address (displaced header location) - KILLED)
1522 //         tmpReg (ObjectMonitor address + markWord::monitor_value)
1523 void MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg,
1524                                           Register scrReg, Register retry_on_busy_count_Reg,
1525                                           Register retry_on_abort_count_Reg,
1526                                           RTMLockingCounters* rtm_counters,
1527                                           Metadata* method_data, bool profile_rtm,
1528                                           Label& DONE_LABEL) {
1529   assert(UseRTMLocking, "why call this otherwise?");
1530   assert(tmpReg == rax, "");
1531   assert(scrReg == rdx, "");
1532   Label L_rtm_retry, L_decrement_retry, L_on_abort;
1533   int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
1534 
1535   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1536   movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));










1537   movptr(boxReg, tmpReg); // Save ObjectMonitor address
1538 
1539   if (RTMRetryCount > 0) {
1540     movl(retry_on_busy_count_Reg, RTMRetryCount);  // Retry on lock busy
1541     movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
1542     bind(L_rtm_retry);
1543   }
1544   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1545     Label L_noincrement;
1546     if (RTMTotalCountIncrRate > 1) {
1547       // tmpReg, scrReg and flags are killed
1548       branch_on_random_using_rdtsc(tmpReg, scrReg, RTMTotalCountIncrRate, L_noincrement);
1549     }
1550     assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
1551     atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg);
1552     bind(L_noincrement);
1553   }
1554   xbegin(L_on_abort);
1555   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));
1556   movptr(tmpReg, Address(tmpReg, owner_offset));
1557   testptr(tmpReg, tmpReg);
1558   jcc(Assembler::zero, DONE_LABEL);
1559   if (UseRTMXendForLockBusy) {
1560     xend();
1561     jmp(L_decrement_retry);
1562   }
1563   else {
1564     xabort(0);
1565   }
1566   bind(L_on_abort);
1567   Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
1568   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1569     rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
1570   }
1571   if (RTMRetryCount > 0) {
1572     // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1573     rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1574   }
1575 
1576   movptr(tmpReg, Address(boxReg, owner_offset)) ;
1577   testptr(tmpReg, tmpReg) ;
1578   jccb(Assembler::notZero, L_decrement_retry) ;
1579 
1580   // Appears unlocked - try to swing _owner from null to non-null.
1581   // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
1582 #ifdef _LP64
1583   Register threadReg = r15_thread;
1584 #else
1585   get_thread(scrReg);
1586   Register threadReg = scrReg;
1587 #endif
1588   lock();
1589   cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg
1590 
1591   if (RTMRetryCount > 0) {
1592     // success done else retry
1593     jccb(Assembler::equal, DONE_LABEL) ;
1594     bind(L_decrement_retry);
1595     // Spin and retry if lock is busy.
1596     rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry);
1597   }
1598   else {
1599     bind(L_decrement_retry);
1600   }













1601 }
1602 
1603 #endif //  INCLUDE_RTM_OPT
1604 
1605 // fast_lock and fast_unlock used by C2
1606 
1607 // Because the transitions from emitted code to the runtime
1608 // monitorenter/exit helper stubs are so slow it's critical that
1609 // we inline both the stack-locking fast path and the inflated fast path.
1610 //
1611 // See also: cmpFastLock and cmpFastUnlock.
1612 //
1613 // What follows is a specialized inline transliteration of the code
1614 // in enter() and exit(). If we're concerned about I$ bloat another
1615 // option would be to emit TrySlowEnter and TrySlowExit methods
1616 // at startup-time.  These methods would accept arguments as
1617 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
1618 // indications in the icc.ZFlag.  fast_lock and fast_unlock would simply
1619 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
1620 // In practice, however, the # of lock sites is bounded and is usually small.


1806   cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1807   movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3
1808   // If we weren't able to swing _owner from NULL to the BasicLock
1809   // then take the slow path.
1810   jccb  (Assembler::notZero, DONE_LABEL);
1811   // update _owner from BasicLock to thread
1812   get_thread (scrReg);                    // beware: clobbers ICCs
1813   movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
1814   xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
1815 
1816   // If the CAS fails we can either retry or pass control to the slow path.
1817   // We use the latter tactic.
1818   // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1819   // If the CAS was successful ...
1820   //   Self has acquired the lock
1821   //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1822   // Intentional fall-through into DONE_LABEL ...
1823 #else // _LP64
1824   // It's inflated and we use scrReg for ObjectMonitor* in this section.
1825   movq(scrReg, tmpReg);
1826   xorq(tmpReg, tmpReg);
1827   lock();
1828   cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1829   // Unconditionally set box->_displaced_header = markWord::unused_mark().
1830   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1831   movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));













1832   // Intentional fall-through into DONE_LABEL ...
1833   // Propagate ICC.ZF from CAS above into DONE_LABEL.








1834 #endif // _LP64
1835 #if INCLUDE_RTM_OPT
1836   } // use_rtm()
1837 #endif
1838   // DONE_LABEL is a hot target - we'd really like to place it at the
1839   // start of cache line by padding with NOPs.
1840   // See the AMD and Intel software optimization manuals for the
1841   // most efficient "long" NOP encodings.
1842   // Unfortunately none of our alignment mechanisms suffice.
1843   bind(DONE_LABEL);
1844 
1845   // At DONE_LABEL the icc ZFlag is set as follows ...
1846   // fast_unlock uses the same protocol.
1847   // ZFlag == 1 -> Success
1848   // ZFlag == 0 -> Failure - force control through the slow path
1849 }
1850 
1851 // obj: object to unlock
1852 // box: box address (displaced header location), killed.  Must be EAX.
1853 // tmp: killed, cannot be obj nor box.


1893   }
1894 
1895 #if INCLUDE_RTM_OPT
1896   if (UseRTMForStackLocks && use_rtm) {
1897     assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
1898     Label L_regular_unlock;
1899     movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
1900     andptr(tmpReg, markWord::biased_lock_mask_in_place);              // look at 3 lock bits
1901     cmpptr(tmpReg, markWord::unlocked_value);                         // bits = 001 unlocked
1902     jccb(Assembler::notEqual, L_regular_unlock);                      // if !HLE RegularLock
1903     xend();                                                           // otherwise end...
1904     jmp(DONE_LABEL);                                                  // ... and we're done
1905     bind(L_regular_unlock);
1906   }
1907 #endif
1908 
1909   cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD);                   // Examine the displaced header
1910   jcc   (Assembler::zero, DONE_LABEL);                              // 0 indicates recursive stack-lock
1911   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
1912   testptr(tmpReg, markWord::monitor_value);                         // Inflated?
1913   jccb  (Assembler::zero, Stacked);
1914 
1915   // It's inflated.
1916 #if INCLUDE_RTM_OPT
1917   if (use_rtm) {
1918     Label L_regular_inflated_unlock;
1919     int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
1920     movptr(boxReg, Address(tmpReg, owner_offset));
1921     testptr(boxReg, boxReg);
1922     jccb(Assembler::notZero, L_regular_inflated_unlock);
1923     xend();
1924     jmpb(DONE_LABEL);
1925     bind(L_regular_inflated_unlock);
1926   }
1927 #endif
1928 
1929   // Despite our balanced locking property we still check that m->_owner == Self
1930   // as java routines or native JNI code called by this thread might
1931   // have released the lock.
1932   // Refer to the comments in synchronizer.cpp for how we might encode extra
1933   // state in _succ so we can avoid fetching EntryList|cxq.
1934   //
1935   // I'd like to add more cases in fast_lock() and fast_unlock() --
1936   // such as recursive enter and exit -- but we have to be wary of
1937   // I$ bloat, T$ effects and BP$ effects.
1938   //
1939   // If there's no contention try a 1-0 exit.  That is, exit without
1940   // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
1941   // we detect and recover from the race that the 1-0 exit admits.
1942   //
1943   // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
1944   // before it STs null into _owner, releasing the lock.  Updates


1966 
1967   bind (Stacked);
1968   // It's not inflated and it's not recursively stack-locked and it's not biased.
1969   // It must be stack-locked.
1970   // Try to reset the header to displaced header.
1971   // The "box" value on the stack is stable, so we can reload
1972   // and be assured we observe the same value as above.
1973   movptr(tmpReg, Address(boxReg, 0));
1974   lock();
1975   cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
1976   // Intention fall-thru into DONE_LABEL
1977 
1978   // DONE_LABEL is a hot target - we'd really like to place it at the
1979   // start of cache line by padding with NOPs.
1980   // See the AMD and Intel software optimization manuals for the
1981   // most efficient "long" NOP encodings.
1982   // Unfortunately none of our alignment mechanisms suffice.
1983   bind (CheckSucc);
1984 #else // _LP64
1985   // It's inflated












1986   xorptr(boxReg, boxReg);
1987   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
1988   jccb  (Assembler::notZero, DONE_LABEL);
1989   movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
1990   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
1991   jccb  (Assembler::notZero, CheckSucc);
1992   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1993   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
1994   jmpb  (DONE_LABEL);
1995 
1996   // Try to avoid passing control into the slow_path ...
1997   Label LSuccess, LGoSlowPath ;
1998   bind  (CheckSucc);
1999 
2000   // The following optional optimization can be elided if necessary
2001   // Effectively: if (succ == null) goto slow path
2002   // The code reduces the window for a race, however,
2003   // and thus benefits performance.
2004   cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
2005   jccb  (Assembler::zero, LGoSlowPath);
2006 
2007   xorptr(boxReg, boxReg);
2008   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
2009   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2010 
2011   // Memory barrier/fence
2012   // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
2013   // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
2014   // This is faster on Nehalem and AMD Shanghai/Barcelona.
2015   // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
2016   // We might also restructure (ST Owner=0;barrier;LD _Succ) to
2017   // (mov box,0; xchgq box, &m->Owner; LD _succ) .


2026   // We need to ensure progress and succession.
2027   // Try to reacquire the lock.
2028   // If that fails then the new owner is responsible for succession and this
2029   // thread needs to take no further action and can exit via the fast path (success).
2030   // If the re-acquire succeeds then pass control into the slow path.
2031   // As implemented, this latter mode is horrible because we generated more
2032   // coherence traffic on the lock *and* artifically extended the critical section
2033   // length while by virtue of passing control into the slow path.
2034 
2035   // box is really RAX -- the following CMPXCHG depends on that binding
2036   // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
2037   lock();
2038   cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2039   // There's no successor so we tried to regrab the lock.
2040   // If that didn't work, then another thread grabbed the
2041   // lock so we're done (and exit was a success).
2042   jccb  (Assembler::notEqual, LSuccess);
2043   // Intentional fall-through into slow path
2044 
2045   bind  (LGoSlowPath);




2046   orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
2047   jmpb  (DONE_LABEL);
2048 
2049   bind  (LSuccess);




2050   testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
2051   jmpb  (DONE_LABEL);
2052 
2053   bind  (Stacked);
2054   movptr(tmpReg, Address (boxReg, 0));      // re-fetch
2055   lock();
2056   cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
2057 
2058 #endif
2059   bind(DONE_LABEL);
2060 }
2061 #endif // COMPILER2
2062 
2063 void MacroAssembler::c2bool(Register x) {
2064   // implements x == 0 ? 0 : 1
2065   // note: must only look at least-significant byte of x
2066   //       since C-style booleans are stored in one byte
2067   //       only! (was bug)
2068   andl(x, 0xFF);
2069   setb(Assembler::notZero, x);




1279   return null_check_offset;
1280 }
1281 
1282 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
1283   assert(UseBiasedLocking, "why call this otherwise?");
1284 
1285   // Check for biased locking unlock case, which is a no-op
1286   // Note: we do not have to check the thread ID for two reasons.
1287   // First, the interpreter checks for IllegalMonitorStateException at
1288   // a higher level. Second, if the bias was revoked while we held the
1289   // lock, the object could not be rebiased toward another thread, so
1290   // the bias bit would be clear.
1291   movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1292   andptr(temp_reg, markWord::biased_lock_mask_in_place);
1293   cmpptr(temp_reg, markWord::biased_lock_pattern);
1294   jcc(Assembler::equal, done);
1295 }
1296 
1297 #ifdef COMPILER2
1298 
1299 // Increment the ObjectMonitor's ref_count for safety or force a branch
1300 // to 'done' with ICC.ZF=0 to indicate failure/take the slow path.
1301 void MacroAssembler::inc_om_ref_count(Register obj_reg, Register om_reg, Register tmp_reg, Label& done) {
1302   atomic_incl(Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
1303 
1304   Label LGoSlowPath;
1305   if (AsyncDeflateIdleMonitors) {
1306     // Race here if monitor is not owned! The above ref_count bump
1307     // will cause subsequent async deflation to skip it. However,
1308     // previous or concurrent async deflation is a race.
1309 
1310     // First check: if the owner field == DEFLATER_MARKER:
1311     movptr(tmp_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1312     // DEFLATER_MARKER == reinterpret_cast<void*>(-1) so the compiler
1313     // doesn't like to use the define here:
1314     cmpptr(tmp_reg, -1);
1315     // If marked for async deflation, then take the slow path. This is a
1316     // simpler check than what ObjectMonitorHandle::save_om_ptr() does
1317     // so ObjectMonitor::install_displaced_markword_in_object() doesn't
1318     // have to be implemented in macro assembler.
1319     jccb(Assembler::equal, LGoSlowPath);
1320 
1321     // Second check: if ref_count field <= 0:
1322     movptr(tmp_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
1323     cmpptr(tmp_reg, 0);
1324     // If async deflation is in the process of bailing out, but has not
1325     // yet restored the ref_count field, then we take the slow path. We
1326     // want a stable ref_count value for the fast path.
1327     jccb(Assembler::lessEqual, LGoSlowPath);
1328 
1329     // Final check: if object field == obj_reg:
1330     cmpptr(obj_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(object)));
1331     // If the ObjectMonitor has been deflated and recycled, then take
1332     // the slow path.
1333     jccb(Assembler::notEqual, LGoSlowPath);
1334   }
1335 
1336   Label LRetToCaller;
1337   // We leave the ref_count incremented to protect the caller's code
1338   // paths against async deflation.
1339   jmpb(LRetToCaller);
1340 
1341   bind(LGoSlowPath);
1342   lock();
1343   decrementl(Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
1344   // Jump to 'done' with ICC.ZF=0 to indicate failure/take the slow path.
1345   orl(tmp_reg, 1);
1346   jmp(done);
1347 
1348   bind(LRetToCaller);
1349 }
1350 
1351 #if INCLUDE_RTM_OPT
1352 
1353 // Update rtm_counters based on abort status
1354 // input: abort_status
1355 //        rtm_counters (RTMLockingCounters*)
1356 // flags are killed
1357 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) {
1358 
1359   atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset()));
1360   if (PrintPreciseRTMLockingStatistics) {
1361     for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
1362       Label check_abort;
1363       testl(abort_status, (1<<i));
1364       jccb(Assembler::equal, check_abort);
1365       atomic_incptr(Address(rtm_counters, RTMLockingCounters::abortX_count_offset() + (i * sizeof(uintx))));
1366       bind(check_abort);
1367     }
1368   }
1369 }
1370 


1564   bind(L_decrement_retry);
1565   if (RTMRetryCount > 0) {
1566     // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1567     rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1568   }
1569 }
1570 
1571 // Use RTM for inflating locks
1572 // inputs: objReg (object to lock)
1573 //         boxReg (on-stack box address (displaced header location) - KILLED)
1574 //         tmpReg (ObjectMonitor address + markWord::monitor_value)
1575 void MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, Register tmpReg,
1576                                           Register scrReg, Register retry_on_busy_count_Reg,
1577                                           Register retry_on_abort_count_Reg,
1578                                           RTMLockingCounters* rtm_counters,
1579                                           Metadata* method_data, bool profile_rtm,
1580                                           Label& DONE_LABEL) {
1581   assert(UseRTMLocking, "why call this otherwise?");
1582   assert(tmpReg == rax, "");
1583   assert(scrReg == rdx, "");
1584   Label L_rtm_retry, L_decrement_retry, L_on_abort, L_local_done;
1585   int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
1586 
1587   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1588   movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
1589 
1590   if (!HandshakeAfterDeflateIdleMonitors) {
1591     // Increment the ObjectMonitor's ref_count for safety or force the
1592     // enter slow path via DONE_LABEL.
1593     // In rtm_inflated_locking(), initially tmpReg contains the object's
1594     // mark word which, in this case, is the (ObjectMonitor* | monitor_value).
1595     // Also this code uses scrReg as its temporary register.
1596     inc_om_ref_count(objReg, tmpReg /* om_reg */, scrReg /* tmp_reg */, DONE_LABEL);
1597   }
1598 
1599   movptr(boxReg, tmpReg); // Save ObjectMonitor address
1600 
1601   if (RTMRetryCount > 0) {
1602     movl(retry_on_busy_count_Reg, RTMRetryCount);  // Retry on lock busy
1603     movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
1604     bind(L_rtm_retry);
1605   }
1606   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1607     Label L_noincrement;
1608     if (RTMTotalCountIncrRate > 1) {
1609       // tmpReg, scrReg and flags are killed
1610       branch_on_random_using_rdtsc(tmpReg, scrReg, RTMTotalCountIncrRate, L_noincrement);
1611     }
1612     assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
1613     atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg);
1614     bind(L_noincrement);
1615   }
1616   xbegin(L_on_abort);
1617   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));
1618   movptr(tmpReg, Address(tmpReg, owner_offset));
1619   testptr(tmpReg, tmpReg);
1620   jcc(Assembler::zero, L_local_done);
1621   if (UseRTMXendForLockBusy) {
1622     xend();
1623     jmp(L_decrement_retry);
1624   }
1625   else {
1626     xabort(0);
1627   }
1628   bind(L_on_abort);
1629   Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
1630   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1631     rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
1632   }
1633   if (RTMRetryCount > 0) {
1634     // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1635     rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1636   }
1637 
1638   movptr(tmpReg, Address(boxReg, owner_offset)) ;
1639   testptr(tmpReg, tmpReg) ;
1640   jccb(Assembler::notZero, L_decrement_retry) ;
1641 
1642   // Appears unlocked - try to swing _owner from null to non-null.
1643   // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
1644 #ifdef _LP64
1645   Register threadReg = r15_thread;
1646 #else
1647   get_thread(scrReg);
1648   Register threadReg = scrReg;
1649 #endif
1650   lock();
1651   cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg
1652 
1653   if (RTMRetryCount > 0) {
1654     // success done else retry
1655     jccb(Assembler::equal, L_local_done);
1656     bind(L_decrement_retry);
1657     // Spin and retry if lock is busy.
1658     rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry);
1659   }
1660   else {
1661     bind(L_decrement_retry);
1662   }
1663 
1664   // rtm_inflated_locking() exit paths come here except for a failed
1665   // inc_om_ref_count() which goes directly to DONE_LABEL.
1666   bind(L_local_done);
1667   if (!HandshakeAfterDeflateIdleMonitors) {
1668     pushf();  // Preserve flags.
1669     // Decrement the ObjectMonitor's ref_count.
1670     lock();
1671     decrementl(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
1672     popf();  // Restore flags so we have the proper ICC.ZF value.
1673   }
1674 
1675   jmp(DONE_LABEL) ;
1676 }
1677 
1678 #endif //  INCLUDE_RTM_OPT
1679 
1680 // fast_lock and fast_unlock used by C2
1681 
1682 // Because the transitions from emitted code to the runtime
1683 // monitorenter/exit helper stubs are so slow it's critical that
1684 // we inline both the stack-locking fast path and the inflated fast path.
1685 //
1686 // See also: cmpFastLock and cmpFastUnlock.
1687 //
1688 // What follows is a specialized inline transliteration of the code
1689 // in enter() and exit(). If we're concerned about I$ bloat another
1690 // option would be to emit TrySlowEnter and TrySlowExit methods
1691 // at startup-time.  These methods would accept arguments as
1692 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
1693 // indications in the icc.ZFlag.  fast_lock and fast_unlock would simply
1694 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
1695 // In practice, however, the # of lock sites is bounded and is usually small.


1881   cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1882   movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3
1883   // If we weren't able to swing _owner from NULL to the BasicLock
1884   // then take the slow path.
1885   jccb  (Assembler::notZero, DONE_LABEL);
1886   // update _owner from BasicLock to thread
1887   get_thread (scrReg);                    // beware: clobbers ICCs
1888   movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
1889   xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
1890 
1891   // If the CAS fails we can either retry or pass control to the slow path.
1892   // We use the latter tactic.
1893   // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1894   // If the CAS was successful ...
1895   //   Self has acquired the lock
1896   //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1897   // Intentional fall-through into DONE_LABEL ...
1898 #else // _LP64
1899   // It's inflated and we use scrReg for ObjectMonitor* in this section.
1900   movq(scrReg, tmpReg);
1901 


1902   // Unconditionally set box->_displaced_header = markWord::unused_mark().
1903   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
1904   movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
1905 
1906   if (!HandshakeAfterDeflateIdleMonitors) {
1907     // Increment the ObjectMonitor's ref_count for safety or force the
1908     // enter slow path via DONE_LABEL.
1909     // In fast_lock(), scrReg contains the object's mark word which,
1910     // in this case, is the (ObjectMonitor* | monitor_value). Also this
1911     // code uses tmpReg as its temporary register.
1912     inc_om_ref_count(objReg, scrReg /* om_reg */, tmpReg /* tmp_reg */, DONE_LABEL);
1913   }
1914 
1915   xorq(tmpReg, tmpReg);
1916   lock();
1917   cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1918   // Intentional fall-through into DONE_LABEL ...
1919   // Propagate ICC.ZF from CAS above into DONE_LABEL.
1920 
1921   if (!HandshakeAfterDeflateIdleMonitors) {
1922     pushf();  // Preserve flags.
1923     // Decrement the ObjectMonitor's ref_count.
1924     lock();
1925     decrementl(Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
1926     popf();  // Restore flags so we have the proper ICC.ZF value.
1927   }
1928 #endif // _LP64
1929 #if INCLUDE_RTM_OPT
1930   } // use_rtm()
1931 #endif
1932   // DONE_LABEL is a hot target - we'd really like to place it at the
1933   // start of cache line by padding with NOPs.
1934   // See the AMD and Intel software optimization manuals for the
1935   // most efficient "long" NOP encodings.
1936   // Unfortunately none of our alignment mechanisms suffice.
1937   bind(DONE_LABEL);
1938 
1939   // At DONE_LABEL the icc ZFlag is set as follows ...
1940   // fast_unlock uses the same protocol.
1941   // ZFlag == 1 -> Success
1942   // ZFlag == 0 -> Failure - force control through the slow path
1943 }
1944 
1945 // obj: object to unlock
1946 // box: box address (displaced header location), killed.  Must be EAX.
1947 // tmp: killed, cannot be obj nor box.


1987   }
1988 
1989 #if INCLUDE_RTM_OPT
1990   if (UseRTMForStackLocks && use_rtm) {
1991     assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
1992     Label L_regular_unlock;
1993     movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword
1994     andptr(tmpReg, markWord::biased_lock_mask_in_place);              // look at 3 lock bits
1995     cmpptr(tmpReg, markWord::unlocked_value);                         // bits = 001 unlocked
1996     jccb(Assembler::notEqual, L_regular_unlock);                      // if !HLE RegularLock
1997     xend();                                                           // otherwise end...
1998     jmp(DONE_LABEL);                                                  // ... and we're done
1999     bind(L_regular_unlock);
2000   }
2001 #endif
2002 
2003   cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD);                   // Examine the displaced header
2004   jcc   (Assembler::zero, DONE_LABEL);                              // 0 indicates recursive stack-lock
2005   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
2006   testptr(tmpReg, markWord::monitor_value);                         // Inflated?
2007   jcc  (Assembler::zero, Stacked);
2008 
2009   // It's inflated.
2010 #if INCLUDE_RTM_OPT
2011   if (use_rtm) {
2012     Label L_regular_inflated_unlock;
2013     int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
2014     movptr(boxReg, Address(tmpReg, owner_offset));
2015     testptr(boxReg, boxReg);
2016     jccb(Assembler::notZero, L_regular_inflated_unlock);
2017     xend();
2018     jmp(DONE_LABEL);
2019     bind(L_regular_inflated_unlock);
2020   }
2021 #endif
2022 
2023   // Despite our balanced locking property we still check that m->_owner == Self
2024   // as java routines or native JNI code called by this thread might
2025   // have released the lock.
2026   // Refer to the comments in synchronizer.cpp for how we might encode extra
2027   // state in _succ so we can avoid fetching EntryList|cxq.
2028   //
2029   // I'd like to add more cases in fast_lock() and fast_unlock() --
2030   // such as recursive enter and exit -- but we have to be wary of
2031   // I$ bloat, T$ effects and BP$ effects.
2032   //
2033   // If there's no contention try a 1-0 exit.  That is, exit without
2034   // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
2035   // we detect and recover from the race that the 1-0 exit admits.
2036   //
2037   // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
2038   // before it STs null into _owner, releasing the lock.  Updates


2060 
2061   bind (Stacked);
2062   // It's not inflated and it's not recursively stack-locked and it's not biased.
2063   // It must be stack-locked.
2064   // Try to reset the header to displaced header.
2065   // The "box" value on the stack is stable, so we can reload
2066   // and be assured we observe the same value as above.
2067   movptr(tmpReg, Address(boxReg, 0));
2068   lock();
2069   cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
2070   // Intention fall-thru into DONE_LABEL
2071 
2072   // DONE_LABEL is a hot target - we'd really like to place it at the
2073   // start of cache line by padding with NOPs.
2074   // See the AMD and Intel software optimization manuals for the
2075   // most efficient "long" NOP encodings.
2076   // Unfortunately none of our alignment mechanisms suffice.
2077   bind (CheckSucc);
2078 #else // _LP64
2079   // It's inflated
2080 
2081   if (!HandshakeAfterDeflateIdleMonitors) {
2082     // Increment the ObjectMonitor's ref_count for safety or force the
2083     // exit slow path via DONE_LABEL.
2084     // In fast_unlock(), tmpReg contains the object's mark word which,
2085     // in this case, is the (ObjectMonitor* | monitor_value). Also this
2086     // code uses boxReg as its temporary register.
2087     inc_om_ref_count(objReg, tmpReg /* om_reg */, boxReg /* tmp_reg */, DONE_LABEL);
2088   }
2089 
2090   // Try to avoid passing control into the slow path ...
2091   Label LSuccess, LGoSlowPath;
2092   xorptr(boxReg, boxReg);
2093   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
2094   jccb(Assembler::notZero, LGoSlowPath);
2095   movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
2096   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
2097   jccb  (Assembler::notZero, CheckSucc);
2098   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
2099   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2100   jmpb(LSuccess);
2101 


2102   bind  (CheckSucc);
2103 
2104   // The following optional optimization can be elided if necessary
2105   // Effectively: if (succ == null) goto slow path
2106   // The code reduces the window for a race, however,
2107   // and thus benefits performance.
2108   cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
2109   jccb  (Assembler::zero, LGoSlowPath);
2110 
2111   xorptr(boxReg, boxReg);
2112   // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
2113   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2114 
2115   // Memory barrier/fence
2116   // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
2117   // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
2118   // This is faster on Nehalem and AMD Shanghai/Barcelona.
2119   // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
2120   // We might also restructure (ST Owner=0;barrier;LD _Succ) to
2121   // (mov box,0; xchgq box, &m->Owner; LD _succ) .


2130   // We need to ensure progress and succession.
2131   // Try to reacquire the lock.
2132   // If that fails then the new owner is responsible for succession and this
2133   // thread needs to take no further action and can exit via the fast path (success).
2134   // If the re-acquire succeeds then pass control into the slow path.
2135   // As implemented, this latter mode is horrible because we generated more
2136   // coherence traffic on the lock *and* artifically extended the critical section
2137   // length while by virtue of passing control into the slow path.
2138 
2139   // box is really RAX -- the following CMPXCHG depends on that binding
2140   // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
2141   lock();
2142   cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2143   // There's no successor so we tried to regrab the lock.
2144   // If that didn't work, then another thread grabbed the
2145   // lock so we're done (and exit was a success).
2146   jccb  (Assembler::notEqual, LSuccess);
2147   // Intentional fall-through into slow path
2148 
2149   bind  (LGoSlowPath);
2150   if (!HandshakeAfterDeflateIdleMonitors) {
2151     lock();
2152     decrementl(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
2153   }
2154   orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
2155   jmpb  (DONE_LABEL);
2156 
2157   bind  (LSuccess);
2158   if (!HandshakeAfterDeflateIdleMonitors) {
2159     lock();
2160     decrementl(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
2161   }
2162   testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
2163   jmpb  (DONE_LABEL);
2164 
2165   bind  (Stacked);
2166   movptr(tmpReg, Address (boxReg, 0));      // re-fetch
2167   lock();
2168   cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
2169 
2170 #endif
2171   bind(DONE_LABEL);
2172 }
2173 #endif // COMPILER2
2174 
2175 void MacroAssembler::c2bool(Register x) {
2176   // implements x == 0 ? 0 : 1
2177   // note: must only look at least-significant byte of x
2178   //       since C-style booleans are stored in one byte
2179   //       only! (was bug)
2180   andl(x, 0xFF);
2181   setb(Assembler::notZero, x);


< prev index next >