< prev index next >

src/cpu/x86/vm/macroAssembler_x86.cpp

Print this page
rev 12152 : [mq]: verification.patch


  28 #include "compiler/disassembler.hpp"
  29 #include "gc/shared/cardTableModRefBS.hpp"
  30 #include "gc/shared/collectedHeap.inline.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "memory/resourceArea.hpp"
  33 #include "memory/universe.hpp"
  34 #include "oops/klass.inline.hpp"
  35 #include "prims/methodHandles.hpp"
  36 #include "runtime/biasedLocking.hpp"
  37 #include "runtime/interfaceSupport.hpp"
  38 #include "runtime/objectMonitor.hpp"
  39 #include "runtime/os.hpp"
  40 #include "runtime/sharedRuntime.hpp"
  41 #include "runtime/stubRoutines.hpp"
  42 #include "runtime/thread.hpp"
  43 #include "utilities/macros.hpp"
  44 #if INCLUDE_ALL_GCS
  45 #include "gc/g1/g1CollectedHeap.inline.hpp"
  46 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
  47 #include "gc/g1/heapRegion.hpp"


  48 #endif // INCLUDE_ALL_GCS
  49 #include "crc32c.h"
  50 #ifdef COMPILER2
  51 #include "opto/intrinsicnode.hpp"
  52 #endif
  53 
  54 #ifdef PRODUCT
  55 #define BLOCK_COMMENT(str) /* nothing */
  56 #define STOP(error) stop(error)
  57 #else
  58 #define BLOCK_COMMENT(str) block_comment(str)
  59 #define STOP(error) block_comment(error); stop(error)
  60 #endif
  61 
  62 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  63 
  64 #ifdef ASSERT
  65 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
  66 #endif
  67 


1089 
1090     bind(no_reserved_zone_enabling);
1091 }
1092 
1093 int MacroAssembler::biased_locking_enter(Register lock_reg,
1094                                          Register obj_reg,
1095                                          Register swap_reg,
1096                                          Register tmp_reg,
1097                                          bool swap_reg_contains_mark,
1098                                          Label& done,
1099                                          Label* slow_case,
1100                                          BiasedLockingCounters* counters) {
1101   assert(UseBiasedLocking, "why call this otherwise?");
1102   assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
1103   assert(tmp_reg != noreg, "tmp_reg must be supplied");
1104   assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
1105   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
1106   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
1107   NOT_LP64( Address saved_mark_addr(lock_reg, 0); )
1108 


1109   if (PrintBiasedLockingStatistics && counters == NULL) {
1110     counters = BiasedLocking::counters();
1111   }
1112   // Biased locking
1113   // See whether the lock is currently biased toward our thread and
1114   // whether the epoch is still valid
1115   // Note that the runtime guarantees sufficient alignment of JavaThread
1116   // pointers to allow age to be placed into low bits
1117   // First check to see whether biasing is even enabled for this object
1118   Label cas_label;
1119   int null_check_offset = -1;
1120   if (!swap_reg_contains_mark) {
1121     null_check_offset = offset();
1122     movptr(swap_reg, mark_addr);
1123   }
1124   movptr(tmp_reg, swap_reg);
1125   andptr(tmp_reg, markOopDesc::biased_lock_mask_in_place);
1126   cmpptr(tmp_reg, markOopDesc::biased_lock_pattern);
1127   jcc(Assembler::notEqual, cas_label);
1128   // The bias pattern is present in the object's header. Need to check


1152   andptr(header_reg, ~((int) markOopDesc::age_mask_in_place));
1153   if (counters != NULL) {
1154     cond_inc32(Assembler::zero,
1155                ExternalAddress((address) counters->biased_lock_entry_count_addr()));
1156   }
1157   jcc(Assembler::equal, done);
1158 
1159   Label try_revoke_bias;
1160   Label try_rebias;
1161 
1162   // At this point we know that the header has the bias pattern and
1163   // that we are not the bias owner in the current epoch. We need to
1164   // figure out more details about the state of the header in order to
1165   // know what operations can be legally performed on the object's
1166   // header.
1167 
1168   // If the low three bits in the xor result aren't clear, that means
1169   // the prototype header is no longer biased and we have to revoke
1170   // the bias on this object.
1171   testptr(header_reg, markOopDesc::biased_lock_mask_in_place);
1172   jccb(Assembler::notZero, try_revoke_bias);
1173 
1174   // Biasing is still enabled for this data type. See whether the
1175   // epoch of the current bias is still valid, meaning that the epoch
1176   // bits of the mark word are equal to the epoch bits of the
1177   // prototype header. (Note that the prototype header's epoch bits
1178   // only change at a safepoint.) If not, attempt to rebias the object
1179   // toward the current thread. Note that we must be absolutely sure
1180   // that the current epoch is invalid in order to do this because
1181   // otherwise the manipulations it performs on the mark word are
1182   // illegal.
1183   testptr(header_reg, markOopDesc::epoch_mask_in_place);
1184   jccb(Assembler::notZero, try_rebias);
1185 
1186   // The epoch of the current bias is still valid but we know nothing
1187   // about the owner; it might be set or it might be clear. Try to
1188   // acquire the bias of the object using an atomic operation. If this
1189   // fails we will go in to the runtime to revoke the object's bias.
1190   // Note that we first construct the presumed unbiased header so we
1191   // don't accidentally blow away another thread's valid bias.
1192   NOT_LP64( movptr(swap_reg, saved_mark_addr); )
1193   andptr(swap_reg,
1194          markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
1195 #ifdef _LP64
1196   movptr(tmp_reg, swap_reg);
1197   orptr(tmp_reg, r15_thread);
1198 #else
1199   get_thread(tmp_reg);
1200   orptr(tmp_reg, swap_reg);
1201 #endif
1202   if (os::is_MP()) {
1203     lock();
1204   }


1273   // removing the bias bit from the object's header.
1274   if (counters != NULL) {
1275     cond_inc32(Assembler::zero,
1276                ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
1277   }
1278 
1279   bind(cas_label);
1280 
1281   return null_check_offset;
1282 }
1283 
1284 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
1285   assert(UseBiasedLocking, "why call this otherwise?");
1286 
1287   // Check for biased locking unlock case, which is a no-op
1288   // Note: we do not have to check the thread ID for two reasons.
1289   // First, the interpreter checks for IllegalMonitorStateException at
1290   // a higher level. Second, if the bias was revoked while we held the
1291   // lock, the object could not be rebiased toward another thread, so
1292   // the bias bit would be clear.

1293   movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1294   andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
1295   cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
1296   jcc(Assembler::equal, done);
1297 }
1298 
1299 #ifdef COMPILER2
1300 
1301 #if INCLUDE_RTM_OPT
1302 
1303 // Update rtm_counters based on abort status
1304 // input: abort_status
1305 //        rtm_counters (RTMLockingCounters*)
1306 // flags are killed
1307 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) {
1308 
1309   atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset()));
1310   if (PrintPreciseRTMLockingStatistics) {
1311     for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
1312       Label check_abort;


1465   incrementl(retry_count_Reg); // clear z flag
1466 }
1467 
1468 // Use RTM for normal stack locks
1469 // Input: objReg (object to lock)
1470 void MacroAssembler::rtm_stack_locking(Register objReg, Register tmpReg, Register scrReg,
1471                                        Register retry_on_abort_count_Reg,
1472                                        RTMLockingCounters* stack_rtm_counters,
1473                                        Metadata* method_data, bool profile_rtm,
1474                                        Label& DONE_LABEL, Label& IsInflated) {
1475   assert(UseRTMForStackLocks, "why call this otherwise?");
1476   assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
1477   assert(tmpReg == rax, "");
1478   assert(scrReg == rdx, "");
1479   Label L_rtm_retry, L_decrement_retry, L_on_abort;
1480 
1481   if (RTMRetryCount > 0) {
1482     movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
1483     bind(L_rtm_retry);
1484   }

1485   movptr(tmpReg, Address(objReg, 0));
1486   testptr(tmpReg, markOopDesc::monitor_value);  // inflated vs stack-locked|neutral|biased
1487   jcc(Assembler::notZero, IsInflated);
1488 
1489   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1490     Label L_noincrement;
1491     if (RTMTotalCountIncrRate > 1) {
1492       // tmpReg, scrReg and flags are killed
1493       branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement);
1494     }
1495     assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM");
1496     atomic_incptr(ExternalAddress((address)stack_rtm_counters->total_count_addr()), scrReg);
1497     bind(L_noincrement);
1498   }
1499   xbegin(L_on_abort);
1500   movptr(tmpReg, Address(objReg, 0));       // fetch markword
1501   andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
1502   cmpptr(tmpReg, markOopDesc::unlocked_value);            // bits = 001 unlocked
1503   jcc(Assembler::equal, DONE_LABEL);        // all done if unlocked
1504 


1541   // Without cast to int32_t a movptr will destroy r10 which is typically obj
1542   movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
1543   movptr(boxReg, tmpReg); // Save ObjectMonitor address
1544 
1545   if (RTMRetryCount > 0) {
1546     movl(retry_on_busy_count_Reg, RTMRetryCount);  // Retry on lock busy
1547     movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
1548     bind(L_rtm_retry);
1549   }
1550   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1551     Label L_noincrement;
1552     if (RTMTotalCountIncrRate > 1) {
1553       // tmpReg, scrReg and flags are killed
1554       branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement);
1555     }
1556     assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
1557     atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg);
1558     bind(L_noincrement);
1559   }
1560   xbegin(L_on_abort);

1561   movptr(tmpReg, Address(objReg, 0));
1562   movptr(tmpReg, Address(tmpReg, owner_offset));
1563   testptr(tmpReg, tmpReg);
1564   jcc(Assembler::zero, DONE_LABEL);
1565   if (UseRTMXendForLockBusy) {
1566     xend();
1567     jmp(L_decrement_retry);
1568   }
1569   else {
1570     xabort(0);
1571   }
1572   bind(L_on_abort);
1573   Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
1574   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1575     rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
1576   }
1577   if (RTMRetryCount > 0) {
1578     // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1579     rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1580   }


1687 // rax,: tmp -- KILLED
1688 // scr: tmp -- KILLED
1689 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
1690                                Register scrReg, Register cx1Reg, Register cx2Reg,
1691                                BiasedLockingCounters* counters,
1692                                RTMLockingCounters* rtm_counters,
1693                                RTMLockingCounters* stack_rtm_counters,
1694                                Metadata* method_data,
1695                                bool use_rtm, bool profile_rtm) {
1696   // Ensure the register assignments are disjoint
1697   assert(tmpReg == rax, "");
1698 
1699   if (use_rtm) {
1700     assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
1701   } else {
1702     assert(cx1Reg == noreg, "");
1703     assert(cx2Reg == noreg, "");
1704     assert_different_registers(objReg, boxReg, tmpReg, scrReg);
1705   }
1706 


1707   if (counters != NULL) {
1708     atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
1709   }
1710   if (EmitSync & 1) {
1711       // set box->dhw = markOopDesc::unused_mark()
1712       // Force all sync thru slow-path: slow_enter() and slow_exit()
1713       movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
1714       cmpptr (rsp, (int32_t)NULL_WORD);
1715   } else {
1716     // Possible cases that we'll encounter in fast_lock
1717     // ------------------------------------------------
1718     // * Inflated
1719     //    -- unlocked
1720     //    -- Locked
1721     //       = by self
1722     //       = by other
1723     // * biased
1724     //    -- by Self
1725     //    -- by other
1726     // * neutral


1736     // it's stack-locked, biased or neutral
1737     // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
1738     // order to reduce the number of conditional branches in the most common cases.
1739     // Beware -- there's a subtle invariant that fetch of the markword
1740     // at [FETCH], below, will never observe a biased encoding (*101b).
1741     // If this invariant is not held we risk exclusion (safety) failure.
1742     if (UseBiasedLocking && !UseOptoBiasInlining) {
1743       biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
1744     }
1745 
1746 #if INCLUDE_RTM_OPT
1747     if (UseRTMForStackLocks && use_rtm) {
1748       rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
1749                         stack_rtm_counters, method_data, profile_rtm,
1750                         DONE_LABEL, IsInflated);
1751     }
1752 #endif // INCLUDE_RTM_OPT
1753 
1754     movptr(tmpReg, Address(objReg, 0));          // [FETCH]
1755     testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
1756     jccb(Assembler::notZero, IsInflated);
1757 
1758     // Attempt stack-locking ...
1759     orptr (tmpReg, markOopDesc::unlocked_value);
1760     movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
1761     if (os::is_MP()) {
1762       lock();
1763     }
1764     cmpxchgptr(boxReg, Address(objReg, 0));      // Updates tmpReg
1765     if (counters != NULL) {
1766       cond_inc32(Assembler::equal,
1767                  ExternalAddress((address)counters->fast_path_entry_count_addr()));
1768     }
1769     jcc(Assembler::equal, DONE_LABEL);           // Success
1770 
1771     // Recursive locking.
1772     // The object is stack-locked: markword contains stack pointer to BasicLock.
1773     // Locked by current thread if difference with current SP is less than one page.
1774     subptr(tmpReg, rsp);
1775     // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
1776     andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );


1813     } else
1814     if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS
1815        // register juggle because we need tmpReg for cmpxchgptr below
1816        movptr(scrReg, boxReg);
1817        movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2]
1818 
1819        // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
1820        if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
1821           // prefetchw [eax + Offset(_owner)-2]
1822           prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1823        }
1824 
1825        if ((EmitSync & 64) == 0) {
1826          // Optimistic form: consider XORL tmpReg,tmpReg
1827          movptr(tmpReg, NULL_WORD);
1828        } else {
1829          // Can suffer RTS->RTO upgrades on shared or cold $ lines
1830          // Test-And-CAS instead of CAS
1831          movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));   // rax, = m->_owner
1832          testptr(tmpReg, tmpReg);                   // Locked ?
1833          jccb  (Assembler::notZero, DONE_LABEL);
1834        }
1835 
1836        // Appears unlocked - try to swing _owner from null to non-null.
1837        // Ideally, I'd manifest "Self" with get_thread and then attempt
1838        // to CAS the register containing Self into m->Owner.
1839        // But we don't have enough registers, so instead we can either try to CAS
1840        // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
1841        // we later store "Self" into m->Owner.  Transiently storing a stack address
1842        // (rsp or the address of the box) into  m->owner is harmless.
1843        // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
1844        if (os::is_MP()) {
1845          lock();
1846        }
1847        cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1848        movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3
1849        // If we weren't able to swing _owner from NULL to the BasicLock
1850        // then take the slow path.
1851        jccb  (Assembler::notZero, DONE_LABEL);
1852        // update _owner from BasicLock to thread
1853        get_thread (scrReg);                    // beware: clobbers ICCs
1854        movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
1855        xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
1856 
1857        // If the CAS fails we can either retry or pass control to the slow-path.
1858        // We use the latter tactic.
1859        // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1860        // If the CAS was successful ...
1861        //   Self has acquired the lock
1862        //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1863        // Intentional fall-through into DONE_LABEL ...
1864     } else {
1865        movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark()));  // results in ST-before-CAS penalty
1866        movptr(boxReg, tmpReg);
1867 
1868        // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
1869        if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
1870           // prefetchw [eax + Offset(_owner)-2]
1871           prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1872        }
1873 
1874        if ((EmitSync & 64) == 0) {
1875          // Optimistic form
1876          xorptr  (tmpReg, tmpReg);
1877        } else {
1878          // Can suffer RTS->RTO upgrades on shared or cold $ lines
1879          movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));   // rax, = m->_owner
1880          testptr(tmpReg, tmpReg);                   // Locked ?
1881          jccb  (Assembler::notZero, DONE_LABEL);
1882        }
1883 
1884        // Appears unlocked - try to swing _owner from null to non-null.
1885        // Use either "Self" (in scr) or rsp as thread identity in _owner.
1886        // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
1887        get_thread (scrReg);
1888        if (os::is_MP()) {
1889          lock();
1890        }
1891        cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1892 
1893        // If the CAS fails we can either retry or pass control to the slow-path.
1894        // We use the latter tactic.
1895        // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1896        // If the CAS was successful ...
1897        //   Self has acquired the lock
1898        //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1899        // Intentional fall-through into DONE_LABEL ...
1900     }
1901 #else // _LP64


1949 //
1950 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
1951 // B() doesn't have provably balanced locking so it runs in the interpreter.
1952 // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
1953 // is still locked by A().
1954 //
1955 // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
1956 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
1957 // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
1958 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
1959 // Arguably given that the spec legislates the JNI case as undefined our implementation
1960 // could reasonably *avoid* checking owner in Fast_Unlock().
1961 // In the interest of performance we elide m->Owner==Self check in unlock.
1962 // A perfectly viable alternative is to elide the owner check except when
1963 // Xcheck:jni is enabled.
1964 
1965 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
1966   assert(boxReg == rax, "");
1967   assert_different_registers(objReg, boxReg, tmpReg);
1968 


1969   if (EmitSync & 4) {
1970     // Disable - inhibit all inlining.  Force control through the slow-path
1971     cmpptr (rsp, 0);
1972   } else {
1973     Label DONE_LABEL, Stacked, CheckSucc;
1974 
1975     // Critically, the biased locking test must have precedence over
1976     // and appear before the (box->dhw == 0) recursive stack-lock test.
1977     if (UseBiasedLocking && !UseOptoBiasInlining) {
1978        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
1979     }
1980 
1981 #if INCLUDE_RTM_OPT
1982     if (UseRTMForStackLocks && use_rtm) {
1983       assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
1984       Label L_regular_unlock;
1985       movptr(tmpReg, Address(objReg, 0));           // fetch markword
1986       andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
1987       cmpptr(tmpReg, markOopDesc::unlocked_value);            // bits = 001 unlocked
1988       jccb(Assembler::notEqual, L_regular_unlock);  // if !HLE RegularLock


1990       jmp(DONE_LABEL);                              // ... and we're done
1991       bind(L_regular_unlock);
1992     }
1993 #endif
1994 
1995     cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
1996     jcc   (Assembler::zero, DONE_LABEL);            // 0 indicates recursive stack-lock
1997     movptr(tmpReg, Address(objReg, 0));             // Examine the object's markword
1998     testptr(tmpReg, markOopDesc::monitor_value);    // Inflated?
1999     jccb  (Assembler::zero, Stacked);
2000 
2001     // It's inflated.
2002 #if INCLUDE_RTM_OPT
2003     if (use_rtm) {
2004       Label L_regular_inflated_unlock;
2005       int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
2006       movptr(boxReg, Address(tmpReg, owner_offset));
2007       testptr(boxReg, boxReg);
2008       jccb(Assembler::notZero, L_regular_inflated_unlock);
2009       xend();
2010       jmpb(DONE_LABEL);
2011       bind(L_regular_inflated_unlock);
2012     }
2013 #endif
2014 
2015     // Despite our balanced locking property we still check that m->_owner == Self
2016     // as java routines or native JNI code called by this thread might
2017     // have released the lock.
2018     // Refer to the comments in synchronizer.cpp for how we might encode extra
2019     // state in _succ so we can avoid fetching EntryList|cxq.
2020     //
2021     // I'd like to add more cases in fast_lock() and fast_unlock() --
2022     // such as recursive enter and exit -- but we have to be wary of
2023     // I$ bloat, T$ effects and BP$ effects.
2024     //
2025     // If there's no contention try a 1-0 exit.  That is, exit without
2026     // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
2027     // we detect and recover from the race that the 1-0 exit admits.
2028     //
2029     // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
2030     // before it STs null into _owner, releasing the lock.  Updates


2034     // IA32's memory-model is SPO, so STs are ordered with respect to
2035     // each other and there's no need for an explicit barrier (fence).
2036     // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
2037 #ifndef _LP64
2038     get_thread (boxReg);
2039     if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
2040       // prefetchw [ebx + Offset(_owner)-2]
2041       prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2042     }
2043 
2044     // Note that we could employ various encoding schemes to reduce
2045     // the number of loads below (currently 4) to just 2 or 3.
2046     // Refer to the comments in synchronizer.cpp.
2047     // In practice the chain of fetches doesn't seem to impact performance, however.
2048     xorptr(boxReg, boxReg);
2049     if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
2050        // Attempt to reduce branch density - AMD's branch predictor.
2051        orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
2052        orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
2053        orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
2054        jccb  (Assembler::notZero, DONE_LABEL);
2055        movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
2056        jmpb  (DONE_LABEL);
2057     } else {
2058        orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
2059        jccb  (Assembler::notZero, DONE_LABEL);
2060        movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
2061        orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
2062        jccb  (Assembler::notZero, CheckSucc);
2063        movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
2064        jmpb  (DONE_LABEL);
2065     }
2066 
2067     // The Following code fragment (EmitSync & 65536) improves the performance of
2068     // contended applications and contended synchronization microbenchmarks.
2069     // Unfortunately the emission of the code - even though not executed - causes regressions
2070     // in scimark and jetstream, evidently because of $ effects.  Replacing the code
2071     // with an equal number of never-executed NOPs results in the same regression.
2072     // We leave it off by default.
2073 
2074     if ((EmitSync & 65536) != 0) {
2075        Label LSuccess, LGoSlowPath ;
2076 
2077        bind  (CheckSucc);
2078 
2079        // Optional pre-test ... it's safe to elide this
2080        cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
2081        jccb(Assembler::zero, LGoSlowPath);
2082 
2083        // We have a classic Dekker-style idiom:
2084        //    ST m->_owner = 0 ; MEMBAR; LD m->_succ


2114        // Ratify _succ remains non-null
2115        cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), 0);
2116        jccb  (Assembler::notZero, LSuccess);
2117 
2118        xorptr(boxReg, boxReg);                  // box is really EAX
2119        if (os::is_MP()) { lock(); }
2120        cmpxchgptr(rsp, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2121        // There's no successor so we tried to regrab the lock with the
2122        // placeholder value. If that didn't work, then another thread
2123        // grabbed the lock so we're done (and exit was a success).
2124        jccb  (Assembler::notEqual, LSuccess);
2125        // Since we're low on registers we installed rsp as a placeholding in _owner.
2126        // Now install Self over rsp.  This is safe as we're transitioning from
2127        // non-null to non=null
2128        get_thread (boxReg);
2129        movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), boxReg);
2130        // Intentional fall-through into LGoSlowPath ...
2131 
2132        bind  (LGoSlowPath);
2133        orptr(boxReg, 1);                      // set ICC.ZF=0 to indicate failure
2134        jmpb  (DONE_LABEL);
2135 
2136        bind  (LSuccess);
2137        xorptr(boxReg, boxReg);                 // set ICC.ZF=1 to indicate success
2138        jmpb  (DONE_LABEL);
2139     }
2140 
2141     bind (Stacked);
2142     // It's not inflated and it's not recursively stack-locked and it's not biased.
2143     // It must be stack-locked.
2144     // Try to reset the header to displaced header.
2145     // The "box" value on the stack is stable, so we can reload
2146     // and be assured we observe the same value as above.
2147     movptr(tmpReg, Address(boxReg, 0));
2148     if (os::is_MP()) {
2149       lock();
2150     }
2151     cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
2152     // Intention fall-thru into DONE_LABEL
2153 
2154     // DONE_LABEL is a hot target - we'd really like to place it at the
2155     // start of cache line by padding with NOPs.
2156     // See the AMD and Intel software optimization manuals for the
2157     // most efficient "long" NOP encodings.
2158     // Unfortunately none of our alignment mechanisms suffice.
2159     if ((EmitSync & 65536) == 0) {
2160        bind (CheckSucc);
2161     }
2162 #else // _LP64
2163     // It's inflated
2164     if (EmitSync & 1024) {
2165       // Emit code to check that _owner == Self
2166       // We could fold the _owner test into subsequent code more efficiently
2167       // than using a stand-alone check, but since _owner checking is off by
2168       // default we don't bother. We also might consider predicating the
2169       // _owner==Self check on Xcheck:jni or running on a debug build.
2170       movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2171       xorptr(boxReg, r15_thread);
2172     } else {
2173       xorptr(boxReg, boxReg);
2174     }
2175     orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
2176     jccb  (Assembler::notZero, DONE_LABEL);
2177     movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
2178     orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
2179     jccb  (Assembler::notZero, CheckSucc);
2180     movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2181     jmpb  (DONE_LABEL);
2182 
2183     if ((EmitSync & 65536) == 0) {
2184       // Try to avoid passing control into the slow_path ...
2185       Label LSuccess, LGoSlowPath ;
2186       bind  (CheckSucc);
2187 
2188       // The following optional optimization can be elided if necessary
2189       // Effectively: if (succ == null) goto SlowPath
2190       // The code reduces the window for a race, however,
2191       // and thus benefits performance.
2192       cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
2193       jccb  (Assembler::zero, LGoSlowPath);
2194 
2195       xorptr(boxReg, boxReg);
2196       if ((EmitSync & 16) && os::is_MP()) {
2197         xchgptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2198       } else {
2199         movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2200         if (os::is_MP()) {
2201           // Memory barrier/fence


2218       // Try to reacquire the lock.
2219       // If that fails then the new owner is responsible for succession and this
2220       // thread needs to take no further action and can exit via the fast path (success).
2221       // If the re-acquire succeeds then pass control into the slow path.
2222       // As implemented, this latter mode is horrible because we generated more
2223       // coherence traffic on the lock *and* artifically extended the critical section
2224       // length while by virtue of passing control into the slow path.
2225 
2226       // box is really RAX -- the following CMPXCHG depends on that binding
2227       // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
2228       if (os::is_MP()) { lock(); }
2229       cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2230       // There's no successor so we tried to regrab the lock.
2231       // If that didn't work, then another thread grabbed the
2232       // lock so we're done (and exit was a success).
2233       jccb  (Assembler::notEqual, LSuccess);
2234       // Intentional fall-through into slow-path
2235 
2236       bind  (LGoSlowPath);
2237       orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
2238       jmpb  (DONE_LABEL);
2239 
2240       bind  (LSuccess);
2241       testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
2242       jmpb  (DONE_LABEL);
2243     }
2244 
2245     bind  (Stacked);
2246     movptr(tmpReg, Address (boxReg, 0));      // re-fetch
2247     if (os::is_MP()) { lock(); }
2248     cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
2249 
2250     if (EmitSync & 65536) {
2251        bind (CheckSucc);
2252     }
2253 #endif
2254     bind(DONE_LABEL);
2255   }
2256 }
2257 #endif // COMPILER2
2258 
2259 void MacroAssembler::c2bool(Register x) {
2260   // implements x == 0 ? 0 : 1
2261   // note: must only look at least-significant byte of x
2262   //       since C-style booleans are stored in one byte


6060     b = code_string(ss.as_string());
6061   }
6062   BLOCK_COMMENT("verify_oop {");
6063 #ifdef _LP64
6064   push(rscratch1);                    // save r10, trashed by movptr()
6065 #endif
6066   push(rax);                          // save rax,
6067   push(reg);                          // pass register argument
6068   ExternalAddress buffer((address) b);
6069   // avoid using pushptr, as it modifies scratch registers
6070   // and our contract is not to modify anything
6071   movptr(rax, buffer.addr());
6072   push(rax);
6073   // call indirectly to solve generation ordering problem
6074   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
6075   call(rax);
6076   // Caller pops the arguments (oop, message) and restores rax, r10
6077   BLOCK_COMMENT("} verify_oop");
6078 }
6079 
































































































































































6080 
6081 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
6082                                                       Register tmp,
6083                                                       int offset) {
6084   intptr_t value = *delayed_value_addr;
6085   if (value != 0)
6086     return RegisterOrConstant(value + offset);
6087 
6088   // load indirectly to solve generation ordering problem
6089   movptr(tmp, ExternalAddress((address) delayed_value_addr));
6090 
6091 #ifdef ASSERT
6092   { Label L;
6093     testptr(tmp, tmp);
6094     if (WizardMode) {
6095       const char* buf = NULL;
6096       {
6097         ResourceMark rm;
6098         stringStream ss;
6099         ss.print("DelayedValue=" INTPTR_FORMAT, delayed_value_addr[1]);




  28 #include "compiler/disassembler.hpp"
  29 #include "gc/shared/cardTableModRefBS.hpp"
  30 #include "gc/shared/collectedHeap.inline.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "memory/resourceArea.hpp"
  33 #include "memory/universe.hpp"
  34 #include "oops/klass.inline.hpp"
  35 #include "prims/methodHandles.hpp"
  36 #include "runtime/biasedLocking.hpp"
  37 #include "runtime/interfaceSupport.hpp"
  38 #include "runtime/objectMonitor.hpp"
  39 #include "runtime/os.hpp"
  40 #include "runtime/sharedRuntime.hpp"
  41 #include "runtime/stubRoutines.hpp"
  42 #include "runtime/thread.hpp"
  43 #include "utilities/macros.hpp"
  44 #if INCLUDE_ALL_GCS
  45 #include "gc/g1/g1CollectedHeap.inline.hpp"
  46 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
  47 #include "gc/g1/heapRegion.hpp"
  48 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
  49 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
  50 #endif // INCLUDE_ALL_GCS
  51 #include "crc32c.h"
  52 #ifdef COMPILER2
  53 #include "opto/intrinsicnode.hpp"
  54 #endif
  55 
  56 #ifdef PRODUCT
  57 #define BLOCK_COMMENT(str) /* nothing */
  58 #define STOP(error) stop(error)
  59 #else
  60 #define BLOCK_COMMENT(str) block_comment(str)
  61 #define STOP(error) block_comment(error); stop(error)
  62 #endif
  63 
  64 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  65 
  66 #ifdef ASSERT
  67 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
  68 #endif
  69 


1091 
1092     bind(no_reserved_zone_enabling);
1093 }
1094 
1095 int MacroAssembler::biased_locking_enter(Register lock_reg,
1096                                          Register obj_reg,
1097                                          Register swap_reg,
1098                                          Register tmp_reg,
1099                                          bool swap_reg_contains_mark,
1100                                          Label& done,
1101                                          Label* slow_case,
1102                                          BiasedLockingCounters* counters) {
1103   assert(UseBiasedLocking, "why call this otherwise?");
1104   assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq");
1105   assert(tmp_reg != noreg, "tmp_reg must be supplied");
1106   assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
1107   assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
1108   Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
1109   NOT_LP64( Address saved_mark_addr(lock_reg, 0); )
1110 
1111   shenandoah_store_addr_check(obj_reg);
1112 
1113   if (PrintBiasedLockingStatistics && counters == NULL) {
1114     counters = BiasedLocking::counters();
1115   }
1116   // Biased locking
1117   // See whether the lock is currently biased toward our thread and
1118   // whether the epoch is still valid
1119   // Note that the runtime guarantees sufficient alignment of JavaThread
1120   // pointers to allow age to be placed into low bits
1121   // First check to see whether biasing is even enabled for this object
1122   Label cas_label;
1123   int null_check_offset = -1;
1124   if (!swap_reg_contains_mark) {
1125     null_check_offset = offset();
1126     movptr(swap_reg, mark_addr);
1127   }
1128   movptr(tmp_reg, swap_reg);
1129   andptr(tmp_reg, markOopDesc::biased_lock_mask_in_place);
1130   cmpptr(tmp_reg, markOopDesc::biased_lock_pattern);
1131   jcc(Assembler::notEqual, cas_label);
1132   // The bias pattern is present in the object's header. Need to check


1156   andptr(header_reg, ~((int) markOopDesc::age_mask_in_place));
1157   if (counters != NULL) {
1158     cond_inc32(Assembler::zero,
1159                ExternalAddress((address) counters->biased_lock_entry_count_addr()));
1160   }
1161   jcc(Assembler::equal, done);
1162 
1163   Label try_revoke_bias;
1164   Label try_rebias;
1165 
1166   // At this point we know that the header has the bias pattern and
1167   // that we are not the bias owner in the current epoch. We need to
1168   // figure out more details about the state of the header in order to
1169   // know what operations can be legally performed on the object's
1170   // header.
1171 
1172   // If the low three bits in the xor result aren't clear, that means
1173   // the prototype header is no longer biased and we have to revoke
1174   // the bias on this object.
1175   testptr(header_reg, markOopDesc::biased_lock_mask_in_place);
1176   jccb_if_possible(Assembler::notZero, try_revoke_bias);
1177 
1178   // Biasing is still enabled for this data type. See whether the
1179   // epoch of the current bias is still valid, meaning that the epoch
1180   // bits of the mark word are equal to the epoch bits of the
1181   // prototype header. (Note that the prototype header's epoch bits
1182   // only change at a safepoint.) If not, attempt to rebias the object
1183   // toward the current thread. Note that we must be absolutely sure
1184   // that the current epoch is invalid in order to do this because
1185   // otherwise the manipulations it performs on the mark word are
1186   // illegal.
1187   testptr(header_reg, markOopDesc::epoch_mask_in_place);
1188   jccb_if_possible(Assembler::notZero, try_rebias);
1189 
1190   // The epoch of the current bias is still valid but we know nothing
1191   // about the owner; it might be set or it might be clear. Try to
1192   // acquire the bias of the object using an atomic operation. If this
1193   // fails we will go in to the runtime to revoke the object's bias.
1194   // Note that we first construct the presumed unbiased header so we
1195   // don't accidentally blow away another thread's valid bias.
1196   NOT_LP64( movptr(swap_reg, saved_mark_addr); )
1197   andptr(swap_reg,
1198          markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
1199 #ifdef _LP64
1200   movptr(tmp_reg, swap_reg);
1201   orptr(tmp_reg, r15_thread);
1202 #else
1203   get_thread(tmp_reg);
1204   orptr(tmp_reg, swap_reg);
1205 #endif
1206   if (os::is_MP()) {
1207     lock();
1208   }


1277   // removing the bias bit from the object's header.
1278   if (counters != NULL) {
1279     cond_inc32(Assembler::zero,
1280                ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
1281   }
1282 
1283   bind(cas_label);
1284 
1285   return null_check_offset;
1286 }
1287 
1288 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
1289   assert(UseBiasedLocking, "why call this otherwise?");
1290 
1291   // Check for biased locking unlock case, which is a no-op
1292   // Note: we do not have to check the thread ID for two reasons.
1293   // First, the interpreter checks for IllegalMonitorStateException at
1294   // a higher level. Second, if the bias was revoked while we held the
1295   // lock, the object could not be rebiased toward another thread, so
1296   // the bias bit would be clear.
1297   shenandoah_store_addr_check(obj_reg); // Access mark word
1298   movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1299   andptr(temp_reg, markOopDesc::biased_lock_mask_in_place);
1300   cmpptr(temp_reg, markOopDesc::biased_lock_pattern);
1301   jcc(Assembler::equal, done);
1302 }
1303 
1304 #ifdef COMPILER2
1305 
1306 #if INCLUDE_RTM_OPT
1307 
1308 // Update rtm_counters based on abort status
1309 // input: abort_status
1310 //        rtm_counters (RTMLockingCounters*)
1311 // flags are killed
1312 void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters) {
1313 
1314   atomic_incptr(Address(rtm_counters, RTMLockingCounters::abort_count_offset()));
1315   if (PrintPreciseRTMLockingStatistics) {
1316     for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
1317       Label check_abort;


1470   incrementl(retry_count_Reg); // clear z flag
1471 }
1472 
1473 // Use RTM for normal stack locks
1474 // Input: objReg (object to lock)
1475 void MacroAssembler::rtm_stack_locking(Register objReg, Register tmpReg, Register scrReg,
1476                                        Register retry_on_abort_count_Reg,
1477                                        RTMLockingCounters* stack_rtm_counters,
1478                                        Metadata* method_data, bool profile_rtm,
1479                                        Label& DONE_LABEL, Label& IsInflated) {
1480   assert(UseRTMForStackLocks, "why call this otherwise?");
1481   assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
1482   assert(tmpReg == rax, "");
1483   assert(scrReg == rdx, "");
1484   Label L_rtm_retry, L_decrement_retry, L_on_abort;
1485 
1486   if (RTMRetryCount > 0) {
1487     movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
1488     bind(L_rtm_retry);
1489   }
1490   shenandoah_store_addr_check(objReg); // Access mark word
1491   movptr(tmpReg, Address(objReg, 0));
1492   testptr(tmpReg, markOopDesc::monitor_value);  // inflated vs stack-locked|neutral|biased
1493   jcc(Assembler::notZero, IsInflated);
1494 
1495   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1496     Label L_noincrement;
1497     if (RTMTotalCountIncrRate > 1) {
1498       // tmpReg, scrReg and flags are killed
1499       branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement);
1500     }
1501     assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM");
1502     atomic_incptr(ExternalAddress((address)stack_rtm_counters->total_count_addr()), scrReg);
1503     bind(L_noincrement);
1504   }
1505   xbegin(L_on_abort);
1506   movptr(tmpReg, Address(objReg, 0));       // fetch markword
1507   andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
1508   cmpptr(tmpReg, markOopDesc::unlocked_value);            // bits = 001 unlocked
1509   jcc(Assembler::equal, DONE_LABEL);        // all done if unlocked
1510 


1547   // Without cast to int32_t a movptr will destroy r10 which is typically obj
1548   movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
1549   movptr(boxReg, tmpReg); // Save ObjectMonitor address
1550 
1551   if (RTMRetryCount > 0) {
1552     movl(retry_on_busy_count_Reg, RTMRetryCount);  // Retry on lock busy
1553     movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
1554     bind(L_rtm_retry);
1555   }
1556   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1557     Label L_noincrement;
1558     if (RTMTotalCountIncrRate > 1) {
1559       // tmpReg, scrReg and flags are killed
1560       branch_on_random_using_rdtsc(tmpReg, scrReg, (int)RTMTotalCountIncrRate, L_noincrement);
1561     }
1562     assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
1563     atomic_incptr(ExternalAddress((address)rtm_counters->total_count_addr()), scrReg);
1564     bind(L_noincrement);
1565   }
1566   xbegin(L_on_abort);
1567   shenandoah_store_addr_check(objReg); // Access mark word
1568   movptr(tmpReg, Address(objReg, 0));
1569   movptr(tmpReg, Address(tmpReg, owner_offset));
1570   testptr(tmpReg, tmpReg);
1571   jcc(Assembler::zero, DONE_LABEL);
1572   if (UseRTMXendForLockBusy) {
1573     xend();
1574     jmp(L_decrement_retry);
1575   }
1576   else {
1577     xabort(0);
1578   }
1579   bind(L_on_abort);
1580   Register abort_status_Reg = tmpReg; // status of abort is stored in RAX
1581   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1582     rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
1583   }
1584   if (RTMRetryCount > 0) {
1585     // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1586     rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1587   }


1694 // rax,: tmp -- KILLED
1695 // scr: tmp -- KILLED
1696 void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
1697                                Register scrReg, Register cx1Reg, Register cx2Reg,
1698                                BiasedLockingCounters* counters,
1699                                RTMLockingCounters* rtm_counters,
1700                                RTMLockingCounters* stack_rtm_counters,
1701                                Metadata* method_data,
1702                                bool use_rtm, bool profile_rtm) {
1703   // Ensure the register assignments are disjoint
1704   assert(tmpReg == rax, "");
1705 
1706   if (use_rtm) {
1707     assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
1708   } else {
1709     assert(cx1Reg == noreg, "");
1710     assert(cx2Reg == noreg, "");
1711     assert_different_registers(objReg, boxReg, tmpReg, scrReg);
1712   }
1713 
1714   shenandoah_store_addr_check(objReg); // Access mark word
1715 
1716   if (counters != NULL) {
1717     atomic_incl(ExternalAddress((address)counters->total_entry_count_addr()), scrReg);
1718   }
1719   if (EmitSync & 1) {
1720       // set box->dhw = markOopDesc::unused_mark()
1721       // Force all sync thru slow-path: slow_enter() and slow_exit()
1722       movptr (Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
1723       cmpptr (rsp, (int32_t)NULL_WORD);
1724   } else {
1725     // Possible cases that we'll encounter in fast_lock
1726     // ------------------------------------------------
1727     // * Inflated
1728     //    -- unlocked
1729     //    -- Locked
1730     //       = by self
1731     //       = by other
1732     // * biased
1733     //    -- by Self
1734     //    -- by other
1735     // * neutral


1745     // it's stack-locked, biased or neutral
1746     // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
1747     // order to reduce the number of conditional branches in the most common cases.
1748     // Beware -- there's a subtle invariant that fetch of the markword
1749     // at [FETCH], below, will never observe a biased encoding (*101b).
1750     // If this invariant is not held we risk exclusion (safety) failure.
1751     if (UseBiasedLocking && !UseOptoBiasInlining) {
1752       biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
1753     }
1754 
1755 #if INCLUDE_RTM_OPT
1756     if (UseRTMForStackLocks && use_rtm) {
1757       rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
1758                         stack_rtm_counters, method_data, profile_rtm,
1759                         DONE_LABEL, IsInflated);
1760     }
1761 #endif // INCLUDE_RTM_OPT
1762 
1763     movptr(tmpReg, Address(objReg, 0));          // [FETCH]
1764     testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
1765     jccb_if_possible(Assembler::notZero, IsInflated);
1766 
1767     // Attempt stack-locking ...
1768     orptr (tmpReg, markOopDesc::unlocked_value);
1769     movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
1770     if (os::is_MP()) {
1771       lock();
1772     }
1773     cmpxchgptr(boxReg, Address(objReg, 0));      // Updates tmpReg
1774     if (counters != NULL) {
1775       cond_inc32(Assembler::equal,
1776                  ExternalAddress((address)counters->fast_path_entry_count_addr()));
1777     }
1778     jcc(Assembler::equal, DONE_LABEL);           // Success
1779 
1780     // Recursive locking.
1781     // The object is stack-locked: markword contains stack pointer to BasicLock.
1782     // Locked by current thread if difference with current SP is less than one page.
1783     subptr(tmpReg, rsp);
1784     // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
1785     andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );


1822     } else
1823     if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS
1824        // register juggle because we need tmpReg for cmpxchgptr below
1825        movptr(scrReg, boxReg);
1826        movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2]
1827 
1828        // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
1829        if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
1830           // prefetchw [eax + Offset(_owner)-2]
1831           prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1832        }
1833 
1834        if ((EmitSync & 64) == 0) {
1835          // Optimistic form: consider XORL tmpReg,tmpReg
1836          movptr(tmpReg, NULL_WORD);
1837        } else {
1838          // Can suffer RTS->RTO upgrades on shared or cold $ lines
1839          // Test-And-CAS instead of CAS
1840          movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));   // rax, = m->_owner
1841          testptr(tmpReg, tmpReg);                   // Locked ?
1842          jccb_if_possible(Assembler::notZero, DONE_LABEL);
1843        }
1844 
1845        // Appears unlocked - try to swing _owner from null to non-null.
1846        // Ideally, I'd manifest "Self" with get_thread and then attempt
1847        // to CAS the register containing Self into m->Owner.
1848        // But we don't have enough registers, so instead we can either try to CAS
1849        // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
1850        // we later store "Self" into m->Owner.  Transiently storing a stack address
1851        // (rsp or the address of the box) into  m->owner is harmless.
1852        // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
1853        if (os::is_MP()) {
1854          lock();
1855        }
1856        cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1857        movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3
1858        // If we weren't able to swing _owner from NULL to the BasicLock
1859        // then take the slow path.
1860        jccb_if_possible(Assembler::notZero, DONE_LABEL);
1861        // update _owner from BasicLock to thread
1862        get_thread (scrReg);                    // beware: clobbers ICCs
1863        movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
1864        xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
1865 
1866        // If the CAS fails we can either retry or pass control to the slow-path.
1867        // We use the latter tactic.
1868        // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1869        // If the CAS was successful ...
1870        //   Self has acquired the lock
1871        //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1872        // Intentional fall-through into DONE_LABEL ...
1873     } else {
1874        movptr(Address(boxReg, 0), intptr_t(markOopDesc::unused_mark()));  // results in ST-before-CAS penalty
1875        movptr(boxReg, tmpReg);
1876 
1877        // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
1878        if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
1879           // prefetchw [eax + Offset(_owner)-2]
1880           prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1881        }
1882 
1883        if ((EmitSync & 64) == 0) {
1884          // Optimistic form
1885          xorptr  (tmpReg, tmpReg);
1886        } else {
1887          // Can suffer RTS->RTO upgrades on shared or cold $ lines
1888          movptr(tmpReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));   // rax, = m->_owner
1889          testptr(tmpReg, tmpReg);                   // Locked ?
1890          jccb_if_possible(Assembler::notZero, DONE_LABEL);
1891        }
1892 
1893        // Appears unlocked - try to swing _owner from null to non-null.
1894        // Use either "Self" (in scr) or rsp as thread identity in _owner.
1895        // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
1896        get_thread (scrReg);
1897        if (os::is_MP()) {
1898          lock();
1899        }
1900        cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1901 
1902        // If the CAS fails we can either retry or pass control to the slow-path.
1903        // We use the latter tactic.
1904        // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1905        // If the CAS was successful ...
1906        //   Self has acquired the lock
1907        //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1908        // Intentional fall-through into DONE_LABEL ...
1909     }
1910 #else // _LP64


1958 //
1959 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
1960 // B() doesn't have provably balanced locking so it runs in the interpreter.
1961 // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
1962 // is still locked by A().
1963 //
1964 // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
1965 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
1966 // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
1967 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
1968 // Arguably given that the spec legislates the JNI case as undefined our implementation
1969 // could reasonably *avoid* checking owner in Fast_Unlock().
1970 // In the interest of performance we elide m->Owner==Self check in unlock.
1971 // A perfectly viable alternative is to elide the owner check except when
1972 // Xcheck:jni is enabled.
1973 
1974 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
1975   assert(boxReg == rax, "");
1976   assert_different_registers(objReg, boxReg, tmpReg);
1977 
1978   shenandoah_store_addr_check(objReg); // Access mark word
1979 
1980   if (EmitSync & 4) {
1981     // Disable - inhibit all inlining.  Force control through the slow-path
1982     cmpptr (rsp, 0);
1983   } else {
1984     Label DONE_LABEL, Stacked, CheckSucc;
1985 
1986     // Critically, the biased locking test must have precedence over
1987     // and appear before the (box->dhw == 0) recursive stack-lock test.
1988     if (UseBiasedLocking && !UseOptoBiasInlining) {
1989        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
1990     }
1991 
1992 #if INCLUDE_RTM_OPT
1993     if (UseRTMForStackLocks && use_rtm) {
1994       assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
1995       Label L_regular_unlock;
1996       movptr(tmpReg, Address(objReg, 0));           // fetch markword
1997       andptr(tmpReg, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
1998       cmpptr(tmpReg, markOopDesc::unlocked_value);            // bits = 001 unlocked
1999       jccb(Assembler::notEqual, L_regular_unlock);  // if !HLE RegularLock


2001       jmp(DONE_LABEL);                              // ... and we're done
2002       bind(L_regular_unlock);
2003     }
2004 #endif
2005 
2006     cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
2007     jcc   (Assembler::zero, DONE_LABEL);            // 0 indicates recursive stack-lock
2008     movptr(tmpReg, Address(objReg, 0));             // Examine the object's markword
2009     testptr(tmpReg, markOopDesc::monitor_value);    // Inflated?
2010     jccb  (Assembler::zero, Stacked);
2011 
2012     // It's inflated.
2013 #if INCLUDE_RTM_OPT
2014     if (use_rtm) {
2015       Label L_regular_inflated_unlock;
2016       int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
2017       movptr(boxReg, Address(tmpReg, owner_offset));
2018       testptr(boxReg, boxReg);
2019       jccb(Assembler::notZero, L_regular_inflated_unlock);
2020       xend();
2021       jmpb_if_possible(DONE_LABEL);
2022       bind(L_regular_inflated_unlock);
2023     }
2024 #endif
2025 
2026     // Despite our balanced locking property we still check that m->_owner == Self
2027     // as java routines or native JNI code called by this thread might
2028     // have released the lock.
2029     // Refer to the comments in synchronizer.cpp for how we might encode extra
2030     // state in _succ so we can avoid fetching EntryList|cxq.
2031     //
2032     // I'd like to add more cases in fast_lock() and fast_unlock() --
2033     // such as recursive enter and exit -- but we have to be wary of
2034     // I$ bloat, T$ effects and BP$ effects.
2035     //
2036     // If there's no contention try a 1-0 exit.  That is, exit without
2037     // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
2038     // we detect and recover from the race that the 1-0 exit admits.
2039     //
2040     // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
2041     // before it STs null into _owner, releasing the lock.  Updates


2045     // IA32's memory-model is SPO, so STs are ordered with respect to
2046     // each other and there's no need for an explicit barrier (fence).
2047     // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
2048 #ifndef _LP64
2049     get_thread (boxReg);
2050     if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
2051       // prefetchw [ebx + Offset(_owner)-2]
2052       prefetchw(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2053     }
2054 
2055     // Note that we could employ various encoding schemes to reduce
2056     // the number of loads below (currently 4) to just 2 or 3.
2057     // Refer to the comments in synchronizer.cpp.
2058     // In practice the chain of fetches doesn't seem to impact performance, however.
2059     xorptr(boxReg, boxReg);
2060     if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
2061        // Attempt to reduce branch density - AMD's branch predictor.
2062        orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
2063        orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
2064        orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
2065        jccb_if_possible(Assembler::notZero, DONE_LABEL);
2066        movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
2067        jmpb_if_possible(DONE_LABEL);
2068     } else {
2069        orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
2070        jccb_if_possible(Assembler::notZero, DONE_LABEL);
2071        movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
2072        orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
2073        jccb  (Assembler::notZero, CheckSucc);
2074        movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
2075        jmpb_if_possible(DONE_LABEL);
2076     }
2077 
2078     // The Following code fragment (EmitSync & 65536) improves the performance of
2079     // contended applications and contended synchronization microbenchmarks.
2080     // Unfortunately the emission of the code - even though not executed - causes regressions
2081     // in scimark and jetstream, evidently because of $ effects.  Replacing the code
2082     // with an equal number of never-executed NOPs results in the same regression.
2083     // We leave it off by default.
2084 
2085     if ((EmitSync & 65536) != 0) {
2086        Label LSuccess, LGoSlowPath ;
2087 
2088        bind  (CheckSucc);
2089 
2090        // Optional pre-test ... it's safe to elide this
2091        cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
2092        jccb(Assembler::zero, LGoSlowPath);
2093 
2094        // We have a classic Dekker-style idiom:
2095        //    ST m->_owner = 0 ; MEMBAR; LD m->_succ


2125        // Ratify _succ remains non-null
2126        cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), 0);
2127        jccb  (Assembler::notZero, LSuccess);
2128 
2129        xorptr(boxReg, boxReg);                  // box is really EAX
2130        if (os::is_MP()) { lock(); }
2131        cmpxchgptr(rsp, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2132        // There's no successor so we tried to regrab the lock with the
2133        // placeholder value. If that didn't work, then another thread
2134        // grabbed the lock so we're done (and exit was a success).
2135        jccb  (Assembler::notEqual, LSuccess);
2136        // Since we're low on registers we installed rsp as a placeholding in _owner.
2137        // Now install Self over rsp.  This is safe as we're transitioning from
2138        // non-null to non=null
2139        get_thread (boxReg);
2140        movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), boxReg);
2141        // Intentional fall-through into LGoSlowPath ...
2142 
2143        bind  (LGoSlowPath);
2144        orptr(boxReg, 1);                      // set ICC.ZF=0 to indicate failure
2145        jmpb_if_possible(DONE_LABEL);
2146 
2147        bind  (LSuccess);
2148        xorptr(boxReg, boxReg);                 // set ICC.ZF=1 to indicate success
2149        jmpb_if_possible(DONE_LABEL);
2150     }
2151 
2152     bind (Stacked);
2153     // It's not inflated and it's not recursively stack-locked and it's not biased.
2154     // It must be stack-locked.
2155     // Try to reset the header to displaced header.
2156     // The "box" value on the stack is stable, so we can reload
2157     // and be assured we observe the same value as above.
2158     movptr(tmpReg, Address(boxReg, 0));
2159     if (os::is_MP()) {
2160       lock();
2161     }
2162     cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
2163     // Intention fall-thru into DONE_LABEL
2164 
2165     // DONE_LABEL is a hot target - we'd really like to place it at the
2166     // start of cache line by padding with NOPs.
2167     // See the AMD and Intel software optimization manuals for the
2168     // most efficient "long" NOP encodings.
2169     // Unfortunately none of our alignment mechanisms suffice.
2170     if ((EmitSync & 65536) == 0) {
2171        bind (CheckSucc);
2172     }
2173 #else // _LP64
2174     // It's inflated
2175     if (EmitSync & 1024) {
2176       // Emit code to check that _owner == Self
2177       // We could fold the _owner test into subsequent code more efficiently
2178       // than using a stand-alone check, but since _owner checking is off by
2179       // default we don't bother. We also might consider predicating the
2180       // _owner==Self check on Xcheck:jni or running on a debug build.
2181       movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2182       xorptr(boxReg, r15_thread);
2183     } else {
2184       xorptr(boxReg, boxReg);
2185     }
2186     orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
2187     jccb_if_possible(Assembler::notZero, DONE_LABEL);
2188     movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
2189     orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
2190     jccb  (Assembler::notZero, CheckSucc);
2191     movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2192     jmpb_if_possible(DONE_LABEL);
2193 
2194     if ((EmitSync & 65536) == 0) {
2195       // Try to avoid passing control into the slow_path ...
2196       Label LSuccess, LGoSlowPath ;
2197       bind  (CheckSucc);
2198 
2199       // The following optional optimization can be elided if necessary
2200       // Effectively: if (succ == null) goto SlowPath
2201       // The code reduces the window for a race, however,
2202       // and thus benefits performance.
2203       cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
2204       jccb  (Assembler::zero, LGoSlowPath);
2205 
2206       xorptr(boxReg, boxReg);
2207       if ((EmitSync & 16) && os::is_MP()) {
2208         xchgptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2209       } else {
2210         movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2211         if (os::is_MP()) {
2212           // Memory barrier/fence


2229       // Try to reacquire the lock.
2230       // If that fails then the new owner is responsible for succession and this
2231       // thread needs to take no further action and can exit via the fast path (success).
2232       // If the re-acquire succeeds then pass control into the slow path.
2233       // As implemented, this latter mode is horrible because we generated more
2234       // coherence traffic on the lock *and* artifically extended the critical section
2235       // length while by virtue of passing control into the slow path.
2236 
2237       // box is really RAX -- the following CMPXCHG depends on that binding
2238       // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
2239       if (os::is_MP()) { lock(); }
2240       cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2241       // There's no successor so we tried to regrab the lock.
2242       // If that didn't work, then another thread grabbed the
2243       // lock so we're done (and exit was a success).
2244       jccb  (Assembler::notEqual, LSuccess);
2245       // Intentional fall-through into slow-path
2246 
2247       bind  (LGoSlowPath);
2248       orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
2249       jmpb_if_possible(DONE_LABEL);
2250 
2251       bind  (LSuccess);
2252       testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
2253       jmpb_if_possible  (DONE_LABEL);
2254     }
2255 
2256     bind  (Stacked);
2257     movptr(tmpReg, Address (boxReg, 0));      // re-fetch
2258     if (os::is_MP()) { lock(); }
2259     cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box
2260 
2261     if (EmitSync & 65536) {
2262        bind (CheckSucc);
2263     }
2264 #endif
2265     bind(DONE_LABEL);
2266   }
2267 }
2268 #endif // COMPILER2
2269 
2270 void MacroAssembler::c2bool(Register x) {
2271   // implements x == 0 ? 0 : 1
2272   // note: must only look at least-significant byte of x
2273   //       since C-style booleans are stored in one byte


6071     b = code_string(ss.as_string());
6072   }
6073   BLOCK_COMMENT("verify_oop {");
6074 #ifdef _LP64
6075   push(rscratch1);                    // save r10, trashed by movptr()
6076 #endif
6077   push(rax);                          // save rax,
6078   push(reg);                          // pass register argument
6079   ExternalAddress buffer((address) b);
6080   // avoid using pushptr, as it modifies scratch registers
6081   // and our contract is not to modify anything
6082   movptr(rax, buffer.addr());
6083   push(rax);
6084   // call indirectly to solve generation ordering problem
6085   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
6086   call(rax);
6087   // Caller pops the arguments (oop, message) and restores rax, r10
6088   BLOCK_COMMENT("} verify_oop");
6089 }
6090 
6091 void MacroAssembler::in_heap_check(Register raddr, Label& done) {
6092   ShenandoahHeap *h = (ShenandoahHeap *)Universe::heap();
6093 
6094   HeapWord* first_region_bottom = h->first_region_bottom();
6095   HeapWord* last_region_end = first_region_bottom + (ShenandoahHeapRegion::RegionSizeBytes / HeapWordSize) * h->max_regions();
6096 
6097   cmpptr(raddr, (intptr_t) first_region_bottom);
6098   jcc(Assembler::less, done);
6099   cmpptr(raddr, (intptr_t) first_region_bottom);
6100   jcc(Assembler::greaterEqual, done);
6101 
6102 }
6103 
6104 void MacroAssembler::shenandoah_cset_check(Register raddr, Register tmp1, Register tmp2, Label& done) {
6105   // Test that oop is not in to-space.
6106   movptr(tmp1, raddr);
6107   shrptr(tmp1, ShenandoahHeapRegion::RegionSizeShift);
6108   movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
6109   movbool(tmp2, Address(tmp2, tmp1, Address::times_1));
6110   testbool(tmp2);
6111   jcc(Assembler::zero, done);
6112 
6113   // Check for cancelled GC.
6114   movptr(tmp2, (intptr_t) ShenandoahHeap::cancelled_concgc_addr());
6115   movbool(tmp2, Address(tmp2, 0));
6116   testbool(tmp2);
6117   jcc(Assembler::notZero, done);
6118 
6119 }
6120 
6121 void MacroAssembler::_shenandoah_store_addr_check(Address addr, const char* msg, const char* file, int line) {
6122   _shenandoah_store_addr_check(addr.base(), msg, file, line);
6123 }
6124 
6125 void MacroAssembler::_shenandoah_store_addr_check(Register dst, const char* msg, const char* file, int line) {
6126   if (! UseShenandoahGC && ! ShenandoahStoreCheck) return;
6127   if (dst == rsp) return; // Stack-based target
6128 
6129   Register raddr = r9;
6130   Register tmp1 = r10;
6131   Register tmp2 = r11;
6132 
6133   Label done;
6134 
6135   pushf();
6136   push(raddr);
6137   push(tmp1);
6138   push(tmp2);
6139 
6140   movptr(raddr, dst);
6141 
6142   // Check null.
6143   testptr(raddr, raddr);
6144   jcc(Assembler::zero, done);
6145 
6146   in_heap_check(raddr, done);
6147   shenandoah_cset_check(raddr, tmp1, tmp2, done);
6148 
6149   // Fail.
6150   pop(tmp2);
6151   pop(tmp1);
6152   pop(raddr);
6153   popf();
6154   const char* b = NULL;
6155   {
6156     ResourceMark rm;
6157     stringStream ss;
6158     ss.print("shenandoah_store_check: %s in file: %s line: %i", msg, file, line);
6159     b = code_string(ss.as_string());
6160   }
6161   stop(b);
6162 
6163   bind(done);
6164 
6165   pop(tmp2);
6166   pop(tmp1);
6167   pop(raddr);
6168   popf();
6169 }
6170 
6171 void MacroAssembler::_shenandoah_store_check(Register dst, Register value, const char* msg, const char* file, int line) {
6172   if (! UseShenandoahGC && ! ShenandoahStoreCheck) return;
6173   if (dst == rsp) return; // Stack-based target
6174 
6175   Register raddr = r8;
6176   Register rval =  r9;
6177   Register tmp1 = r10;
6178   Register tmp2 = r11;
6179 
6180   // Push tmp regs and flags.
6181   pushf();
6182   push(raddr);
6183   push(rval);
6184   push(tmp1);
6185   push(tmp2);
6186 
6187   movptr(raddr, dst);
6188   movptr(rval, value);
6189 
6190   Label done;
6191 
6192   // If not in-heap target, skip check.
6193   in_heap_check(raddr, done);
6194 
6195   // Test that target oop is not in to-space.
6196   shenandoah_cset_check(raddr, tmp1, tmp2, done);
6197 
6198   // Do value-check only when concurrent mark is in progress.
6199   movptr(tmp1, (intptr_t) ShenandoahHeap::concurrent_mark_in_progress_addr());
6200   movbool(tmp1, Address(tmp1, 0));
6201   testbool(tmp1);
6202   jcc(Assembler::zero, done);
6203 
6204   // Null-check value.
6205   testptr(rval, rval);
6206   jcc(Assembler::zero, done);
6207 
6208   // Test that value oop is not in to-space.
6209   shenandoah_cset_check(rval, tmp1, tmp2, done);
6210 
6211   // Failure.
6212   // Pop tmp regs and flags.
6213   pop(tmp2);
6214   pop(tmp1);
6215   pop(rval);
6216   pop(raddr);
6217   popf();
6218   const char* b = NULL;
6219   {
6220     ResourceMark rm;
6221     stringStream ss;
6222     ss.print("shenandoah_store_check: %s in file: %s line: %i", msg, file, line);
6223     b = code_string(ss.as_string());
6224   }
6225   stop(b);
6226 
6227   bind(done);
6228 
6229   // Pop tmp regs and flags.
6230   pop(tmp2);
6231   pop(tmp1);
6232   pop(rval);
6233   pop(raddr);
6234   popf();
6235 }
6236 
6237 void MacroAssembler::_shenandoah_store_check(Address addr, Register value, const char* msg, const char* file, int line) {
6238   _shenandoah_store_check(addr.base(), value, msg, file, line);
6239 }
6240 
6241 void MacroAssembler::_shenandoah_lock_check(Register dst, const char* msg, const char* file, int line) {
6242 #ifdef ASSERT
6243   if (! UseShenandoahGC && ! ShenandoahStoreCheck) return;
6244 
6245   push(r8);
6246   movptr(r8, Address(dst, BasicObjectLock::obj_offset_in_bytes()));
6247   _shenandoah_store_addr_check(r8, msg, file, line);
6248   pop(r8);
6249 #endif
6250 }
6251 
6252 RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
6253                                                       Register tmp,
6254                                                       int offset) {
6255   intptr_t value = *delayed_value_addr;
6256   if (value != 0)
6257     return RegisterOrConstant(value + offset);
6258 
6259   // load indirectly to solve generation ordering problem
6260   movptr(tmp, ExternalAddress((address) delayed_value_addr));
6261 
6262 #ifdef ASSERT
6263   { Label L;
6264     testptr(tmp, tmp);
6265     if (WizardMode) {
6266       const char* buf = NULL;
6267       {
6268         ResourceMark rm;
6269         stringStream ss;
6270         ss.print("DelayedValue=" INTPTR_FORMAT, delayed_value_addr[1]);


< prev index next >