< prev index next >

src/hotspot/cpu/x86/macroAssembler_x86.cpp

Print this page




1013     Assembler::andpd(dst, Address(rscratch1, 0));
1014   }
1015 }
1016 
1017 void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) {
1018   // Used in sign-masking with aligned address.
1019   assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
1020   if (reachable(src)) {
1021     Assembler::andps(dst, as_Address(src));
1022   } else {
1023     lea(rscratch1, src);
1024     Assembler::andps(dst, Address(rscratch1, 0));
1025   }
1026 }
1027 
1028 void MacroAssembler::andptr(Register dst, int32_t imm32) {
1029   LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
1030 }
1031 
1032 void MacroAssembler::atomic_incl(Address counter_addr) {
1033   if (os::is_MP())
1034     lock();
1035   incrementl(counter_addr);
1036 }
1037 
1038 void MacroAssembler::atomic_incl(AddressLiteral counter_addr, Register scr) {
1039   if (reachable(counter_addr)) {
1040     atomic_incl(as_Address(counter_addr));
1041   } else {
1042     lea(scr, counter_addr);
1043     atomic_incl(Address(scr, 0));
1044   }
1045 }
1046 
1047 #ifdef _LP64
1048 void MacroAssembler::atomic_incq(Address counter_addr) {
1049   if (os::is_MP())
1050     lock();
1051   incrementq(counter_addr);
1052 }
1053 
1054 void MacroAssembler::atomic_incq(AddressLiteral counter_addr, Register scr) {
1055   if (reachable(counter_addr)) {
1056     atomic_incq(as_Address(counter_addr));
1057   } else {
1058     lea(scr, counter_addr);
1059     atomic_incq(Address(scr, 0));
1060   }
1061 }
1062 #endif
1063 
1064 // Writes to stack successive pages until offset reached to check for
1065 // stack overflow + shadow pages.  This clobbers tmp.
1066 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
1067   movptr(tmp, rsp);
1068   // Bang stack for total size given plus shadow page size.
1069   // Bang one page at a time because large size can bang beyond yellow and


1196   // illegal.
1197   testptr(header_reg, markOopDesc::epoch_mask_in_place);
1198   jccb(Assembler::notZero, try_rebias);
1199 
1200   // The epoch of the current bias is still valid but we know nothing
1201   // about the owner; it might be set or it might be clear. Try to
1202   // acquire the bias of the object using an atomic operation. If this
1203   // fails we will go in to the runtime to revoke the object's bias.
1204   // Note that we first construct the presumed unbiased header so we
1205   // don't accidentally blow away another thread's valid bias.
1206   NOT_LP64( movptr(swap_reg, saved_mark_addr); )
1207   andptr(swap_reg,
1208          markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
1209 #ifdef _LP64
1210   movptr(tmp_reg, swap_reg);
1211   orptr(tmp_reg, r15_thread);
1212 #else
1213   get_thread(tmp_reg);
1214   orptr(tmp_reg, swap_reg);
1215 #endif
1216   if (os::is_MP()) {
1217     lock();
1218   }
1219   cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
1220   // If the biasing toward our thread failed, this means that
1221   // another thread succeeded in biasing it toward itself and we
1222   // need to revoke that bias. The revocation will occur in the
1223   // interpreter runtime in the slow case.
1224   if (counters != NULL) {
1225     cond_inc32(Assembler::zero,
1226                ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
1227   }
1228   if (slow_case != NULL) {
1229     jcc(Assembler::notZero, *slow_case);
1230   }
1231   jmp(done);
1232 
1233   bind(try_rebias);
1234   // At this point we know the epoch has expired, meaning that the
1235   // current "bias owner", if any, is actually invalid. Under these
1236   // circumstances _only_, we are allowed to use the current header's
1237   // value as the comparison value when doing the cas to acquire the
1238   // bias in the current epoch. In other words, we allow transfer of
1239   // the bias from one thread to another directly in this situation.
1240   //
1241   // FIXME: due to a lack of registers we currently blow away the age
1242   // bits in this situation. Should attempt to preserve them.
1243   load_prototype_header(tmp_reg, obj_reg);
1244 #ifdef _LP64
1245   orptr(tmp_reg, r15_thread);
1246 #else
1247   get_thread(swap_reg);
1248   orptr(tmp_reg, swap_reg);
1249   movptr(swap_reg, saved_mark_addr);
1250 #endif
1251   if (os::is_MP()) {
1252     lock();
1253   }
1254   cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
1255   // If the biasing toward our thread failed, then another thread
1256   // succeeded in biasing it toward itself and we need to revoke that
1257   // bias. The revocation will occur in the runtime in the slow case.
1258   if (counters != NULL) {
1259     cond_inc32(Assembler::zero,
1260                ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
1261   }
1262   if (slow_case != NULL) {
1263     jcc(Assembler::notZero, *slow_case);
1264   }
1265   jmp(done);
1266 
1267   bind(try_revoke_bias);
1268   // The prototype mark in the klass doesn't have the bias bit set any
1269   // more, indicating that objects of this data type are not supposed
1270   // to be biased any more. We are going to try to reset the mark of
1271   // this object to the prototype value and fall through to the
1272   // CAS-based locking scheme. Note that if our CAS fails, it means
1273   // that another thread raced us for the privilege of revoking the
1274   // bias of this particular object, so it's okay to continue in the
1275   // normal locking code.
1276   //
1277   // FIXME: due to a lack of registers we currently blow away the age
1278   // bits in this situation. Should attempt to preserve them.
1279   NOT_LP64( movptr(swap_reg, saved_mark_addr); )
1280   load_prototype_header(tmp_reg, obj_reg);
1281   if (os::is_MP()) {
1282     lock();
1283   }
1284   cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
1285   // Fall through to the normal CAS-based lock, because no matter what
1286   // the result of the above CAS, some thread must have succeeded in
1287   // removing the bias bit from the object's header.
1288   if (counters != NULL) {
1289     cond_inc32(Assembler::zero,
1290                ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
1291   }
1292 
1293   bind(cas_label);
1294 
1295   return null_check_offset;
1296 }
1297 
1298 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
1299   assert(UseBiasedLocking, "why call this otherwise?");
1300 
1301   // Check for biased locking unlock case, which is a no-op
1302   // Note: we do not have to check the thread ID for two reasons.
1303   // First, the interpreter checks for IllegalMonitorStateException at


1359   }
1360   // Abort ratio calculation only if abort_count > RTMAbortThreshold
1361   //   Aborted transactions = abort_count * 100
1362   //   All transactions = total_count *  RTMTotalCountIncrRate
1363   //   Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
1364 
1365   movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::abort_count_offset()));
1366   cmpptr(tmpReg, RTMAbortThreshold);
1367   jccb(Assembler::below, L_check_always_rtm2);
1368   imulptr(tmpReg, tmpReg, 100);
1369 
1370   Register scrReg = rtm_counters_Reg;
1371   movptr(scrReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset()));
1372   imulptr(scrReg, scrReg, RTMTotalCountIncrRate);
1373   imulptr(scrReg, scrReg, RTMAbortRatio);
1374   cmpptr(tmpReg, scrReg);
1375   jccb(Assembler::below, L_check_always_rtm1);
1376   if (method_data != NULL) {
1377     // set rtm_state to "no rtm" in MDO
1378     mov_metadata(tmpReg, method_data);
1379     if (os::is_MP()) {
1380       lock();
1381     }
1382     orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), NoRTM);
1383   }
1384   jmpb(L_done);
1385   bind(L_check_always_rtm1);
1386   // Reload RTMLockingCounters* address
1387   lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters));
1388   bind(L_check_always_rtm2);
1389   movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset()));
1390   cmpptr(tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate);
1391   jccb(Assembler::below, L_done);
1392   if (method_data != NULL) {
1393     // set rtm_state to "always rtm" in MDO
1394     mov_metadata(tmpReg, method_data);
1395     if (os::is_MP()) {
1396       lock();
1397     }
1398     orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), UseRTM);
1399   }
1400   bind(L_done);
1401 }
1402 
1403 // Update counters and perform abort ratio calculation
1404 // input:  abort_status_Reg
1405 // rtm_counters_Reg, flags are killed
1406 void MacroAssembler::rtm_profiling(Register abort_status_Reg,
1407                                    Register rtm_counters_Reg,
1408                                    RTMLockingCounters* rtm_counters,
1409                                    Metadata* method_data,
1410                                    bool profile_rtm) {
1411 
1412   assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
1413   // update rtm counters based on rax value at abort
1414   // reads abort_status_Reg, updates flags
1415   lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters));
1416   rtm_counters_update(abort_status_Reg, rtm_counters_Reg);
1417   if (profile_rtm) {


1588   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1589     rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
1590   }
1591   if (RTMRetryCount > 0) {
1592     // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1593     rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1594   }
1595 
1596   movptr(tmpReg, Address(boxReg, owner_offset)) ;
1597   testptr(tmpReg, tmpReg) ;
1598   jccb(Assembler::notZero, L_decrement_retry) ;
1599 
1600   // Appears unlocked - try to swing _owner from null to non-null.
1601   // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
1602 #ifdef _LP64
1603   Register threadReg = r15_thread;
1604 #else
1605   get_thread(scrReg);
1606   Register threadReg = scrReg;
1607 #endif
1608   if (os::is_MP()) {
1609     lock();
1610   }
1611   cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg
1612 
1613   if (RTMRetryCount > 0) {
1614     // success done else retry
1615     jccb(Assembler::equal, DONE_LABEL) ;
1616     bind(L_decrement_retry);
1617     // Spin and retry if lock is busy.
1618     rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry);
1619   }
1620   else {
1621     bind(L_decrement_retry);
1622   }
1623 }
1624 
1625 #endif //  INCLUDE_RTM_OPT
1626 
1627 // Fast_Lock and Fast_Unlock used by C2
1628 
1629 // Because the transitions from emitted code to the runtime
1630 // monitorenter/exit helper stubs are so slow it's critical that


1750   // If this invariant is not held we risk exclusion (safety) failure.
1751   if (UseBiasedLocking && !UseOptoBiasInlining) {
1752     biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
1753   }
1754 
1755 #if INCLUDE_RTM_OPT
1756   if (UseRTMForStackLocks && use_rtm) {
1757     rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
1758                       stack_rtm_counters, method_data, profile_rtm,
1759                       DONE_LABEL, IsInflated);
1760   }
1761 #endif // INCLUDE_RTM_OPT
1762 
1763   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));          // [FETCH]
1764   testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
1765   jccb(Assembler::notZero, IsInflated);
1766 
1767   // Attempt stack-locking ...
1768   orptr (tmpReg, markOopDesc::unlocked_value);
1769   movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
1770   if (os::is_MP()) {
1771     lock();
1772   }
1773   cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()));      // Updates tmpReg
1774   if (counters != NULL) {
1775     cond_inc32(Assembler::equal,
1776                ExternalAddress((address)counters->fast_path_entry_count_addr()));
1777   }
1778   jcc(Assembler::equal, DONE_LABEL);           // Success
1779 
1780   // Recursive locking.
1781   // The object is stack-locked: markword contains stack pointer to BasicLock.
1782   // Locked by current thread if difference with current SP is less than one page.
1783   subptr(tmpReg, rsp);
1784   // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
1785   andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
1786   movptr(Address(boxReg, 0), tmpReg);
1787   if (counters != NULL) {
1788     cond_inc32(Assembler::equal,
1789                ExternalAddress((address)counters->fast_path_entry_count_addr()));
1790   }
1791   jmp(DONE_LABEL);
1792 


1809   //   set box->_displaced_header = markOopDesc::unused_mark().  Any non-0 value suffices.
1810   // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
1811   // additional latency as we have another ST in the store buffer that must drain.
1812 
1813   // avoid ST-before-CAS
1814   // register juggle because we need tmpReg for cmpxchgptr below
1815   movptr(scrReg, boxReg);
1816   movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2]
1817 
1818   // Optimistic form: consider XORL tmpReg,tmpReg
1819   movptr(tmpReg, NULL_WORD);
1820 
1821   // Appears unlocked - try to swing _owner from null to non-null.
1822   // Ideally, I'd manifest "Self" with get_thread and then attempt
1823   // to CAS the register containing Self into m->Owner.
1824   // But we don't have enough registers, so instead we can either try to CAS
1825   // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
1826   // we later store "Self" into m->Owner.  Transiently storing a stack address
1827   // (rsp or the address of the box) into  m->owner is harmless.
1828   // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
1829   if (os::is_MP()) {
1830     lock();
1831   }
1832   cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1833   movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3
1834   // If we weren't able to swing _owner from NULL to the BasicLock
1835   // then take the slow path.
1836   jccb  (Assembler::notZero, DONE_LABEL);
1837   // update _owner from BasicLock to thread
1838   get_thread (scrReg);                    // beware: clobbers ICCs
1839   movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
1840   xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
1841 
1842   // If the CAS fails we can either retry or pass control to the slow-path.
1843   // We use the latter tactic.
1844   // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1845   // If the CAS was successful ...
1846   //   Self has acquired the lock
1847   //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1848   // Intentional fall-through into DONE_LABEL ...
1849 #else // _LP64
1850   // It's inflated
1851   movq(scrReg, tmpReg);
1852   xorq(tmpReg, tmpReg);
1853 
1854   if (os::is_MP()) {
1855     lock();
1856   }
1857   cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1858   // Unconditionally set box->_displaced_header = markOopDesc::unused_mark().
1859   // Without cast to int32_t movptr will destroy r10 which is typically obj.
1860   movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
1861   // Intentional fall-through into DONE_LABEL ...
1862   // Propagate ICC.ZF from CAS above into DONE_LABEL.
1863 #endif // _LP64
1864 #if INCLUDE_RTM_OPT
1865   } // use_rtm()
1866 #endif
1867   // DONE_LABEL is a hot target - we'd really like to place it at the
1868   // start of cache line by padding with NOPs.
1869   // See the AMD and Intel software optimization manuals for the
1870   // most efficient "long" NOP encodings.
1871   // Unfortunately none of our alignment mechanisms suffice.
1872   bind(DONE_LABEL);
1873 
1874   // At DONE_LABEL the icc ZFlag is set as follows ...
1875   // Fast_Unlock uses the same protocol.
1876   // ZFlag == 1 -> Success


1983   // Note that we could employ various encoding schemes to reduce
1984   // the number of loads below (currently 4) to just 2 or 3.
1985   // Refer to the comments in synchronizer.cpp.
1986   // In practice the chain of fetches doesn't seem to impact performance, however.
1987   xorptr(boxReg, boxReg);
1988   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
1989   jccb  (Assembler::notZero, DONE_LABEL);
1990   movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
1991   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
1992   jccb  (Assembler::notZero, CheckSucc);
1993   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
1994   jmpb  (DONE_LABEL);
1995 
1996   bind (Stacked);
1997   // It's not inflated and it's not recursively stack-locked and it's not biased.
1998   // It must be stack-locked.
1999   // Try to reset the header to displaced header.
2000   // The "box" value on the stack is stable, so we can reload
2001   // and be assured we observe the same value as above.
2002   movptr(tmpReg, Address(boxReg, 0));
2003   if (os::is_MP()) {
2004     lock();
2005   }
2006   cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
2007   // Intention fall-thru into DONE_LABEL
2008 
2009   // DONE_LABEL is a hot target - we'd really like to place it at the
2010   // start of cache line by padding with NOPs.
2011   // See the AMD and Intel software optimization manuals for the
2012   // most efficient "long" NOP encodings.
2013   // Unfortunately none of our alignment mechanisms suffice.
2014   bind (CheckSucc);
2015 #else // _LP64
2016   // It's inflated
2017   xorptr(boxReg, boxReg);
2018   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
2019   jccb  (Assembler::notZero, DONE_LABEL);
2020   movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
2021   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
2022   jccb  (Assembler::notZero, CheckSucc);
2023   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2024   jmpb  (DONE_LABEL);
2025 
2026   // Try to avoid passing control into the slow_path ...
2027   Label LSuccess, LGoSlowPath ;
2028   bind  (CheckSucc);
2029 
2030   // The following optional optimization can be elided if necessary
2031   // Effectively: if (succ == null) goto SlowPath
2032   // The code reduces the window for a race, however,
2033   // and thus benefits performance.
2034   cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
2035   jccb  (Assembler::zero, LGoSlowPath);
2036 
2037   xorptr(boxReg, boxReg);
2038   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2039   if (os::is_MP()) {
2040     // Memory barrier/fence
2041     // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
2042     // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
2043     // This is faster on Nehalem and AMD Shanghai/Barcelona.
2044     // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
2045     // We might also restructure (ST Owner=0;barrier;LD _Succ) to
2046     // (mov box,0; xchgq box, &m->Owner; LD _succ) .
2047     lock(); addl(Address(rsp, 0), 0);
2048   }
2049   cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
2050   jccb  (Assembler::notZero, LSuccess);
2051 
2052   // Rare inopportune interleaving - race.
2053   // The successor vanished in the small window above.
2054   // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
2055   // We need to ensure progress and succession.
2056   // Try to reacquire the lock.
2057   // If that fails then the new owner is responsible for succession and this
2058   // thread needs to take no further action and can exit via the fast path (success).
2059   // If the re-acquire succeeds then pass control into the slow path.
2060   // As implemented, this latter mode is horrible because we generated more
2061   // coherence traffic on the lock *and* artifically extended the critical section
2062   // length while by virtue of passing control into the slow path.
2063 
2064   // box is really RAX -- the following CMPXCHG depends on that binding
2065   // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
2066   if (os::is_MP()) { lock(); }
2067   cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2068   // There's no successor so we tried to regrab the lock.
2069   // If that didn't work, then another thread grabbed the
2070   // lock so we're done (and exit was a success).
2071   jccb  (Assembler::notEqual, LSuccess);
2072   // Intentional fall-through into slow-path
2073 
2074   bind  (LGoSlowPath);
2075   orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
2076   jmpb  (DONE_LABEL);
2077 
2078   bind  (LSuccess);
2079   testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
2080   jmpb  (DONE_LABEL);
2081 
2082   bind  (Stacked);
2083   movptr(tmpReg, Address (boxReg, 0));      // re-fetch
2084   if (os::is_MP()) { lock(); }
2085   cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
2086 
2087 #endif
2088   bind(DONE_LABEL);
2089 }
2090 #endif // COMPILER2
2091 
2092 void MacroAssembler::c2bool(Register x) {
2093   // implements x == 0 ? 0 : 1
2094   // note: must only look at least-significant byte of x
2095   //       since C-style booleans are stored in one byte
2096   //       only! (was bug)
2097   andl(x, 0xFF);
2098   setb(Assembler::notZero, x);
2099 }
2100 
2101 // Wouldn't need if AddressLiteral version had new name
2102 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
2103   Assembler::call(L, rtype);
2104 }


2616 void MacroAssembler::cmpoop(Register src1, Register src2) {
2617   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2618   bs->obj_equals(this, src1, src2);
2619 }
2620 
2621 void MacroAssembler::cmpoop(Register src1, Address src2) {
2622   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2623   bs->obj_equals(this, src1, src2);
2624 }
2625 
2626 #ifdef _LP64
2627 void MacroAssembler::cmpoop(Register src1, jobject src2) {
2628   movoop(rscratch1, src2);
2629   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2630   bs->obj_equals(this, src1, rscratch1);
2631 }
2632 #endif
2633 
2634 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
2635   if (reachable(adr)) {
2636     if (os::is_MP())
2637       lock();
2638     cmpxchgptr(reg, as_Address(adr));
2639   } else {
2640     lea(rscratch1, adr);
2641     if (os::is_MP())
2642       lock();
2643     cmpxchgptr(reg, Address(rscratch1, 0));
2644   }
2645 }
2646 
2647 void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
2648   LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr));
2649 }
2650 
2651 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
2652   if (reachable(src)) {
2653     Assembler::comisd(dst, as_Address(src));
2654   } else {
2655     lea(rscratch1, src);
2656     Assembler::comisd(dst, Address(rscratch1, 0));
2657   }
2658 }
2659 
2660 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
2661   if (reachable(src)) {




1013     Assembler::andpd(dst, Address(rscratch1, 0));
1014   }
1015 }
1016 
1017 void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) {
1018   // Used in sign-masking with aligned address.
1019   assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
1020   if (reachable(src)) {
1021     Assembler::andps(dst, as_Address(src));
1022   } else {
1023     lea(rscratch1, src);
1024     Assembler::andps(dst, Address(rscratch1, 0));
1025   }
1026 }
1027 
1028 void MacroAssembler::andptr(Register dst, int32_t imm32) {
1029   LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
1030 }
1031 
1032 void MacroAssembler::atomic_incl(Address counter_addr) {

1033   lock();
1034   incrementl(counter_addr);
1035 }
1036 
1037 void MacroAssembler::atomic_incl(AddressLiteral counter_addr, Register scr) {
1038   if (reachable(counter_addr)) {
1039     atomic_incl(as_Address(counter_addr));
1040   } else {
1041     lea(scr, counter_addr);
1042     atomic_incl(Address(scr, 0));
1043   }
1044 }
1045 
1046 #ifdef _LP64
1047 void MacroAssembler::atomic_incq(Address counter_addr) {

1048   lock();
1049   incrementq(counter_addr);
1050 }
1051 
1052 void MacroAssembler::atomic_incq(AddressLiteral counter_addr, Register scr) {
1053   if (reachable(counter_addr)) {
1054     atomic_incq(as_Address(counter_addr));
1055   } else {
1056     lea(scr, counter_addr);
1057     atomic_incq(Address(scr, 0));
1058   }
1059 }
1060 #endif
1061 
1062 // Writes to stack successive pages until offset reached to check for
1063 // stack overflow + shadow pages.  This clobbers tmp.
1064 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
1065   movptr(tmp, rsp);
1066   // Bang stack for total size given plus shadow page size.
1067   // Bang one page at a time because large size can bang beyond yellow and


1194   // illegal.
1195   testptr(header_reg, markOopDesc::epoch_mask_in_place);
1196   jccb(Assembler::notZero, try_rebias);
1197 
1198   // The epoch of the current bias is still valid but we know nothing
1199   // about the owner; it might be set or it might be clear. Try to
1200   // acquire the bias of the object using an atomic operation. If this
1201   // fails we will go in to the runtime to revoke the object's bias.
1202   // Note that we first construct the presumed unbiased header so we
1203   // don't accidentally blow away another thread's valid bias.
1204   NOT_LP64( movptr(swap_reg, saved_mark_addr); )
1205   andptr(swap_reg,
1206          markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
1207 #ifdef _LP64
1208   movptr(tmp_reg, swap_reg);
1209   orptr(tmp_reg, r15_thread);
1210 #else
1211   get_thread(tmp_reg);
1212   orptr(tmp_reg, swap_reg);
1213 #endif

1214   lock();

1215   cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
1216   // If the biasing toward our thread failed, this means that
1217   // another thread succeeded in biasing it toward itself and we
1218   // need to revoke that bias. The revocation will occur in the
1219   // interpreter runtime in the slow case.
1220   if (counters != NULL) {
1221     cond_inc32(Assembler::zero,
1222                ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr()));
1223   }
1224   if (slow_case != NULL) {
1225     jcc(Assembler::notZero, *slow_case);
1226   }
1227   jmp(done);
1228 
1229   bind(try_rebias);
1230   // At this point we know the epoch has expired, meaning that the
1231   // current "bias owner", if any, is actually invalid. Under these
1232   // circumstances _only_, we are allowed to use the current header's
1233   // value as the comparison value when doing the cas to acquire the
1234   // bias in the current epoch. In other words, we allow transfer of
1235   // the bias from one thread to another directly in this situation.
1236   //
1237   // FIXME: due to a lack of registers we currently blow away the age
1238   // bits in this situation. Should attempt to preserve them.
1239   load_prototype_header(tmp_reg, obj_reg);
1240 #ifdef _LP64
1241   orptr(tmp_reg, r15_thread);
1242 #else
1243   get_thread(swap_reg);
1244   orptr(tmp_reg, swap_reg);
1245   movptr(swap_reg, saved_mark_addr);
1246 #endif

1247   lock();

1248   cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
1249   // If the biasing toward our thread failed, then another thread
1250   // succeeded in biasing it toward itself and we need to revoke that
1251   // bias. The revocation will occur in the runtime in the slow case.
1252   if (counters != NULL) {
1253     cond_inc32(Assembler::zero,
1254                ExternalAddress((address) counters->rebiased_lock_entry_count_addr()));
1255   }
1256   if (slow_case != NULL) {
1257     jcc(Assembler::notZero, *slow_case);
1258   }
1259   jmp(done);
1260 
1261   bind(try_revoke_bias);
1262   // The prototype mark in the klass doesn't have the bias bit set any
1263   // more, indicating that objects of this data type are not supposed
1264   // to be biased any more. We are going to try to reset the mark of
1265   // this object to the prototype value and fall through to the
1266   // CAS-based locking scheme. Note that if our CAS fails, it means
1267   // that another thread raced us for the privilege of revoking the
1268   // bias of this particular object, so it's okay to continue in the
1269   // normal locking code.
1270   //
1271   // FIXME: due to a lack of registers we currently blow away the age
1272   // bits in this situation. Should attempt to preserve them.
1273   NOT_LP64( movptr(swap_reg, saved_mark_addr); )
1274   load_prototype_header(tmp_reg, obj_reg);

1275   lock();

1276   cmpxchgptr(tmp_reg, mark_addr); // compare tmp_reg and swap_reg
1277   // Fall through to the normal CAS-based lock, because no matter what
1278   // the result of the above CAS, some thread must have succeeded in
1279   // removing the bias bit from the object's header.
1280   if (counters != NULL) {
1281     cond_inc32(Assembler::zero,
1282                ExternalAddress((address) counters->revoked_lock_entry_count_addr()));
1283   }
1284 
1285   bind(cas_label);
1286 
1287   return null_check_offset;
1288 }
1289 
1290 void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
1291   assert(UseBiasedLocking, "why call this otherwise?");
1292 
1293   // Check for biased locking unlock case, which is a no-op
1294   // Note: we do not have to check the thread ID for two reasons.
1295   // First, the interpreter checks for IllegalMonitorStateException at


1351   }
1352   // Abort ratio calculation only if abort_count > RTMAbortThreshold
1353   //   Aborted transactions = abort_count * 100
1354   //   All transactions = total_count *  RTMTotalCountIncrRate
1355   //   Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
1356 
1357   movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::abort_count_offset()));
1358   cmpptr(tmpReg, RTMAbortThreshold);
1359   jccb(Assembler::below, L_check_always_rtm2);
1360   imulptr(tmpReg, tmpReg, 100);
1361 
1362   Register scrReg = rtm_counters_Reg;
1363   movptr(scrReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset()));
1364   imulptr(scrReg, scrReg, RTMTotalCountIncrRate);
1365   imulptr(scrReg, scrReg, RTMAbortRatio);
1366   cmpptr(tmpReg, scrReg);
1367   jccb(Assembler::below, L_check_always_rtm1);
1368   if (method_data != NULL) {
1369     // set rtm_state to "no rtm" in MDO
1370     mov_metadata(tmpReg, method_data);

1371     lock();

1372     orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), NoRTM);
1373   }
1374   jmpb(L_done);
1375   bind(L_check_always_rtm1);
1376   // Reload RTMLockingCounters* address
1377   lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters));
1378   bind(L_check_always_rtm2);
1379   movptr(tmpReg, Address(rtm_counters_Reg, RTMLockingCounters::total_count_offset()));
1380   cmpptr(tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate);
1381   jccb(Assembler::below, L_done);
1382   if (method_data != NULL) {
1383     // set rtm_state to "always rtm" in MDO
1384     mov_metadata(tmpReg, method_data);

1385     lock();

1386     orl(Address(tmpReg, MethodData::rtm_state_offset_in_bytes()), UseRTM);
1387   }
1388   bind(L_done);
1389 }
1390 
1391 // Update counters and perform abort ratio calculation
1392 // input:  abort_status_Reg
1393 // rtm_counters_Reg, flags are killed
1394 void MacroAssembler::rtm_profiling(Register abort_status_Reg,
1395                                    Register rtm_counters_Reg,
1396                                    RTMLockingCounters* rtm_counters,
1397                                    Metadata* method_data,
1398                                    bool profile_rtm) {
1399 
1400   assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
1401   // update rtm counters based on rax value at abort
1402   // reads abort_status_Reg, updates flags
1403   lea(rtm_counters_Reg, ExternalAddress((address)rtm_counters));
1404   rtm_counters_update(abort_status_Reg, rtm_counters_Reg);
1405   if (profile_rtm) {


1576   if (PrintPreciseRTMLockingStatistics || profile_rtm) {
1577     rtm_profiling(abort_status_Reg, scrReg, rtm_counters, method_data, profile_rtm);
1578   }
1579   if (RTMRetryCount > 0) {
1580     // retry on lock abort if abort status is 'can retry' (0x2) or 'memory conflict' (0x4)
1581     rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
1582   }
1583 
1584   movptr(tmpReg, Address(boxReg, owner_offset)) ;
1585   testptr(tmpReg, tmpReg) ;
1586   jccb(Assembler::notZero, L_decrement_retry) ;
1587 
1588   // Appears unlocked - try to swing _owner from null to non-null.
1589   // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
1590 #ifdef _LP64
1591   Register threadReg = r15_thread;
1592 #else
1593   get_thread(scrReg);
1594   Register threadReg = scrReg;
1595 #endif

1596   lock();

1597   cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg
1598 
1599   if (RTMRetryCount > 0) {
1600     // success done else retry
1601     jccb(Assembler::equal, DONE_LABEL) ;
1602     bind(L_decrement_retry);
1603     // Spin and retry if lock is busy.
1604     rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry);
1605   }
1606   else {
1607     bind(L_decrement_retry);
1608   }
1609 }
1610 
1611 #endif //  INCLUDE_RTM_OPT
1612 
1613 // Fast_Lock and Fast_Unlock used by C2
1614 
1615 // Because the transitions from emitted code to the runtime
1616 // monitorenter/exit helper stubs are so slow it's critical that


1736   // If this invariant is not held we risk exclusion (safety) failure.
1737   if (UseBiasedLocking && !UseOptoBiasInlining) {
1738     biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
1739   }
1740 
1741 #if INCLUDE_RTM_OPT
1742   if (UseRTMForStackLocks && use_rtm) {
1743     rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg,
1744                       stack_rtm_counters, method_data, profile_rtm,
1745                       DONE_LABEL, IsInflated);
1746   }
1747 #endif // INCLUDE_RTM_OPT
1748 
1749   movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));          // [FETCH]
1750   testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
1751   jccb(Assembler::notZero, IsInflated);
1752 
1753   // Attempt stack-locking ...
1754   orptr (tmpReg, markOopDesc::unlocked_value);
1755   movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS

1756   lock();

1757   cmpxchgptr(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()));      // Updates tmpReg
1758   if (counters != NULL) {
1759     cond_inc32(Assembler::equal,
1760                ExternalAddress((address)counters->fast_path_entry_count_addr()));
1761   }
1762   jcc(Assembler::equal, DONE_LABEL);           // Success
1763 
1764   // Recursive locking.
1765   // The object is stack-locked: markword contains stack pointer to BasicLock.
1766   // Locked by current thread if difference with current SP is less than one page.
1767   subptr(tmpReg, rsp);
1768   // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
1769   andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - os::vm_page_size())) );
1770   movptr(Address(boxReg, 0), tmpReg);
1771   if (counters != NULL) {
1772     cond_inc32(Assembler::equal,
1773                ExternalAddress((address)counters->fast_path_entry_count_addr()));
1774   }
1775   jmp(DONE_LABEL);
1776 


1793   //   set box->_displaced_header = markOopDesc::unused_mark().  Any non-0 value suffices.
1794   // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
1795   // additional latency as we have another ST in the store buffer that must drain.
1796 
1797   // avoid ST-before-CAS
1798   // register juggle because we need tmpReg for cmpxchgptr below
1799   movptr(scrReg, boxReg);
1800   movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2]
1801 
1802   // Optimistic form: consider XORL tmpReg,tmpReg
1803   movptr(tmpReg, NULL_WORD);
1804 
1805   // Appears unlocked - try to swing _owner from null to non-null.
1806   // Ideally, I'd manifest "Self" with get_thread and then attempt
1807   // to CAS the register containing Self into m->Owner.
1808   // But we don't have enough registers, so instead we can either try to CAS
1809   // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
1810   // we later store "Self" into m->Owner.  Transiently storing a stack address
1811   // (rsp or the address of the box) into  m->owner is harmless.
1812   // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.

1813   lock();

1814   cmpxchgptr(scrReg, Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1815   movptr(Address(scrReg, 0), 3);          // box->_displaced_header = 3
1816   // If we weren't able to swing _owner from NULL to the BasicLock
1817   // then take the slow path.
1818   jccb  (Assembler::notZero, DONE_LABEL);
1819   // update _owner from BasicLock to thread
1820   get_thread (scrReg);                    // beware: clobbers ICCs
1821   movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
1822   xorptr(boxReg, boxReg);                 // set icc.ZFlag = 1 to indicate success
1823 
1824   // If the CAS fails we can either retry or pass control to the slow-path.
1825   // We use the latter tactic.
1826   // Pass the CAS result in the icc.ZFlag into DONE_LABEL
1827   // If the CAS was successful ...
1828   //   Self has acquired the lock
1829   //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
1830   // Intentional fall-through into DONE_LABEL ...
1831 #else // _LP64
1832   // It's inflated
1833   movq(scrReg, tmpReg);
1834   xorq(tmpReg, tmpReg);
1835 

1836   lock();

1837   cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
1838   // Unconditionally set box->_displaced_header = markOopDesc::unused_mark().
1839   // Without cast to int32_t movptr will destroy r10 which is typically obj.
1840   movptr(Address(boxReg, 0), (int32_t)intptr_t(markOopDesc::unused_mark()));
1841   // Intentional fall-through into DONE_LABEL ...
1842   // Propagate ICC.ZF from CAS above into DONE_LABEL.
1843 #endif // _LP64
1844 #if INCLUDE_RTM_OPT
1845   } // use_rtm()
1846 #endif
1847   // DONE_LABEL is a hot target - we'd really like to place it at the
1848   // start of cache line by padding with NOPs.
1849   // See the AMD and Intel software optimization manuals for the
1850   // most efficient "long" NOP encodings.
1851   // Unfortunately none of our alignment mechanisms suffice.
1852   bind(DONE_LABEL);
1853 
1854   // At DONE_LABEL the icc ZFlag is set as follows ...
1855   // Fast_Unlock uses the same protocol.
1856   // ZFlag == 1 -> Success


1963   // Note that we could employ various encoding schemes to reduce
1964   // the number of loads below (currently 4) to just 2 or 3.
1965   // Refer to the comments in synchronizer.cpp.
1966   // In practice the chain of fetches doesn't seem to impact performance, however.
1967   xorptr(boxReg, boxReg);
1968   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
1969   jccb  (Assembler::notZero, DONE_LABEL);
1970   movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
1971   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
1972   jccb  (Assembler::notZero, CheckSucc);
1973   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
1974   jmpb  (DONE_LABEL);
1975 
1976   bind (Stacked);
1977   // It's not inflated and it's not recursively stack-locked and it's not biased.
1978   // It must be stack-locked.
1979   // Try to reset the header to displaced header.
1980   // The "box" value on the stack is stable, so we can reload
1981   // and be assured we observe the same value as above.
1982   movptr(tmpReg, Address(boxReg, 0));

1983   lock();

1984   cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
1985   // Intention fall-thru into DONE_LABEL
1986 
1987   // DONE_LABEL is a hot target - we'd really like to place it at the
1988   // start of cache line by padding with NOPs.
1989   // See the AMD and Intel software optimization manuals for the
1990   // most efficient "long" NOP encodings.
1991   // Unfortunately none of our alignment mechanisms suffice.
1992   bind (CheckSucc);
1993 #else // _LP64
1994   // It's inflated
1995   xorptr(boxReg, boxReg);
1996   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
1997   jccb  (Assembler::notZero, DONE_LABEL);
1998   movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
1999   orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
2000   jccb  (Assembler::notZero, CheckSucc);
2001   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2002   jmpb  (DONE_LABEL);
2003 
2004   // Try to avoid passing control into the slow_path ...
2005   Label LSuccess, LGoSlowPath ;
2006   bind  (CheckSucc);
2007 
2008   // The following optional optimization can be elided if necessary
2009   // Effectively: if (succ == null) goto SlowPath
2010   // The code reduces the window for a race, however,
2011   // and thus benefits performance.
2012   cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
2013   jccb  (Assembler::zero, LGoSlowPath);
2014 
2015   xorptr(boxReg, boxReg);
2016   movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
2017 
2018   // Memory barrier/fence
2019   // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
2020   // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
2021   // This is faster on Nehalem and AMD Shanghai/Barcelona.
2022   // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
2023   // We might also restructure (ST Owner=0;barrier;LD _Succ) to
2024   // (mov box,0; xchgq box, &m->Owner; LD _succ) .
2025   lock(); addl(Address(rsp, 0), 0);
2026 
2027   cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
2028   jccb  (Assembler::notZero, LSuccess);
2029 
2030   // Rare inopportune interleaving - race.
2031   // The successor vanished in the small window above.
2032   // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
2033   // We need to ensure progress and succession.
2034   // Try to reacquire the lock.
2035   // If that fails then the new owner is responsible for succession and this
2036   // thread needs to take no further action and can exit via the fast path (success).
2037   // If the re-acquire succeeds then pass control into the slow path.
2038   // As implemented, this latter mode is horrible because we generated more
2039   // coherence traffic on the lock *and* artifically extended the critical section
2040   // length while by virtue of passing control into the slow path.
2041 
2042   // box is really RAX -- the following CMPXCHG depends on that binding
2043   // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
2044   lock();
2045   cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
2046   // There's no successor so we tried to regrab the lock.
2047   // If that didn't work, then another thread grabbed the
2048   // lock so we're done (and exit was a success).
2049   jccb  (Assembler::notEqual, LSuccess);
2050   // Intentional fall-through into slow-path
2051 
2052   bind  (LGoSlowPath);
2053   orl   (boxReg, 1);                      // set ICC.ZF=0 to indicate failure
2054   jmpb  (DONE_LABEL);
2055 
2056   bind  (LSuccess);
2057   testl (boxReg, 0);                      // set ICC.ZF=1 to indicate success
2058   jmpb  (DONE_LABEL);
2059 
2060   bind  (Stacked);
2061   movptr(tmpReg, Address (boxReg, 0));      // re-fetch
2062   lock();
2063   cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box
2064 
2065 #endif
2066   bind(DONE_LABEL);
2067 }
2068 #endif // COMPILER2
2069 
2070 void MacroAssembler::c2bool(Register x) {
2071   // implements x == 0 ? 0 : 1
2072   // note: must only look at least-significant byte of x
2073   //       since C-style booleans are stored in one byte
2074   //       only! (was bug)
2075   andl(x, 0xFF);
2076   setb(Assembler::notZero, x);
2077 }
2078 
2079 // Wouldn't need if AddressLiteral version had new name
2080 void MacroAssembler::call(Label& L, relocInfo::relocType rtype) {
2081   Assembler::call(L, rtype);
2082 }


2594 void MacroAssembler::cmpoop(Register src1, Register src2) {
2595   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2596   bs->obj_equals(this, src1, src2);
2597 }
2598 
2599 void MacroAssembler::cmpoop(Register src1, Address src2) {
2600   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2601   bs->obj_equals(this, src1, src2);
2602 }
2603 
2604 #ifdef _LP64
2605 void MacroAssembler::cmpoop(Register src1, jobject src2) {
2606   movoop(rscratch1, src2);
2607   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2608   bs->obj_equals(this, src1, rscratch1);
2609 }
2610 #endif
2611 
2612 void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) {
2613   if (reachable(adr)) {

2614     lock();
2615     cmpxchgptr(reg, as_Address(adr));
2616   } else {
2617     lea(rscratch1, adr);

2618     lock();
2619     cmpxchgptr(reg, Address(rscratch1, 0));
2620   }
2621 }
2622 
2623 void MacroAssembler::cmpxchgptr(Register reg, Address adr) {
2624   LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr));
2625 }
2626 
2627 void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) {
2628   if (reachable(src)) {
2629     Assembler::comisd(dst, as_Address(src));
2630   } else {
2631     lea(rscratch1, src);
2632     Assembler::comisd(dst, Address(rscratch1, 0));
2633   }
2634 }
2635 
2636 void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) {
2637   if (reachable(src)) {


< prev index next >