< prev index next >
src/hotspot/cpu/x86/macroAssembler_x86.cpp
Print this page
rev 56775 : imported patch 8230876.patch
rev 56776 : v2.00 -> v2.07 (CR7/v2.07/10-for-jdk14) patches combined into one; merge with 8230876.patch (2019.10.17) and jdk-14+21.
rev 56777 : See CR7-to-CR8-changes.
*** 1294,1303 ****
--- 1294,1355 ----
jcc(Assembler::equal, done);
}
#ifdef COMPILER2
+ // Increment the ObjectMonitor's ref_count for safety or force a branch
+ // to 'done' with ICC.ZF=0 to indicate failure/take the slow path.
+ void MacroAssembler::inc_om_ref_count(Register obj_reg, Register om_reg, Register tmp_reg, Label& done) {
+ atomic_incl(Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
+
+ Label LGoSlowPath;
+ if (AsyncDeflateIdleMonitors) {
+ // Race here if monitor is not owned! The above ref_count bump
+ // will cause subsequent async deflation to skip it. However,
+ // previous or concurrent async deflation is a race.
+
+ // First check: if the owner field == DEFLATER_MARKER:
+ movptr(tmp_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+ // DEFLATER_MARKER == reinterpret_cast<void*>(-1) so the compiler
+ // doesn't like to use the define here:
+ cmpptr(tmp_reg, -1);
+ // If marked for async deflation, then take the slow path. This is a
+ // simpler check than what ObjectMonitorHandle::save_om_ptr() does
+ // so ObjectMonitor::install_displaced_markword_in_object() doesn't
+ // have to be implemented in macro assembler.
+ jccb(Assembler::equal, LGoSlowPath);
+
+ // Second check: if ref_count field <= 0:
+ movptr(tmp_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
+ cmpptr(tmp_reg, 0);
+ // If async deflation is in the process of bailing out, but has not
+ // yet restored the ref_count field, then we take the slow path. We
+ // want a stable ref_count value for the fast path.
+ jccb(Assembler::lessEqual, LGoSlowPath);
+
+ // Final check: if object field == obj_reg:
+ cmpptr(obj_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(object)));
+ // If the ObjectMonitor has been deflated and recycled, then take
+ // the slow path.
+ jccb(Assembler::notEqual, LGoSlowPath);
+ }
+
+ Label LRetToCaller;
+ // We leave the ref_count incremented to protect the caller's code
+ // paths against async deflation.
+ jmpb(LRetToCaller);
+
+ bind(LGoSlowPath);
+ lock();
+ decrementl(Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
+ // Jump to 'done' with ICC.ZF=0 to indicate failure/take the slow path.
+ orl(tmp_reg, 1);
+ jmp(done);
+
+ bind(LRetToCaller);
+ }
+
#if INCLUDE_RTM_OPT
// Update rtm_counters based on abort status
// input: abort_status
// rtm_counters (RTMLockingCounters*)
*** 1527,1541 ****
Metadata* method_data, bool profile_rtm,
Label& DONE_LABEL) {
assert(UseRTMLocking, "why call this otherwise?");
assert(tmpReg == rax, "");
assert(scrReg == rdx, "");
! Label L_rtm_retry, L_decrement_retry, L_on_abort;
int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
! // Without cast to int32_t a movptr will destroy r10 which is typically obj
movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
movptr(boxReg, tmpReg); // Save ObjectMonitor address
if (RTMRetryCount > 0) {
movl(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy
movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
--- 1579,1603 ----
Metadata* method_data, bool profile_rtm,
Label& DONE_LABEL) {
assert(UseRTMLocking, "why call this otherwise?");
assert(tmpReg == rax, "");
assert(scrReg == rdx, "");
! Label L_rtm_retry, L_decrement_retry, L_on_abort, L_local_done;
int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
! // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
+
+ if (!HandshakeAfterDeflateIdleMonitors) {
+ // Increment the ObjectMonitor's ref_count for safety or force the
+ // enter slow path via DONE_LABEL.
+ // In rtm_inflated_locking(), initially tmpReg contains the object's
+ // mark word which, in this case, is the (ObjectMonitor* | monitor_value).
+ // Also this code uses scrReg as its temporary register.
+ inc_om_ref_count(objReg, tmpReg /* om_reg */, scrReg /* tmp_reg */, DONE_LABEL);
+ }
+
movptr(boxReg, tmpReg); // Save ObjectMonitor address
if (RTMRetryCount > 0) {
movl(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy
movl(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
*** 1553,1563 ****
}
xbegin(L_on_abort);
movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));
movptr(tmpReg, Address(tmpReg, owner_offset));
testptr(tmpReg, tmpReg);
! jcc(Assembler::zero, DONE_LABEL);
if (UseRTMXendForLockBusy) {
xend();
jmp(L_decrement_retry);
}
else {
--- 1615,1625 ----
}
xbegin(L_on_abort);
movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));
movptr(tmpReg, Address(tmpReg, owner_offset));
testptr(tmpReg, tmpReg);
! jcc(Assembler::zero, L_local_done);
if (UseRTMXendForLockBusy) {
xend();
jmp(L_decrement_retry);
}
else {
*** 1588,1623 ****
lock();
cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg
if (RTMRetryCount > 0) {
// success done else retry
! jccb(Assembler::equal, DONE_LABEL) ;
bind(L_decrement_retry);
// Spin and retry if lock is busy.
rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry);
}
else {
bind(L_decrement_retry);
}
}
#endif // INCLUDE_RTM_OPT
! // Fast_Lock and Fast_Unlock used by C2
// Because the transitions from emitted code to the runtime
// monitorenter/exit helper stubs are so slow it's critical that
! // we inline both the stack-locking fast-path and the inflated fast path.
//
// See also: cmpFastLock and cmpFastUnlock.
//
// What follows is a specialized inline transliteration of the code
// in enter() and exit(). If we're concerned about I$ bloat another
// option would be to emit TrySlowEnter and TrySlowExit methods
// at startup-time. These methods would accept arguments as
// (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
! // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
// In practice, however, the # of lock sites is bounded and is usually small.
// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
// if the processor uses simple bimodal branch predictors keyed by EIP
// Since the helper routines would be called from multiple synchronization
--- 1650,1698 ----
lock();
cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg
if (RTMRetryCount > 0) {
// success done else retry
! jccb(Assembler::equal, L_local_done);
bind(L_decrement_retry);
// Spin and retry if lock is busy.
rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry);
}
else {
bind(L_decrement_retry);
}
+
+ // rtm_inflated_locking() exit paths come here except for a failed
+ // inc_om_ref_count() which goes directly to DONE_LABEL.
+ bind(L_local_done);
+ if (!HandshakeAfterDeflateIdleMonitors) {
+ pushf(); // Preserve flags.
+ // Decrement the ObjectMonitor's ref_count.
+ lock();
+ decrementl(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
+ popf(); // Restore flags so we have the proper ICC.ZF value.
+ }
+
+ jmp(DONE_LABEL) ;
}
#endif // INCLUDE_RTM_OPT
! // fast_lock and fast_unlock used by C2
// Because the transitions from emitted code to the runtime
// monitorenter/exit helper stubs are so slow it's critical that
! // we inline both the stack-locking fast path and the inflated fast path.
//
// See also: cmpFastLock and cmpFastUnlock.
//
// What follows is a specialized inline transliteration of the code
// in enter() and exit(). If we're concerned about I$ bloat another
// option would be to emit TrySlowEnter and TrySlowExit methods
// at startup-time. These methods would accept arguments as
// (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
! // indications in the icc.ZFlag. fast_lock and fast_unlock would simply
// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
// In practice, however, the # of lock sites is bounded and is usually small.
// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
// if the processor uses simple bimodal branch predictors keyed by EIP
// Since the helper routines would be called from multiple synchronization
*** 1632,1643 ****
// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
// (b) explicit barriers or fence operations.
//
// TODO:
//
! // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
! // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
// Given TLAB allocation, Self is usually manifested in a register, so passing it into
// the lock operators would typically be faster than reifying Self.
//
// * Ideally I'd define the primitives as:
// fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
--- 1707,1718 ----
// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
// (b) explicit barriers or fence operations.
//
// TODO:
//
! // * Arrange for C2 to pass "Self" into fast_lock and fast_unlock in one of the registers (scr).
! // This avoids manifesting the Self pointer in the fast_lock and fast_unlock terminals.
// Given TLAB allocation, Self is usually manifested in a register, so passing it into
// the lock operators would typically be faster than reifying Self.
//
// * Ideally I'd define the primitives as:
// fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
*** 1659,1676 ****
// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
//
// * use jccb and jmpb instead of jcc and jmp to improve code density.
// But beware of excessive branch density on AMD Opterons.
//
! // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
! // or failure of the fast-path. If the fast-path fails then we pass
! // control to the slow-path, typically in C. In Fast_Lock and
! // Fast_Unlock we often branch to DONE_LABEL, just to find that C2
// will emit a conditional branch immediately after the node.
// So we have branches to branches and lots of ICC.ZF games.
// Instead, it might be better to have C2 pass a "FailureLabel"
! // into Fast_Lock and Fast_Unlock. In the case of success, control
// will drop through the node. ICC.ZF is undefined at exit.
// In the case of failure, the node will branch directly to the
// FailureLabel
--- 1734,1751 ----
// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
//
// * use jccb and jmpb instead of jcc and jmp to improve code density.
// But beware of excessive branch density on AMD Opterons.
//
! // * Both fast_lock and fast_unlock set the ICC.ZF to indicate success
! // or failure of the fast path. If the fast path fails then we pass
! // control to the slow path, typically in C. In fast_lock and
! // fast_unlock we often branch to DONE_LABEL, just to find that C2
// will emit a conditional branch immediately after the node.
// So we have branches to branches and lots of ICC.ZF games.
// Instead, it might be better to have C2 pass a "FailureLabel"
! // into fast_lock and fast_unlock. In the case of success, control
// will drop through the node. ICC.ZF is undefined at exit.
// In the case of failure, the node will branch directly to the
// FailureLabel
*** 1811,1839 ****
// update _owner from BasicLock to thread
get_thread (scrReg); // beware: clobbers ICCs
movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
! // If the CAS fails we can either retry or pass control to the slow-path.
// We use the latter tactic.
// Pass the CAS result in the icc.ZFlag into DONE_LABEL
// If the CAS was successful ...
// Self has acquired the lock
// Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
// Intentional fall-through into DONE_LABEL ...
#else // _LP64
! // It's inflated
movq(scrReg, tmpReg);
- xorq(tmpReg, tmpReg);
- lock();
- cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
// Unconditionally set box->_displaced_header = markWord::unused_mark().
! // Without cast to int32_t movptr will destroy r10 which is typically obj.
movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
// Intentional fall-through into DONE_LABEL ...
// Propagate ICC.ZF from CAS above into DONE_LABEL.
#endif // _LP64
#if INCLUDE_RTM_OPT
} // use_rtm()
#endif
// DONE_LABEL is a hot target - we'd really like to place it at the
--- 1886,1932 ----
// update _owner from BasicLock to thread
get_thread (scrReg); // beware: clobbers ICCs
movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg);
xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success
! // If the CAS fails we can either retry or pass control to the slow path.
// We use the latter tactic.
// Pass the CAS result in the icc.ZFlag into DONE_LABEL
// If the CAS was successful ...
// Self has acquired the lock
// Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
// Intentional fall-through into DONE_LABEL ...
#else // _LP64
! // It's inflated and we use scrReg for ObjectMonitor* in this section.
movq(scrReg, tmpReg);
// Unconditionally set box->_displaced_header = markWord::unused_mark().
! // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value()));
+
+ if (!HandshakeAfterDeflateIdleMonitors) {
+ // Increment the ObjectMonitor's ref_count for safety or force the
+ // enter slow path via DONE_LABEL.
+ // In fast_lock(), scrReg contains the object's mark word which,
+ // in this case, is the (ObjectMonitor* | monitor_value). Also this
+ // code uses tmpReg as its temporary register.
+ inc_om_ref_count(objReg, scrReg /* om_reg */, tmpReg /* tmp_reg */, DONE_LABEL);
+ }
+
+ xorq(tmpReg, tmpReg);
+ lock();
+ cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
// Intentional fall-through into DONE_LABEL ...
// Propagate ICC.ZF from CAS above into DONE_LABEL.
+
+ if (!HandshakeAfterDeflateIdleMonitors) {
+ pushf(); // Preserve flags.
+ // Decrement the ObjectMonitor's ref_count.
+ lock();
+ decrementl(Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
+ popf(); // Restore flags so we have the proper ICC.ZF value.
+ }
#endif // _LP64
#if INCLUDE_RTM_OPT
} // use_rtm()
#endif
// DONE_LABEL is a hot target - we'd really like to place it at the
*** 1842,1863 ****
// most efficient "long" NOP encodings.
// Unfortunately none of our alignment mechanisms suffice.
bind(DONE_LABEL);
// At DONE_LABEL the icc ZFlag is set as follows ...
! // Fast_Unlock uses the same protocol.
// ZFlag == 1 -> Success
! // ZFlag == 0 -> Failure - force control through the slow-path
}
// obj: object to unlock
// box: box address (displaced header location), killed. Must be EAX.
// tmp: killed, cannot be obj nor box.
//
// Some commentary on balanced locking:
//
! // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
// Methods that don't have provably balanced locking are forced to run in the
// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
// The interpreter provides two properties:
// I1: At return-time the interpreter automatically and quietly unlocks any
// objects acquired the current activation (frame). Recall that the
--- 1935,1956 ----
// most efficient "long" NOP encodings.
// Unfortunately none of our alignment mechanisms suffice.
bind(DONE_LABEL);
// At DONE_LABEL the icc ZFlag is set as follows ...
! // fast_unlock uses the same protocol.
// ZFlag == 1 -> Success
! // ZFlag == 0 -> Failure - force control through the slow path
}
// obj: object to unlock
// box: box address (displaced header location), killed. Must be EAX.
// tmp: killed, cannot be obj nor box.
//
// Some commentary on balanced locking:
//
! // fast_lock and fast_unlock are emitted only for provably balanced lock sites.
// Methods that don't have provably balanced locking are forced to run in the
// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
// The interpreter provides two properties:
// I1: At return-time the interpreter automatically and quietly unlocks any
// objects acquired the current activation (frame). Recall that the
*** 1874,1884 ****
// The only other source of unbalanced locking would be JNI. The "Java Native Interface:
// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
// should not be unlocked by "normal" java-level locking and vice-versa. The specification
// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
// Arguably given that the spec legislates the JNI case as undefined our implementation
! // could reasonably *avoid* checking owner in Fast_Unlock().
// In the interest of performance we elide m->Owner==Self check in unlock.
// A perfectly viable alternative is to elide the owner check except when
// Xcheck:jni is enabled.
void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
--- 1967,1977 ----
// The only other source of unbalanced locking would be JNI. The "Java Native Interface:
// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
// should not be unlocked by "normal" java-level locking and vice-versa. The specification
// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
// Arguably given that the spec legislates the JNI case as undefined our implementation
! // could reasonably *avoid* checking owner in fast_unlock().
// In the interest of performance we elide m->Owner==Self check in unlock.
// A perfectly viable alternative is to elide the owner check except when
// Xcheck:jni is enabled.
void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
*** 1909,1930 ****
cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
testptr(tmpReg, markWord::monitor_value); // Inflated?
! jccb (Assembler::zero, Stacked);
// It's inflated.
#if INCLUDE_RTM_OPT
if (use_rtm) {
Label L_regular_inflated_unlock;
int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
movptr(boxReg, Address(tmpReg, owner_offset));
testptr(boxReg, boxReg);
jccb(Assembler::notZero, L_regular_inflated_unlock);
xend();
! jmpb(DONE_LABEL);
bind(L_regular_inflated_unlock);
}
#endif
// Despite our balanced locking property we still check that m->_owner == Self
--- 2002,2023 ----
cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header
jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock
movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword
testptr(tmpReg, markWord::monitor_value); // Inflated?
! jcc (Assembler::zero, Stacked);
// It's inflated.
#if INCLUDE_RTM_OPT
if (use_rtm) {
Label L_regular_inflated_unlock;
int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner);
movptr(boxReg, Address(tmpReg, owner_offset));
testptr(boxReg, boxReg);
jccb(Assembler::notZero, L_regular_inflated_unlock);
xend();
! jmp(DONE_LABEL);
bind(L_regular_inflated_unlock);
}
#endif
// Despite our balanced locking property we still check that m->_owner == Self
*** 1939,1949 ****
//
// If there's no contention try a 1-0 exit. That is, exit without
// a costly MEMBAR or CAS. See synchronizer.cpp for details on how
// we detect and recover from the race that the 1-0 exit admits.
//
! // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
// before it STs null into _owner, releasing the lock. Updates
// to data protected by the critical section must be visible before
// we drop the lock (and thus before any other thread could acquire
// the lock and observe the fields protected by the lock).
// IA32's memory-model is SPO, so STs are ordered with respect to
--- 2032,2042 ----
//
// If there's no contention try a 1-0 exit. That is, exit without
// a costly MEMBAR or CAS. See synchronizer.cpp for details on how
// we detect and recover from the race that the 1-0 exit admits.
//
! // Conceptually fast_unlock() must execute a STST|LDST "release" barrier
// before it STs null into _owner, releasing the lock. Updates
// to data protected by the critical section must be visible before
// we drop the lock (and thus before any other thread could acquire
// the lock and observe the fields protected by the lock).
// IA32's memory-model is SPO, so STs are ordered with respect to
*** 1982,2012 ****
// most efficient "long" NOP encodings.
// Unfortunately none of our alignment mechanisms suffice.
bind (CheckSucc);
#else // _LP64
// It's inflated
xorptr(boxReg, boxReg);
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
! jccb (Assembler::notZero, DONE_LABEL);
movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
jccb (Assembler::notZero, CheckSucc);
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
! jmpb (DONE_LABEL);
- // Try to avoid passing control into the slow_path ...
- Label LSuccess, LGoSlowPath ;
bind (CheckSucc);
// The following optional optimization can be elided if necessary
! // Effectively: if (succ == null) goto SlowPath
// The code reduces the window for a race, however,
// and thus benefits performance.
cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
jccb (Assembler::zero, LGoSlowPath);
xorptr(boxReg, boxReg);
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
// Memory barrier/fence
// Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
// Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
--- 2075,2117 ----
// most efficient "long" NOP encodings.
// Unfortunately none of our alignment mechanisms suffice.
bind (CheckSucc);
#else // _LP64
// It's inflated
+
+ if (!HandshakeAfterDeflateIdleMonitors) {
+ // Increment the ObjectMonitor's ref_count for safety or force the
+ // exit slow path via DONE_LABEL.
+ // In fast_unlock(), tmpReg contains the object's mark word which,
+ // in this case, is the (ObjectMonitor* | monitor_value). Also this
+ // code uses boxReg as its temporary register.
+ inc_om_ref_count(objReg, tmpReg /* om_reg */, boxReg /* tmp_reg */, DONE_LABEL);
+ }
+
+ // Try to avoid passing control into the slow path ...
+ Label LSuccess, LGoSlowPath;
xorptr(boxReg, boxReg);
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
! jccb(Assembler::notZero, LGoSlowPath);
movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
jccb (Assembler::notZero, CheckSucc);
+ // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
! jmpb(LSuccess);
bind (CheckSucc);
// The following optional optimization can be elided if necessary
! // Effectively: if (succ == null) goto slow path
// The code reduces the window for a race, however,
// and thus benefits performance.
cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
jccb (Assembler::zero, LGoSlowPath);
xorptr(boxReg, boxReg);
+ // Without cast to int32_t this style of movptr will destroy r10 which is typically obj.
movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
// Memory barrier/fence
// Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
// Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
*** 2037,2053 ****
cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
// There's no successor so we tried to regrab the lock.
// If that didn't work, then another thread grabbed the
// lock so we're done (and exit was a success).
jccb (Assembler::notEqual, LSuccess);
! // Intentional fall-through into slow-path
bind (LGoSlowPath);
orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
jmpb (DONE_LABEL);
bind (LSuccess);
testl (boxReg, 0); // set ICC.ZF=1 to indicate success
jmpb (DONE_LABEL);
bind (Stacked);
movptr(tmpReg, Address (boxReg, 0)); // re-fetch
--- 2142,2166 ----
cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
// There's no successor so we tried to regrab the lock.
// If that didn't work, then another thread grabbed the
// lock so we're done (and exit was a success).
jccb (Assembler::notEqual, LSuccess);
! // Intentional fall-through into slow path
bind (LGoSlowPath);
+ if (!HandshakeAfterDeflateIdleMonitors) {
+ lock();
+ decrementl(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
+ }
orl (boxReg, 1); // set ICC.ZF=0 to indicate failure
jmpb (DONE_LABEL);
bind (LSuccess);
+ if (!HandshakeAfterDeflateIdleMonitors) {
+ lock();
+ decrementl(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count)));
+ }
testl (boxReg, 0); // set ICC.ZF=1 to indicate success
jmpb (DONE_LABEL);
bind (Stacked);
movptr(tmpReg, Address (boxReg, 0)); // re-fetch
< prev index next >