< prev index next >

src/hotspot/cpu/x86/macroAssembler_x86.cpp

Print this page
rev 56635 : v2.00 -> v2.05 (CR5/v2.05/8-for-jdk13) patches combined into one; merge with 8229212.patch; merge with jdk-14+11; merge with 8230184.patch; merge with 8230876.patch; merge with jdk-14+15; merge with jdk-14+18.
rev 56639 : loosen a couple more counter checks due to races observed in testing; simplify om_release() extraction of mid since list head or cur_mid_in_use is marked; simplify deflate_monitor_list() extraction of mid since there are no parallel deleters due to the safepoint; simplify deflate_monitor_list_using_JT() extraction of mid since list head or cur_mid_in_use is marked; prepend_block_to_lists() - simplify based on David H's comments; does not need load_acquire() or release_store() because of the cmpxchg(); prepend_to_common() - simplify to use mark_next_loop() for m and use mark_list_head() and release_store() for the non-empty list case; add more debugging for "Non-balanced monitor enter/exit" failure mode; fix race in inflate() in the "CASE: neutral" code path; install_displaced_markword_in_object() does not need to clear the header field since that is handled when the ObjectMonitor is moved from the global free list; LSuccess should clear boxReg to set ICC.ZF=1 to avoid depending on existing boxReg contents; update fast_unlock() to detect when object no longer refers to the same ObjectMonitor and take fast path exit instead; clarify fast_lock() code where we detect when object no longer refers to the same ObjectMonitor; add/update comments for movptr() calls where we move a literal into an Address; remove set_owner(); refactor setting of owner field into set_owner_from(2 versions), set_owner_from_BasicLock(), and try_set_owner_from(); the new functions include monitorinflation+owner logging; extract debug code from v2.06 and v2.07 and move to v2.07.debug; change 'jccb' -> 'jcc' and 'jmpb' -> 'jmp' as needed; checkpoint initial version of MacroAssembler::inc_om_ref_count(); update LP64 MacroAssembler::fast_lock() and fast_unlock() to use inc_om_ref_count(); fast_lock() return flag setting logic can use 'testptr(tmpReg, tmpReg)' instead of 'cmpptr(tmpReg, 0)' since that's more efficient; fast_unlock() LSuccess return flag setting logic can use 'testl (boxReg, 0)' instead of 'xorptr(boxReg, boxReg)' since that's more efficient; cleanup "fast-path" vs "fast path" and "slow-path" vs "slow path"; update MacroAssembler::rtm_inflated_locking() to use inc_om_ref_count(); update MacroAssembler::fast_lock() to preserve the flags before decrementing ref_count and restore the flags afterwards; this is more clean than depending on the contents of rax/tmpReg; coleenp CR - refactor async monitor deflation work from ServiceThread::service_thread_entry() to ObjectSynchronizer::deflate_idle_monitors_using_JT(); rehn,eosterlund CR - add support for HandshakeAfterDeflateIdleMonitors for platforms that don't have ObjectMonitor ref_count support implemented in C2 fast_lock() and fast_unlock().

*** 1294,1303 **** --- 1294,1355 ---- jcc(Assembler::equal, done); } #ifdef COMPILER2 + // Increment the ObjectMonitor's ref_count for safety or force a branch + // to 'done' with ICC.ZF=0 to indicate failure/take the slow path. + void MacroAssembler::inc_om_ref_count(Register obj_reg, Register om_reg, Register tmp_reg, Label& done) { + atomic_incl(Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count))); + + Label LGoSlowPath; + if (AsyncDeflateIdleMonitors) { + // Race here if monitor is not owned! The above ref_count bump + // will cause subsequent async deflation to skip it. However, + // previous or concurrent async deflation is a race. + + // First check: if the owner field == DEFLATER_MARKER: + movptr(tmp_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + // DEFLATER_MARKER == reinterpret_cast<void*>(-1) so the compiler + // doesn't like to use the define here: + cmpptr(tmp_reg, -1); + // If marked for async deflation, then take the slow path. This is a + // simpler check than what ObjectMonitorHandle::save_om_ptr() does + // so ObjectMonitor::install_displaced_markword_in_object() doesn't + // have to be implemented in macro assembler. + jccb(Assembler::equal, LGoSlowPath); + + // Second check: if ref_count field <= 0: + movptr(tmp_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count))); + cmpptr(tmp_reg, 0); + // If async deflation is in the process of bailing out, but has not + // yet restored the ref_count field, then we take the slow path. We + // want a stable ref_count value for the fast path. + jccb(Assembler::lessEqual, LGoSlowPath); + + // Final check: if object field == obj_reg: + cmpptr(obj_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(object))); + // If the ObjectMonitor has been deflated and recycled, then take + // the slow path. + jccb(Assembler::notEqual, LGoSlowPath); + } + + Label LRetToCaller; + // We leave the ref_count incremented to protect the caller's code + // paths against async deflation. + jmpb(LRetToCaller); + + bind(LGoSlowPath); + lock(); + decrementl(Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count))); + // Jump to 'done' with ICC.ZF=0 to indicate failure/take the slow path. + orl(tmp_reg, 1); + jmp(done); + + bind(LRetToCaller); + } + #if INCLUDE_RTM_OPT // Update rtm_counters based on abort status // input: abort_status // rtm_counters (RTMLockingCounters*)
*** 1527,1540 **** Metadata* method_data, bool profile_rtm, Label& DONE_LABEL) { assert(UseRTMLocking, "why call this otherwise?"); assert(tmpReg == rax, ""); assert(scrReg == rdx, ""); ! Label L_rtm_retry, L_decrement_retry, L_on_abort; int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner); ! // Without cast to int32_t a movptr will destroy r10 which is typically obj movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value())); movptr(boxReg, tmpReg); // Save ObjectMonitor address if (RTMRetryCount > 0) { movl(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy --- 1579,1601 ---- Metadata* method_data, bool profile_rtm, Label& DONE_LABEL) { assert(UseRTMLocking, "why call this otherwise?"); assert(tmpReg == rax, ""); assert(scrReg == rdx, ""); ! Label L_rtm_retry, L_decrement_retry, L_on_abort, L_local_done; int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner); ! if (!HandshakeAfterDeflateIdleMonitors) { ! // Increment the ObjectMonitor's ref_count for safety or force the ! // enter slow path via DONE_LABEL. ! // In rtm_inflated_locking(), initially tmpReg contains the object's ! // mark word which, in this case, is the (ObjectMonitor* | monitor_value). ! // Also this code uses scrReg as its temporary register. ! inc_om_ref_count(objReg, tmpReg /* om_reg */, scrReg /* tmp_reg */, DONE_LABEL); ! } ! ! // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value())); movptr(boxReg, tmpReg); // Save ObjectMonitor address if (RTMRetryCount > 0) { movl(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy
*** 1553,1563 **** } xbegin(L_on_abort); movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); movptr(tmpReg, Address(tmpReg, owner_offset)); testptr(tmpReg, tmpReg); ! jcc(Assembler::zero, DONE_LABEL); if (UseRTMXendForLockBusy) { xend(); jmp(L_decrement_retry); } else { --- 1614,1624 ---- } xbegin(L_on_abort); movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); movptr(tmpReg, Address(tmpReg, owner_offset)); testptr(tmpReg, tmpReg); ! jcc(Assembler::zero, L_local_done); if (UseRTMXendForLockBusy) { xend(); jmp(L_decrement_retry); } else {
*** 1588,1623 **** lock(); cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg if (RTMRetryCount > 0) { // success done else retry ! jccb(Assembler::equal, DONE_LABEL) ; bind(L_decrement_retry); // Spin and retry if lock is busy. rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry); } else { bind(L_decrement_retry); } } #endif // INCLUDE_RTM_OPT ! // Fast_Lock and Fast_Unlock used by C2 // Because the transitions from emitted code to the runtime // monitorenter/exit helper stubs are so slow it's critical that ! // we inline both the stack-locking fast-path and the inflated fast path. // // See also: cmpFastLock and cmpFastUnlock. // // What follows is a specialized inline transliteration of the code // in enter() and exit(). If we're concerned about I$ bloat another // option would be to emit TrySlowEnter and TrySlowExit methods // at startup-time. These methods would accept arguments as // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure ! // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. // In practice, however, the # of lock sites is bounded and is usually small. // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer // if the processor uses simple bimodal branch predictors keyed by EIP // Since the helper routines would be called from multiple synchronization --- 1649,1697 ---- lock(); cmpxchgptr(threadReg, Address(boxReg, owner_offset)); // Updates tmpReg if (RTMRetryCount > 0) { // success done else retry ! jccb(Assembler::equal, L_local_done); bind(L_decrement_retry); // Spin and retry if lock is busy. rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry); } else { bind(L_decrement_retry); } + + // rtm_inflated_locking() exit paths come here except for a failed + // inc_om_ref_count() which goes directly to DONE_LABEL. + bind(L_local_done); + if (!HandshakeAfterDeflateIdleMonitors) { + pushf(); // Preserve flags. + // Decrement the ObjectMonitor's ref_count. + lock(); + decrementl(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count))); + popf(); // Restore flags so we have the proper ICC.ZF value. + } + + jmp(DONE_LABEL) ; } #endif // INCLUDE_RTM_OPT ! // fast_lock and fast_unlock used by C2 // Because the transitions from emitted code to the runtime // monitorenter/exit helper stubs are so slow it's critical that ! // we inline both the stack-locking fast path and the inflated fast path. // // See also: cmpFastLock and cmpFastUnlock. // // What follows is a specialized inline transliteration of the code // in enter() and exit(). If we're concerned about I$ bloat another // option would be to emit TrySlowEnter and TrySlowExit methods // at startup-time. These methods would accept arguments as // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure ! // indications in the icc.ZFlag. fast_lock and fast_unlock would simply // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. // In practice, however, the # of lock sites is bounded and is usually small. // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer // if the processor uses simple bimodal branch predictors keyed by EIP // Since the helper routines would be called from multiple synchronization
*** 1632,1643 **** // to (a) prevent compiler-JIT reordering of non-volatile accesses, and // (b) explicit barriers or fence operations. // // TODO: // ! // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). ! // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. // Given TLAB allocation, Self is usually manifested in a register, so passing it into // the lock operators would typically be faster than reifying Self. // // * Ideally I'd define the primitives as: // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED. --- 1706,1717 ---- // to (a) prevent compiler-JIT reordering of non-volatile accesses, and // (b) explicit barriers or fence operations. // // TODO: // ! // * Arrange for C2 to pass "Self" into fast_lock and fast_unlock in one of the registers (scr). ! // This avoids manifesting the Self pointer in the fast_lock and fast_unlock terminals. // Given TLAB allocation, Self is usually manifested in a register, so passing it into // the lock operators would typically be faster than reifying Self. // // * Ideally I'd define the primitives as: // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
*** 1659,1676 **** // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). // // * use jccb and jmpb instead of jcc and jmp to improve code density. // But beware of excessive branch density on AMD Opterons. // ! // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success ! // or failure of the fast-path. If the fast-path fails then we pass ! // control to the slow-path, typically in C. In Fast_Lock and ! // Fast_Unlock we often branch to DONE_LABEL, just to find that C2 // will emit a conditional branch immediately after the node. // So we have branches to branches and lots of ICC.ZF games. // Instead, it might be better to have C2 pass a "FailureLabel" ! // into Fast_Lock and Fast_Unlock. In the case of success, control // will drop through the node. ICC.ZF is undefined at exit. // In the case of failure, the node will branch directly to the // FailureLabel --- 1733,1750 ---- // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). // // * use jccb and jmpb instead of jcc and jmp to improve code density. // But beware of excessive branch density on AMD Opterons. // ! // * Both fast_lock and fast_unlock set the ICC.ZF to indicate success ! // or failure of the fast path. If the fast path fails then we pass ! // control to the slow path, typically in C. In fast_lock and ! // fast_unlock we often branch to DONE_LABEL, just to find that C2 // will emit a conditional branch immediately after the node. // So we have branches to branches and lots of ICC.ZF games. // Instead, it might be better to have C2 pass a "FailureLabel" ! // into fast_lock and fast_unlock. In the case of success, control // will drop through the node. ICC.ZF is undefined at exit. // In the case of failure, the node will branch directly to the // FailureLabel
*** 1811,1839 **** // update _owner from BasicLock to thread get_thread (scrReg); // beware: clobbers ICCs movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg); xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success ! // If the CAS fails we can either retry or pass control to the slow-path. // We use the latter tactic. // Pass the CAS result in the icc.ZFlag into DONE_LABEL // If the CAS was successful ... // Self has acquired the lock // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. // Intentional fall-through into DONE_LABEL ... #else // _LP64 ! // It's inflated movq(scrReg, tmpReg); - xorq(tmpReg, tmpReg); lock(); cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); // Unconditionally set box->_displaced_header = markWord::unused_mark(). ! // Without cast to int32_t movptr will destroy r10 which is typically obj. movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value())); // Intentional fall-through into DONE_LABEL ... // Propagate ICC.ZF from CAS above into DONE_LABEL. #endif // _LP64 #if INCLUDE_RTM_OPT } // use_rtm() #endif // DONE_LABEL is a hot target - we'd really like to place it at the --- 1885,1930 ---- // update _owner from BasicLock to thread get_thread (scrReg); // beware: clobbers ICCs movptr(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), scrReg); xorptr(boxReg, boxReg); // set icc.ZFlag = 1 to indicate success ! // If the CAS fails we can either retry or pass control to the slow path. // We use the latter tactic. // Pass the CAS result in the icc.ZFlag into DONE_LABEL // If the CAS was successful ... // Self has acquired the lock // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it. // Intentional fall-through into DONE_LABEL ... #else // _LP64 ! // It's inflated and we use scrReg for ObjectMonitor* in this section. movq(scrReg, tmpReg); + if (!HandshakeAfterDeflateIdleMonitors) { + // Increment the ObjectMonitor's ref_count for safety or force the + // enter slow path via DONE_LABEL. + // In fast_lock(), scrReg contains the object's mark word which, + // in this case, is the (ObjectMonitor* | monitor_value). Also this + // code uses tmpReg as its temporary register. + inc_om_ref_count(objReg, scrReg /* om_reg */, tmpReg /* tmp_reg */, DONE_LABEL); + } + + xorq(tmpReg, tmpReg); lock(); cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); // Unconditionally set box->_displaced_header = markWord::unused_mark(). ! // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value())); // Intentional fall-through into DONE_LABEL ... // Propagate ICC.ZF from CAS above into DONE_LABEL. + + if (!HandshakeAfterDeflateIdleMonitors) { + pushf(); // Preserve flags. + // Decrement the ObjectMonitor's ref_count. + lock(); + decrementl(Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count))); + popf(); // Restore flags so we have the proper ICC.ZF value. + } #endif // _LP64 #if INCLUDE_RTM_OPT } // use_rtm() #endif // DONE_LABEL is a hot target - we'd really like to place it at the
*** 1842,1863 **** // most efficient "long" NOP encodings. // Unfortunately none of our alignment mechanisms suffice. bind(DONE_LABEL); // At DONE_LABEL the icc ZFlag is set as follows ... ! // Fast_Unlock uses the same protocol. // ZFlag == 1 -> Success ! // ZFlag == 0 -> Failure - force control through the slow-path } // obj: object to unlock // box: box address (displaced header location), killed. Must be EAX. // tmp: killed, cannot be obj nor box. // // Some commentary on balanced locking: // ! // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. // Methods that don't have provably balanced locking are forced to run in the // interpreter - such methods won't be compiled to use fast_lock and fast_unlock. // The interpreter provides two properties: // I1: At return-time the interpreter automatically and quietly unlocks any // objects acquired the current activation (frame). Recall that the --- 1933,1954 ---- // most efficient "long" NOP encodings. // Unfortunately none of our alignment mechanisms suffice. bind(DONE_LABEL); // At DONE_LABEL the icc ZFlag is set as follows ... ! // fast_unlock uses the same protocol. // ZFlag == 1 -> Success ! // ZFlag == 0 -> Failure - force control through the slow path } // obj: object to unlock // box: box address (displaced header location), killed. Must be EAX. // tmp: killed, cannot be obj nor box. // // Some commentary on balanced locking: // ! // fast_lock and fast_unlock are emitted only for provably balanced lock sites. // Methods that don't have provably balanced locking are forced to run in the // interpreter - such methods won't be compiled to use fast_lock and fast_unlock. // The interpreter provides two properties: // I1: At return-time the interpreter automatically and quietly unlocks any // objects acquired the current activation (frame). Recall that the
*** 1874,1884 **** // The only other source of unbalanced locking would be JNI. The "Java Native Interface: // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter // should not be unlocked by "normal" java-level locking and vice-versa. The specification // doesn't specify what will occur if a program engages in such mixed-mode locking, however. // Arguably given that the spec legislates the JNI case as undefined our implementation ! // could reasonably *avoid* checking owner in Fast_Unlock(). // In the interest of performance we elide m->Owner==Self check in unlock. // A perfectly viable alternative is to elide the owner check except when // Xcheck:jni is enabled. void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) { --- 1965,1975 ---- // The only other source of unbalanced locking would be JNI. The "Java Native Interface: // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter // should not be unlocked by "normal" java-level locking and vice-versa. The specification // doesn't specify what will occur if a program engages in such mixed-mode locking, however. // Arguably given that the spec legislates the JNI case as undefined our implementation ! // could reasonably *avoid* checking owner in fast_unlock(). // In the interest of performance we elide m->Owner==Self check in unlock. // A perfectly viable alternative is to elide the owner check except when // Xcheck:jni is enabled. void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
*** 1909,1930 **** cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword testptr(tmpReg, markWord::monitor_value); // Inflated? ! jccb (Assembler::zero, Stacked); // It's inflated. #if INCLUDE_RTM_OPT if (use_rtm) { Label L_regular_inflated_unlock; int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner); movptr(boxReg, Address(tmpReg, owner_offset)); testptr(boxReg, boxReg); jccb(Assembler::notZero, L_regular_inflated_unlock); xend(); ! jmpb(DONE_LABEL); bind(L_regular_inflated_unlock); } #endif // Despite our balanced locking property we still check that m->_owner == Self --- 2000,2021 ---- cmpptr(Address(boxReg, 0), (int32_t)NULL_WORD); // Examine the displaced header jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword testptr(tmpReg, markWord::monitor_value); // Inflated? ! jcc (Assembler::zero, Stacked); // It's inflated. #if INCLUDE_RTM_OPT if (use_rtm) { Label L_regular_inflated_unlock; int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner); movptr(boxReg, Address(tmpReg, owner_offset)); testptr(boxReg, boxReg); jccb(Assembler::notZero, L_regular_inflated_unlock); xend(); ! jmp(DONE_LABEL); bind(L_regular_inflated_unlock); } #endif // Despite our balanced locking property we still check that m->_owner == Self
*** 1939,1949 **** // // If there's no contention try a 1-0 exit. That is, exit without // a costly MEMBAR or CAS. See synchronizer.cpp for details on how // we detect and recover from the race that the 1-0 exit admits. // ! // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier // before it STs null into _owner, releasing the lock. Updates // to data protected by the critical section must be visible before // we drop the lock (and thus before any other thread could acquire // the lock and observe the fields protected by the lock). // IA32's memory-model is SPO, so STs are ordered with respect to --- 2030,2040 ---- // // If there's no contention try a 1-0 exit. That is, exit without // a costly MEMBAR or CAS. See synchronizer.cpp for details on how // we detect and recover from the race that the 1-0 exit admits. // ! // Conceptually fast_unlock() must execute a STST|LDST "release" barrier // before it STs null into _owner, releasing the lock. Updates // to data protected by the critical section must be visible before // we drop the lock (and thus before any other thread could acquire // the lock and observe the fields protected by the lock). // IA32's memory-model is SPO, so STs are ordered with respect to
*** 1982,2012 **** // most efficient "long" NOP encodings. // Unfortunately none of our alignment mechanisms suffice. bind (CheckSucc); #else // _LP64 // It's inflated xorptr(boxReg, boxReg); orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); ! jccb (Assembler::notZero, DONE_LABEL); movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); jccb (Assembler::notZero, CheckSucc); movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD); ! jmpb (DONE_LABEL); - // Try to avoid passing control into the slow_path ... - Label LSuccess, LGoSlowPath ; bind (CheckSucc); // The following optional optimization can be elided if necessary ! // Effectively: if (succ == null) goto SlowPath // The code reduces the window for a race, however, // and thus benefits performance. cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD); jccb (Assembler::zero, LGoSlowPath); xorptr(boxReg, boxReg); movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD); // Memory barrier/fence // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack. --- 2073,2115 ---- // most efficient "long" NOP encodings. // Unfortunately none of our alignment mechanisms suffice. bind (CheckSucc); #else // _LP64 // It's inflated + + if (!HandshakeAfterDeflateIdleMonitors) { + // Increment the ObjectMonitor's ref_count for safety or force the + // exit slow path via DONE_LABEL. + // In fast_unlock(), tmpReg contains the object's mark word which, + // in this case, is the (ObjectMonitor* | monitor_value). Also this + // code uses boxReg as its temporary register. + inc_om_ref_count(objReg, tmpReg /* om_reg */, boxReg /* tmp_reg */, DONE_LABEL); + } + + // Try to avoid passing control into the slow path ... + Label LSuccess, LGoSlowPath; xorptr(boxReg, boxReg); orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); ! jccb(Assembler::notZero, LGoSlowPath); movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); jccb (Assembler::notZero, CheckSucc); + // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD); ! jmpb(LSuccess); bind (CheckSucc); // The following optional optimization can be elided if necessary ! // Effectively: if (succ == null) goto slow path // The code reduces the window for a race, however, // and thus benefits performance. cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD); jccb (Assembler::zero, LGoSlowPath); xorptr(boxReg, boxReg); + // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD); // Memory barrier/fence // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
*** 2037,2053 **** cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); // There's no successor so we tried to regrab the lock. // If that didn't work, then another thread grabbed the // lock so we're done (and exit was a success). jccb (Assembler::notEqual, LSuccess); ! // Intentional fall-through into slow-path bind (LGoSlowPath); orl (boxReg, 1); // set ICC.ZF=0 to indicate failure jmpb (DONE_LABEL); bind (LSuccess); testl (boxReg, 0); // set ICC.ZF=1 to indicate success jmpb (DONE_LABEL); bind (Stacked); movptr(tmpReg, Address (boxReg, 0)); // re-fetch --- 2140,2164 ---- cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); // There's no successor so we tried to regrab the lock. // If that didn't work, then another thread grabbed the // lock so we're done (and exit was a success). jccb (Assembler::notEqual, LSuccess); ! // Intentional fall-through into slow path bind (LGoSlowPath); + if (!HandshakeAfterDeflateIdleMonitors) { + lock(); + decrementl(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count))); + } orl (boxReg, 1); // set ICC.ZF=0 to indicate failure jmpb (DONE_LABEL); bind (LSuccess); + if (!HandshakeAfterDeflateIdleMonitors) { + lock(); + decrementl(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count))); + } testl (boxReg, 0); // set ICC.ZF=1 to indicate success jmpb (DONE_LABEL); bind (Stacked); movptr(tmpReg, Address (boxReg, 0)); // re-fetch
< prev index next >