--- old/src/hotspot/cpu/x86/macroAssembler_x86.cpp 2019-12-11 14:52:00.000000000 -0500 +++ new/src/hotspot/cpu/x86/macroAssembler_x86.cpp 2019-12-11 14:52:00.000000000 -0500 @@ -1296,6 +1296,58 @@ #ifdef COMPILER2 +// Increment the ObjectMonitor's ref_count for safety or force a branch +// to 'done' with ICC.ZF=0 to indicate failure/take the slow path. +void MacroAssembler::inc_om_ref_count(Register obj_reg, Register om_reg, Register tmp_reg, Label& done) { + atomic_incl(Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count))); + + Label LGoSlowPath; + if (AsyncDeflateIdleMonitors) { + // Race here if monitor is not owned! The above ref_count bump + // will cause subsequent async deflation to skip it. However, + // previous or concurrent async deflation is a race. + + // First check: if the owner field == DEFLATER_MARKER: + movptr(tmp_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + // DEFLATER_MARKER == reinterpret_cast(-1) so the compiler + // doesn't like to use the define here: + cmpptr(tmp_reg, -1); + // If marked for async deflation, then take the slow path. This is a + // simpler check than what ObjectMonitorHandle::save_om_ptr() does + // so ObjectMonitor::install_displaced_markword_in_object() doesn't + // have to be implemented in macro assembler. + jccb(Assembler::equal, LGoSlowPath); + + // Second check: if ref_count field <= 0: + movptr(tmp_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count))); + cmpptr(tmp_reg, 0); + // If async deflation is in the process of bailing out, but has not + // yet restored the ref_count field, then we take the slow path. We + // want a stable ref_count value for the fast path. + jccb(Assembler::lessEqual, LGoSlowPath); + + // Final check: if object field == obj_reg: + cmpptr(obj_reg, Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(object))); + // If the ObjectMonitor has been deflated and recycled, then take + // the slow path. + jccb(Assembler::notEqual, LGoSlowPath); + } + + Label LRetToCaller; + // We leave the ref_count incremented to protect the caller's code + // paths against async deflation. + jmpb(LRetToCaller); + + bind(LGoSlowPath); + lock(); + decrementl(Address(om_reg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count))); + // Jump to 'done' with ICC.ZF=0 to indicate failure/take the slow path. + orl(tmp_reg, 1); + jmp(done); + + bind(LRetToCaller); +} + #if INCLUDE_RTM_OPT // Update rtm_counters based on abort status @@ -1529,11 +1581,21 @@ assert(UseRTMLocking, "why call this otherwise?"); assert(tmpReg == rax, ""); assert(scrReg == rdx, ""); - Label L_rtm_retry, L_decrement_retry, L_on_abort; + Label L_rtm_retry, L_decrement_retry, L_on_abort, L_local_done; int owner_offset = OM_OFFSET_NO_MONITOR_VALUE_TAG(owner); // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value())); + + if (!HandshakeAfterDeflateIdleMonitors) { + // Increment the ObjectMonitor's ref_count for safety or force the + // enter slow path via DONE_LABEL. + // In rtm_inflated_locking(), initially tmpReg contains the object's + // mark word which, in this case, is the (ObjectMonitor* | monitor_value). + // Also this code uses scrReg as its temporary register. + inc_om_ref_count(objReg, tmpReg /* om_reg */, scrReg /* tmp_reg */, DONE_LABEL); + } + movptr(boxReg, tmpReg); // Save ObjectMonitor address if (RTMRetryCount > 0) { @@ -1555,7 +1617,7 @@ movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); movptr(tmpReg, Address(tmpReg, owner_offset)); testptr(tmpReg, tmpReg); - jcc(Assembler::zero, DONE_LABEL); + jcc(Assembler::zero, L_local_done); if (UseRTMXendForLockBusy) { xend(); jmp(L_decrement_retry); @@ -1590,7 +1652,7 @@ if (RTMRetryCount > 0) { // success done else retry - jccb(Assembler::equal, DONE_LABEL) ; + jccb(Assembler::equal, L_local_done); bind(L_decrement_retry); // Spin and retry if lock is busy. rtm_retry_lock_on_busy(retry_on_busy_count_Reg, boxReg, tmpReg, scrReg, L_rtm_retry); @@ -1598,6 +1660,19 @@ else { bind(L_decrement_retry); } + + // rtm_inflated_locking() exit paths come here except for a failed + // inc_om_ref_count() which goes directly to DONE_LABEL. + bind(L_local_done); + if (!HandshakeAfterDeflateIdleMonitors) { + pushf(); // Preserve flags. + // Decrement the ObjectMonitor's ref_count. + lock(); + decrementl(Address(boxReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count))); + popf(); // Restore flags so we have the proper ICC.ZF value. + } + + jmp(DONE_LABEL) ; } #endif // INCLUDE_RTM_OPT @@ -1823,14 +1898,33 @@ #else // _LP64 // It's inflated and we use scrReg for ObjectMonitor* in this section. movq(scrReg, tmpReg); - xorq(tmpReg, tmpReg); - lock(); - cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + // Unconditionally set box->_displaced_header = markWord::unused_mark(). // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. movptr(Address(boxReg, 0), (int32_t)intptr_t(markWord::unused_mark().value())); + + if (!HandshakeAfterDeflateIdleMonitors) { + // Increment the ObjectMonitor's ref_count for safety or force the + // enter slow path via DONE_LABEL. + // In fast_lock(), scrReg contains the object's mark word which, + // in this case, is the (ObjectMonitor* | monitor_value). Also this + // code uses tmpReg as its temporary register. + inc_om_ref_count(objReg, scrReg /* om_reg */, tmpReg /* tmp_reg */, DONE_LABEL); + } + + xorq(tmpReg, tmpReg); + lock(); + cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); // Intentional fall-through into DONE_LABEL ... // Propagate ICC.ZF from CAS above into DONE_LABEL. + + if (!HandshakeAfterDeflateIdleMonitors) { + pushf(); // Preserve flags. + // Decrement the ObjectMonitor's ref_count. + lock(); + decrementl(Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count))); + popf(); // Restore flags so we have the proper ICC.ZF value. + } #endif // _LP64 #if INCLUDE_RTM_OPT } // use_rtm() @@ -1910,7 +2004,7 @@ jcc (Assembler::zero, DONE_LABEL); // 0 indicates recursive stack-lock movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword testptr(tmpReg, markWord::monitor_value); // Inflated? - jccb (Assembler::zero, Stacked); + jcc (Assembler::zero, Stacked); // It's inflated. #if INCLUDE_RTM_OPT @@ -1921,7 +2015,7 @@ testptr(boxReg, boxReg); jccb(Assembler::notZero, L_regular_inflated_unlock); xend(); - jmpb(DONE_LABEL); + jmp(DONE_LABEL); bind(L_regular_inflated_unlock); } #endif @@ -1983,18 +2077,28 @@ bind (CheckSucc); #else // _LP64 // It's inflated + + if (!HandshakeAfterDeflateIdleMonitors) { + // Increment the ObjectMonitor's ref_count for safety or force the + // exit slow path via DONE_LABEL. + // In fast_unlock(), tmpReg contains the object's mark word which, + // in this case, is the (ObjectMonitor* | monitor_value). Also this + // code uses boxReg as its temporary register. + inc_om_ref_count(objReg, tmpReg /* om_reg */, boxReg /* tmp_reg */, DONE_LABEL); + } + + // Try to avoid passing control into the slow path ... + Label LSuccess, LGoSlowPath; xorptr(boxReg, boxReg); orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); - jccb (Assembler::notZero, DONE_LABEL); + jccb(Assembler::notZero, LGoSlowPath); movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq))); orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList))); jccb (Assembler::notZero, CheckSucc); // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD); - jmpb (DONE_LABEL); + jmpb(LSuccess); - // Try to avoid passing control into the slow_path ... - Label LSuccess, LGoSlowPath ; bind (CheckSucc); // The following optional optimization can be elided if necessary @@ -2043,10 +2147,18 @@ // Intentional fall-through into slow path bind (LGoSlowPath); + if (!HandshakeAfterDeflateIdleMonitors) { + lock(); + decrementl(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count))); + } orl (boxReg, 1); // set ICC.ZF=0 to indicate failure jmpb (DONE_LABEL); bind (LSuccess); + if (!HandshakeAfterDeflateIdleMonitors) { + lock(); + decrementl(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(ref_count))); + } testl (boxReg, 0); // set ICC.ZF=1 to indicate success jmpb (DONE_LABEL);