--- old/src/cpu/x86/vm/macroAssembler_x86.cpp 2016-10-25 10:40:02.828773328 +0200 +++ new/src/cpu/x86/vm/macroAssembler_x86.cpp 2016-10-25 10:40:02.777773294 +0200 @@ -2033,7 +2033,7 @@ cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box bind(DONE_LABEL); } else { - Label DONE_LABEL, Stacked, CheckSucc; + Label DONE_LABEL, Stacked; // Critically, the biased locking test must have precedence over // and appear before the (box->dhw == 0) recursive stack-lock test. @@ -2227,6 +2227,8 @@ bind (CheckSucc); } #else // _LP64 + Label LGoSlowPath; + // It's inflated movptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); xorptr(boxReg, r15_thread); @@ -2234,50 +2236,23 @@ jccb (Assembler::notZero, DONE_LABEL); movptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)); orptr (boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)); - jccb (Assembler::notZero, CheckSucc); + orptr (boxReg, Address (tmpReg, ObjectMonitor::trace_exit_stack_offset_in_bytes()-2)); + jccb (Assembler::notZero, LGoSlowPath); movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD); jmpb (DONE_LABEL); - if ((EmitSync & 65536) == 0) { - Label LSuccess, LGoSlowPath ; - bind (CheckSucc); - cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD); - jccb (Assembler::zero, LGoSlowPath); - - // I'd much rather use lock:andl m->_owner, 0 as it's faster than the - // the explicit ST;MEMBAR combination, but masm doesn't currently support - // "ANDQ M,IMM". Don't use MFENCE here. lock:add to TOS, xchg, etc - // are all faster when the write buffer is populated. - movptr (Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), (int32_t)NULL_WORD); - if (os::is_MP()) { - lock (); addl (Address(rsp, 0), 0); - } - cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), (int32_t)NULL_WORD); - jccb (Assembler::notZero, LSuccess); - - movptr (boxReg, (int32_t)NULL_WORD); // box is really EAX - if (os::is_MP()) { lock(); } - cmpxchgptr(r15_thread, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)); - jccb (Assembler::notEqual, LSuccess); - // Intentional fall-through into slow-path - - bind (LGoSlowPath); - orl (boxReg, 1); // set ICC.ZF=0 to indicate failure - jmpb (DONE_LABEL); - - bind (LSuccess); - testl (boxReg, 0); // set ICC.ZF=1 to indicate success - jmpb (DONE_LABEL); - } + // We had a fast path here for when _succ was set, but event tracing + // requires always generating an event when there are any threads + // blocked on the monitor. Hence we enter the slow path. + bind (LGoSlowPath); + orl (boxReg, 1); // set ICC.ZF=0 to indicate failure + jmpb (DONE_LABEL); bind (Stacked); movptr(tmpReg, Address (boxReg, 0)); // re-fetch if (os::is_MP()) { lock(); } cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses RAX which is box - if (EmitSync & 65536) { - bind (CheckSucc); - } #endif bind(DONE_LABEL); // Avoid branch to branch on AMD processors