# HG changeset patch # User shade # Date 1569875951 -7200 # Mon Sep 30 22:39:11 2019 +0200 # Node ID e9a75b54079365d5cbbcdae354f52a33eb3e5271 # Parent 72f4863bcabbd8cbb0e19f9c7b04b69b8f8bb28b [backport] 8222766: Shenandoah: streamline post-LRB CAS barrier (x86) Reviewed-by: rkennke diff --git a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp --- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp +++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp @@ -39,7 +39,7 @@ #include "oops/objArrayKlass.hpp" #include "runtime/sharedRuntime.hpp" #include "vmreg_x86.inline.hpp" - +#include "utilities/macros.hpp" #if INCLUDE_ALL_GCS #include "shenandoahBarrierSetAssembler_x86.hpp" #endif @@ -2005,11 +2005,11 @@ if (UseShenandoahGC && ShenandoahCASBarrier) { Register tmp1 = op->tmp1()->as_register(); Register tmp2 = op->tmp2()->as_register(); - + Register res = op->result_opr()->as_register(); __ encode_heap_oop(cmpval); __ mov(rscratch1, newval); __ encode_heap_oop(rscratch1); - ShenandoahBarrierSetAssembler::bsasm()->cmpxchg_oop(_masm, NULL, Address(addr, 0), cmpval, rscratch1, true, tmp1, tmp2); + ShenandoahBarrierSetAssembler::bsasm()->cmpxchg_oop(_masm, res, Address(addr, 0), cmpval, rscratch1, false, tmp1, tmp2); } else #endif { @@ -2029,7 +2029,8 @@ if (UseShenandoahGC && ShenandoahCASBarrier) { Register tmp1 = op->tmp1()->as_register(); Register tmp2 = op->tmp2()->as_register(); - ShenandoahBarrierSetAssembler::bsasm()->cmpxchg_oop(_masm, NULL, Address(addr, 0), cmpval, newval, true, tmp1, tmp2); + Register res = op->result_opr()->as_register(); + ShenandoahBarrierSetAssembler::bsasm()->cmpxchg_oop(_masm, res, Address(addr, 0), cmpval, newval, false, tmp1, tmp2); } else #endif { diff --git a/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp b/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp --- a/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp +++ b/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp @@ -792,8 +792,10 @@ LIR_Opr ill = LIR_OprFact::illegalOpr; // for convenience if (type == objectType) { #if INCLUDE_ALL_GCS - if (UseShenandoahGC) { - __ cas_obj(addr, cmp.result(), val.result(), new_register(T_OBJECT), new_register(T_OBJECT)); + if (UseShenandoahGC && ShenandoahCASBarrier) { + LIR_Opr result = rlock_result(x); + __ cas_obj(addr, cmp.result(), val.result(), new_register(T_OBJECT), new_register(T_OBJECT), result); + return; } else #endif { diff --git a/src/cpu/x86/vm/shenandoahBarrierSetAssembler_x86.cpp b/src/cpu/x86/vm/shenandoahBarrierSetAssembler_x86.cpp --- a/src/cpu/x86/vm/shenandoahBarrierSetAssembler_x86.cpp +++ b/src/cpu/x86/vm/shenandoahBarrierSetAssembler_x86.cpp @@ -104,53 +104,6 @@ } } -void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) { - assert(ShenandoahCASBarrier, "should be enabled"); - Label is_null; - __ testptr(dst, dst); - __ jcc(Assembler::zero, is_null); - resolve_forward_pointer_not_null(masm, dst, tmp); - __ bind(is_null); -} - -void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) { - assert(ShenandoahCASBarrier || ShenandoahLoadRefBarrier, "should be enabled"); - // The below loads the mark word, checks if the lowest two bits are - // set, and if so, clear the lowest two bits and copy the result - // to dst. Otherwise it leaves dst alone. - // Implementing this is surprisingly awkward. I do it here by: - // - Inverting the mark word - // - Test lowest two bits == 0 - // - If so, set the lowest two bits - // - Invert the result back, and copy to dst - - bool borrow_reg = (tmp == noreg); - if (borrow_reg) { - // No free registers available. Make one useful. - tmp = LP64_ONLY(rscratch1) NOT_LP64(rdx); - if (tmp == dst) { - tmp = LP64_ONLY(rscratch2) NOT_LP64(rcx); - } - __ push(tmp); - } - - assert_different_registers(dst, tmp); - - Label done; - __ movptr(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); - __ notptr(tmp); - __ testb(tmp, markOopDesc::marked_value); - __ jccb(Assembler::notZero, done); - __ orptr(tmp, markOopDesc::marked_value); - __ notptr(tmp); - __ mov(dst, tmp); - __ bind(done); - - if (borrow_reg) { - __ pop(tmp); - } -} - void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst) { assert(ShenandoahLoadRefBarrier, "Should be enabled"); @@ -288,8 +241,9 @@ bool exchange, Register tmp1, Register tmp2) { assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled"); assert(oldval == rax, "must be in rax for implicit use in cmpxchg"); + assert_different_registers(oldval, newval, tmp1, tmp2); - Label retry, done; + Label L_success, L_failure; // Remember oldval for retry logic below #ifdef _LP64 @@ -301,8 +255,10 @@ __ movptr(tmp1, oldval); } - // Step 1. Try to CAS with given arguments. If successful, then we are done, - // and can safely return. + // Step 1. Fast-path. + // + // Try to CAS with given arguments. If successful, then we are done. + if (os::is_MP()) __ lock(); #ifdef _LP64 if (UseCompressedOops) { @@ -312,21 +268,32 @@ { __ cmpxchgptr(newval, addr); } - __ jcc(Assembler::equal, done, true); + __ jcc(Assembler::equal, L_success); // Step 2. CAS had failed. This may be a false negative. // // The trouble comes when we compare the to-space pointer with the from-space - // pointer to the same object. To resolve this, it will suffice to resolve both - // oldval and the value from memory -- this will give both to-space pointers. + // pointer to the same object. To resolve this, it will suffice to resolve + // the value from memory -- this will give both to-space pointers. // If they mismatch, then it was a legitimate failure. // + // Before reaching to resolve sequence, see if we can avoid the whole shebang + // with filters. + + // Filter: when offending in-memory value is NULL, the failure is definitely legitimate + __ testptr(oldval, oldval); + __ jcc(Assembler::zero, L_failure); + + // Filter: when heap is stable, the failure is definitely legitimate #ifdef _LP64 - if (UseCompressedOops) { - __ decode_heap_oop(tmp1); - } + const Register thread = r15_thread; +#else + const Register thread = tmp2; + __ get_thread(thread); #endif - resolve_forward_pointer(masm, tmp1); + Address gc_state(thread, in_bytes(JavaThread::gc_state_offset())); + __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); + __ jcc(Assembler::zero, L_failure); #ifdef _LP64 if (UseCompressedOops) { @@ -337,18 +304,70 @@ { __ movptr(tmp2, oldval); } - resolve_forward_pointer(masm, tmp2); + + // Decode offending in-memory value. + // Test if-forwarded + __ testb(Address(tmp2, oopDesc::mark_offset_in_bytes()), markOopDesc::marked_value); + __ jcc(Assembler::noParity, L_failure); // When odd number of bits, then not forwarded + __ jcc(Assembler::zero, L_failure); // When it is 00, then also not forwarded + + // Load and mask forwarding pointer + __ movptr(tmp2, Address(tmp2, oopDesc::mark_offset_in_bytes())); + __ shrptr(tmp2, 2); + __ shlptr(tmp2, 2); +#ifdef _LP64 + if (UseCompressedOops) { + __ decode_heap_oop(tmp1); // decode for comparison + } +#endif + + // Now we have the forwarded offender in tmp2. + // Compare and if they don't match, we have legitimate failure __ cmpptr(tmp1, tmp2); - __ jcc(Assembler::notEqual, done, true); + __ jcc(Assembler::notEqual, L_failure); + + // Step 3. Need to fix the memory ptr before continuing. + // + // At this point, we have from-space oldval in the register, and its to-space + // address is in tmp2. Let's try to update it into memory. We don't care if it + // succeeds or not. If it does, then the retrying CAS would see it and succeed. + // If this fixup fails, this means somebody else beat us to it, and necessarily + // with to-space ptr store. We still have to do the retry, because the GC might + // have updated the reference for us. - // Step 3. Try to CAS again with resolved to-space pointers. +#ifdef _LP64 + if (UseCompressedOops) { + __ encode_heap_oop(tmp2); // previously decoded at step 2. + } +#endif + + if (os::is_MP()) __ lock(); +#ifdef _LP64 + if (UseCompressedOops) { + __ cmpxchgl(tmp2, addr); + } else +#endif + { + __ cmpxchgptr(tmp2, addr); + } + + // Step 4. Try to CAS again. // - // Corner case: it may happen that somebody stored the from-space pointer - // to memory while we were preparing for retry. Therefore, we can fail again - // on retry, and so need to do this in loop, always resolving the failure - // witness. - __ bind(retry); + // This is guaranteed not to have false negatives, because oldval is definitely + // to-space, and memory pointer is to-space as well. Nothing is able to store + // from-space ptr into memory anymore. Make sure oldval is restored, after being + // garbled during retries. + // +#ifdef _LP64 + if (UseCompressedOops) { + __ movl(oldval, tmp2); + } else +#endif + { + __ movptr(oldval, tmp2); + } + if (os::is_MP()) __ lock(); #ifdef _LP64 if (UseCompressedOops) { @@ -358,41 +377,28 @@ { __ cmpxchgptr(newval, addr); } - __ jcc(Assembler::equal, done, true); + if (!exchange) { + __ jccb(Assembler::equal, L_success); // fastpath, peeking into Step 5, no need to jump + } -#ifdef _LP64 - if (UseCompressedOops) { - __ movl(tmp2, oldval); - __ decode_heap_oop(tmp2); - } else -#endif - { - __ movptr(tmp2, oldval); - } - resolve_forward_pointer(masm, tmp2); - - __ cmpptr(tmp1, tmp2); - __ jcc(Assembler::equal, retry, true); + // Step 5. If we need a boolean result out of CAS, set the flag appropriately. + // and promote the result. Note that we handle the flag from both the 1st and 2nd CAS. + // Otherwise, failure witness for CAE is in oldval on all paths, and we can return. - // Step 4. If we need a boolean result out of CAS, check the flag again, - // and promote the result. Note that we handle the flag from both the CAS - // itself and from the retry loop. - __ bind(done); - if (!exchange) { + if (exchange) { + __ bind(L_failure); + __ bind(L_success); + } else { assert(res != NULL, "need result register"); -#ifdef _LP64 - __ setb(Assembler::equal, res); - __ movzbl(res, res); -#else - // Need something else to clean the result, because some registers - // do not have byte encoding that movzbl wants. Cannot do the xor first, - // because it modifies the flags. - Label res_non_zero; + + Label exit; + __ bind(L_failure); + __ xorptr(res, res); + __ jmpb(exit); + + __ bind(L_success); __ movptr(res, 1); - __ jcc(Assembler::equal, res_non_zero, true); - __ xorptr(res, res); - __ bind(res_non_zero); -#endif + __ bind(exit); } } diff --git a/src/cpu/x86/vm/shenandoahBarrierSetAssembler_x86.hpp b/src/cpu/x86/vm/shenandoahBarrierSetAssembler_x86.hpp --- a/src/cpu/x86/vm/shenandoahBarrierSetAssembler_x86.hpp +++ b/src/cpu/x86/vm/shenandoahBarrierSetAssembler_x86.hpp @@ -36,9 +36,6 @@ class ShenandoahBarrierSetAssembler : public CHeapObj { private: - void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg); - void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg); - void load_reference_barrier_not_null(MacroAssembler* masm, Register dst); void storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp);