--- old/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp 2019-05-08 16:25:21.105928603 +0200 +++ new/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp 2019-05-08 16:25:20.893931687 +0200 @@ -211,18 +211,34 @@ __ bind(done); } -void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst) { +void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) { assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled"); Label is_null; __ cbz(dst, is_null); - resolve_forward_pointer_not_null(masm, dst); + resolve_forward_pointer_not_null(masm, dst, tmp); __ bind(is_null); } -// IMPORTANT: This must preserve all registers, even rscratch1 and rscratch2. -void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst) { +// IMPORTANT: This must preserve all registers, even rscratch1 and rscratch2, except those explicitely +// passed in. +void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) { assert(ShenandoahLoadRefBarrier || ShenandoahCASBarrier, "Should be enabled"); - __ ldr(dst, Address(dst, ShenandoahForwarding::byte_offset())); + // The below loads the mark word, checks if the lowest two bits are + // set, and if so, clear the lowest two bits and copy the result + // to dst. Otherwise it leaves dst alone. + // Implementing this is surprisingly awkward. I do it here by: + // - Inverting the mark word + // - Test lowest two bits == 0 + // - If so, set the lowest two bits + // - Invert the result back, and copy to dst + Label done; + __ ldr(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); + __ eon(tmp, tmp, zr); + __ ands(zr, tmp, markOopDesc::lock_mask_in_place); + __ br(Assembler::NE, done); + __ orr(tmp, tmp, markOopDesc::marked_value); + __ eon(dst, tmp, zr); + __ bind(done); } void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Register tmp) { @@ -343,43 +359,9 @@ } -void ShenandoahBarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj, - Register var_size_in_bytes, - int con_size_in_bytes, - Register t1, - Register t2, - Label& slow_case) { - - assert_different_registers(obj, t2); - assert_different_registers(obj, var_size_in_bytes); - Register end = t2; - - __ ldr(obj, Address(rthread, JavaThread::tlab_top_offset())); - if (var_size_in_bytes == noreg) { - __ lea(end, Address(obj, (int) (con_size_in_bytes + ShenandoahForwarding::byte_size()))); - } else { - __ add(var_size_in_bytes, var_size_in_bytes, ShenandoahForwarding::byte_size()); - __ lea(end, Address(obj, var_size_in_bytes)); - } - __ ldr(rscratch1, Address(rthread, JavaThread::tlab_end_offset())); - __ cmp(end, rscratch1); - __ br(Assembler::HI, slow_case); - - // update the tlab top pointer - __ str(end, Address(rthread, JavaThread::tlab_top_offset())); - - __ add(obj, obj, ShenandoahForwarding::byte_size()); - __ str(obj, Address(obj, ShenandoahForwarding::byte_offset())); - - // recover var_size_in_bytes if necessary - if (var_size_in_bytes == end) { - __ sub(var_size_in_bytes, var_size_in_bytes, obj); - } -} - void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, bool acquire, bool release, bool weak, bool is_cae, - Register result) { + Register tmp, Register result) { Register tmp1 = rscratch1; Register tmp2 = rscratch2; bool is_narrow = UseCompressedOops; @@ -415,8 +397,8 @@ __ decode_heap_oop(tmp1, tmp1); __ decode_heap_oop(tmp2, tmp2); } - resolve_forward_pointer(masm, tmp1); - resolve_forward_pointer(masm, tmp2); + resolve_forward_pointer(masm, tmp1, tmp); + resolve_forward_pointer(masm, tmp2, tmp); __ cmp(tmp1, tmp2); // Retry with expected now being the value we just loaded from addr. __ br(Assembler::EQ, retry); @@ -569,7 +551,7 @@ __ bind(work); __ mov(rscratch2, r0); - resolve_forward_pointer_not_null(cgen->assembler(), r0); + resolve_forward_pointer_not_null(cgen->assembler(), r0, rscratch1); __ cmp(rscratch2, r0); __ br(Assembler::NE, done); --- old/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp 2019-05-08 16:25:21.626921025 +0200 +++ new/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp 2019-05-08 16:25:21.414924109 +0200 @@ -54,8 +54,8 @@ bool tosca_live, bool expand_call); - void resolve_forward_pointer(MacroAssembler* masm, Register dst); - void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst); + void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp); + void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp); void load_reference_barrier(MacroAssembler* masm, Register dst, Register tmp); void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Register tmp); @@ -80,15 +80,8 @@ Register dst, Address src, Register tmp1, Register tmp_thread); virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Address dst, Register val, Register tmp1, Register tmp2); - virtual void tlab_allocate(MacroAssembler* masm, Register obj, - Register var_size_in_bytes, - int con_size_in_bytes, - Register t1, - Register t2, - Label& slow_case); - void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, - bool acquire, bool release, bool weak, bool is_cae, Register result); + bool acquire, bool release, bool weak, bool is_cae, Register tmp, Register result); virtual void barrier_stubs_init(); }; --- old/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetC1_aarch64.cpp 2019-05-08 16:25:22.190912822 +0200 +++ new/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetC1_aarch64.cpp 2019-05-08 16:25:21.980915877 +0200 @@ -36,6 +36,7 @@ Register cmpval = _cmp_value->as_register(); Register tmp1 = _tmp1->as_register(); Register tmp2 = _tmp2->as_register(); + Register tmp3 = _tmp3->as_register(); Register result = result_opr()->as_register(); ShenandoahBarrierSet::assembler()->storeval_barrier(masm->masm(), newval, rscratch2); @@ -47,7 +48,7 @@ newval = tmp2; } - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false, result); + ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), addr, cmpval, newval, /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false, tmp3, result); } #undef __ @@ -72,10 +73,11 @@ LIR_Opr t1 = gen->new_register(T_OBJECT); LIR_Opr t2 = gen->new_register(T_OBJECT); + LIR_Opr t3 = gen->new_register(T_OBJECT); LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base(); LIR_Opr result = gen->new_register(T_INT); - __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), t1, t2, result)); + __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), t1, t2, t3, result)); return result; } } --- old/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad 2019-05-08 16:25:22.750904677 +0200 +++ new/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoah_aarch64.ad 2019-05-08 16:25:22.540907731 +0200 @@ -26,47 +26,49 @@ %} encode %{ - enc_class aarch64_enc_cmpxchg_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegINoSp res) %{ + enc_class aarch64_enc_cmpxchg_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegPNoSp tmp2, iRegINoSp res) %{ MacroAssembler _masm(&cbuf); guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); Register tmp = $tmp$$Register; __ mov(tmp, $oldval$$Register); // Must not clobber oldval. ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, - /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $res$$Register); + /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false, + $tmp2$$Register, $res$$Register); %} - enc_class aarch64_enc_cmpxchg_acq_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegINoSp res) %{ + enc_class aarch64_enc_cmpxchg_acq_oop_shenandoah(memory mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegPNoSp tmp2, iRegINoSp res) %{ MacroAssembler _masm(&cbuf); guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding"); Register tmp = $tmp$$Register; __ mov(tmp, $oldval$$Register); // Must not clobber oldval. ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, - /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $res$$Register); + /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false, + $tmp2$$Register, $res$$Register); %} %} -instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ +instruct compareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegPNoSp tmp2, rFlagsReg cr) %{ match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); ins_cost(2 * VOLATILE_REF_COST); - effect(TEMP tmp, KILL cr); + effect(TEMP tmp, TEMP tmp2, KILL cr); format %{ "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp" %} - ins_encode(aarch64_enc_cmpxchg_oop_shenandoah(mem, oldval, newval, tmp, res)); + ins_encode(aarch64_enc_cmpxchg_oop_shenandoah(mem, oldval, newval, tmp, tmp2, res)); ins_pipe(pipe_slow); %} -instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ +instruct compareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, iRegNNoSp tmp2, rFlagsReg cr) %{ match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); ins_cost(2 * VOLATILE_REF_COST); - effect(TEMP tmp, KILL cr); + effect(TEMP tmp, TEMP tmp2, KILL cr); format %{ "cmpxchgw_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp" @@ -75,36 +77,36 @@ ins_encode %{ Register tmp = $tmp$$Register; __ mov(tmp, $oldval$$Register); // Must not clobber oldval. - ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $res$$Register); + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $tmp2$$Register, $res$$Register); %} ins_pipe(pipe_slow); %} -instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ +instruct compareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegPNoSp tmp2, rFlagsReg cr) %{ predicate(needs_acquiring_load_exclusive(n)); match(Set res (ShenandoahCompareAndSwapP mem (Binary oldval newval))); ins_cost(VOLATILE_REF_COST); - effect(TEMP tmp, KILL cr); + effect(TEMP tmp, TEMP tmp2, KILL cr); format %{ "cmpxchg_acq_shenandoah_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp" %} - ins_encode(aarch64_enc_cmpxchg_acq_oop_shenandoah(mem, oldval, newval, tmp, res)); + ins_encode(aarch64_enc_cmpxchg_acq_oop_shenandoah(mem, oldval, newval, tmp, tmp2, res)); ins_pipe(pipe_slow); %} -instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ +instruct compareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, iRegNNoSp tmp2, rFlagsReg cr) %{ predicate(needs_acquiring_load_exclusive(n)); match(Set res (ShenandoahCompareAndSwapN mem (Binary oldval newval))); ins_cost(VOLATILE_REF_COST); - effect(TEMP tmp, KILL cr); + effect(TEMP tmp, TEMP tmp2, KILL cr); format %{ "cmpxchgw_acq_shenandoah_narrow_oop $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp" @@ -113,16 +115,16 @@ ins_encode %{ Register tmp = $tmp$$Register; __ mov(tmp, $oldval$$Register); // Must not clobber oldval. - ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $res$$Register); + ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ false, $tmp2$$Register, $res$$Register); %} ins_pipe(pipe_slow); %} -instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ +instruct compareAndExchangeN_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, iRegNNoSp tmp2, rFlagsReg cr) %{ match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); ins_cost(2 * VOLATILE_REF_COST); - effect(TEMP_DEF res, TEMP tmp, KILL cr); + effect(TEMP_DEF res, TEMP tmp, TEMP tmp2, KILL cr); format %{ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval" %} @@ -130,15 +132,15 @@ Register tmp = $tmp$$Register; __ mov(tmp, $oldval$$Register); // Must not clobber oldval. ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, - /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ true, $res$$Register); + /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ true, $tmp2$$Register, $res$$Register); %} ins_pipe(pipe_slow); %} -instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ +instruct compareAndExchangeP_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegPNoSp tmp2, rFlagsReg cr) %{ match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); ins_cost(2 * VOLATILE_REF_COST); - effect(TEMP_DEF res, TEMP tmp, KILL cr); + effect(TEMP_DEF res, TEMP tmp, TEMP tmp2, KILL cr); format %{ "cmpxchg_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp" %} @@ -146,16 +148,16 @@ Register tmp = $tmp$$Register; __ mov(tmp, $oldval$$Register); // Must not clobber oldval. ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, - /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ true, $res$$Register); + /*acquire*/ false, /*release*/ true, /*weak*/ false, /*is_cae*/ true, $tmp2$$Register, $res$$Register); %} ins_pipe(pipe_slow); %} -instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ +instruct compareAndExchangeNAcq_shenandoah(iRegNNoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, iRegNNoSp tmp2, rFlagsReg cr) %{ predicate(needs_acquiring_load_exclusive(n)); match(Set res (ShenandoahCompareAndExchangeN mem (Binary oldval newval))); ins_cost(VOLATILE_REF_COST); - effect(TEMP_DEF res, TEMP tmp, KILL cr); + effect(TEMP_DEF res, TEMP tmp, TEMP tmp2, KILL cr); format %{ "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval" %} @@ -163,16 +165,16 @@ Register tmp = $tmp$$Register; __ mov(tmp, $oldval$$Register); // Must not clobber oldval. ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, - /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ true, $res$$Register); + /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ true, $tmp2$$Register, $res$$Register); %} ins_pipe(pipe_slow); %} -instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ +instruct compareAndExchangePAcq_shenandoah(iRegPNoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegPNoSp tmp2, rFlagsReg cr) %{ predicate(needs_acquiring_load_exclusive(n)); match(Set res (ShenandoahCompareAndExchangeP mem (Binary oldval newval))); ins_cost(VOLATILE_REF_COST); - effect(TEMP_DEF res, TEMP tmp, KILL cr); + effect(TEMP_DEF res, TEMP tmp, TEMP tmp2, KILL cr); format %{ "cmpxchg_acq_shenandoah $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval with temp $tmp" %} @@ -180,15 +182,15 @@ Register tmp = $tmp$$Register; __ mov(tmp, $oldval$$Register); // Must not clobber oldval. ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, - /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ true, $res$$Register); + /*acquire*/ true, /*release*/ true, /*weak*/ false, /*is_cae*/ true, $tmp2$$Register, $res$$Register); %} ins_pipe(pipe_slow); %} -instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ +instruct weakCompareAndSwapN_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, iRegNNoSp tmp2, rFlagsReg cr) %{ match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); ins_cost(2 * VOLATILE_REF_COST); - effect(TEMP tmp, KILL cr); + effect(TEMP tmp, TEMP tmp2, KILL cr); format %{ "cmpxchgw_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval" "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)" @@ -197,15 +199,15 @@ Register tmp = $tmp$$Register; __ mov(tmp, $oldval$$Register); // Must not clobber oldval. ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, - /*acquire*/ false, /*release*/ true, /*weak*/ true, /*is_cae*/ false, $res$$Register); + /*acquire*/ false, /*release*/ true, /*weak*/ true, /*is_cae*/ false, $tmp2$$Register, $res$$Register); %} ins_pipe(pipe_slow); %} -instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ +instruct weakCompareAndSwapP_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegPNoSp tmp2, rFlagsReg cr) %{ match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); ins_cost(2 * VOLATILE_REF_COST); - effect(TEMP tmp, KILL cr); + effect(TEMP tmp, TEMP tmp2, KILL cr); format %{ "cmpxchg_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval" %} @@ -213,16 +215,16 @@ Register tmp = $tmp$$Register; __ mov(tmp, $oldval$$Register); // Must not clobber oldval. ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, - /*acquire*/ false, /*release*/ true, /*weak*/ true, /*is_cae*/ false, $res$$Register); + /*acquire*/ false, /*release*/ true, /*weak*/ true, /*is_cae*/ false, $tmp2$$Register, $res$$Register); %} ins_pipe(pipe_slow); %} -instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, rFlagsReg cr) %{ +instruct weakCompareAndSwapNAcq_shenandoah(iRegINoSp res, indirect mem, iRegN oldval, iRegN newval, iRegNNoSp tmp, iRegNNoSp tmp2, rFlagsReg cr) %{ predicate(needs_acquiring_load_exclusive(n)); match(Set res (ShenandoahWeakCompareAndSwapN mem (Binary oldval newval))); ins_cost(VOLATILE_REF_COST); - effect(TEMP tmp, KILL cr); + effect(TEMP tmp, TEMP tmp2, KILL cr); format %{ "cmpxchgw_acq_shenandoah $res = $mem, $oldval, $newval\t# (narrow oop, weak) if $mem == $oldval then $mem <-- $newval" "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)" @@ -231,16 +233,16 @@ Register tmp = $tmp$$Register; __ mov(tmp, $oldval$$Register); // Must not clobber oldval. ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, - /*acquire*/ true, /*release*/ true, /*weak*/ true, /*is_cae*/ false, $res$$Register); + /*acquire*/ true, /*release*/ true, /*weak*/ true, /*is_cae*/ false, $tmp2$$Register, $res$$Register); %} ins_pipe(pipe_slow); %} -instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, rFlagsReg cr) %{ +instruct weakCompareAndSwapPAcq_shenandoah(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, iRegPNoSp tmp, iRegPNoSp tmp2, rFlagsReg cr) %{ predicate(needs_acquiring_load_exclusive(n)); match(Set res (ShenandoahWeakCompareAndSwapP mem (Binary oldval newval))); ins_cost(VOLATILE_REF_COST); - effect(TEMP tmp, KILL cr); + effect(TEMP tmp, TEMP tmp2, KILL cr); format %{ "cmpxchg_acq_shenandoah $res = $mem, $oldval, $newval\t# (ptr, weak) if $mem == $oldval then $mem <-- $newval" "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)" @@ -249,7 +251,7 @@ Register tmp = $tmp$$Register; __ mov(tmp, $oldval$$Register); // Must not clobber oldval. ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $mem$$Register, tmp, $newval$$Register, - /*acquire*/ true, /*release*/ true, /*weak*/ true, /*is_cae*/ false, $res$$Register); + /*acquire*/ true, /*release*/ true, /*weak*/ true, /*is_cae*/ false, $tmp2$$Register, $res$$Register); %} ins_pipe(pipe_slow); %} --- old/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp 2019-05-08 16:25:23.273897070 +0200 +++ new/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp 2019-05-08 16:25:23.062900139 +0200 @@ -23,7 +23,6 @@ #include "precompiled.hpp" #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" -#include "gc/shenandoah/shenandoahForwarding.hpp" #include "gc/shenandoah/shenandoahHeap.hpp" #include "gc/shenandoah/shenandoahHeapRegion.hpp" #include "gc/shenandoah/shenandoahHeuristics.hpp" @@ -313,18 +312,34 @@ __ bind(done); } -void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst) { +void ShenandoahBarrierSetAssembler::resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp) { assert(ShenandoahCASBarrier, "should be enabled"); Label is_null; __ testptr(dst, dst); __ jcc(Assembler::zero, is_null); - resolve_forward_pointer_not_null(masm, dst); + resolve_forward_pointer_not_null(masm, dst, tmp); __ bind(is_null); } -void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst) { +void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp) { assert(ShenandoahCASBarrier || ShenandoahLoadRefBarrier, "should be enabled"); - __ movptr(dst, Address(dst, ShenandoahForwarding::byte_offset())); + // The below loads the mark word, checks if the lowest two bits are + // set, and if so, clear the lowest two bits and copy the result + // to dst. Otherwise it leaves dst alone. + // Implementing this is surprisingly awkward. I do it here by: + // - Inverting the mark word + // - Test lowest two bits == 0 + // - If so, set the lowest two bits + // - Invert the result back, and copy to dst + Label done; + __ movptr(tmp, Address(dst, oopDesc::mark_offset_in_bytes())); + __ notptr(tmp); + __ testb(tmp, markOopDesc::marked_value); + __ jccb(Assembler::notZero, done); + __ orptr(tmp, markOopDesc::marked_value); + __ notptr(tmp); + __ mov(dst, tmp); + __ bind(done); } @@ -334,13 +349,7 @@ Label done; Address gc_state(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); - __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED | ShenandoahHeap::EVACUATION | ShenandoahHeap::TRAVERSAL); - __ jccb(Assembler::zero, done); - - // Heap is unstable, need to perform the resolve even if LRB is inactive - resolve_forward_pointer_not_null(masm, dst); - - __ testb(gc_state, ShenandoahHeap::EVACUATION | ShenandoahHeap::TRAVERSAL); + __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); __ jccb(Assembler::zero, done); if (dst != rax) { @@ -475,55 +484,6 @@ } } -void ShenandoahBarrierSetAssembler::tlab_allocate(MacroAssembler* masm, - Register thread, Register obj, - Register var_size_in_bytes, - int con_size_in_bytes, - Register t1, Register t2, - Label& slow_case) { - assert_different_registers(obj, t1, t2); - assert_different_registers(obj, var_size_in_bytes, t1); - Register end = t2; - if (!thread->is_valid()) { -#ifdef _LP64 - thread = r15_thread; -#else - assert(t1->is_valid(), "need temp reg"); - thread = t1; - __ get_thread(thread); -#endif - } - - __ verify_tlab(); - - __ movptr(obj, Address(thread, JavaThread::tlab_top_offset())); - if (var_size_in_bytes == noreg) { - __ lea(end, Address(obj, con_size_in_bytes + ShenandoahForwarding::byte_size())); - } else { - __ addptr(var_size_in_bytes, ShenandoahForwarding::byte_size()); - __ lea(end, Address(obj, var_size_in_bytes, Address::times_1)); - } - __ cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); - __ jcc(Assembler::above, slow_case); - - // update the tlab top pointer - __ movptr(Address(thread, JavaThread::tlab_top_offset()), end); - - // Initialize brooks pointer -#ifdef _LP64 - __ incrementq(obj, ShenandoahForwarding::byte_size()); -#else - __ incrementl(obj, ShenandoahForwarding::byte_size()); -#endif - __ movptr(Address(obj, ShenandoahForwarding::byte_offset()), obj); - - // recover var_size_in_bytes if necessary - if (var_size_in_bytes == end) { - __ subptr(var_size_in_bytes, obj); - } - __ verify_tlab(); -} - // Special Shenandoah CAS implementation that handles false negatives // due to concurrent evacuation. #ifndef _LP64 @@ -536,7 +496,7 @@ #else void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, Register res, Address addr, Register oldval, Register newval, - bool exchange, Register tmp1, Register tmp2) { + bool exchange, Register tmp1, Register tmp2, Register tmp3) { assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled"); assert(oldval == rax, "must be in rax for implicit use in cmpxchg"); @@ -569,7 +529,7 @@ if (UseCompressedOops) { __ decode_heap_oop(tmp1); } - resolve_forward_pointer(masm, tmp1); + resolve_forward_pointer(masm, tmp1, tmp3); if (UseCompressedOops) { __ movl(tmp2, oldval); @@ -577,7 +537,7 @@ } else { __ movptr(tmp2, oldval); } - resolve_forward_pointer(masm, tmp2); + resolve_forward_pointer(masm, tmp2, tmp3); __ cmpptr(tmp1, tmp2); __ jcc(Assembler::notEqual, done, true); @@ -603,7 +563,7 @@ } else { __ movptr(tmp2, oldval); } - resolve_forward_pointer(masm, tmp2); + resolve_forward_pointer(masm, tmp2, tmp3); __ cmpptr(tmp1, tmp2); __ jcc(Assembler::equal, retry, true); @@ -851,7 +811,7 @@ address start = __ pc(); #ifdef _LP64 - Label not_done; + Label resolve_oop, slow_path; // We use RDI, which also serves as argument register for slow call. // RAX always holds the src object ptr, except after the slow call and @@ -873,13 +833,31 @@ // unlive: rdi __ testbool(r8); // unlive: r8 - __ jccb(Assembler::notZero, not_done); + __ jccb(Assembler::notZero, resolve_oop); + + __ pop(r8); + __ pop(rdi); + __ ret(0); + + __ bind(resolve_oop); + + __ movptr(r8, Address(rax, oopDesc::mark_offset_in_bytes())); + // Test if both lowest bits are set. We trick it by negating the bits + // then test for both bits clear. + __ notptr(r8); + __ testb(r8, markOopDesc::marked_value); + __ jccb(Assembler::notZero, slow_path); + // Clear both lower bits. It's still inverted, so set them, and then invert back. + __ orptr(r8, markOopDesc::marked_value); + __ notptr(r8); + // At this point, r8 contains the decoded forwarding pointer. + __ mov(rax, r8); __ pop(r8); __ pop(rdi); __ ret(0); - __ bind(not_done); + __ bind(slow_path); __ push(rcx); __ push(rdx); --- old/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp 2019-05-08 16:25:23.801889390 +0200 +++ new/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp 2019-05-08 16:25:23.591892444 +0200 @@ -55,8 +55,8 @@ bool tosca_live, bool expand_call); - void resolve_forward_pointer(MacroAssembler* masm, Register dst); - void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst); + void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp); + void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp); void load_reference_barrier_not_null(MacroAssembler* masm, Register dst); @@ -81,7 +81,7 @@ void cmpxchg_oop(MacroAssembler* masm, Register res, Address addr, Register oldval, Register newval, - bool exchange, Register tmp1, Register tmp2); + bool exchange, Register tmp1, Register tmp2, Register tmp3); virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Register src, Register dst, Register count); virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type, @@ -91,13 +91,6 @@ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, Address dst, Register val, Register tmp1, Register tmp2); - virtual void tlab_allocate(MacroAssembler* masm, - Register thread, Register obj, - Register var_size_in_bytes, - int con_size_in_bytes, - Register t1, Register t2, - Label& slow_case); - virtual void barrier_stubs_init(); }; --- old/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetC1_x86.cpp 2019-05-08 16:25:24.355881332 +0200 +++ new/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetC1_x86.cpp 2019-05-08 16:25:24.148884343 +0200 @@ -36,6 +36,7 @@ Register cmpval = _cmp_value->as_register(); Register tmp1 = _tmp1->as_register(); Register tmp2 = _tmp2->as_register(); + Register tmp3 = _tmp3->as_register(); Register result = result_opr()->as_register(); assert(cmpval == rax, "wrong register"); assert(newval != NULL, "new val must be register"); @@ -53,7 +54,7 @@ newval = rscratch1; } - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), result, Address(addr, 0), cmpval, newval, false, tmp1, tmp2); + ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), result, Address(addr, 0), cmpval, newval, false, tmp1, tmp2, tmp3); } #undef __ @@ -78,10 +79,11 @@ LIR_Opr t1 = gen->new_register(T_OBJECT); LIR_Opr t2 = gen->new_register(T_OBJECT); + LIR_Opr t3 = gen->new_register(T_OBJECT); LIR_Opr addr = access.resolved_addr()->as_address_ptr()->base(); LIR_Opr result = gen->new_register(T_INT); - __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), t1, t2, result)); + __ append(new LIR_OpShenandoahCompareAndSwap(addr, cmp_value.result(), new_value.result(), t1, t2, t3, result)); return result; } } --- old/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_64.ad 2019-05-08 16:25:24.915873187 +0200 +++ new/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_64.ad 2019-05-08 16:25:24.705876241 +0200 @@ -28,14 +28,14 @@ instruct compareAndSwapP_shenandoah(rRegI res, memory mem_ptr, - rRegP tmp1, rRegP tmp2, + rRegP tmp1, rRegP tmp2, rRegP tmp3, rax_RegP oldval, rRegP newval, rFlagsReg cr) %{ predicate(VM_Version::supports_cx8()); match(Set res (ShenandoahCompareAndSwapP mem_ptr (Binary oldval newval))); match(Set res (ShenandoahWeakCompareAndSwapP mem_ptr (Binary oldval newval))); - effect(TEMP tmp1, TEMP tmp2, KILL cr, KILL oldval); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr, KILL oldval); format %{ "shenandoah_cas_oop $mem_ptr,$newval" %} @@ -43,7 +43,7 @@ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $res$$Register, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, false, // swap - $tmp1$$Register, $tmp2$$Register + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register ); %} ins_pipe( pipe_cmpxchg ); @@ -51,12 +51,12 @@ instruct compareAndSwapN_shenandoah(rRegI res, memory mem_ptr, - rRegP tmp1, rRegP tmp2, + rRegP tmp1, rRegP tmp2, rRegP tmp3, rax_RegN oldval, rRegN newval, rFlagsReg cr) %{ match(Set res (ShenandoahCompareAndSwapN mem_ptr (Binary oldval newval))); match(Set res (ShenandoahWeakCompareAndSwapN mem_ptr (Binary oldval newval))); - effect(TEMP tmp1, TEMP tmp2, KILL cr, KILL oldval); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr, KILL oldval); format %{ "shenandoah_cas_oop $mem_ptr,$newval" %} @@ -64,7 +64,7 @@ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, $res$$Register, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, false, // swap - $tmp1$$Register, $tmp2$$Register + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register ); %} ins_pipe( pipe_cmpxchg ); @@ -72,10 +72,10 @@ instruct compareAndExchangeN_shenandoah(memory mem_ptr, rax_RegN oldval, rRegN newval, - rRegP tmp1, rRegP tmp2, + rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr) %{ match(Set oldval (ShenandoahCompareAndExchangeN mem_ptr (Binary oldval newval))); - effect(TEMP tmp1, TEMP tmp2, KILL cr); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); format %{ "shenandoah_cas_oop $mem_ptr,$newval" %} @@ -83,7 +83,7 @@ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, NULL, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, true, // exchange - $tmp1$$Register, $tmp2$$Register + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register ); %} ins_pipe( pipe_cmpxchg ); @@ -91,12 +91,12 @@ instruct compareAndExchangeP_shenandoah(memory mem_ptr, rax_RegP oldval, rRegP newval, - rRegP tmp1, rRegP tmp2, + rRegP tmp1, rRegP tmp2, rRegP tmp3, rFlagsReg cr) %{ predicate(VM_Version::supports_cx8()); match(Set oldval (ShenandoahCompareAndExchangeP mem_ptr (Binary oldval newval))); - effect(KILL cr, TEMP tmp1, TEMP tmp2); + effect(KILL cr, TEMP tmp1, TEMP tmp2, TEMP tmp3); ins_cost(1000); format %{ "shenandoah_cas_oop $mem_ptr,$newval" %} @@ -105,7 +105,7 @@ ShenandoahBarrierSet::assembler()->cmpxchg_oop(&_masm, NULL, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, true, // exchange - $tmp1$$Register, $tmp2$$Register + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register ); %} ins_pipe( pipe_cmpxchg ); --- old/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.hpp 2019-05-08 16:25:25.488864852 +0200 +++ new/src/hotspot/share/gc/shenandoah/c1/shenandoahBarrierSetC1.hpp 2019-05-08 16:25:25.277867921 +0200 @@ -126,22 +126,25 @@ LIR_Opr _new_value; LIR_Opr _tmp1; LIR_Opr _tmp2; + LIR_Opr _tmp3; public: LIR_OpShenandoahCompareAndSwap(LIR_Opr addr, LIR_Opr cmp_value, LIR_Opr new_value, - LIR_Opr t1, LIR_Opr t2, LIR_Opr result) + LIR_Opr t1, LIR_Opr t2, LIR_Opr t3, LIR_Opr result) : LIR_Op(lir_none, result, NULL) // no info , _addr(addr) , _cmp_value(cmp_value) , _new_value(new_value) , _tmp1(t1) - , _tmp2(t2) { } + , _tmp2(t2) + , _tmp3(t3) { } LIR_Opr addr() const { return _addr; } LIR_Opr cmp_value() const { return _cmp_value; } LIR_Opr new_value() const { return _new_value; } LIR_Opr tmp1() const { return _tmp1; } LIR_Opr tmp2() const { return _tmp2; } + LIR_Opr tmp3() const { return _tmp3; } virtual void visit(LIR_OpVisitState* state) { assert(_addr->is_valid(), "used"); @@ -156,6 +159,7 @@ state->do_temp(_new_value); if (_tmp1->is_valid()) state->do_temp(_tmp1); if (_tmp2->is_valid()) state->do_temp(_tmp2); + if (_tmp3->is_valid()) state->do_temp(_tmp3); if (_result->is_valid()) state->do_output(_result); } @@ -167,6 +171,7 @@ new_value()->print(out); out->print(" "); tmp1()->print(out); out->print(" "); tmp2()->print(out); out->print(" "); + tmp3()->print(out); out->print(" "); } #ifndef PRODUCT virtual const char* name() const { --- old/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp 2019-05-08 16:25:26.069856401 +0200 +++ new/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp 2019-05-08 16:25:25.846859645 +0200 @@ -23,7 +23,6 @@ #include "precompiled.hpp" #include "gc/shared/barrierSet.hpp" -#include "gc/shenandoah/shenandoahForwarding.hpp" #include "gc/shenandoah/shenandoahHeap.hpp" #include "gc/shenandoah/shenandoahHeuristics.hpp" #include "gc/shenandoah/shenandoahRuntime.hpp" @@ -710,30 +709,6 @@ BarrierSetC2::clone(kit, src, dst, size, is_array); } -Node* ShenandoahBarrierSetC2::obj_allocate(PhaseMacroExpand* macro, Node* ctrl, Node* mem, Node* toobig_false, Node* size_in_bytes, - Node*& i_o, Node*& needgc_ctrl, - Node*& fast_oop_ctrl, Node*& fast_oop_rawmem, - intx prefetch_lines) const { - PhaseIterGVN& igvn = macro->igvn(); - - // Allocate several words more for the Shenandoah brooks pointer. - size_in_bytes = new AddXNode(size_in_bytes, igvn.MakeConX(ShenandoahForwarding::byte_size())); - macro->transform_later(size_in_bytes); - - Node* fast_oop = BarrierSetC2::obj_allocate(macro, ctrl, mem, toobig_false, size_in_bytes, - i_o, needgc_ctrl, fast_oop_ctrl, fast_oop_rawmem, - prefetch_lines); - - // Bump up object for Shenandoah brooks pointer. - fast_oop = new AddPNode(macro->top(), fast_oop, igvn.MakeConX(ShenandoahForwarding::byte_size())); - macro->transform_later(fast_oop); - - // Initialize Shenandoah brooks pointer to point to the object itself. - fast_oop_rawmem = macro->make_store(fast_oop_ctrl, fast_oop_rawmem, fast_oop, ShenandoahForwarding::byte_offset(), fast_oop, T_OBJECT); - - return fast_oop; -} - // Support for GC barriers emitted during parsing bool ShenandoahBarrierSetC2::is_gc_barrier_node(Node* node) const { if (node->Opcode() == Op_ShenandoahLoadReferenceBarrier) return true; --- old/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp 2019-05-08 16:25:26.653847907 +0200 +++ new/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.hpp 2019-05-08 16:25:26.440851005 +0200 @@ -108,11 +108,6 @@ // This is the entry-point for the backend to perform accesses through the Access API. virtual void clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const; - virtual Node* obj_allocate(PhaseMacroExpand* macro, Node* ctrl, Node* mem, Node* toobig_false, Node* size_in_bytes, - Node*& i_o, Node*& needgc_ctrl, - Node*& fast_oop_ctrl, Node*& fast_oop_rawmem, - intx prefetch_lines) const; - // These are general helper methods used by C2 virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, ArrayCopyPhase phase) const; virtual void clone_barrier_at_expansion(ArrayCopyNode* ac, Node* call, PhaseIterGVN& igvn) const; --- old/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp 2019-05-08 16:25:27.223839617 +0200 +++ new/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp 2019-05-08 16:25:27.006842773 +0200 @@ -26,7 +26,6 @@ #include "gc/shenandoah/c2/shenandoahSupport.hpp" #include "gc/shenandoah/c2/shenandoahBarrierSetC2.hpp" #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" -#include "gc/shenandoah/shenandoahForwarding.hpp" #include "gc/shenandoah/shenandoahHeap.hpp" #include "gc/shenandoah/shenandoahHeapRegion.hpp" #include "gc/shenandoah/shenandoahRuntime.hpp" @@ -1458,7 +1457,7 @@ assert(val->bottom_type()->make_oopptr(), "need oop"); assert(val->bottom_type()->make_oopptr()->const_oop() == NULL, "expect non-constant"); - enum { _heap_stable = 1, _not_cset, _not_equal, _evac_path, _null_path, PATH_LIMIT }; + enum { _heap_stable = 1, _not_cset, _fwded, _evac_path, _null_path, PATH_LIMIT }; Node* region = new RegionNode(PATH_LIMIT); Node* val_phi = new PhiNode(region, uncasted_val->bottom_type()->is_oopptr()); Node* raw_mem_phi = PhiNode::make(region, raw_mem, Type::MEMORY, TypeRawPtr::BOTTOM); @@ -1508,36 +1507,47 @@ IfNode* iff = unc_ctrl->in(0)->as_If(); phase->igvn().replace_input_of(iff, 1, phase->igvn().intcon(1)); } - Node* addr = new AddPNode(new_val, uncasted_val, phase->igvn().MakeConX(ShenandoahForwarding::byte_offset())); + Node* addr = new AddPNode(new_val, new_val, phase->igvn().MakeConX(oopDesc::mark_offset_in_bytes())); phase->register_new_node(addr, ctrl); - assert(val->bottom_type()->isa_oopptr(), "what else?"); - const TypePtr* obj_type = val->bottom_type()->is_oopptr(); - const TypePtr* adr_type = TypeRawPtr::BOTTOM; - Node* fwd = new LoadPNode(ctrl, raw_mem, addr, adr_type, obj_type, MemNode::unordered); - phase->register_new_node(fwd, ctrl); + assert(new_val->bottom_type()->isa_oopptr(), "what else?"); + Node* markword = new LoadXNode(ctrl, raw_mem, addr, TypeRawPtr::BOTTOM, TypeX_X, MemNode::unordered); + phase->register_new_node(markword, ctrl); + + // Test if object is forwarded. This is the case if lowest two bits are set. + Node* masked = new AndXNode(markword, phase->igvn().MakeConX(markOopDesc::lock_mask_in_place)); + phase->register_new_node(masked, ctrl); + Node* cmp = new CmpXNode(masked, phase->igvn().MakeConX(markOopDesc::marked_value)); + phase->register_new_node(cmp, ctrl); // Only branch to LRB stub if object is not forwarded; otherwise reply with fwd ptr - Node* cmp = new CmpPNode(fwd, new_val); - phase->register_new_node(cmp, ctrl); - Node* bol = new BoolNode(cmp, BoolTest::eq); + Node* bol = new BoolNode(cmp, BoolTest::eq); // Equals 3 means it's forwarded phase->register_new_node(bol, ctrl); IfNode* iff = new IfNode(ctrl, bol, PROB_UNLIKELY(0.999), COUNT_UNKNOWN); - if (reg2_ctrl == NULL) reg2_ctrl = iff; phase->register_control(iff, loop, ctrl); - Node* if_not_eq = new IfFalseNode(iff); - phase->register_control(if_not_eq, loop, iff); - Node* if_eq = new IfTrueNode(iff); - phase->register_control(if_eq, loop, iff); + Node* if_fwd = new IfTrueNode(iff); + phase->register_control(if_fwd, loop, iff); + Node* if_not_fwd = new IfFalseNode(iff); + phase->register_control(if_not_fwd, loop, iff); + + // Decode forward pointer. + Node* masked2 = new AndXNode(markword, phase->igvn().MakeConX(~markOopDesc::lock_mask_in_place)); + phase->register_new_node(masked2, if_fwd); + Node* fwdraw = new CastX2PNode(masked2); + fwdraw->init_req(0, if_fwd); + phase->register_new_node(fwdraw, if_fwd); + Node* fwd = new CheckCastPPNode(NULL, fwdraw, val->bottom_type()); + phase->register_new_node(fwd, if_fwd); // Wire up not-equal-path in slots 3. - region->init_req(_not_equal, if_not_eq); - val_phi->init_req(_not_equal, fwd); - raw_mem_phi->init_req(_not_equal, raw_mem); + region->init_req(_fwded, if_fwd); + val_phi->init_req(_fwded, fwd); + raw_mem_phi->init_req(_fwded, raw_mem); // Call wb-stub and wire up that path in slots 4 Node* result_mem = NULL; - ctrl = if_eq; + ctrl = if_not_fwd; + fwd = new_val; call_lrb_stub(ctrl, fwd, result_mem, raw_mem, phase); region->init_req(_evac_path, ctrl); val_phi->init_req(_evac_path, fwd); @@ -3182,6 +3192,8 @@ case Op_CastP2X: case Op_SafePoint: case Op_EncodeISOArray: + case Op_LoadKlass: + case Op_LoadNKlass: strength = STRONG; break; case Op_LoadB: @@ -3233,8 +3245,6 @@ break; case Op_Conv2B: case Op_LoadRange: - case Op_LoadKlass: - case Op_LoadNKlass: // NONE, i.e. leave current strength as is break; case Op_AddP: --- old/src/hotspot/share/gc/shenandoah/shenandoahAsserts.cpp 2019-05-08 16:25:27.815831006 +0200 +++ new/src/hotspot/share/gc/shenandoah/shenandoahAsserts.cpp 2019-05-08 16:25:27.607834031 +0200 @@ -250,7 +250,7 @@ file, line); } - size_t alloc_size = obj->size() + ShenandoahForwarding::word_size(); + size_t alloc_size = obj->size(); if (alloc_size > ShenandoahHeapRegion::humongous_threshold_words()) { size_t idx = r->region_number(); size_t num_regions = ShenandoahHeapRegion::required_regions(alloc_size * HeapWordSize); --- old/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.cpp 2019-05-08 16:25:28.388822672 +0200 +++ new/src/hotspot/share/gc/shenandoah/shenandoahBarrierSet.cpp 2019-05-08 16:25:28.171825828 +0200 @@ -26,7 +26,6 @@ #include "gc/shenandoah/shenandoahBarrierSet.hpp" #include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" #include "gc/shenandoah/shenandoahCollectorPolicy.hpp" -#include "gc/shenandoah/shenandoahForwarding.hpp" #include "gc/shenandoah/shenandoahHeap.inline.hpp" #include "gc/shenandoah/shenandoahHeuristics.hpp" #include "gc/shenandoah/shenandoahTraversalGC.hpp" @@ -262,7 +261,7 @@ ShenandoahHeapRegion* r = _heap->heap_region_containing(obj); assert(r->is_cset(), "sanity"); - HeapWord* cur = (HeapWord*)obj + obj->size() + ShenandoahForwarding::word_size(); + HeapWord* cur = (HeapWord*)obj + obj->size(); size_t count = 0; while ((cur < r->top()) && ctx->is_marked(oop(cur)) && (count++ < max)) { @@ -270,7 +269,7 @@ if (oopDesc::equals_raw(cur_oop, resolve_forwarded_not_null(cur_oop))) { _heap->evacuate_object(cur_oop, thread); } - cur = cur + cur_oop->size() + ShenandoahForwarding::word_size(); + cur = cur + cur_oop->size(); } } --- old/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.inline.hpp 2019-05-08 16:25:28.963814308 +0200 +++ new/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.inline.hpp 2019-05-08 16:25:28.749817421 +0200 @@ -25,7 +25,6 @@ #define SHARE_GC_SHENANDOAH_SHENANDOAHCONCURRENTMARK_INLINE_HPP #include "gc/shenandoah/shenandoahAsserts.hpp" -#include "gc/shenandoah/shenandoahForwarding.hpp" #include "gc/shenandoah/shenandoahBarrierSet.inline.hpp" #include "gc/shenandoah/shenandoahConcurrentMark.hpp" #include "gc/shenandoah/shenandoahMarkingContext.inline.hpp" @@ -69,7 +68,7 @@ inline void ShenandoahConcurrentMark::count_liveness(jushort* live_data, oop obj) { size_t region_idx = _heap->heap_region_index_containing(obj); ShenandoahHeapRegion* region = _heap->get_region(region_idx); - size_t size = obj->size() + ShenandoahForwarding::word_size(); + size_t size = obj->size(); if (!region->is_humongous_start()) { assert(!region->is_humongous(), "Cannot have continuations here"); --- old/src/hotspot/share/gc/shenandoah/shenandoahForwarding.hpp 2019-05-08 16:25:29.531806047 +0200 +++ new/src/hotspot/share/gc/shenandoah/shenandoahForwarding.hpp 2019-05-08 16:25:29.314809203 +0200 @@ -28,68 +28,11 @@ #include "utilities/globalDefinitions.hpp" class ShenandoahForwarding { - /* - * Notes: - * - * a. It is important to have byte_offset and word_offset return constant - * expressions, because that will allow to constant-fold forwarding ptr - * accesses. This is not a problem in JIT compilers that would generate - * the code once, but it is problematic in GC hotpath code. - * - * b. With filler object mechanics, we may need to allocate more space for - * the forwarding ptr to meet alignment requirements for objects. This - * means *_offset and *_size calls are NOT interchangeable. The accesses - * to forwarding ptrs should always be via *_offset. Storage size - * calculations should always be via *_size. - */ - public: - /* Offset from the object start, in HeapWords. */ - static inline int word_offset() { - return -1; // exactly one HeapWord - } - - /* Offset from the object start, in bytes. */ - static inline int byte_offset() { - return -HeapWordSize; // exactly one HeapWord - } - - /* Allocated size, in HeapWords. */ - static inline uint word_size() { - return (uint) MinObjAlignment; - } - - /* Allocated size, in bytes */ - static inline uint byte_size() { - return (uint) MinObjAlignmentInBytes; - } - - /* Assert basic stuff once at startup. */ - static void initial_checks() { - guarantee (MinObjAlignment > 0, "sanity, word_size is correct"); - guarantee (MinObjAlignmentInBytes > 0, "sanity, byte_size is correct"); - } - - /* Initializes forwarding pointer (to self). - */ - static inline void initialize(oop obj); - /* Gets forwardee from the given object. */ static inline oop get_forwardee(oop obj); - /* Tries to atomically update forwardee in $holder object to $update. - * Assumes $holder points at itself. - * Asserts $holder is in from-space. - * Asserts $update is in to-space. - */ - static inline oop try_update_forwardee(oop obj, oop update); - - /* Sets raw value for forwardee slot. - * THIS IS DANGEROUS: USERS HAVE TO INITIALIZE/SET FORWARDEE BACK AFTER THEY ARE DONE. - */ - static inline void set_forwardee_raw(oop obj, HeapWord* update); - /* Returns the raw value from forwardee slot. */ static inline HeapWord* get_forwardee_raw(oop obj); @@ -99,8 +42,18 @@ */ static inline HeapWord* get_forwardee_raw_unchecked(oop obj); -private: - static inline HeapWord** forward_ptr_addr(oop obj); + /** + * Returns true if the object is forwarded, false otherwise. + */ + static inline bool is_forwarded(oop obj); + + /* Tries to atomically update forwardee in $holder object to $update. + * Assumes $holder points at itself. + * Asserts $holder is in from-space. + * Asserts $update is in to-space. + */ + static inline oop try_update_forwardee(oop obj, oop update); + }; #endif // SHARE_GC_SHENANDOAH_SHENANDOAHFORWARDING_HPP --- old/src/hotspot/share/gc/shenandoah/shenandoahForwarding.inline.hpp 2019-05-08 16:25:30.105797698 +0200 +++ new/src/hotspot/share/gc/shenandoah/shenandoahForwarding.inline.hpp 2019-05-08 16:25:29.886800883 +0200 @@ -26,40 +26,45 @@ #include "gc/shenandoah/shenandoahAsserts.hpp" #include "gc/shenandoah/shenandoahForwarding.hpp" +#include "oops/markOop.inline.hpp" #include "runtime/atomic.hpp" -inline HeapWord** ShenandoahForwarding::forward_ptr_addr(oop obj) { - return (HeapWord**)((HeapWord*) obj + word_offset()); -} - -inline void ShenandoahForwarding::initialize(oop obj) { - shenandoah_assert_in_heap(NULL, obj); - *forward_ptr_addr(obj) = (HeapWord*) obj; -} - -inline void ShenandoahForwarding::set_forwardee_raw(oop obj, HeapWord* update) { - shenandoah_assert_in_heap(NULL, obj); - *forward_ptr_addr(obj) = update; -} - inline HeapWord* ShenandoahForwarding::get_forwardee_raw(oop obj) { shenandoah_assert_in_heap(NULL, obj); - return *forward_ptr_addr(obj); + return get_forwardee_raw_unchecked(obj); } inline HeapWord* ShenandoahForwarding::get_forwardee_raw_unchecked(oop obj) { - return *forward_ptr_addr(obj); + markOop mark = obj->mark_raw(); + if (mark->is_marked()) { + return (HeapWord*) mark->clear_lock_bits(); + } else { + return (HeapWord*) obj; + } } inline oop ShenandoahForwarding::get_forwardee(oop obj) { shenandoah_assert_correct(NULL, obj); - return oop(*forward_ptr_addr(obj)); + return oop(get_forwardee_raw_unchecked(obj)); +} + +inline bool ShenandoahForwarding::is_forwarded(oop obj) { + return obj->mark_raw()->is_marked(); } inline oop ShenandoahForwarding::try_update_forwardee(oop obj, oop update) { - oop result = (oop) Atomic::cmpxchg(update, (oop*)forward_ptr_addr(obj), obj); - shenandoah_assert_correct_except(NULL, obj, !oopDesc::equals_raw(result, obj)); - return result; + markOop old_mark = obj->mark_raw(); + if (old_mark->is_marked()) { + return (oop) old_mark->clear_lock_bits(); + } + + markOop new_mark = markOopDesc::encode_pointer_as_mark(update); + markOop prev_mark = obj->cas_set_mark_raw(new_mark, old_mark); + if (prev_mark == old_mark) { + return obj; + } else { + return (oop) prev_mark->clear_lock_bits(); + } } #endif // SHARE_GC_SHENANDOAH_SHENANDOAHFORWARDING_INLINE_HPP --- old/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp 2019-05-08 16:25:30.670789480 +0200 +++ new/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp 2019-05-08 16:25:30.456792592 +0200 @@ -32,7 +32,6 @@ #include "gc/shenandoah/shenandoahAllocTracker.hpp" #include "gc/shenandoah/shenandoahBarrierSet.hpp" -#include "gc/shenandoah/shenandoahForwarding.hpp" #include "gc/shenandoah/shenandoahClosures.inline.hpp" #include "gc/shenandoah/shenandoahCollectionSet.hpp" #include "gc/shenandoah/shenandoahCollectorPolicy.hpp" @@ -131,7 +130,6 @@ }; jint ShenandoahHeap::initialize() { - ShenandoahForwarding::initial_checks(); initialize_heuristics(); @@ -854,49 +852,6 @@ return _free_set->allocate(req, in_new_region); } -class ShenandoahMemAllocator : public MemAllocator { -private: - MemAllocator& _initializer; -public: - ShenandoahMemAllocator(MemAllocator& initializer, Klass* klass, size_t word_size, Thread* thread) : - MemAllocator(klass, word_size + ShenandoahForwarding::word_size(), thread), - _initializer(initializer) {} - -protected: - virtual HeapWord* mem_allocate(Allocation& allocation) const { - HeapWord* result = MemAllocator::mem_allocate(allocation); - // Initialize brooks-pointer - if (result != NULL) { - result += ShenandoahForwarding::word_size(); - ShenandoahForwarding::initialize(oop(result)); - assert(! ShenandoahHeap::heap()->in_collection_set(result), "never allocate in targetted region"); - } - return result; - } - - virtual oop initialize(HeapWord* mem) const { - return _initializer.initialize(mem); - } -}; - -oop ShenandoahHeap::obj_allocate(Klass* klass, int size, TRAPS) { - ObjAllocator initializer(klass, size, THREAD); - ShenandoahMemAllocator allocator(initializer, klass, size, THREAD); - return allocator.allocate(); -} - -oop ShenandoahHeap::array_allocate(Klass* klass, int size, int length, bool do_zero, TRAPS) { - ObjArrayAllocator initializer(klass, size, length, do_zero, THREAD); - ShenandoahMemAllocator allocator(initializer, klass, size, THREAD); - return allocator.allocate(); -} - -oop ShenandoahHeap::class_allocate(Klass* klass, int size, TRAPS) { - ClassAllocator initializer(klass, size, THREAD); - ShenandoahMemAllocator allocator(initializer, klass, size, THREAD); - return allocator.allocate(); -} - HeapWord* ShenandoahHeap::mem_allocate(size_t size, bool* gc_overhead_limit_was_exceeded) { ShenandoahAllocRequest req = ShenandoahAllocRequest::for_shared(size); @@ -939,15 +894,6 @@ return NULL; } -void ShenandoahHeap::fill_with_dummy_object(HeapWord* start, HeapWord* end, bool zap) { - HeapWord* obj = tlab_post_allocation_setup(start); - CollectedHeap::fill_with_object(obj, end); -} - -size_t ShenandoahHeap::min_dummy_object_size() const { - return CollectedHeap::min_dummy_object_size() + ShenandoahForwarding::word_size(); -} - class ShenandoahConcurrentEvacuateRegionObjectClosure : public ObjectClosure { private: ShenandoahHeap* const _heap; @@ -958,7 +904,7 @@ void do_object(oop p) { shenandoah_assert_marked(NULL, p); - if (oopDesc::equals_raw(p, ShenandoahBarrierSet::resolve_forwarded_not_null(p))) { + if (!p->is_forwarded()) { _heap->evacuate_object(p, _thread); } } @@ -1038,8 +984,8 @@ void ShenandoahHeap::trash_humongous_region_at(ShenandoahHeapRegion* start) { assert(start->is_humongous_start(), "reclaim regions starting with the first one"); - oop humongous_obj = oop(start->bottom() + ShenandoahForwarding::word_size()); - size_t size = humongous_obj->size() + ShenandoahForwarding::word_size(); + oop humongous_obj = oop(start->bottom()); + size_t size = humongous_obj->size(); size_t required_regions = ShenandoahHeapRegion::required_regions(size * HeapWordSize); size_t index = start->region_number() + required_regions - 1; @@ -1848,13 +1794,6 @@ set_gc_state_mask(EVACUATION, in_progress); } -HeapWord* ShenandoahHeap::tlab_post_allocation_setup(HeapWord* obj) { - // Initialize Brooks pointer for the next object - HeapWord* result = obj + ShenandoahForwarding::word_size(); - ShenandoahForwarding::initialize(oop(result)); - return result; -} - void ShenandoahHeap::ref_processing_init() { assert(_max_workers > 0, "Sanity"); @@ -2817,11 +2756,3 @@ } } } - -size_t ShenandoahHeap::obj_size(oop obj) const { - return CollectedHeap::obj_size(obj) + ShenandoahForwarding::word_size(); -} - -ptrdiff_t ShenandoahHeap::cell_header_size() const { - return ShenandoahForwarding::byte_size(); -} --- old/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp 2019-05-08 16:25:31.260780898 +0200 +++ new/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp 2019-05-08 16:25:31.045784025 +0200 @@ -519,9 +519,6 @@ bool is_in(const void* p) const; - size_t obj_size(oop obj) const; - virtual ptrdiff_t cell_header_size() const; - void collect(GCCause::Cause cause); void do_full_collection(bool clear_all_soft_refs); @@ -575,10 +572,6 @@ size_t size, Metaspace::MetadataType mdtype); - oop obj_allocate(Klass* klass, int size, TRAPS); - oop array_allocate(Klass* klass, int size, int length, bool do_zero, TRAPS); - oop class_allocate(Klass* klass, int size, TRAPS); - void notify_mutator_alloc_words(size_t words, bool waste); // Shenandoah supports TLAB allocation @@ -590,10 +583,6 @@ size_t max_tlab_size() const; size_t tlab_used(Thread* ignored) const; - HeapWord* tlab_post_allocation_setup(HeapWord* obj); - void fill_with_dummy_object(HeapWord* start, HeapWord* end, bool zap); - size_t min_dummy_object_size() const; - void resize_tlabs(); void ensure_parsability(bool retire_tlabs); --- old/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp 2019-05-08 16:25:31.827772651 +0200 +++ new/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp 2019-05-08 16:25:31.612775778 +0200 @@ -234,50 +234,45 @@ assert(ShenandoahThreadLocalData::is_evac_allowed(thread), "must be enclosed in oom-evac scope"); - size_t size_no_fwdptr = (size_t) p->size(); - size_t size_with_fwdptr = size_no_fwdptr + ShenandoahForwarding::word_size(); + size_t size = p->size(); assert(!heap_region_containing(p)->is_humongous(), "never evacuate humongous objects"); bool alloc_from_gclab = true; - HeapWord* filler = NULL; + HeapWord* copy = NULL; #ifdef ASSERT if (ShenandoahOOMDuringEvacALot && (os::random() & 1) == 0) { // Simulate OOM every ~2nd slow-path call - filler = NULL; + copy = NULL; } else { #endif if (UseTLAB) { - filler = allocate_from_gclab(thread, size_with_fwdptr); + copy = allocate_from_gclab(thread, size); } - if (filler == NULL) { - ShenandoahAllocRequest req = ShenandoahAllocRequest::for_shared_gc(size_with_fwdptr); - filler = allocate_memory(req); + if (copy == NULL) { + ShenandoahAllocRequest req = ShenandoahAllocRequest::for_shared_gc(size); + copy = allocate_memory(req); alloc_from_gclab = false; } #ifdef ASSERT } #endif - if (filler == NULL) { - control_thread()->handle_alloc_failure_evac(size_with_fwdptr); + if (copy == NULL) { + control_thread()->handle_alloc_failure_evac(size); _oom_evac_handler.handle_out_of_memory_during_evacuation(); return ShenandoahBarrierSet::resolve_forwarded(p); } - // Copy the object and initialize its forwarding ptr: - HeapWord* copy = filler + ShenandoahForwarding::word_size(); - oop copy_val = oop(copy); - - Copy::aligned_disjoint_words((HeapWord*) p, copy, size_no_fwdptr); - ShenandoahForwarding::initialize(oop(copy)); + // Copy the object: + Copy::aligned_disjoint_words((HeapWord*) p, copy, size); // Try to install the new forwarding pointer. + oop copy_val = oop(copy); oop result = ShenandoahForwarding::try_update_forwardee(p, copy_val); - if (oopDesc::equals_raw(result, p)) { // Successfully evacuated. Our copy is now the public one! shenandoah_assert_correct(NULL, copy_val); @@ -295,11 +290,11 @@ // have to explicitly overwrite the copy with the filler object. With that overwrite, // we have to keep the fwdptr initialized and pointing to our (stale) copy. if (alloc_from_gclab) { - ShenandoahThreadLocalData::gclab(thread)->undo_allocation(filler, size_with_fwdptr); + ShenandoahThreadLocalData::gclab(thread)->undo_allocation(copy, size); } else { - fill_with_object(copy, size_no_fwdptr); + fill_with_object(copy, size); + shenandoah_assert_correct(NULL, copy_val); } - shenandoah_assert_correct(NULL, copy_val); shenandoah_assert_correct(NULL, result); return result; } @@ -370,7 +365,6 @@ template inline void ShenandoahHeap::marked_object_iterate(ShenandoahHeapRegion* region, T* cl, HeapWord* limit) { - assert(ShenandoahForwarding::word_offset() < 0, "skip_delta calculation below assumes the forwarding ptr is before obj"); assert(! region->is_humongous_continuation(), "no humongous continuation regions here"); ShenandoahMarkingContext* const ctx = complete_marking_context(); @@ -379,10 +373,9 @@ MarkBitMap* mark_bit_map = ctx->mark_bit_map(); HeapWord* tams = ctx->top_at_mark_start(region); - size_t skip_bitmap_delta = ShenandoahForwarding::word_size() + 1; - size_t skip_objsize_delta = ShenandoahForwarding::word_size() /* + actual obj.size() below */; - HeapWord* start = region->bottom() + ShenandoahForwarding::word_size(); - HeapWord* end = MIN2(tams + ShenandoahForwarding::word_size(), region->end()); + size_t skip_bitmap_delta = 1; + HeapWord* start = region->bottom(); + HeapWord* end = MIN2(tams, region->end()); // Step 1. Scan below the TAMS based on bitmap data. HeapWord* limit_bitmap = MIN2(limit, tams); @@ -412,7 +405,7 @@ do { avail = 0; for (int c = 0; (c < dist) && (cb < limit_bitmap); c++) { - Prefetch::read(cb, ShenandoahForwarding::byte_offset()); + Prefetch::read(cb, oopDesc::mark_offset_in_bytes()); slots[avail++] = cb; cb += skip_bitmap_delta; if (cb < limit_bitmap) { @@ -447,16 +440,16 @@ // Step 2. Accurate size-based traversal, happens past the TAMS. // This restarts the scan at TAMS, which makes sure we traverse all objects, // regardless of what happened at Step 1. - HeapWord* cs = tams + ShenandoahForwarding::word_size(); + HeapWord* cs = tams; while (cs < limit) { - assert (cs > tams, "only objects past TAMS here: " PTR_FORMAT " (" PTR_FORMAT ")", p2i(cs), p2i(tams)); + assert (cs >= tams, "only objects past TAMS here: " PTR_FORMAT " (" PTR_FORMAT ")", p2i(cs), p2i(tams)); assert (cs < limit, "only objects below limit here: " PTR_FORMAT " (" PTR_FORMAT ")", p2i(cs), p2i(limit)); oop obj = oop(cs); assert(oopDesc::is_oop(obj), "sanity"); assert(ctx->is_marked(obj), "object expected to be marked"); int size = obj->size(); cl->do_object(obj); - cs += size + skip_objsize_delta; + cs += size; } } --- old/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp 2019-05-08 16:25:32.394764404 +0200 +++ new/src/hotspot/share/gc/shenandoah/shenandoahHeapRegion.cpp 2019-05-08 16:25:32.185767444 +0200 @@ -23,7 +23,6 @@ #include "precompiled.hpp" #include "memory/allocation.hpp" -#include "gc/shenandoah/shenandoahForwarding.hpp" #include "gc/shenandoah/shenandoahHeapRegionSet.inline.hpp" #include "gc/shenandoah/shenandoahHeap.inline.hpp" #include "gc/shenandoah/shenandoahHeapRegion.hpp" @@ -452,12 +451,12 @@ void ShenandoahHeapRegion::oop_iterate_objects(OopIterateClosure* blk) { assert(! is_humongous(), "no humongous region here"); - HeapWord* obj_addr = bottom() + ShenandoahForwarding::word_size(); + HeapWord* obj_addr = bottom(); HeapWord* t = top(); // Could call objects iterate, but this is easier. while (obj_addr < t) { oop obj = oop(obj_addr); - obj_addr += obj->oop_iterate_size(blk) + ShenandoahForwarding::word_size(); + obj_addr += obj->oop_iterate_size(blk); } } @@ -466,7 +465,7 @@ // Find head. ShenandoahHeapRegion* r = humongous_start_region(); assert(r->is_humongous_start(), "need humongous head here"); - oop obj = oop(r->bottom() + ShenandoahForwarding::word_size()); + oop obj = oop(r->bottom()); obj->oop_iterate(blk, MemRegion(bottom(), top())); } @@ -505,11 +504,11 @@ if (p >= top()) { return top(); } else { - HeapWord* last = bottom() + ShenandoahForwarding::word_size(); + HeapWord* last = bottom(); HeapWord* cur = last; while (cur <= p) { last = cur; - cur += oop(cur)->size() + ShenandoahForwarding::word_size(); + cur += oop(cur)->size(); } shenandoah_assert_correct(NULL, oop(last)); return last; --- old/src/hotspot/share/gc/shenandoah/shenandoahHeuristics.cpp 2019-05-08 16:25:32.955756244 +0200 +++ new/src/hotspot/share/gc/shenandoah/shenandoahHeuristics.cpp 2019-05-08 16:25:32.743759328 +0200 @@ -24,7 +24,6 @@ #include "precompiled.hpp" #include "gc/shared/gcCause.hpp" -#include "gc/shenandoah/shenandoahForwarding.hpp" #include "gc/shenandoah/shenandoahCollectionSet.inline.hpp" #include "gc/shenandoah/shenandoahCollectorPolicy.hpp" #include "gc/shenandoah/shenandoahHeap.inline.hpp" @@ -164,7 +163,7 @@ // Reclaim humongous regions here, and count them as the immediate garbage #ifdef ASSERT bool reg_live = region->has_live(); - bool bm_live = ctx->is_marked(oop(region->bottom() + ShenandoahForwarding::word_size())); + bool bm_live = ctx->is_marked(oop(region->bottom())); assert(reg_live == bm_live, "Humongous liveness and marks should agree. Region live: %s; Bitmap live: %s; Region Live Words: " SIZE_FORMAT, BOOL_TO_STR(reg_live), BOOL_TO_STR(bm_live), region->get_live_data_words()); --- old/src/hotspot/share/gc/shenandoah/shenandoahMarkCompact.cpp 2019-05-08 16:25:33.515748099 +0200 +++ new/src/hotspot/share/gc/shenandoah/shenandoahMarkCompact.cpp 2019-05-08 16:25:33.303751182 +0200 @@ -25,7 +25,6 @@ #include "code/codeCache.hpp" #include "gc/shared/gcTraceTime.inline.hpp" -#include "gc/shenandoah/shenandoahForwarding.inline.hpp" #include "gc/shenandoah/shenandoahConcurrentMark.inline.hpp" #include "gc/shenandoah/shenandoahCollectionSet.hpp" #include "gc/shenandoah/shenandoahFreeSet.hpp" @@ -228,6 +227,7 @@ class ShenandoahPrepareForCompactionObjectClosure : public ObjectClosure { private: + ShenandoahMarkCompact* const _mark_compact; ShenandoahHeap* const _heap; GrowableArray& _empty_regions; int _empty_regions_pos; @@ -236,7 +236,8 @@ HeapWord* _compact_point; public: - ShenandoahPrepareForCompactionObjectClosure(GrowableArray& empty_regions, ShenandoahHeapRegion* to_region) : + ShenandoahPrepareForCompactionObjectClosure(ShenandoahMarkCompact* mc, GrowableArray& empty_regions, ShenandoahHeapRegion* to_region) : + _mark_compact(mc), _heap(ShenandoahHeap::heap()), _empty_regions(empty_regions), _empty_regions_pos(0), @@ -266,7 +267,7 @@ assert(_heap->complete_marking_context()->is_marked(p), "must be marked"); assert(!_heap->complete_marking_context()->allocated_after_mark_start((HeapWord*) p), "must be truly marked"); - size_t obj_size = p->size() + ShenandoahForwarding::word_size(); + size_t obj_size = p->size(); if (_compact_point + obj_size > _to_region->end()) { finish_region(); @@ -289,13 +290,15 @@ // Object fits into current region, record new location: assert(_compact_point + obj_size <= _to_region->end(), "must fit"); shenandoah_assert_not_forwarded(NULL, p); - ShenandoahForwarding::set_forwardee_raw(p, _compact_point + ShenandoahForwarding::word_size()); + _mark_compact->preserve_mark(p); + p->forward_to(oop(_compact_point)); _compact_point += obj_size; } }; class ShenandoahPrepareForCompactionTask : public AbstractGangTask { private: + ShenandoahMarkCompact* const _mark_compact; ShenandoahHeap* const _heap; ShenandoahHeapRegionSet** const _worker_slices; ShenandoahRegionIterator _heap_regions; @@ -318,8 +321,9 @@ } public: - ShenandoahPrepareForCompactionTask(ShenandoahHeapRegionSet** worker_slices) : + ShenandoahPrepareForCompactionTask(ShenandoahMarkCompact* mc, ShenandoahHeapRegionSet** worker_slices) : AbstractGangTask("Shenandoah Prepare For Compaction Task"), + _mark_compact(mc), _heap(ShenandoahHeap::heap()), _worker_slices(worker_slices) { } @@ -335,7 +339,7 @@ // Remember empty regions and reuse them as needed. ResourceMark rm; GrowableArray empty_regions((int)_heap->num_regions()); - ShenandoahPrepareForCompactionObjectClosure cl(empty_regions, from_region); + ShenandoahPrepareForCompactionObjectClosure cl(_mark_compact, empty_regions, from_region); while (from_region != NULL) { cl.set_from_region(from_region); if (from_region->has_live()) { @@ -375,8 +379,8 @@ size_t to_begin = heap->num_regions(); size_t to_end = heap->num_regions(); - for (size_t c = heap->num_regions() - 1; c > 0; c--) { - ShenandoahHeapRegion *r = heap->get_region(c); + for (size_t c = heap->num_regions(); c > 0; c--) { + ShenandoahHeapRegion *r = heap->get_region(c - 1); if (r->is_humongous_continuation() || (r->new_top() == r->bottom())) { // To-region candidate: record this, and continue scan to_begin = r->region_number(); @@ -385,15 +389,15 @@ if (r->is_humongous_start() && r->is_move_allowed()) { // From-region candidate: movable humongous region - oop old_obj = oop(r->bottom() + ShenandoahForwarding::word_size()); - size_t words_size = old_obj->size() + ShenandoahForwarding::word_size(); + oop old_obj = oop(r->bottom()); + size_t words_size = old_obj->size(); size_t num_regions = ShenandoahHeapRegion::required_regions(words_size * HeapWordSize); size_t start = to_end - num_regions; if (start >= to_begin && start != r->region_number()) { // Fits into current window, and the move is non-trivial. Record the move then, and continue scan. - ShenandoahForwarding::set_forwardee_raw(old_obj, heap->get_region(start)->bottom() + ShenandoahForwarding::word_size()); + old_obj->forward_to(oop(heap->get_region(start)->bottom())); to_end = start; continue; } @@ -441,7 +445,7 @@ void heap_region_do(ShenandoahHeapRegion* r) { if (r->is_humongous_start()) { - oop humongous_obj = oop(r->bottom() + ShenandoahForwarding::word_size()); + oop humongous_obj = oop(r->bottom()); if (!_ctx->is_marked(humongous_obj)) { assert(!r->has_live(), "Region " SIZE_FORMAT " is not marked, should not have live", r->region_number()); @@ -482,7 +486,7 @@ // Compute the new addresses for regular objects { ShenandoahGCPhase phase(ShenandoahPhaseTimings::full_gc_calculate_addresses_regular); - ShenandoahPrepareForCompactionTask prepare_task(worker_slices); + ShenandoahPrepareForCompactionTask prepare_task(this, worker_slices); heap->workers()->run_task(&prepare_task); } @@ -504,8 +508,10 @@ if (!CompressedOops::is_null(o)) { oop obj = CompressedOops::decode_not_null(o); assert(_ctx->is_marked(obj), "must be marked"); - oop forw = oop(ShenandoahForwarding::get_forwardee_raw(obj)); - RawAccess::oop_store(p, forw); + if (obj->is_forwarded()) { + oop forw = obj->forwardee(); + RawAccess::oop_store(p, forw); + } } } @@ -529,7 +535,6 @@ } void do_object(oop p) { assert(_heap->complete_marking_context()->is_marked(p), "must be marked"); - HeapWord* forw = ShenandoahForwarding::get_forwardee_raw(p); p->oop_iterate(&_cl); } }; @@ -600,6 +605,8 @@ ShenandoahAdjustPointersTask adjust_pointers_task; workers->run_task(&adjust_pointers_task); + + adjust_marks(); } class ShenandoahCompactObjectsClosure : public ObjectClosure { @@ -614,13 +621,13 @@ void do_object(oop p) { assert(_heap->complete_marking_context()->is_marked(p), "must be marked"); size_t size = (size_t)p->size(); - HeapWord* compact_to = ShenandoahForwarding::get_forwardee_raw(p); - HeapWord* compact_from = (HeapWord*) p; - if (compact_from != compact_to) { + if (p->is_forwarded()) { + HeapWord* compact_from = (HeapWord*) p; + HeapWord* compact_to = (HeapWord*) p->forwardee(); Copy::aligned_conjoint_words(compact_from, compact_to, size); + oop new_obj = oop(compact_to); + new_obj->init_mark(); } - oop new_obj = oop(compact_to); - ShenandoahForwarding::initialize(new_obj); } }; @@ -711,31 +718,30 @@ ShenandoahHeap* heap = ShenandoahHeap::heap(); - for (size_t c = heap->num_regions() - 1; c > 0; c--) { - ShenandoahHeapRegion* r = heap->get_region(c); + for (size_t c = heap->num_regions(); c > 0; c--) { + ShenandoahHeapRegion* r = heap->get_region(c - 1); if (r->is_humongous_start()) { - oop old_obj = oop(r->bottom() + ShenandoahForwarding::word_size()); - size_t words_size = old_obj->size() + ShenandoahForwarding::word_size(); + oop old_obj = oop(r->bottom()); + if (!old_obj->is_forwarded()) { + // No need to move the object, it stays at the same slot + continue; + } + size_t words_size = old_obj->size(); size_t num_regions = ShenandoahHeapRegion::required_regions(words_size * HeapWordSize); size_t old_start = r->region_number(); size_t old_end = old_start + num_regions - 1; - size_t new_start = heap->heap_region_index_containing(ShenandoahForwarding::get_forwardee_raw(old_obj)); + size_t new_start = heap->heap_region_index_containing(old_obj->forwardee()); size_t new_end = new_start + num_regions - 1; - - if (old_start == new_start) { - // No need to move the object, it stays at the same slot - continue; - } - + assert(old_start != new_start, "must be real move"); assert (r->is_move_allowed(), "should be movable"); Copy::aligned_conjoint_words(heap->get_region(old_start)->bottom(), heap->get_region(new_start)->bottom(), ShenandoahHeapRegion::region_size_words()*num_regions); - oop new_obj = oop(heap->get_region(new_start)->bottom() + ShenandoahForwarding::word_size()); - ShenandoahForwarding::initialize(new_obj); + oop new_obj = oop(heap->get_region(new_start)->bottom()); + new_obj->init_mark(); { for (size_t c = old_start; c <= old_end; c++) { @@ -815,6 +821,8 @@ compact_humongous_objects(); } + restore_marks(); + // Reset complete bitmap. We're about to reset the complete-top-at-mark-start pointer // and must ensure the bitmap is in sync. { @@ -837,3 +845,38 @@ heap->clear_cancelled_gc(); } + +void ShenandoahMarkCompact::preserve_mark(oop obj) { + markOop mark = obj->mark_raw(); + if (mark->must_be_preserved(obj)) { + MutexLocker ml(&_preserved_mark_lock, Mutex::_no_safepoint_check_flag); + assert(_preserved_oop_stack.size() == _preserved_mark_stack.size(), + "inconsistent preserved oop stacks: oop_stack size: " SIZE_FORMAT ", mark_stack size: " SIZE_FORMAT, _preserved_oop_stack.size(), _preserved_mark_stack.size()); + _preserved_mark_stack.push(mark); + _preserved_oop_stack.push(obj); + } +} + +void ShenandoahMarkCompact::restore_marks() { + MutexLocker ml(&_preserved_mark_lock, Mutex::_no_safepoint_check_flag); + assert(_preserved_oop_stack.size() == _preserved_mark_stack.size(), + "inconsistent preserved oop stacks: oop_stack size: " SIZE_FORMAT ", mark_stack size: " SIZE_FORMAT, _preserved_oop_stack.size(), _preserved_mark_stack.size()); + while (!_preserved_oop_stack.is_empty()) { + oop obj = _preserved_oop_stack.pop(); + markOop mark = _preserved_mark_stack.pop(); + obj->set_mark_raw(mark); + } +} + +void ShenandoahMarkCompact::adjust_marks() { + MutexLocker ml(&_preserved_mark_lock, Mutex::_no_safepoint_check_flag); + assert(_preserved_oop_stack.size() == _preserved_mark_stack.size(), + "inconsistent preserved oop stacks: oop_stack size: " SIZE_FORMAT ", mark_stack size: " SIZE_FORMAT, _preserved_oop_stack.size(), _preserved_mark_stack.size()); + StackIterator iter(_preserved_oop_stack); + while (!iter.is_empty()) { + oop* p = iter.next_addr(); + oop obj = *p; + oop fwd = obj->forwardee(); + *p = fwd; + } +} --- old/src/hotspot/share/gc/shenandoah/shenandoahMarkCompact.hpp 2019-05-08 16:25:34.040740463 +0200 +++ new/src/hotspot/share/gc/shenandoah/shenandoahMarkCompact.hpp 2019-05-08 16:25:33.827743561 +0200 @@ -50,11 +50,19 @@ */ class ShenandoahMarkCompact : public CHeapObj { + friend class ShenandoahPrepareForCompactionObjectClosure; private: GCTimer* _gc_timer; + Stack _preserved_mark_stack; + Stack _preserved_oop_stack; + Mutex _preserved_mark_lock; + public: + ShenandoahMarkCompact() : _gc_timer(NULL), _preserved_mark_stack(), _preserved_oop_stack(), + _preserved_mark_lock(Mutex::leaf, "Shenandoah Preserved Marks Lock", false, Mutex::_safepoint_check_never) {} void initialize(GCTimer* gc_timer); + void do_it(GCCause::Cause gc_cause); private: @@ -66,6 +74,9 @@ void calculate_target_humongous_objects(); void compact_humongous_objects(); + void preserve_mark(oop obj); + void restore_marks(); + void adjust_marks(); }; #endif // SHARE_GC_SHENANDOAH_SHENANDOAHMARKCOMPACT_HPP --- old/src/hotspot/share/gc/shenandoah/shenandoahTraversalGC.cpp 2019-05-08 16:25:34.551733030 +0200 +++ new/src/hotspot/share/gc/shenandoah/shenandoahTraversalGC.cpp 2019-05-08 16:25:34.338736128 +0200 @@ -35,7 +35,6 @@ #include "gc/shenandoah/shenandoahCollectionSet.hpp" #include "gc/shenandoah/shenandoahCollectorPolicy.hpp" #include "gc/shenandoah/shenandoahFreeSet.hpp" -#include "gc/shenandoah/shenandoahForwarding.hpp" #include "gc/shenandoah/shenandoahPhaseTimings.hpp" #include "gc/shenandoah/shenandoahHeap.inline.hpp" #include "gc/shenandoah/shenandoahHeapRegionSet.inline.hpp" @@ -638,7 +637,7 @@ bool candidate = traversal_regions->is_in(r) && !r->has_live() && not_allocated; if (r->is_humongous_start() && candidate) { // Trash humongous. - HeapWord* humongous_obj = r->bottom() + ShenandoahForwarding::word_size(); + HeapWord* humongous_obj = r->bottom(); assert(!ctx->is_marked(oop(humongous_obj)), "must not be marked"); r->make_trash_immediate(); while (i + 1 < num_regions && _heap->get_region(i + 1)->is_humongous_continuation()) { --- old/src/hotspot/share/gc/shenandoah/shenandoahVerifier.cpp 2019-05-08 16:25:35.132724579 +0200 +++ new/src/hotspot/share/gc/shenandoah/shenandoahVerifier.cpp 2019-05-08 16:25:34.907727852 +0200 @@ -137,7 +137,7 @@ // skip break; case ShenandoahVerifier::_verify_liveness_complete: - Atomic::add(obj->size() + ShenandoahForwarding::word_size(), &_ld[obj_reg->region_number()]); + Atomic::add((uint) obj->size(), &_ld[obj_reg->region_number()]); // fallthrough for fast failure for un-live regions: case ShenandoahVerifier::_verify_liveness_conservative: check(ShenandoahAsserts::_safe_oop, obj, obj_reg->has_live(), @@ -277,6 +277,7 @@ */ void verify_oops_from(oop obj) { _loc = obj; + obj = ShenandoahBarrierSet::resolve_forwarded_not_null(obj); obj->oop_iterate(this); _loc = NULL; } @@ -528,7 +529,7 @@ virtual void work_humongous(ShenandoahHeapRegion *r, ShenandoahVerifierStack& stack, ShenandoahVerifyOopClosure& cl) { size_t processed = 0; - HeapWord* obj = r->bottom() + ShenandoahForwarding::word_size(); + HeapWord* obj = r->bottom(); if (_heap->complete_marking_context()->is_marked((oop)obj)) { verify_and_follow(obj, stack, cl, &processed); } @@ -542,12 +543,12 @@ // Bitmaps, before TAMS if (tams > r->bottom()) { - HeapWord* start = r->bottom() + ShenandoahForwarding::word_size(); + HeapWord* start = r->bottom(); HeapWord* addr = mark_bit_map->get_next_marked_addr(start, tams); while (addr < tams) { verify_and_follow(addr, stack, cl, &processed); - addr += ShenandoahForwarding::word_size(); + addr += 1; if (addr < tams) { addr = mark_bit_map->get_next_marked_addr(addr, tams); } @@ -557,11 +558,11 @@ // Size-based, after TAMS { HeapWord* limit = r->top(); - HeapWord* addr = tams + ShenandoahForwarding::word_size(); + HeapWord* addr = tams; while (addr < limit) { verify_and_follow(addr, stack, cl, &processed); - addr += oop(addr)->size() + ShenandoahForwarding::word_size(); + addr += oop(addr)->size(); } }