# HG changeset patch # User rkennke # Date 1585165099 -3600 # Wed Mar 25 20:38:19 2020 +0100 # Node ID 285e32a96b6e2cced545473cc795ada2f88b3c18 # Parent 9cff5368d25a95e39cc82d02e6db2a839097e303 8241605: Shenandoah: More aggressive reference discovery diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahBarrierSetC2.cpp @@ -589,9 +589,14 @@ if (on_weak_ref) { // Use the pre-barrier to record the value in the referent field - satb_write_barrier_pre(kit, false /* do_load */, - NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */, - load /* pre_val */, T_OBJECT); + if (ShenandoahAggressiveReferenceDiscovery) { + load = shenandoah_enqueue_barrier(kit, load); + } else { + satb_write_barrier_pre(kit, false /* do_load */, + NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, + NULL /* val_type */, + load /* pre_val */, T_OBJECT); + } // Add memory barrier to prevent commoning reads from this field // across safepoint since GC can change its value. kit->insert_mem_bar(Op_MemBarCPUOrder); diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.cpp @@ -860,8 +860,8 @@ inner->clear_strip_mined(); } -void ShenandoahBarrierC2Support::test_heap_stable(Node*& ctrl, Node* raw_mem, Node*& heap_stable_ctrl, - PhaseIdealLoop* phase) { +void ShenandoahBarrierC2Support::test_heap_state(Node*& ctrl, Node* raw_mem, Node*& heap_stable_ctrl, + PhaseIdealLoop* phase, int flags) { IdealLoopTree* loop = phase->get_loop(ctrl); Node* thread = new ThreadLocalNode(); phase->register_new_node(thread, ctrl); @@ -875,7 +875,7 @@ Node* gc_state = new LoadBNode(ctrl, raw_mem, gc_state_addr, gc_state_adr_type, TypeInt::BYTE, MemNode::unordered); phase->register_new_node(gc_state, ctrl); - Node* heap_stable_and = new AndINode(gc_state, phase->igvn().intcon(ShenandoahHeap::HAS_FORWARDED)); + Node* heap_stable_and = new AndINode(gc_state, phase->igvn().intcon(flags)); phase->register_new_node(heap_stable_and, ctrl); Node* heap_stable_cmp = new CmpINode(heap_stable_and, phase->igvn().zerocon(T_INT)); phase->register_new_node(heap_stable_cmp, ctrl); @@ -889,7 +889,7 @@ ctrl = new IfTrueNode(heap_stable_iff); phase->register_control(ctrl, loop, heap_stable_iff); - assert(is_heap_stable_test(heap_stable_iff), "Should match the shape"); + assert(is_heap_state_test(heap_stable_iff, flags), "Should match the shape"); } void ShenandoahBarrierC2Support::test_null(Node*& ctrl, Node* val, Node*& null_ctrl, PhaseIdealLoop* phase) { @@ -1437,7 +1437,7 @@ Node* raw_mem_phi = PhiNode::make(region, raw_mem, Type::MEMORY, TypeRawPtr::BOTTOM); // Stable path. - test_heap_stable(ctrl, raw_mem, heap_stable_ctrl, phase); + test_heap_state(ctrl, raw_mem, heap_stable_ctrl, phase, ShenandoahHeap::HAS_FORWARDED); IfNode* heap_stable_iff = heap_stable_ctrl->in(0)->as_If(); // Heap stable case @@ -1570,11 +1570,13 @@ assert(ShenandoahBarrierSetC2::bsc2()->state()->load_reference_barriers_count() == 0, "all load reference barrier nodes should have been replaced"); for (int i = state->enqueue_barriers_count() - 1; i >= 0; i--) { - Node* barrier = state->enqueue_barrier(i); + ShenandoahEnqueueBarrierNode* barrier = state->enqueue_barrier(i); Node* pre_val = barrier->in(1); - if (phase->igvn().type(pre_val)->higher_equal(TypePtr::NULL_PTR)) { - ShouldNotReachHere(); + assert(!phase->igvn().type(pre_val)->higher_equal(TypePtr::NULL_PTR), "no known-NULLs here"); + + if (barrier->can_eliminate(phase)) { + phase->igvn().replace_node(barrier, pre_val); continue; } @@ -1608,7 +1610,7 @@ Node* phi2 = PhiNode::make(region2, raw_mem, Type::MEMORY, TypeRawPtr::BOTTOM); // Stable path. - test_heap_stable(ctrl, raw_mem, heap_stable_ctrl, phase); + test_heap_state(ctrl, raw_mem, heap_stable_ctrl, phase, ShenandoahHeap::TRAVERSAL | ShenandoahHeap::MARKING); region->init_req(_heap_stable, heap_stable_ctrl); phi->init_req(_heap_stable, raw_mem); @@ -2171,6 +2173,87 @@ return NULL; } +bool ShenandoahEnqueueBarrierNode::can_eliminate(PhaseIdealLoop* phase) { + return ShenandoahHeap::heap()->traversal_gc() == NULL && + is_redundant() && ShenandoahAggressiveReferenceDiscovery; +} + +bool ShenandoahEnqueueBarrierNode::is_redundant() { + Unique_Node_List visited; + Node_Stack stack(0); + stack.push(this, 0); + + while (stack.size() > 0) { + Node* n = stack.node(); + if (visited.member(n)) { + stack.pop(); + continue; + } + visited.push(n); + bool visit_users = false; + switch (n->Opcode()) { + case Op_CallStaticJava: + if (n->as_CallStaticJava()->uncommon_trap_request() == 0) { + return false; + } + break; + case Op_CallDynamicJava: + case Op_CompareAndExchangeN: + case Op_CompareAndExchangeP: + case Op_CompareAndSwapN: + case Op_CompareAndSwapP: + case Op_ShenandoahCompareAndSwapN: + case Op_ShenandoahCompareAndSwapP: + case Op_GetAndSetN: + case Op_GetAndSetP: + case Op_Return: + case Op_StoreN: + case Op_StoreP: + return false; + break; + case Op_AddP: + case Op_Allocate: + case Op_AllocateArray: + case Op_ArrayCopy: + case Op_CmpP: + case Op_LoadL: + case Op_SafePoint: + case Op_SubTypeCheck: + case Op_StoreLConditional: + case Op_StoreIConditional: + case Op_FastUnlock: + break; + case Op_CastPP: + case Op_CheckCastPP: + case Op_CMoveN: + case Op_CMoveP: + case Op_EncodeP: + case Op_Phi: + case Op_ShenandoahEnqueueBarrier: + visit_users = true; + break; + default: { +#ifdef ASSERT + fatal("Unknown node in is_redundant: %s", NodeClassNames[n->Opcode()]); +#endif + // Default to useful: better to have excess barriers, rather than miss some. + return false; + } + } + + stack.pop(); + if (visit_users) { + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + Node* user = n->fast_out(i); + if (user != NULL) { + stack.push(user, 0); + } + } + } + } + return true; +} + Node* ShenandoahEnqueueBarrierNode::Identity(PhaseGVN* phase) { PhaseIterGVN* igvn = phase->is_IterGVN(); @@ -3245,7 +3328,6 @@ case Op_GetAndAddI: case Op_GetAndAddB: case Op_GetAndAddS: - case Op_ShenandoahEnqueueBarrier: case Op_FastLock: case Op_FastUnlock: case Op_Rethrow: @@ -3322,6 +3404,7 @@ case Op_CMoveP: case Op_Phi: case Op_ShenandoahLoadReferenceBarrier: + case Op_ShenandoahEnqueueBarrier: // Whether or not these need the barriers depends on their users visit_users = true; break; diff --git a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.hpp b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.hpp --- a/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.hpp +++ b/src/hotspot/share/gc/shenandoah/c2/shenandoahSupport.hpp @@ -58,8 +58,8 @@ static Node* find_bottom_mem(Node* ctrl, PhaseIdealLoop* phase); static void follow_barrier_uses(Node* n, Node* ctrl, Unique_Node_List& uses, PhaseIdealLoop* phase); static void test_null(Node*& ctrl, Node* val, Node*& null_ctrl, PhaseIdealLoop* phase); - static void test_heap_stable(Node*& ctrl, Node* raw_mem, Node*& heap_stable_ctrl, - PhaseIdealLoop* phase); + static void test_heap_state(Node*& ctrl, Node* raw_mem, Node*& heap_stable_ctrl, + PhaseIdealLoop* phase, int flags); static void call_lrb_stub(Node*& ctrl, Node*& val, Node* load_addr, Node*& result_mem, Node* raw_mem, bool is_native, PhaseIdealLoop* phase); static Node* clone_null_check(Node*& c, Node* val, Node* unc_ctrl, PhaseIdealLoop* phase); static void fix_null_check(Node* unc, Node* unc_ctrl, Node* new_unc_ctrl, Unique_Node_List& uses, @@ -97,6 +97,8 @@ Node* Identity(PhaseGVN* phase); int Opcode() const; + bool can_eliminate(PhaseIdealLoop* phase); + bool is_redundant(); private: enum { Needed, NotNeeded, MaybeNeeded }; diff --git a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahConcurrentMark.cpp @@ -182,19 +182,27 @@ class ShenandoahSATBAndRemarkCodeRootsThreadsClosure : public ThreadClosure { private: - ShenandoahSATBBufferClosure* _satb_cl; - MarkingCodeBlobClosure* _code_cl; + ShenandoahSATBBufferClosure* const _satb_cl; + OopClosure* const _cl; + MarkingCodeBlobClosure* const _code_cl; uintx _claim_token; - public: - ShenandoahSATBAndRemarkCodeRootsThreadsClosure(ShenandoahSATBBufferClosure* satb_cl, MarkingCodeBlobClosure* code_cl) : - _satb_cl(satb_cl), _code_cl(code_cl), + ShenandoahSATBAndRemarkCodeRootsThreadsClosure(ShenandoahSATBBufferClosure* satb_cl, + OopClosure* cl, MarkingCodeBlobClosure* code_cl) : + _satb_cl(satb_cl), _cl(cl), _code_cl(code_cl), _claim_token(Threads::thread_claim_token()) {} void do_thread(Thread* thread) { if (thread->claim_threads_do(true, _claim_token)) { ShenandoahThreadLocalData::satb_mark_queue(thread).apply_closure_and_empty(_satb_cl); - if (_code_cl != NULL && thread->is_Java_thread()) { + if (_cl != NULL) { + // This doesn't appear to add very much to final-mark latency. If that ever becomes a problem, + // we can attempt to trim it to only scan actual thread-stacks (and avoid stuff like handles, monitors, etc) + // and there only compiled frames. We can also make thread-scans templatized to avoid virtual calls and + // instead inline the closures. + ResourceMark rm; + thread->oops_do(_cl, _code_cl); + } else if (_code_cl != NULL && thread->is_Java_thread()) { // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking // however the liveness of oops reachable from nmethods have very complex lifecycles: // * Alive if on the stack of an executing method @@ -243,20 +251,20 @@ SATBMarkQueueSet& satb_mq_set = ShenandoahBarrierSet::satb_mark_queue_set(); while (satb_mq_set.apply_closure_to_completed_buffer(&cl)); - if (heap->unload_classes() && !ShenandoahConcurrentRoots::can_do_concurrent_class_unloading()) { - if (heap->has_forwarded_objects()) { - ShenandoahMarkResolveRefsClosure resolve_mark_cl(q, rp); - MarkingCodeBlobClosure blobsCl(&resolve_mark_cl, !CodeBlobToOopClosure::FixRelocations); - ShenandoahSATBAndRemarkCodeRootsThreadsClosure tc(&cl, &blobsCl); - Threads::threads_do(&tc); - } else { - ShenandoahMarkRefsClosure mark_cl(q, rp); - MarkingCodeBlobClosure blobsCl(&mark_cl, !CodeBlobToOopClosure::FixRelocations); - ShenandoahSATBAndRemarkCodeRootsThreadsClosure tc(&cl, &blobsCl); - Threads::threads_do(&tc); - } + bool do_nmethods = heap->unload_classes() && !ShenandoahConcurrentRoots::can_do_concurrent_class_unloading(); + if (heap->has_forwarded_objects()) { + ShenandoahMarkResolveRefsClosure resolve_mark_cl(q, rp); + MarkingCodeBlobClosure blobsCl(&resolve_mark_cl, !CodeBlobToOopClosure::FixRelocations); + ShenandoahSATBAndRemarkCodeRootsThreadsClosure tc(&cl, + ShenandoahAggressiveReferenceDiscovery ? &resolve_mark_cl : NULL, + do_nmethods ? &blobsCl : NULL); + Threads::threads_do(&tc); } else { - ShenandoahSATBAndRemarkCodeRootsThreadsClosure tc(&cl, NULL); + ShenandoahMarkRefsClosure mark_cl(q, rp); + MarkingCodeBlobClosure blobsCl(&mark_cl, !CodeBlobToOopClosure::FixRelocations); + ShenandoahSATBAndRemarkCodeRootsThreadsClosure tc(&cl, + ShenandoahAggressiveReferenceDiscovery ? &mark_cl : NULL, + do_nmethods ? &blobsCl : NULL); Threads::threads_do(&tc); } } diff --git a/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp b/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp --- a/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoah_globals.hpp @@ -151,6 +151,11 @@ "other cleanup policy. This minimizes footprint at expense of" \ "more soft reference churn in applications.") \ \ + diagnostic(bool, ShenandoahAggressiveReferenceDiscovery, true, \ + "Aggressively avoid keeping alive references upon " \ + "Reference.get(), at the expense of some extra latency, " \ + "caused by an extra threads-stacks-scan at final-mark") \ + \ experimental(bool, ShenandoahUncommit, true, \ "Allow to uncommit memory under unused regions and metadata. " \ "This optimizes footprint at expense of allocation latency in " \ diff --git a/test/hotspot/jtreg/gc/shenandoah/TestWeakReferenceChurn.java b/test/hotspot/jtreg/gc/shenandoah/TestWeakReferenceChurn.java new file mode 100644 --- /dev/null +++ b/test/hotspot/jtreg/gc/shenandoah/TestWeakReferenceChurn.java @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2020, Red Hat, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/* + * @test TestWeakReferenceChurn + * @summary Check that -XX:+ShenandoahAggressiveReferenceDiscovery reclaims references + * even when accessed during marking + * @key gc + * @requires vm.gc.Shenandoah & !vm.graal.enabled + * + * @run main/othervm -XX:+UnlockDiagnosticVMOptions -XX:+UnlockExperimentalVMOptions -Xmx1g -Xms1g + * -XX:+UseShenandoahGC -XX:ShenandoahGuaranteedGCInterval=1 + * -XX:+ShenandoahAggressiveReferenceDiscovery + * TestWeakReferenceChurn + */ + +import java.lang.ref.WeakReference; +import java.util.ArrayList; +import java.util.Random; + +public class TestWeakReferenceChurn { + + public static final int NUM_REFS = 1000000; + public static final int NUM_ITERS = 1000; + public static class Payload { + public int foo; + public Payload() { + foo = 0; + } + } + + private static ArrayList> refs; + private static Random random; + + public static void main(String[] args) { + refs = new ArrayList<>(NUM_REFS); + random = new Random(); + for (int i = 0; i < NUM_REFS; i++) { + refs.add(new WeakReference(new Payload())); + } + int count = 0; + for (int j = 0; j < NUM_ITERS; j++) { + count = 0; + for (int i = 0; i < NUM_REFS; i++) { + if (accessRef(i)) { + count++; + } + } + if (count == 0) { + break; + } + } + if (count != 0) { + throw new RuntimeException("References not reclaimed"); + } + } + + private static boolean accessRef(int idx) { + Payload item = refs.get(idx).get(); + if (item != null) { + item.foo = random.nextInt(); + return true; + } else { + return false; + } + } +}