--- old/src/hotspot/share/gc/g1/g1CollectedHeap.cpp 2020-01-30 17:10:12.433740915 +0100 +++ new/src/hotspot/share/gc/g1/g1CollectedHeap.cpp 2020-01-30 17:10:11.613729169 +0100 @@ -76,6 +76,7 @@ #include "gc/shared/isGCActiveMark.hpp" #include "gc/shared/locationPrinter.inline.hpp" #include "gc/shared/oopStorageParState.hpp" +#include "gc/shared/owstTaskTerminator.hpp" #include "gc/shared/preservedMarks.inline.hpp" #include "gc/shared/suspendibleThreadSet.hpp" #include "gc/shared/referenceProcessor.inline.hpp" @@ -1132,7 +1133,7 @@ print_heap_after_gc(); print_heap_regions(); #ifdef TRACESPINNING - ParallelTaskTerminator::print_termination_counts(); + OWSTTaskTerminator::print_termination_counts(); #endif } @@ -3140,7 +3141,7 @@ verify_after_young_collection(verify_type); #ifdef TRACESPINNING - ParallelTaskTerminator::print_termination_counts(); + OWSTTaskTerminator::print_termination_counts(); #endif gc_epilogue(false); @@ -3476,14 +3477,14 @@ G1CollectedHeap* _g1h; G1ParScanThreadStateSet* _pss; RefToScanQueueSet* _task_queues; - ParallelTaskTerminator* _terminator; + OWSTTaskTerminator* _terminator; public: G1STWRefProcTaskProxy(ProcessTask& proc_task, G1CollectedHeap* g1h, G1ParScanThreadStateSet* per_thread_states, RefToScanQueueSet *task_queues, - ParallelTaskTerminator* terminator) : + OWSTTaskTerminator* terminator) : AbstractGangTask("Process reference objects in parallel"), _proc_task(proc_task), _g1h(g1h), @@ -3527,8 +3528,8 @@ assert(_workers->active_workers() >= ergo_workers, "Ergonomically chosen workers (%u) should be less than or equal to active workers (%u)", ergo_workers, _workers->active_workers()); - TaskTerminator terminator(ergo_workers, _queues); - G1STWRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _pss, _queues, terminator.terminator()); + OWSTTaskTerminator terminator(ergo_workers, _queues); + G1STWRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _pss, _queues, &terminator); _workers->run_task(&proc_task_proxy, ergo_workers); } @@ -3814,7 +3815,7 @@ G1CollectedHeap* _g1h; G1ParScanThreadStateSet* _per_thread_states; RefToScanQueueSet* _task_queues; - TaskTerminator _terminator; + OWSTTaskTerminator _terminator; uint _num_workers; void evacuate_live_objects(G1ParScanThreadState* pss, @@ -3824,7 +3825,7 @@ G1GCPhaseTimes* p = _g1h->phase_times(); Ticks start = Ticks::now(); - G1ParEvacuateFollowersClosure cl(_g1h, pss, _task_queues, _terminator.terminator(), objcopy_phase); + G1ParEvacuateFollowersClosure cl(_g1h, pss, _task_queues, &_terminator, objcopy_phase); cl.do_void(); assert(pss->queue_is_empty(), "should be empty"); --- old/src/hotspot/share/gc/g1/g1CollectedHeap.hpp 2020-01-30 17:10:15.721788009 +0100 +++ new/src/hotspot/share/gc/g1/g1CollectedHeap.hpp 2020-01-30 17:10:15.113779302 +0100 @@ -1482,18 +1482,18 @@ G1CollectedHeap* _g1h; G1ParScanThreadState* _par_scan_state; RefToScanQueueSet* _queues; - ParallelTaskTerminator* _terminator; + OWSTTaskTerminator* _terminator; G1GCPhaseTimes::GCParPhases _phase; G1ParScanThreadState* par_scan_state() { return _par_scan_state; } RefToScanQueueSet* queues() { return _queues; } - ParallelTaskTerminator* terminator() { return _terminator; } + OWSTTaskTerminator* terminator() { return _terminator; } public: G1ParEvacuateFollowersClosure(G1CollectedHeap* g1h, G1ParScanThreadState* par_scan_state, RefToScanQueueSet* queues, - ParallelTaskTerminator* terminator, + OWSTTaskTerminator* terminator, G1GCPhaseTimes::GCParPhases phase) : _start_term(0.0), _term_time(0.0), _term_attempts(0), _g1h(g1h), _par_scan_state(par_scan_state), --- old/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp 2020-01-30 17:10:19.565843065 +0100 +++ new/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp 2020-01-30 17:10:18.621829545 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -46,6 +46,7 @@ #include "gc/shared/gcTraceTime.inline.hpp" #include "gc/shared/gcVMOperations.hpp" #include "gc/shared/genOopClosures.inline.hpp" +#include "gc/shared/owstTaskTerminator.hpp" #include "gc/shared/referencePolicy.hpp" #include "gc/shared/strongRootsScope.hpp" #include "gc/shared/suspendibleThreadSet.hpp" @@ -600,7 +601,7 @@ _num_active_tasks = active_tasks; // Need to update the three data structures below according to the // number of active threads for this phase. - _terminator.terminator()->reset_for_reuse((int) active_tasks); + _terminator.reset_for_reuse(active_tasks); _first_overflow_barrier_sync.set_n_workers((int) active_tasks); _second_overflow_barrier_sync.set_n_workers((int) active_tasks); } --- old/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp 2020-01-30 17:10:23.353897317 +0100 +++ new/src/hotspot/share/gc/g1/g1ConcurrentMark.hpp 2020-01-30 17:10:22.385883454 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -30,6 +30,7 @@ #include "gc/g1/g1HeapVerifier.hpp" #include "gc/g1/g1RegionMarkStatsCache.hpp" #include "gc/g1/heapRegionSet.hpp" +#include "gc/shared/owstTaskTerminator.hpp" #include "gc/shared/taskqueue.hpp" #include "gc/shared/verifyOption.hpp" #include "gc/shared/workgroup.hpp" @@ -328,7 +329,7 @@ G1CMTask** _tasks; // Task queue array (max_worker_id length) G1CMTaskQueueSet* _task_queues; // Task queue set - TaskTerminator _terminator; // For termination + OWSTTaskTerminator _terminator; // For termination // Two sync barriers that are used to synchronize tasks when an // overflow occurs. The algorithm is the following. All tasks enter @@ -414,10 +415,10 @@ // Prints all gathered CM-related statistics void print_stats(); - HeapWord* finger() { return _finger; } - bool concurrent() { return _concurrent; } - uint active_tasks() { return _num_active_tasks; } - ParallelTaskTerminator* terminator() const { return _terminator.terminator(); } + HeapWord* finger() { return _finger; } + bool concurrent() { return _concurrent; } + uint active_tasks() { return _num_active_tasks; } + OWSTTaskTerminator* terminator() { return &_terminator; } // Claims the next available region to be scanned by a marking // task/thread. It might return NULL if the next region is empty or --- old/src/hotspot/share/gc/g1/g1FullGCMarkTask.cpp 2020-01-30 17:10:26.753946012 +0100 +++ new/src/hotspot/share/gc/g1/g1FullGCMarkTask.cpp 2020-01-30 17:10:26.061936101 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -59,7 +59,7 @@ } // Mark stack is populated, now process and drain it. - marker->complete_marking(collector()->oop_queue_set(), collector()->array_queue_set(), _terminator.terminator()); + marker->complete_marking(collector()->oop_queue_set(), collector()->array_queue_set(), &_terminator); // This is the point where the entire marking should have completed. assert(marker->oop_stack()->is_empty(), "Marking should have completed"); --- old/src/hotspot/share/gc/g1/g1FullGCMarkTask.hpp 2020-01-30 17:10:30.405998313 +0100 +++ new/src/hotspot/share/gc/g1/g1FullGCMarkTask.hpp 2020-01-30 17:10:29.665987714 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,7 +36,7 @@ class G1FullGCMarkTask : public G1FullGCTask { G1RootProcessor _root_processor; - TaskTerminator _terminator; + OWSTTaskTerminator _terminator; public: G1FullGCMarkTask(G1FullCollector* collector); --- old/src/hotspot/share/gc/g1/g1FullGCMarker.cpp 2020-01-30 17:10:34.850061948 +0100 +++ new/src/hotspot/share/gc/g1/g1FullGCMarker.cpp 2020-01-30 17:10:33.834047401 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "classfile/classLoaderData.hpp" #include "gc/g1/g1FullGCMarker.inline.hpp" +#include "gc/shared/owstTaskTerminator.hpp" #include "gc/shared/referenceProcessor.hpp" #include "gc/shared/verifyOption.hpp" #include "memory/iterator.inline.hpp" @@ -49,7 +50,7 @@ void G1FullGCMarker::complete_marking(OopQueueSet* oop_stacks, ObjArrayTaskQueueSet* array_stacks, - ParallelTaskTerminator* terminator) { + OWSTTaskTerminator* terminator) { do { drain_stack(); ObjArrayTask steal_array; --- old/src/hotspot/share/gc/g1/g1FullGCMarker.hpp 2020-01-30 17:10:38.458113616 +0100 +++ new/src/hotspot/share/gc/g1/g1FullGCMarker.hpp 2020-01-30 17:10:37.598101301 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -87,7 +87,7 @@ inline void drain_stack(); void complete_marking(OopQueueSet* oop_stacks, ObjArrayTaskQueueSet* array_stacks, - ParallelTaskTerminator* terminator); + OWSTTaskTerminator* terminator); // Closure getters CLDToOopClosure* cld_closure() { return &_cld_closure; } --- old/src/hotspot/share/gc/g1/g1FullGCReferenceProcessorExecutor.hpp 2020-01-30 17:10:41.550157892 +0100 +++ new/src/hotspot/share/gc/g1/g1FullGCReferenceProcessorExecutor.hpp 2020-01-30 17:10:40.882148326 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -61,7 +61,7 @@ typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; ProcessTask& _proc_task; G1FullCollector* _collector; - TaskTerminator _terminator; + OWSTTaskTerminator _terminator; public: G1RefProcTaskProxy(ProcessTask& proc_task, --- old/src/hotspot/share/gc/parallel/psCompactionManager.hpp 2020-01-30 17:10:44.774204059 +0100 +++ new/src/hotspot/share/gc/parallel/psCompactionManager.hpp 2020-01-30 17:10:44.198195809 +0100 @@ -38,7 +38,6 @@ class ParMarkBitMap; class ParCompactionManager : public CHeapObj { - friend class ParallelTaskTerminator; friend class ParMarkBitMap; friend class PSParallelCompact; friend class CompactionWithStealingTask; @@ -96,7 +95,7 @@ static void initialize(ParMarkBitMap* mbm); protected: - // Array of tasks. Needed by the ParallelTaskTerminator. + // Array of task queues. Needed by the task terminator. static RegionTaskQueueSet* region_array() { return _region_array; } OverflowTaskQueue* marking_stack() { return &_marking_stack; } --- old/src/hotspot/share/gc/parallel/psParallelCompact.cpp 2020-01-30 17:10:48.450256695 +0100 +++ new/src/hotspot/share/gc/parallel/psParallelCompact.cpp 2020-01-30 17:10:47.458242489 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -49,6 +49,7 @@ #include "gc/shared/gcTrace.hpp" #include "gc/shared/gcTraceTime.inline.hpp" #include "gc/shared/isGCActiveMark.hpp" +#include "gc/shared/owstTaskTerminator.hpp" #include "gc/shared/referencePolicy.hpp" #include "gc/shared/referenceProcessor.hpp" #include "gc/shared/referenceProcessorPhaseTimes.hpp" @@ -1969,7 +1970,7 @@ collection_exit.ticks()); #ifdef TRACESPINNING - ParallelTaskTerminator::print_termination_counts(); + OWSTTaskTerminator::print_termination_counts(); #endif AdaptiveSizePolicyOutput::print(size_policy, heap->total_collections()); @@ -2149,7 +2150,7 @@ cm->follow_marking_stacks(); } -static void steal_marking_work(ParallelTaskTerminator& terminator, uint worker_id) { +static void steal_marking_work(OWSTTaskTerminator& terminator, uint worker_id) { assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc"); ParCompactionManager* cm = @@ -2173,7 +2174,7 @@ typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; StrongRootsScope _strong_roots_scope; // needed for Threads::possibly_parallel_threads_do SequentialSubTasksDone _subtasks; - TaskTerminator _terminator; + OWSTTaskTerminator _terminator; uint _active_workers; public: @@ -2197,7 +2198,7 @@ Threads::possibly_parallel_threads_do(true /*parallel */, &closure); if (_active_workers > 1) { - steal_marking_work(*_terminator.terminator(), worker_id); + steal_marking_work(_terminator, worker_id); } } }; @@ -2206,7 +2207,7 @@ typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; ProcessTask& _task; uint _ergo_workers; - TaskTerminator _terminator; + OWSTTaskTerminator _terminator; public: PCRefProcTask(ProcessTask& task, uint ergo_workers) : @@ -2227,7 +2228,7 @@ _task.work(worker_id, *PSParallelCompact::is_alive_closure(), mark_and_push_closure, follow_stack_closure); - steal_marking_work(*_terminator.terminator(), worker_id); + steal_marking_work(_terminator, worker_id); } }; @@ -2586,7 +2587,7 @@ } #endif // #ifdef ASSERT -static void compaction_with_stealing_work(ParallelTaskTerminator* terminator, uint worker_id) { +static void compaction_with_stealing_work(OWSTTaskTerminator* terminator, uint worker_id) { assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc"); ParCompactionManager* cm = @@ -2622,7 +2623,7 @@ class UpdateDensePrefixAndCompactionTask: public AbstractGangTask { typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; TaskQueue& _tq; - TaskTerminator _terminator; + OWSTTaskTerminator _terminator; uint _active_workers; public: @@ -2644,7 +2645,7 @@ // Once a thread has drained it's stack, it should try to steal regions from // other threads. - compaction_with_stealing_work(_terminator.terminator(), worker_id); + compaction_with_stealing_work(&_terminator, worker_id); } }; --- old/src/hotspot/share/gc/parallel/psParallelCompact.hpp 2020-01-30 17:10:51.978307210 +0100 +++ new/src/hotspot/share/gc/parallel/psParallelCompact.hpp 2020-01-30 17:10:51.062294091 +0100 @@ -40,9 +40,7 @@ class PSYoungGen; class PSOldGen; class ParCompactionManager; -class ParallelTaskTerminator; class PSParallelCompact; -class PreGCValues; class MoveAndUpdateClosure; class RefProcTaskExecutor; class ParallelOldTracer; --- old/src/hotspot/share/gc/parallel/psScavenge.cpp 2020-01-30 17:10:55.190353198 +0100 +++ new/src/hotspot/share/gc/parallel/psScavenge.cpp 2020-01-30 17:10:54.198338993 +0100 @@ -43,6 +43,7 @@ #include "gc/shared/gcTrace.hpp" #include "gc/shared/gcTraceTime.inline.hpp" #include "gc/shared/isGCActiveMark.hpp" +#include "gc/shared/owstTaskTerminator.hpp" #include "gc/shared/referencePolicy.hpp" #include "gc/shared/referenceProcessor.hpp" #include "gc/shared/referenceProcessorPhaseTimes.hpp" @@ -138,7 +139,7 @@ pm->drain_stacks(false); } -static void steal_work(ParallelTaskTerminator& terminator, uint worker_id) { +static void steal_work(OWSTTaskTerminator& terminator, uint worker_id) { assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc"); PSPromotionManager* pm = @@ -218,7 +219,7 @@ class PSRefProcTask : public AbstractGangTask { typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; - TaskTerminator _terminator; + OWSTTaskTerminator _terminator; ProcessTask& _task; uint _active_workers; @@ -240,7 +241,7 @@ _task.work(worker_id, is_alive, keep_alive, evac_followers); if (_task.marks_oops_alive() && _active_workers > 1) { - steal_work(*_terminator.terminator(), worker_id); + steal_work(_terminator, worker_id); } } }; @@ -314,7 +315,7 @@ HeapWord* _gen_top; uint _active_workers; bool _is_empty; - TaskTerminator _terminator; + OWSTTaskTerminator _terminator; public: ScavengeRootsTask(PSOldGen* old_gen, @@ -377,7 +378,7 @@ // ParallelGCThreads is > 1. if (_active_workers > 1) { - steal_work(*_terminator.terminator() , worker_id); + steal_work(_terminator, worker_id); } } }; @@ -731,7 +732,7 @@ scavenge_exit.ticks()); #ifdef TRACESPINNING - ParallelTaskTerminator::print_termination_counts(); + OWSTTaskTerminator::print_termination_counts(); #endif AdaptiveSizePolicyOutput::print(size_policy, heap->total_collections()); --- old/src/hotspot/share/gc/shared/gc_globals.hpp 2020-01-30 17:10:58.802404913 +0100 +++ new/src/hotspot/share/gc/shared/gc_globals.hpp 2020-01-30 17:10:57.826390936 +0100 @@ -315,10 +315,6 @@ develop(uintx, PromotionFailureALotInterval, 5, \ "Total collections between promotion failures a lot") \ \ - diagnostic(bool, UseOWSTTaskTerminator, true, \ - "Use Optimized Work Stealing Threads task termination " \ - "protocol") \ - \ experimental(uintx, WorkStealingSleepMillis, 1, \ "Sleep time when sleep is used for yields") \ \ --- old/src/hotspot/share/gc/shared/genCollectedHeap.cpp 2020-01-30 17:11:02.314455191 +0100 +++ new/src/hotspot/share/gc/shared/genCollectedHeap.cpp 2020-01-30 17:11:01.170438812 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -678,7 +678,7 @@ } #ifdef TRACESPINNING - ParallelTaskTerminator::print_termination_counts(); + OWSTTaskTerminator::print_termination_counts(); #endif } --- old/src/hotspot/share/gc/shared/owstTaskTerminator.cpp 2020-01-30 17:11:05.230496940 +0100 +++ new/src/hotspot/share/gc/shared/owstTaskTerminator.cpp 2020-01-30 17:11:04.566487434 +0100 @@ -1,5 +1,6 @@ /* * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,12 +26,72 @@ #include "precompiled.hpp" #include "gc/shared/owstTaskTerminator.hpp" +#include "gc/shared/taskqueue.hpp" #include "logging/log.hpp" +#ifdef TRACESPINNING +uint OWSTTaskTerminator::_total_yields = 0; +uint OWSTTaskTerminator::_total_spins = 0; +uint OWSTTaskTerminator::_total_peeks = 0; +#endif + +OWSTTaskTerminator::OWSTTaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set) : + _n_threads(n_threads), + _queue_set(queue_set), + _offered_termination(0), + _spin_master(NULL) { + + _blocker = new Monitor(Mutex::leaf, "OWSTTaskTerminator", false, Monitor::_safepoint_check_never); +} + +OWSTTaskTerminator::~OWSTTaskTerminator() { + assert(_offered_termination == 0 || !peek_in_queue_set(), "Precondition"); + assert(_offered_termination == 0 || _offered_termination == _n_threads, "Terminated or aborted" ); + + assert(_spin_master == NULL, "Should have been reset"); + assert(_blocker != NULL, "Can not be NULL"); + delete _blocker; +} + +#ifdef ASSERT +bool OWSTTaskTerminator::peek_in_queue_set() { + return _queue_set->peek(); +} +#endif + +void OWSTTaskTerminator::yield() { + assert(_offered_termination <= _n_threads, "Invariant"); + os::naked_yield(); +} + +#ifdef TRACESPINNING +void OWSTTaskTerminator::print_termination_counts() { + log_trace(gc, task)("TaskTerminator Yields: %u Spins: %u Peeks: %u", + total_yields(), total_spins(), total_peeks()); +} +#endif + +void OWSTTaskTerminator::reset_for_reuse() { + if (_offered_termination != 0) { + assert(_offered_termination == _n_threads, + "Terminator may still be in use"); + _offered_termination = 0; + } +} + +void OWSTTaskTerminator::reset_for_reuse(uint n_threads) { + reset_for_reuse(); + _n_threads = n_threads; +} + bool OWSTTaskTerminator::exit_termination(size_t tasks, TerminatorTerminator* terminator) { return tasks > 0 || (terminator != NULL && terminator->should_exit_termination()); } +size_t OWSTTaskTerminator::tasks_in_queue_set() const { + return _queue_set->tasks(); +} + bool OWSTTaskTerminator::offer_termination(TerminatorTerminator* terminator) { assert(_n_threads > 0, "Initialization is incorrect"); assert(_offered_termination < _n_threads, "Invariant"); @@ -163,7 +224,7 @@ } #ifdef TRACESPINNING - _total_peeks++; + _total_peeks++; #endif size_t tasks = tasks_in_queue_set(); bool exit = exit_termination(tasks, terminator); --- old/src/hotspot/share/gc/shared/owstTaskTerminator.hpp 2020-01-30 17:11:07.978536279 +0100 +++ new/src/hotspot/share/gc/shared/owstTaskTerminator.hpp 2020-01-30 17:11:07.274526200 +0100 @@ -1,5 +1,6 @@ /* * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,14 +25,20 @@ #ifndef SHARE_GC_SHARED_OWSTTASKTERMINATOR_HPP #define SHARE_GC_SHARED_OWSTTASKTERMINATOR_HPP -#include "gc/shared/taskqueue.hpp" +#include "memory/allocation.hpp" #include "runtime/mutex.hpp" #include "runtime/thread.hpp" +// Define this to enable additional tracing probes. +#undef TRACESPINNING + +class TaskQueueSetSuper; +class TerminatorTerminator; + /* - * OWST stands for Optimized Work Stealing Threads + * Provides a task termination protocol. OWST stands for Optimized Work Stealing Threads * - * This is an enhanced implementation of Google's work stealing + * This is an enhanced implementation of Google's work stealing task termination * protocol, which is described in the paper: * "Wessam Hassanein. 2016. Understanding and improving JVM GC work * stealing at the data center scale. In Proceedings of the 2016 ACM @@ -43,38 +50,71 @@ * The intention of above enhancement is to reduce spin-master's latency on detecting new tasks * for stealing and termination condition. */ +class OWSTTaskTerminator : public CHeapObj { + uint _n_threads; + TaskQueueSetSuper* _queue_set; + + DEFINE_PAD_MINUS_SIZE(0, DEFAULT_CACHE_LINE_SIZE, 0); + volatile uint _offered_termination; + DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile uint)); + +#ifdef ASSERT + bool peek_in_queue_set(); +#endif + void yield(); -class OWSTTaskTerminator: public ParallelTaskTerminator { -private: Monitor* _blocker; Thread* _spin_master; +#ifdef TRACESPINNING + static uint _total_yields; + static uint _total_spins; + static uint _total_peeks; +#endif + + // If we should exit current termination protocol + bool exit_termination(size_t tasks, TerminatorTerminator* terminator); + + size_t tasks_in_queue_set() const; + + // Perform spin-master task. + // Return true if termination condition is detected, otherwise return false + bool do_spin_master_work(TerminatorTerminator* terminator); + + NONCOPYABLE(OWSTTaskTerminator); + public: - OWSTTaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set) : - ParallelTaskTerminator(n_threads, queue_set), _spin_master(NULL) { - _blocker = new Monitor(Mutex::leaf, "OWSTTaskTerminator", false, Monitor::_safepoint_check_never); - } + OWSTTaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set); + ~OWSTTaskTerminator(); - virtual ~OWSTTaskTerminator() { - assert(_spin_master == NULL, "Should have been reset"); - assert(_blocker != NULL, "Can not be NULL"); - delete _blocker; + // The current thread has no work, and is ready to terminate if everyone + // else is. If returns "true", all threads are terminated. If returns + // "false", available work has been observed in one of the task queues, + // so the global task is not complete. + bool offer_termination() { + return offer_termination(NULL); } + // As above, but it also terminates if the should_exit_termination() + // method of the terminator parameter returns true. If terminator is + // NULL, then it is ignored. bool offer_termination(TerminatorTerminator* terminator); -protected: - // If should exit current termination protocol - virtual bool exit_termination(size_t tasks, TerminatorTerminator* terminator); - -private: - size_t tasks_in_queue_set() { return _queue_set->tasks(); } - - /* - * Perform spin-master task. - * Return true if termination condition is detected, otherwise return false - */ - bool do_spin_master_work(TerminatorTerminator* terminator); + // Reset the terminator, so that it may be reused again. + // The caller is responsible for ensuring that this is done + // in an MT-safe manner, once the previous round of use of + // the terminator is finished. + void reset_for_reuse(); + // Same as above but the number of parallel threads is set to the + // given number. + void reset_for_reuse(uint n_threads); + +#ifdef TRACESPINNING + static uint total_yields() { return _total_yields; } + static uint total_spins() { return _total_spins; } + static uint total_peeks() { return _total_peeks; } + static void print_termination_counts(); +#endif }; --- old/src/hotspot/share/gc/shared/taskqueue.cpp 2020-01-30 17:11:11.050580257 +0100 +++ new/src/hotspot/share/gc/shared/taskqueue.cpp 2020-01-30 17:11:10.286569322 +0100 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,12 +33,6 @@ #include "utilities/debug.hpp" #include "utilities/stack.inline.hpp" -#ifdef TRACESPINNING -uint ParallelTaskTerminator::_total_yields = 0; -uint ParallelTaskTerminator::_total_spins = 0; -uint ParallelTaskTerminator::_total_peeks = 0; -#endif - #if TASKQUEUE_STATS const char * const TaskQueueStats::_names[last_stat_id] = { "qpush", "qpop", "qpop-s", "qattempt", "qsteal", "opush", "omax" @@ -112,171 +106,9 @@ #endif // ASSERT #endif // TASKQUEUE_STATS -ParallelTaskTerminator:: -ParallelTaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set) : - _n_threads(n_threads), - _queue_set(queue_set), - _offered_termination(0) {} - -ParallelTaskTerminator::~ParallelTaskTerminator() { - assert(_offered_termination == 0 || !peek_in_queue_set(), "Precondition"); - assert(_offered_termination == 0 || _offered_termination == _n_threads, "Terminated or aborted" ); -} - -bool ParallelTaskTerminator::peek_in_queue_set() { - return _queue_set->peek(); -} - -void ParallelTaskTerminator::yield() { - assert(_offered_termination <= _n_threads, "Invariant"); - os::naked_yield(); -} - -void ParallelTaskTerminator::sleep(uint millis) { - assert(_offered_termination <= _n_threads, "Invariant"); - os::naked_sleep(millis); -} - -bool -ParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) { - assert(_n_threads > 0, "Initialization is incorrect"); - assert(_offered_termination < _n_threads, "Invariant"); - Atomic::inc(&_offered_termination); - - uint yield_count = 0; - // Number of hard spin loops done since last yield - uint hard_spin_count = 0; - // Number of iterations in the hard spin loop. - uint hard_spin_limit = WorkStealingHardSpins; - - // If WorkStealingSpinToYieldRatio is 0, no hard spinning is done. - // If it is greater than 0, then start with a small number - // of spins and increase number with each turn at spinning until - // the count of hard spins exceeds WorkStealingSpinToYieldRatio. - // Then do a yield() call and start spinning afresh. - if (WorkStealingSpinToYieldRatio > 0) { - hard_spin_limit = WorkStealingHardSpins >> WorkStealingSpinToYieldRatio; - hard_spin_limit = MAX2(hard_spin_limit, 1U); - } - // Remember the initial spin limit. - uint hard_spin_start = hard_spin_limit; - - // Loop waiting for all threads to offer termination or - // more work. - while (true) { - assert(_offered_termination <= _n_threads, "Invariant"); - // Are all threads offering termination? - if (_offered_termination == _n_threads) { - assert(!peek_in_queue_set(), "Precondition"); - return true; - } else { - // Look for more work. - // Periodically sleep() instead of yield() to give threads - // waiting on the cores the chance to grab this code - if (yield_count <= WorkStealingYieldsBeforeSleep) { - // Do a yield or hardspin. For purposes of deciding whether - // to sleep, count this as a yield. - yield_count++; - - // Periodically call yield() instead spinning - // After WorkStealingSpinToYieldRatio spins, do a yield() call - // and reset the counts and starting limit. - if (hard_spin_count > WorkStealingSpinToYieldRatio) { - yield(); - hard_spin_count = 0; - hard_spin_limit = hard_spin_start; -#ifdef TRACESPINNING - _total_yields++; -#endif - } else { - // Hard spin this time - // Increase the hard spinning period but only up to a limit. - hard_spin_limit = MIN2(2*hard_spin_limit, - (uint) WorkStealingHardSpins); - for (uint j = 0; j < hard_spin_limit; j++) { - SpinPause(); - } - hard_spin_count++; -#ifdef TRACESPINNING - _total_spins++; -#endif - } - } else { - log_develop_trace(gc, task)("ParallelTaskTerminator::offer_termination() thread " PTR_FORMAT " sleeps after %u yields", - p2i(Thread::current()), yield_count); - yield_count = 0; - // A sleep will cause this processor to seek work on another processor's - // runqueue, if it has nothing else to run (as opposed to the yield - // which may only move the thread to the end of the this processor's - // runqueue). - sleep(WorkStealingSleepMillis); - } - -#ifdef TRACESPINNING - _total_peeks++; -#endif - if (peek_in_queue_set() || - (terminator != NULL && terminator->should_exit_termination())) { - return complete_or_exit_termination(); - } - } - } -} - -#ifdef TRACESPINNING -void ParallelTaskTerminator::print_termination_counts() { - log_trace(gc, task)("ParallelTaskTerminator Total yields: %u" - " Total spins: %u Total peeks: %u", - total_yields(), - total_spins(), - total_peeks()); -} -#endif - -bool ParallelTaskTerminator::complete_or_exit_termination() { - // If termination is ever reached, terminator should stay in such state, - // so that all threads see the same state - uint current_offered = _offered_termination; - uint expected_value; - do { - if (current_offered == _n_threads) { - assert(!peek_in_queue_set(), "Precondition"); - return true; - } - expected_value = current_offered; - } while ((current_offered = Atomic::cmpxchg(&_offered_termination, current_offered, current_offered - 1)) != expected_value); - - assert(_offered_termination < _n_threads, "Invariant"); - return false; -} - -void ParallelTaskTerminator::reset_for_reuse() { - if (_offered_termination != 0) { - assert(_offered_termination == _n_threads, - "Terminator may still be in use"); - _offered_termination = 0; - } -} - #ifdef ASSERT bool ObjArrayTask::is_valid() const { return _obj != NULL && _obj->is_objArray() && _index >= 0 && _index < objArrayOop(_obj)->length(); } #endif // ASSERT - -void ParallelTaskTerminator::reset_for_reuse(uint n_threads) { - reset_for_reuse(); - _n_threads = n_threads; -} - -TaskTerminator::TaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set) : - _terminator(UseOWSTTaskTerminator ? new OWSTTaskTerminator(n_threads, queue_set) - : new ParallelTaskTerminator(n_threads, queue_set)) { -} - -TaskTerminator::~TaskTerminator() { - if (_terminator != NULL) { - delete _terminator; - } -} --- old/src/hotspot/share/gc/shared/taskqueue.hpp 2020-01-30 17:11:14.242625951 +0100 +++ new/src/hotspot/share/gc/shared/taskqueue.hpp 2020-01-30 17:11:13.450614613 +0100 @@ -443,89 +443,6 @@ virtual bool should_exit_termination() = 0; }; -// A class to aid in the termination of a set of parallel tasks using -// TaskQueueSet's for work stealing. - -#undef TRACESPINNING - -class ParallelTaskTerminator: public CHeapObj { -protected: - uint _n_threads; - TaskQueueSetSuper* _queue_set; - - DEFINE_PAD_MINUS_SIZE(0, DEFAULT_CACHE_LINE_SIZE, 0); - volatile uint _offered_termination; - DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(volatile uint)); - -#ifdef TRACESPINNING - static uint _total_yields; - static uint _total_spins; - static uint _total_peeks; -#endif - - bool peek_in_queue_set(); -protected: - virtual void yield(); - void sleep(uint millis); - - // Called when exiting termination is requested. - // When the request is made, terminator may have already terminated - // (e.g. all threads are arrived and offered termination). In this case, - // it should ignore the request and complete the termination. - // Return true if termination is completed. Otherwise, return false. - bool complete_or_exit_termination(); -public: - - // "n_threads" is the number of threads to be terminated. "queue_set" is a - // queue sets of work queues of other threads. - ParallelTaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set); - virtual ~ParallelTaskTerminator(); - - // The current thread has no work, and is ready to terminate if everyone - // else is. If returns "true", all threads are terminated. If returns - // "false", available work has been observed in one of the task queues, - // so the global task is not complete. - bool offer_termination() { - return offer_termination(NULL); - } - - // As above, but it also terminates if the should_exit_termination() - // method of the terminator parameter returns true. If terminator is - // NULL, then it is ignored. - virtual bool offer_termination(TerminatorTerminator* terminator); - - // Reset the terminator, so that it may be reused again. - // The caller is responsible for ensuring that this is done - // in an MT-safe manner, once the previous round of use of - // the terminator is finished. - void reset_for_reuse(); - // Same as above but the number of parallel threads is set to the - // given number. - void reset_for_reuse(uint n_threads); - -#ifdef TRACESPINNING - static uint total_yields() { return _total_yields; } - static uint total_spins() { return _total_spins; } - static uint total_peeks() { return _total_peeks; } - static void print_termination_counts(); -#endif -}; - -class TaskTerminator : public StackObj { -private: - ParallelTaskTerminator* _terminator; - - NONCOPYABLE(TaskTerminator); - -public: - TaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set); - ~TaskTerminator(); - - ParallelTaskTerminator* terminator() const { - return _terminator; - } -}; - typedef GenericTaskQueue OopTaskQueue; typedef GenericTaskQueueSet OopTaskQueueSet; --- old/src/hotspot/share/gc/shenandoah/shenandoahTaskqueue.cpp 2020-01-30 17:11:17.422671475 +0100 +++ new/src/hotspot/share/gc/shenandoah/shenandoahTaskqueue.cpp 2020-01-30 17:11:16.806662657 +0100 @@ -51,12 +51,7 @@ } ShenandoahTaskTerminator::ShenandoahTaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set) : - _terminator(new OWSTTaskTerminator(n_threads, queue_set)) { } - -ShenandoahTaskTerminator::~ShenandoahTaskTerminator() { - assert(_terminator != NULL, "Invariant"); - delete _terminator; -} + _terminator(n_threads, queue_set) { } #if TASKQUEUE_STATS void ShenandoahObjToScanQueueSet::print_taskqueue_stats_hdr(outputStream* const st) { --- old/src/hotspot/share/gc/shenandoah/shenandoahTaskqueue.hpp 2020-01-30 17:11:20.254712011 +0100 +++ new/src/hotspot/share/gc/shenandoah/shenandoahTaskqueue.hpp 2020-01-30 17:11:19.378699472 +0100 @@ -340,16 +340,15 @@ class ShenandoahTaskTerminator : public StackObj { private: - OWSTTaskTerminator* const _terminator; + OWSTTaskTerminator _terminator; public: ShenandoahTaskTerminator(uint n_threads, TaskQueueSetSuper* queue_set); - ~ShenandoahTaskTerminator(); bool offer_termination(ShenandoahTerminatorTerminator* terminator) { - return _terminator->offer_termination(terminator); + return _terminator.offer_termination(terminator); } - void reset_for_reuse() { _terminator->reset_for_reuse(); } + void reset_for_reuse() { _terminator.reset_for_reuse(); } bool offer_termination() { return offer_termination((ShenandoahTerminatorTerminator*)NULL); } };