--- old/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp 2017-11-20 16:09:27.328414522 +0100 +++ new/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp 2017-11-20 16:09:26.912401892 +0100 @@ -33,6 +33,107 @@ #include "utilities/pair.hpp" #include +G1ConcurrentRefineThread* G1ConcurrentRefineThreadControl::create_refinement_thread(uint worker_id, bool initializing) { + G1ConcurrentRefineThread* result = NULL; + if (initializing || !InjectGCWorkerCreationFailure) { + result = new G1ConcurrentRefineThread(_cr, worker_id); + } + if (result == NULL || result->osthread() == NULL) { + log_warning(gc)("Failed to create refinement thread %u, no more %s", + worker_id, + result == NULL ? "memory" : "OS threads"); + } + return result; +} + +G1ConcurrentRefineThreadControl::G1ConcurrentRefineThreadControl() : + _cr(NULL), + _threads(NULL), + _num_max_threads(0) +{ +} + +G1ConcurrentRefineThreadControl::~G1ConcurrentRefineThreadControl() { + for (uint i = 0; i < _num_max_threads; i++) { + G1ConcurrentRefineThread* t = _threads[i]; + if (t != NULL) { + delete t; + } + } + FREE_C_HEAP_ARRAY(G1ConcurrentRefineThread*, _threads); +} + +jint G1ConcurrentRefineThreadControl::initialize(G1ConcurrentRefine* cr, uint num_max_threads) { + assert(cr != NULL, "G1ConcurrentRefine must not be NULL"); + _cr = cr; + _num_max_threads = num_max_threads; + + _threads = NEW_C_HEAP_ARRAY_RETURN_NULL(G1ConcurrentRefineThread*, num_max_threads, mtGC); + if (_threads == NULL) { + vm_shutdown_during_initialization("Could not allocate thread holder array."); + return JNI_ENOMEM; + } + + for (uint i = 0; i < num_max_threads; i++) { + if (UseDynamicNumberOfGCThreads && i != 0 /* Always start first thread. */) { + _threads[i] = NULL; + } else { + _threads[i] = create_refinement_thread(i, true); + if (_threads[i] == NULL) { + vm_shutdown_during_initialization("Could not allocate refinement threads."); + return JNI_ENOMEM; + } + } + } + return JNI_OK; +} + +void G1ConcurrentRefineThreadControl::maybe_activate_next(uint cur_worker_id) { + assert(cur_worker_id < _num_max_threads, + "Activating another thread from %u not allowed since there can be at most %u", + cur_worker_id, _num_max_threads); + if (cur_worker_id == (_num_max_threads - 1)) { + // Already the last thread, there is no more thread to activate. + return; + } + + uint worker_id = cur_worker_id + 1; + G1ConcurrentRefineThread* thread_to_activate = _threads[worker_id]; + if (thread_to_activate == NULL) { + // Still need to create the thread... + _threads[worker_id] = create_refinement_thread(worker_id, false); + thread_to_activate = _threads[worker_id]; + } + if (thread_to_activate != NULL && !thread_to_activate->is_active()) { + thread_to_activate->activate(); + } +} + +void G1ConcurrentRefineThreadControl::print_on(outputStream* st) const { + for (uint i = 0; i < _num_max_threads; ++i) { + if (_threads[i] != NULL) { + _threads[i]->print_on(st); + st->cr(); + } + } +} + +void G1ConcurrentRefineThreadControl::worker_threads_do(ThreadClosure* tc) { + for (uint i = 0; i < _num_max_threads; i++) { + if (_threads[i] != NULL) { + tc->do_thread(_threads[i]); + } + } +} + +void G1ConcurrentRefineThreadControl::stop() { + for (uint i = 0; i < _num_max_threads; i++) { + if (_threads[i] != NULL) { + _threads[i]->stop(); + } + } +} + // Arbitrary but large limits, to simplify some of the zone calculations. // The general idea is to allow expressions like // MIN2(x OP y, max_XXX_zone) @@ -96,7 +197,7 @@ size_t yellow_zone, uint worker_i) { double yellow_size = yellow_zone - green_zone; - double step = yellow_size / G1ConcurrentRefine::thread_num(); + double step = yellow_size / G1ConcurrentRefine::max_num_threads(); if (worker_i == 0) { // Potentially activate worker 0 more aggressively, to keep // available buffers near green_zone value. When yellow_size is @@ -115,8 +216,7 @@ size_t yellow_zone, size_t red_zone, size_t min_yellow_zone_size) : - _threads(NULL), - _n_worker_threads(thread_num()), + _thread_control(), _green_zone(green_zone), _yellow_zone(yellow_zone), _red_zone(red_zone), @@ -125,9 +225,13 @@ assert_zone_constraints_gyr(green_zone, yellow_zone, red_zone); } +jint G1ConcurrentRefine::initialize() { + return _thread_control.initialize(this, max_num_threads()); +} + static size_t calc_min_yellow_zone_size() { size_t step = G1ConcRefinementThresholdStep; - uint n_workers = G1ConcurrentRefine::thread_num(); + uint n_workers = G1ConcurrentRefine::max_num_threads(); if ((max_yellow_zone / step) < n_workers) { return max_yellow_zone; } else { @@ -191,77 +295,27 @@ return NULL; } - cr->_threads = NEW_C_HEAP_ARRAY_RETURN_NULL(G1ConcurrentRefineThread*, cr->_n_worker_threads, mtGC); - if (cr->_threads == NULL) { - *ecode = JNI_ENOMEM; - vm_shutdown_during_initialization("Could not allocate an array for G1ConcurrentRefineThread"); - return NULL; - } - - uint worker_id_offset = DirtyCardQueueSet::num_par_ids(); - - G1ConcurrentRefineThread *next = NULL; - for (uint i = cr->_n_worker_threads - 1; i != UINT_MAX; i--) { - Thresholds thresholds = calc_thresholds(green_zone, yellow_zone, i); - G1ConcurrentRefineThread* t = - new G1ConcurrentRefineThread(cr, - next, - worker_id_offset, - i, - activation_level(thresholds), - deactivation_level(thresholds)); - assert(t != NULL, "Conc refine should have been created"); - if (t->osthread() == NULL) { - *ecode = JNI_ENOMEM; - vm_shutdown_during_initialization("Could not create G1ConcurrentRefineThread"); - return NULL; - } - - assert(t->cr() == cr, "Conc refine thread should refer to this"); - cr->_threads[i] = t; - next = t; - } - - *ecode = JNI_OK; + *ecode = cr->initialize(); return cr; } void G1ConcurrentRefine::stop() { - for (uint i = 0; i < _n_worker_threads; i++) { - _threads[i]->stop(); - } -} - -void G1ConcurrentRefine::update_thread_thresholds() { - for (uint i = 0; i < _n_worker_threads; i++) { - Thresholds thresholds = calc_thresholds(_green_zone, _yellow_zone, i); - _threads[i]->update_thresholds(activation_level(thresholds), - deactivation_level(thresholds)); - } + _thread_control.stop(); } G1ConcurrentRefine::~G1ConcurrentRefine() { - for (uint i = 0; i < _n_worker_threads; i++) { - delete _threads[i]; - } - FREE_C_HEAP_ARRAY(G1ConcurrentRefineThread*, _threads); } void G1ConcurrentRefine::threads_do(ThreadClosure *tc) { - for (uint i = 0; i < _n_worker_threads; i++) { - tc->do_thread(_threads[i]); - } + _thread_control.worker_threads_do(tc); } -uint G1ConcurrentRefine::thread_num() { +uint G1ConcurrentRefine::max_num_threads() { return G1ConcRefinementThreads; } void G1ConcurrentRefine::print_threads_on(outputStream* st) const { - for (uint i = 0; i < _n_worker_threads; ++i) { - _threads[i]->print_on(st); - st->cr(); - } + _thread_control.print_on(st); } static size_t calc_new_green_zone(size_t green, @@ -326,16 +380,15 @@ if (G1UseAdaptiveConcRefinement) { update_zones(update_rs_time, update_rs_processed_buffers, goal_ms); - update_thread_thresholds(); // Change the barrier params - if (_n_worker_threads == 0) { + if (max_num_threads() == 0) { // Disable dcqs notification when there are no threads to notify. dcqs.set_process_completed_threshold(INT_MAX); } else { // Worker 0 is the primary; wakeup is via dcqs notification. STATIC_ASSERT(max_yellow_zone <= INT_MAX); - size_t activate = _threads[0]->activation_threshold(); + size_t activate = activation_threshold(0); dcqs.set_process_completed_threshold((int)activate); } dcqs.set_max_completed_queue((int)red_zone()); @@ -349,3 +402,42 @@ } dcqs.notify_if_necessary(); } + +size_t G1ConcurrentRefine::activation_threshold(uint worker_id) const { + Thresholds thresholds = calc_thresholds(_green_zone, _yellow_zone, worker_id); + return activation_level(thresholds); +} + +size_t G1ConcurrentRefine::deactivation_threshold(uint worker_id) const { + Thresholds thresholds = calc_thresholds(_green_zone, _yellow_zone, worker_id); + return deactivation_level(thresholds); +} + +uint G1ConcurrentRefine::worker_id_offset() { + return DirtyCardQueueSet::num_par_ids(); +} + +void G1ConcurrentRefine::maybe_activate_more_threads(uint worker_id, size_t num_cur_buffers) { + if (num_cur_buffers > activation_threshold(worker_id + 1)) { + _thread_control.maybe_activate_next(worker_id); + } +} + +bool G1ConcurrentRefine::do_refinement_step(uint worker_id) { + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); + + size_t curr_buffer_num = dcqs.completed_buffers_num(); + // If the number of the buffers falls down into the yellow zone, + // that means that the transition period after the evacuation pause has ended. + // Since the value written to the DCQS is the same for all threads, there is no + // need to synchronize. + if (dcqs.completed_queue_padding() > 0 && curr_buffer_num <= yellow_zone()) { + dcqs.set_completed_queue_padding(0); + } + + maybe_activate_more_threads(worker_id, curr_buffer_num); + + // Process the next buffer, if there are enough left. + return dcqs.refine_completed_buffer_concurrently(worker_id + worker_id_offset(), + deactivation_threshold(worker_id)); +} --- old/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp 2017-11-20 16:09:29.208471600 +0100 +++ new/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp 2017-11-20 16:09:28.798459152 +0100 @@ -30,30 +30,63 @@ // Forward decl class CardTableEntryClosure; +class G1ConcurrentRefine; class G1ConcurrentRefineThread; class outputStream; class ThreadClosure; -class G1ConcurrentRefine : public CHeapObj { +// Helper class for refinement thread management. Used to start, stop and +// iterate over them. +class G1ConcurrentRefineThreadControl VALUE_OBJ_CLASS_SPEC { + G1ConcurrentRefine* _cr; + G1ConcurrentRefineThread** _threads; - uint _n_worker_threads; - /* - * The value of the update buffer queue length falls into one of 3 zones: - * green, yellow, red. If the value is in [0, green) nothing is - * done, the buffers are left unprocessed to enable the caching effect of the - * dirtied cards. In the yellow zone [green, yellow) the concurrent refinement - * threads are gradually activated. In [yellow, red) all threads are - * running. If the length becomes red (max queue length) the mutators start - * processing the buffers. - * - * There are some interesting cases (when G1UseAdaptiveConcRefinement - * is turned off): - * 1) green = yellow = red = 0. In this case the mutator will process all - * buffers. Except for those that are created by the deferred updates - * machinery during a collection. - * 2) green = 0. Means no caching. Can be a good way to minimize the - * amount of time spent updating rsets during a collection. - */ + uint _num_max_threads; + + // Create the refinement thread for the given worker id. + // If initializing is true, ignore InjectGCWorkerCreationFailure. + G1ConcurrentRefineThread* create_refinement_thread(uint worker_id, bool initializing); +public: + G1ConcurrentRefineThreadControl(); + ~G1ConcurrentRefineThreadControl(); + + jint initialize(G1ConcurrentRefine* cr, uint num_max_threads); + + // If there is a "successor" thread that can be activated given the current id, + // activate it. + void maybe_activate_next(uint cur_worker_id); + + void print_on(outputStream* st) const; + void worker_threads_do(ThreadClosure* tc); + void stop(); +}; + +// Controls refinement threads and their activation based on the number of completed +// buffers currently available in the global dirty card queue. +// Refinement threads pick work from the queue based on these thresholds. They are activated +// gradually based on the amount of work to do. +// Refinement thread n activates thread n+1 if the instance of this class determines there +// is enough work available. Threads deactivate themselves if the current amount of +// completed buffers falls below their individual threshold. +class G1ConcurrentRefine : public CHeapObj { + G1ConcurrentRefineThreadControl _thread_control; + /* + * The value of the completed dirty card queue length falls into one of 3 zones: + * green, yellow, red. If the value is in [0, green) nothing is + * done, the buffers are left unprocessed to enable the caching effect of the + * dirtied cards. In the yellow zone [green, yellow) the concurrent refinement + * threads are gradually activated. In [yellow, red) all threads are + * running. If the length becomes red (max queue length) the mutators start + * processing the buffers. + * + * There are some interesting cases (when G1UseAdaptiveConcRefinement + * is turned off): + * 1) green = yellow = red = 0. In this case the mutator will process all + * buffers. Except for those that are created by the deferred updates + * machinery during a collection. + * 2) green = 0. Means no caching. Can be a good way to minimize the + * amount of time spent updating remembered sets during a collection. + */ size_t _green_zone; size_t _yellow_zone; size_t _red_zone; @@ -69,24 +102,32 @@ size_t update_rs_processed_buffers, double goal_ms); - // Update thread thresholds to account for updated zone values. - void update_thread_thresholds(); + static uint worker_id_offset(); + void maybe_activate_more_threads(uint worker_id, size_t num_cur_buffers); - public: + jint initialize(); +public: ~G1ConcurrentRefine(); - // Returns a G1ConcurrentRefine instance if succeeded to create/initialize G1ConcurrentRefine and G1ConcurrentRefineThreads. - // Otherwise, returns NULL with error code. + // Returns a G1ConcurrentRefine instance if succeeded to create/initialize the + // G1ConcurrentRefine instance. Otherwise, returns NULL with error code. static G1ConcurrentRefine* create(jint* ecode); void stop(); + // Adjust refinement thresholds based on work done during the pause and the goal time. void adjust(double update_rs_time, size_t update_rs_processed_buffers, double goal_ms); + size_t activation_threshold(uint worker_id) const; + size_t deactivation_threshold(uint worker_id) const; + // Perform a single refinement step. Called by the refinement threads when woken up. + bool do_refinement_step(uint worker_id); + // Iterate over all concurrent refinement threads applying the given closure. void threads_do(ThreadClosure *tc); - static uint thread_num(); + // Maximum number of refinement threads. + static uint max_num_threads(); void print_threads_on(outputStream* st) const; --- old/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp 2017-11-20 16:09:30.725517657 +0100 +++ new/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp 2017-11-20 16:09:30.318505300 +0100 @@ -25,32 +25,20 @@ #include "precompiled.hpp" #include "gc/g1/g1ConcurrentRefine.hpp" #include "gc/g1/g1ConcurrentRefineThread.hpp" -#include "gc/g1/g1CollectedHeap.inline.hpp" -#include "gc/g1/g1RemSet.hpp" #include "gc/shared/suspendibleThreadSet.hpp" #include "logging/log.hpp" #include "memory/resourceArea.hpp" #include "runtime/handles.inline.hpp" #include "runtime/mutexLocker.hpp" -G1ConcurrentRefineThread::G1ConcurrentRefineThread(G1ConcurrentRefine* cr, - G1ConcurrentRefineThread *next, - uint worker_id_offset, - uint worker_id, - size_t activate, - size_t deactivate) : +G1ConcurrentRefineThread::G1ConcurrentRefineThread(G1ConcurrentRefine* cr, uint worker_id) : ConcurrentGCThread(), - _worker_id_offset(worker_id_offset), _worker_id(worker_id), _active(false), - _next(next), _monitor(NULL), _cr(cr), - _vtime_accum(0.0), - _activation_threshold(activate), - _deactivation_threshold(deactivate) + _vtime_accum(0.0) { - // Each thread has its own monitor. The i-th thread is responsible for signaling // to thread i+1 if the number of buffers in the queue exceeds a threshold for this // thread. Monitors are also used to wake up the threads during termination. @@ -67,13 +55,6 @@ create_and_start(); } -void G1ConcurrentRefineThread::update_thresholds(size_t activate, - size_t deactivate) { - assert(deactivate < activate, "precondition"); - _activation_threshold = activate; - _deactivation_threshold = deactivate; -} - void G1ConcurrentRefineThread::wait_for_completed_buffers() { MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag); while (!should_terminate() && !is_active()) { @@ -118,9 +99,9 @@ } size_t buffers_processed = 0; - DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); - log_debug(gc, refine)("Activated %d, on threshold: " SIZE_FORMAT ", current: " SIZE_FORMAT, - _worker_id, _activation_threshold, dcqs.completed_buffers_num()); + log_debug(gc, refine)("Activated worker %d, on threshold: " SIZE_FORMAT ", current: " SIZE_FORMAT, + _worker_id, _cr->activation_threshold(_worker_id), + JavaThread::dirty_card_queue_set().completed_buffers_num()); { SuspendibleThreadSetJoiner sts_join; @@ -131,33 +112,18 @@ continue; // Re-check for termination after yield delay. } - size_t curr_buffer_num = dcqs.completed_buffers_num(); - // If the number of the buffers falls down into the yellow zone, - // that means that the transition period after the evacuation pause has ended. - if (dcqs.completed_queue_padding() > 0 && curr_buffer_num <= cr()->yellow_zone()) { - dcqs.set_completed_queue_padding(0); - } - - // Check if we need to activate the next thread. - if ((_next != NULL) && - !_next->is_active() && - (curr_buffer_num > _next->_activation_threshold)) { - _next->activate(); - } - - // Process the next buffer, if there are enough left. - if (!dcqs.refine_completed_buffer_concurrently(_worker_id + _worker_id_offset, _deactivation_threshold)) { - break; // Deactivate, number of buffers fell below threshold. + if (!_cr->do_refinement_step(_worker_id)) { + break; } ++buffers_processed; } } deactivate(); - log_debug(gc, refine)("Deactivated %d, off threshold: " SIZE_FORMAT + log_debug(gc, refine)("Deactivated worker %d, off threshold: " SIZE_FORMAT ", current: " SIZE_FORMAT ", processed: " SIZE_FORMAT, - _worker_id, _deactivation_threshold, - dcqs.completed_buffers_num(), + _worker_id, _cr->deactivation_threshold(_worker_id), + JavaThread::dirty_card_queue_set().completed_buffers_num(), buffers_processed); if (os::supports_vtime()) { --- old/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.hpp 2017-11-20 16:09:32.278564807 +0100 +++ new/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.hpp 2017-11-20 16:09:31.851551843 +0100 @@ -43,43 +43,29 @@ uint _worker_id; uint _worker_id_offset; - // The refinement threads collection is linked list. A predecessor can activate a successor - // when the number of the rset update buffer crosses a certain threshold. A successor - // would self-deactivate when the number of the buffers falls below the threshold. bool _active; - G1ConcurrentRefineThread* _next; Monitor* _monitor; G1ConcurrentRefine* _cr; - // This thread's activation/deactivation thresholds - size_t _activation_threshold; - size_t _deactivation_threshold; - void wait_for_completed_buffers(); void set_active(bool x) { _active = x; } - bool is_active(); - void activate(); + // Deactivate this thread. void deactivate(); bool is_primary() { return (_worker_id == 0); } void run_service(); void stop_service(); - public: - // Constructor - G1ConcurrentRefineThread(G1ConcurrentRefine* cr, G1ConcurrentRefineThread* next, - uint worker_id_offset, uint worker_id, - size_t activate, size_t deactivate); + G1ConcurrentRefineThread(G1ConcurrentRefine* cg1r, uint worker_id); - void update_thresholds(size_t activate, size_t deactivate); - size_t activation_threshold() const { return _activation_threshold; } + bool is_active(); + // Activate this thread. + void activate(); // Total virtual time so far. double vtime_accum() { return _vtime_accum; } - - G1ConcurrentRefine* cr() { return _cr; } }; #endif // SHARE_VM_GC_G1_G1CONCURRENTREFINETHREAD_HPP --- old/src/hotspot/share/gc/g1/g1RemSet.cpp 2017-11-20 16:09:33.795610865 +0100 +++ new/src/hotspot/share/gc/g1/g1RemSet.cpp 2017-11-20 16:09:33.392598629 +0100 @@ -298,7 +298,7 @@ } uint G1RemSet::num_par_rem_sets() { - return MAX2(DirtyCardQueueSet::num_par_ids() + G1ConcurrentRefine::thread_num(), ParallelGCThreads); + return MAX2(DirtyCardQueueSet::num_par_ids() + G1ConcurrentRefine::max_num_threads(), ParallelGCThreads); } void G1RemSet::initialize(size_t capacity, uint max_regions) { --- old/src/hotspot/share/gc/g1/g1RemSetSummary.cpp 2017-11-20 16:09:35.295656406 +0100 +++ new/src/hotspot/share/gc/g1/g1RemSetSummary.cpp 2017-11-20 16:09:34.892644171 +0100 @@ -86,7 +86,7 @@ _num_processed_buf_mutator(0), _num_processed_buf_rs_threads(0), _num_coarsenings(0), - _num_vtimes(G1ConcurrentRefine::thread_num()), + _num_vtimes(G1ConcurrentRefine::max_num_threads()), _rs_threads_vtimes(NEW_C_HEAP_ARRAY(double, _num_vtimes, mtGC)), _sampling_thread_vtime(0.0f) { @@ -99,7 +99,7 @@ _num_processed_buf_mutator(0), _num_processed_buf_rs_threads(0), _num_coarsenings(0), - _num_vtimes(G1ConcurrentRefine::thread_num()), + _num_vtimes(G1ConcurrentRefine::max_num_threads()), _rs_threads_vtimes(NEW_C_HEAP_ARRAY(double, _num_vtimes, mtGC)), _sampling_thread_vtime(0.0f) { update();