--- old/src/hotspot/share/gc/parallel/psCompactionManager.cpp 2019-11-15 10:18:11.594961293 +0100 +++ new/src/hotspot/share/gc/parallel/psCompactionManager.cpp 2019-11-15 10:18:11.218959429 +0100 @@ -50,6 +50,8 @@ ObjectStartArray* ParCompactionManager::_start_array = NULL; ParMarkBitMap* ParCompactionManager::_mark_bitmap = NULL; RegionTaskQueueSet* ParCompactionManager::_region_array = NULL; +GrowableArray* ParCompactionManager::_shadow_region_array = NULL; +Monitor* ParCompactionManager::_shadow_region_monitor = NULL; ParCompactionManager::ParCompactionManager() : _action(CopyAndUpdate) { @@ -100,6 +102,13 @@ "Could not create ParCompactionManager"); assert(ParallelScavengeHeap::heap()->workers().total_workers() != 0, "Not initialized?"); + + _shadow_region_array = new (ResourceObj::C_HEAP, mtInternal) GrowableArray(10, true); + guarantee(_shadow_region_array != NULL, "Could not allocate shadow_region_array"); + + _shadow_region_monitor = new Monitor(Mutex::barrier, "CompactionManager monitor", + Mutex::_allow_vm_block_flag, Monitor::_safepoint_check_never); + guarantee(_shadow_region_monitor != NULL, "Could not allocate shadow_region_monitor"); } void ParCompactionManager::reset_all_bitmap_query_caches() { @@ -164,3 +173,32 @@ } } while (!region_stack()->is_empty()); } + +size_t ParCompactionManager::acquire_shadow_region(PSParallelCompact::RegionData* region_ptr) { + while (true) { + MutexLocker ml(_shadow_region_monitor, Mutex::_no_safepoint_check_flag); + if (_shadow_region_array->is_empty()) { + // The corresponding heap region may be available now, + // so we don't need to acquire a shadow region anymore, + // and we return 0 to indicate this case + if (region_ptr->claimed()) { + return 0; + } + } else { + return _shadow_region_array->pop(); + } + } +} + +void ParCompactionManager::release_shadow_region(size_t shadow_region) { + MutexLocker ml(_shadow_region_monitor, Mutex::_no_safepoint_check_flag); + _shadow_region_array->append(shadow_region); +} + +void ParCompactionManager::enqueue_shadow_region(size_t shadow_region) { + _shadow_region_array->append(shadow_region); +} + +void ParCompactionManager::dequeue_shadow_region() { + _shadow_region_array->clear(); +} \ No newline at end of file --- old/src/hotspot/share/gc/parallel/psCompactionManager.hpp 2019-11-15 10:18:12.150964050 +0100 +++ new/src/hotspot/share/gc/parallel/psCompactionManager.hpp 2019-11-15 10:18:11.718961908 +0100 @@ -28,6 +28,7 @@ #include "gc/shared/taskqueue.hpp" #include "memory/allocation.hpp" #include "utilities/stack.hpp" +#include "psParallelCompact.hpp" class MutableSpace; class PSOldGen; @@ -77,6 +78,7 @@ private: OverflowTaskQueue _marking_stack; ObjArrayTaskQueue _objarray_stack; + size_t _shadow_record; // Is there a way to reuse the _marking_stack for the // saving empty regions? For now just create a different @@ -85,6 +87,14 @@ static ParMarkBitMap* _mark_bitmap; + // The shadow region array, we use it in a LIFO fashion, so + // that we can reuse shadow regions for better data locality + // and utilization + static GrowableArray* _shadow_region_array; + + // This Monitor provides mutual exclusive access of _shadow_region_array + static Monitor* _shadow_region_monitor; + Action _action; HeapWord* _last_query_beg; @@ -109,6 +119,14 @@ // marking stack and overflow stack directly. public: + static size_t acquire_shadow_region(PSParallelCompact::RegionData* region_ptr); + static void release_shadow_region(size_t shadow_region); + static void enqueue_shadow_region(size_t shadow_region); + static void dequeue_shadow_region(); + inline size_t shadow_record() { return _shadow_record; } + inline void set_shadow_record(size_t record) { _shadow_record = record; } + inline size_t next_shadow_record(size_t workers) { _shadow_record += workers; return shadow_record(); } + void reset_bitmap_query_cache() { _last_query_beg = NULL; _last_query_obj = NULL; --- old/src/hotspot/share/gc/parallel/psParallelCompact.cpp 2019-11-15 10:18:12.718966867 +0100 +++ new/src/hotspot/share/gc/parallel/psParallelCompact.cpp 2019-11-15 10:18:12.282964705 +0100 @@ -125,6 +125,11 @@ const ParallelCompactData::RegionData::region_sz_t ParallelCompactData::RegionData::dc_completed = 0xcU << dc_shift; +const int ParallelCompactData::RegionData::UNUSED = 0; +const int ParallelCompactData::RegionData::SHADOW = 1; +const int ParallelCompactData::RegionData::FILLED = 2; +const int ParallelCompactData::RegionData::FINISH = 3; + SpaceInfo PSParallelCompact::_space_info[PSParallelCompact::last_space_id]; SpanSubjectToDiscoveryClosure PSParallelCompact::_span_based_discoverer; @@ -1023,6 +1028,7 @@ void PSParallelCompact::post_compact() { GCTraceTime(Info, gc, phases) tm("Post Compact", &_gc_timer); + ParCompactionManager::dequeue_shadow_region(); for (unsigned int id = old_space_id; id < last_space_id; ++id) { // Clear the marking bitmap, summary data and split info. @@ -2417,8 +2423,10 @@ for (size_t cur = end_region - 1; cur + 1 > beg_region; --cur) { if (sd.region(cur)->claim_unsafe()) { ParCompactionManager* cm = ParCompactionManager::manager_array(worker_id); - cm->region_stack()->push(cur); - region_logger.handle(cur); + if (sd.region(cur)->try_push()) { + cm->region_stack()->push(cur); + region_logger.handle(cur); + } // Assign regions to tasks in round-robin fashion. if (++worker_id == parallel_gc_threads) { worker_id = 0; @@ -2598,10 +2606,15 @@ size_t region_index = 0; + PSParallelCompact::initialize_steal_record(worker_id); while (true) { if (ParCompactionManager::steal(worker_id, region_index)) { PSParallelCompact::fill_and_update_region(cm, region_index); cm->drain_region_stacks(); + } else if (PSParallelCompact::steal_shadow_region(cm, region_index)) { + // Keep working with the help of shadow regions + PSParallelCompact::fill_and_update_shadow_region(cm, region_index); + cm->drain_region_stacks(); } else { if (terminator->offer_termination()) { break; @@ -2656,6 +2669,7 @@ // // max push count is thus: last_space_id * (active_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING + 1) TaskQueue task_queue(last_space_id * (active_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING + 1)); + enqueue_shadow_region(); prepare_region_draining_tasks(active_gc_threads); enqueue_dense_prefix_tasks(task_queue, active_gc_threads); @@ -2962,7 +2976,15 @@ assert(cur->data_size() > 0, "region must have live data"); cur->decrement_destination_count(); if (cur < enqueue_end && cur->available() && cur->claim()) { - cm->push_region(sd.region(cur)); + if (cur->try_push()) { + cm->push_region(sd.region(cur)); + } else if (cur->try_copy()) { + // Try to copy the content of the shadow region back to its corresponding + // heap region if the shadow region is filled + copy_back(sd.region_to_addr(cur->shadow_region()), sd.region_to_addr(cur)); + cm->release_shadow_region(cur->shadow_region()); + cur->set_completed(); + } } } } @@ -3040,7 +3062,7 @@ return 0; } -void PSParallelCompact::fill_region(ParCompactionManager* cm, size_t region_idx) +void PSParallelCompact::fill_region(ParCompactionManager* cm, MoveAndUpdateClosure& closure, size_t region_idx) { typedef ParMarkBitMap::IterationStatus IterationStatus; const size_t RegionSize = ParallelCompactData::RegionSize; @@ -3048,20 +3070,12 @@ ParallelCompactData& sd = summary_data(); RegionData* const region_ptr = sd.region(region_idx); - // Get the items needed to construct the closure. - HeapWord* dest_addr = sd.region_to_addr(region_idx); - SpaceId dest_space_id = space_id(dest_addr); - ObjectStartArray* start_array = _space_info[dest_space_id].start_array(); - HeapWord* new_top = _space_info[dest_space_id].new_top(); - assert(dest_addr < new_top, "sanity"); - const size_t words = MIN2(pointer_delta(new_top, dest_addr), RegionSize); - // Get the source region and related info. size_t src_region_idx = region_ptr->source_region(); SpaceId src_space_id = space_id(sd.region_to_addr(src_region_idx)); HeapWord* src_space_top = _space_info[src_space_id].space()->top(); + HeapWord* dest_addr = sd.region_to_addr(region_idx); - MoveAndUpdateClosure closure(bitmap, cm, start_array, dest_addr, words); closure.set_source(first_src_addr(dest_addr, src_space_id, src_region_idx)); // Adjust src_region_idx to prepare for decrementing destination counts (the @@ -3080,7 +3094,7 @@ decrement_destination_counts(cm, src_space_id, src_region_idx, closure.source()); region_ptr->set_deferred_obj_addr(NULL); - region_ptr->set_completed(); + closure.complete_region(cm, dest_addr, region_ptr); return; } @@ -3125,11 +3139,12 @@ // The last object did not fit. Note that interior oop updates were // deferred, then copy enough of the object to fill the region. region_ptr->set_deferred_obj_addr(closure.destination()); + status = closure.copy_until_full(); // copies from closure.source() decrement_destination_counts(cm, src_space_id, src_region_idx, closure.source()); - region_ptr->set_completed(); + closure.complete_region(cm, dest_addr, region_ptr); return; } @@ -3137,7 +3152,7 @@ decrement_destination_counts(cm, src_space_id, src_region_idx, closure.source()); region_ptr->set_deferred_obj_addr(NULL); - region_ptr->set_completed(); + closure.complete_region(cm, dest_addr, region_ptr); return; } @@ -3150,6 +3165,76 @@ } while (true); } +void PSParallelCompact::fill_and_update_region(ParCompactionManager* cm, size_t region_idx) { + MoveAndUpdateClosure cl(mark_bitmap(), cm, region_idx); + fill_region(cm, cl, region_idx); +} + +void PSParallelCompact::fill_shadow_region(ParCompactionManager* cm, size_t region_idx) +{ + // Acquire a shadow region at first + ParallelCompactData& sd = summary_data(); + RegionData* const region_ptr = sd.region(region_idx); + size_t shadow_region = cm->acquire_shadow_region(region_ptr); + // The zero return value indicates the corresponding heap region is available, + // so use MoveAndUpdateClosure to fill the normal region. Otherwise, use + // ShadowClosure to fill the acquired shadow region. + if (shadow_region == 0) { + MoveAndUpdateClosure cl(mark_bitmap(), cm, region_idx); + region_ptr->mark_normal(); + return fill_region(cm, cl, region_idx); + } else { + ShadowClosure cl(mark_bitmap(), cm, region_idx, shadow_region); + return fill_region(cm, cl, region_idx); + } +} + +void PSParallelCompact::copy_back(HeapWord *shadow_addr, HeapWord *region_addr) { + Copy::aligned_conjoint_words(shadow_addr, region_addr, _summary_data.RegionSize); +} + +bool PSParallelCompact::steal_shadow_region(ParCompactionManager* cm, size_t ®ion_idx) { + size_t record = cm->shadow_record(); + ParallelCompactData& sd = _summary_data; + size_t old_new_top = sd.addr_to_region_idx(_space_info[old_space_id].new_top()); + uint active_gc_threads = ParallelScavengeHeap::heap()->workers().active_workers(); + + while (record < old_new_top) { + if (sd.region(record)->try_steal()) { + region_idx = record; + return true; + } + record = cm->next_shadow_record(active_gc_threads); + } + + return false; +} + +void PSParallelCompact::enqueue_shadow_region() { + const ParallelCompactData& sd = PSParallelCompact::summary_data(); + + for (unsigned int id = old_space_id; id < last_space_id; ++id) { + SpaceInfo* const space_info = _space_info + id; + MutableSpace* const space = space_info->space(); + + const size_t beg_region = + sd.addr_to_region_idx(sd.region_align_up(MAX2(space_info->new_top(), space->top()))); + const size_t end_region = + sd.addr_to_region_idx(sd.region_align_down(space->end())); + + for (size_t cur = beg_region + 1; cur < end_region; ++cur) { + ParCompactionManager::enqueue_shadow_region(cur); + } + } +} + +void PSParallelCompact::initialize_steal_record(uint which) { + ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(which); + + size_t record = _summary_data.addr_to_region_idx(_space_info[old_space_id].dense_prefix()); + cm->set_shadow_record(record + which); +} + void PSParallelCompact::fill_blocks(size_t region_idx) { // Fill in the block table elements for the specified region. Each block @@ -3222,9 +3307,9 @@ ParMarkBitMap::IterationStatus MoveAndUpdateClosure::copy_until_full() { - if (source() != destination()) { + if (source() != copy_destination()) { DEBUG_ONLY(PSParallelCompact::check_new_location(source(), destination());) - Copy::aligned_conjoint_words(source(), destination(), words_remaining()); + Copy::aligned_conjoint_words(source(), copy_destination(), words_remaining()); } update_state(words_remaining()); assert(is_full(), "sanity"); @@ -3243,13 +3328,19 @@ // This test is necessary; if omitted, the pointer updates to a partial object // that crosses the dense prefix boundary could be overwritten. - if (source() != destination()) { + if (source() != copy_destination()) { DEBUG_ONLY(PSParallelCompact::check_new_location(source(), destination());) - Copy::aligned_conjoint_words(source(), destination(), words); + Copy::aligned_conjoint_words(source(), copy_destination(), words); } update_state(words); } +void MoveAndUpdateClosure::complete_region(ParCompactionManager *cm, HeapWord *dest_addr, + PSParallelCompact::RegionData *region_ptr) { + assert(region_ptr->shadow_state() == ParallelCompactData::RegionData::FINISH, "Region should be finished"); + region_ptr->set_completed(); +} + ParMarkBitMapClosure::IterationStatus MoveAndUpdateClosure::do_addr(HeapWord* addr, size_t words) { assert(destination() != NULL, "sanity"); @@ -3268,20 +3359,36 @@ _start_array->allocate_block(destination()); } - if (destination() != source()) { + if (copy_destination() != source()) { DEBUG_ONLY(PSParallelCompact::check_new_location(source(), destination());) - Copy::aligned_conjoint_words(source(), destination(), words); + Copy::aligned_conjoint_words(source(), copy_destination(), words); } - oop moved_oop = (oop) destination(); + oop moved_oop = (oop) copy_destination(); compaction_manager()->update_contents(moved_oop); assert(oopDesc::is_oop_or_null(moved_oop), "Expected an oop or NULL at " PTR_FORMAT, p2i(moved_oop)); update_state(words); - assert(destination() == (HeapWord*)moved_oop + moved_oop->size(), "sanity"); + assert(copy_destination() == (HeapWord*)moved_oop + moved_oop->size(), "sanity"); return is_full() ? ParMarkBitMap::full : ParMarkBitMap::incomplete; } +void ShadowClosure::complete_region(ParCompactionManager *cm, HeapWord *dest_addr, + PSParallelCompact::RegionData *region_ptr) { + assert(region_ptr->shadow_state() == ParallelCompactData::RegionData::SHADOW, "Region should be shadow"); + // Record the shadow region index + region_ptr->set_shadow_region(_shadow); + // Mark the shadow region filled + region_ptr->mark_filled(); + // Try to copy the content of the shadow region back to its corresponding + // heap region if available + if (((region_ptr->available() && region_ptr->claim()) || region_ptr->claimed()) && region_ptr->try_copy()) { + region_ptr->set_completed(); + PSParallelCompact::copy_back(PSParallelCompact::summary_data().region_to_addr(_shadow), dest_addr); + cm->release_shadow_region(_shadow); + } +} + UpdateOnlyClosure::UpdateOnlyClosure(ParMarkBitMap* mbm, ParCompactionManager* cm, PSParallelCompact::SpaceId space_id) : --- old/src/hotspot/share/gc/parallel/psParallelCompact.hpp 2019-11-15 10:18:13.358970040 +0100 +++ new/src/hotspot/share/gc/parallel/psParallelCompact.hpp 2019-11-15 10:18:12.918967858 +0100 @@ -239,6 +239,9 @@ // The first region containing data destined for this region. size_t source_region() const { return _source_region; } + // Reuse _source_region to store the corresponding shadow region index + size_t shadow_region() const { return _source_region; } + // The object (if any) starting in this region and ending in a different // region that could not be updated during the main (parallel) compaction // phase. This is different from _partial_obj_addr, which is an object that @@ -307,6 +310,7 @@ // These are not atomic. void set_destination(HeapWord* addr) { _destination = addr; } void set_source_region(size_t region) { _source_region = region; } + void set_shadow_region(size_t region) { _source_region = region; } void set_deferred_obj_addr(HeapWord* addr) { _partial_obj_addr = addr; } void set_partial_obj_addr(HeapWord* addr) { _partial_obj_addr = addr; } void set_partial_obj_size(size_t words) { @@ -326,6 +330,30 @@ inline void decrement_destination_count(); inline bool claim(); + // Possible values of _shadow_state, and transition is as follows + // Normal Path: + // UNUSED -> try_push() -> FINISHED + // Steal Path: + // UNUSED -> try_steal() -> SHADOW -> mark_filled() -> FILLED -> try_copy() -> FINISHED + static const int UNUSED; // Original state + static const int SHADOW; // Stolen by an idle thread, and a shadow region is created for it + static const int FILLED; // Its shadow region has been filled and ready to be copied back + static const int FINISH; // Work has been done + + // Preempt the region to avoid double processes + inline bool try_push(); + inline bool try_steal(); + // Mark the region as filled and ready to be copied back + inline void mark_filled(); + // Preempt the region to copy the shadow region content back + inline bool try_copy(); + // Special case: see the comment in PSParallelCompact::fill_shadow_region. + // Return to the normal path here + inline void mark_normal(); + + + int shadow_state() { return _shadow_state; } + private: // The type used to represent object sizes within a region. typedef uint region_sz_t; @@ -346,6 +374,7 @@ region_sz_t _partial_obj_size; region_sz_t volatile _dc_and_los; bool volatile _blocks_filled; + int volatile _shadow_state; #ifdef ASSERT size_t _blocks_filled_count; // Number of block table fills. @@ -596,6 +625,28 @@ return old == los; } +inline bool ParallelCompactData::RegionData::try_push() { + return Atomic::cmpxchg(FINISH, &_shadow_state, UNUSED) == UNUSED; +} + +inline bool ParallelCompactData::RegionData::try_steal() { + return Atomic::cmpxchg(SHADOW, &_shadow_state, UNUSED) == UNUSED; +} + +inline void ParallelCompactData::RegionData::mark_filled() { + int old = Atomic::cmpxchg(FILLED, &_shadow_state, SHADOW); + assert(old == SHADOW, "Fail to mark the region as filled"); +} + +inline bool ParallelCompactData::RegionData::try_copy() { + return Atomic::cmpxchg(FINISH, &_shadow_state, FILLED) == FILLED; +} + +void ParallelCompactData::RegionData::mark_normal() { + int old = Atomic::cmpxchg(FINISH, &_shadow_state, SHADOW); + assert(old == SHADOW, "Fail to mark the region as finish"); +} + inline ParallelCompactData::RegionData* ParallelCompactData::region(size_t region_idx) const { @@ -1179,11 +1230,20 @@ size_t beg_region, HeapWord* end_addr); - // Fill a region, copying objects from one or more source regions. - static void fill_region(ParCompactionManager* cm, size_t region_idx); - static void fill_and_update_region(ParCompactionManager* cm, size_t region) { - fill_region(cm, region); + static void fill_region(ParCompactionManager* cm, MoveAndUpdateClosure& closure, size_t region); + static void fill_and_update_region(ParCompactionManager* cm, size_t region); + + static bool steal_shadow_region(ParCompactionManager* cm, size_t& region_idx); + static void fill_shadow_region(ParCompactionManager* cm, size_t region_idx); + static void fill_and_update_shadow_region(ParCompactionManager* cm, size_t region) { + fill_shadow_region(cm, region); } + // Copy the content of a shadow region back to its corresponding heap region + static void copy_back(HeapWord* shadow_addr, HeapWord* region_addr); + // Initialize the steal record of a GC thread + static void initialize_steal_record(uint which); + // Reuse the empty heap regions as shadow regions, like to-space regions + static void enqueue_shadow_region(); // Fill in the block table for the specified region. static void fill_blocks(size_t region_idx); @@ -1230,19 +1290,20 @@ }; class MoveAndUpdateClosure: public ParMarkBitMapClosure { + static inline size_t calculate_words_remaining(size_t region); public: inline MoveAndUpdateClosure(ParMarkBitMap* bitmap, ParCompactionManager* cm, - ObjectStartArray* start_array, - HeapWord* destination, size_t words); + size_t region); // Accessors. HeapWord* destination() const { return _destination; } + HeapWord* copy_destination() const { return _destination + _offset; } // If the object will fit (size <= words_remaining()), copy it to the current // destination, update the interior oops and the start array and return either // full (if the closure is full) or incomplete. If the object will not fit, // return would_overflow. - virtual IterationStatus do_addr(HeapWord* addr, size_t size); + IterationStatus do_addr(HeapWord* addr, size_t size); // Copy enough words to fill this closure, starting at source(). Interior // oops and the start array are not updated. Return full. @@ -1253,25 +1314,37 @@ // array are not updated. void copy_partial_obj(); - protected: + virtual void complete_region(ParCompactionManager* cm, HeapWord* dest_addr, + PSParallelCompact::RegionData* region_ptr); + +protected: // Update variables to indicate that word_count words were processed. inline void update_state(size_t word_count); protected: - ObjectStartArray* const _start_array; HeapWord* _destination; // Next addr to be written. + ObjectStartArray* const _start_array; + size_t _offset; }; +inline size_t MoveAndUpdateClosure::calculate_words_remaining(size_t region) { + HeapWord* dest_addr = PSParallelCompact::summary_data().region_to_addr(region); + PSParallelCompact::SpaceId dest_space_id = PSParallelCompact::space_id(dest_addr); + HeapWord* new_top = PSParallelCompact::new_top(dest_space_id); + assert(dest_addr < new_top, "sanity"); + + return MIN2(pointer_delta(new_top, dest_addr), ParallelCompactData::RegionSize); +} + inline MoveAndUpdateClosure::MoveAndUpdateClosure(ParMarkBitMap* bitmap, ParCompactionManager* cm, - ObjectStartArray* start_array, - HeapWord* destination, - size_t words) : - ParMarkBitMapClosure(bitmap, cm, words), _start_array(start_array) -{ - _destination = destination; -} + size_t region_idx) : + ParMarkBitMapClosure(bitmap, cm, calculate_words_remaining(region_idx)), + _destination(PSParallelCompact::summary_data().region_to_addr(region_idx)), + _start_array(PSParallelCompact::start_array(PSParallelCompact::space_id(_destination))), + _offset(0) { } + inline void MoveAndUpdateClosure::update_state(size_t words) { @@ -1280,6 +1353,36 @@ _destination += words; } +class ShadowClosure: public MoveAndUpdateClosure { + inline size_t calculate_shadow_offset(size_t region_idx, size_t shadow_idx); +public: + inline ShadowClosure(ParMarkBitMap* bitmap, ParCompactionManager* cm, + size_t region, size_t shadow); + + virtual void complete_region(ParCompactionManager* cm, HeapWord* dest_addr, + PSParallelCompact::RegionData* region_ptr); + +private: + size_t _shadow; +}; + +inline size_t ShadowClosure::calculate_shadow_offset(size_t region_idx, size_t shadow_idx) { + ParallelCompactData& sd = PSParallelCompact::summary_data(); + HeapWord* dest_addr = sd.region_to_addr(region_idx); + HeapWord* shadow_addr = sd.region_to_addr(shadow_idx); + return pointer_delta(shadow_addr, dest_addr); +} + +inline +ShadowClosure::ShadowClosure(ParMarkBitMap *bitmap, + ParCompactionManager *cm, + size_t region, + size_t shadow) : + MoveAndUpdateClosure(bitmap, cm, region), + _shadow(shadow) { + _offset = calculate_shadow_offset(region, shadow); +} + class UpdateOnlyClosure: public ParMarkBitMapClosure { private: const PSParallelCompact::SpaceId _space_id;