--- old/src/hotspot/share/gc/g1/g1Analytics.cpp 2019-10-22 14:06:30.684933324 +0200 +++ new/src/hotspot/share/gc/g1/g1Analytics.cpp 2019-10-22 14:06:30.348922778 +0200 @@ -44,11 +44,11 @@ }; // all the same -static double young_cards_per_entry_ratio_defaults[] = { +static double young_card_merge_to_scan_ratio_defaults[] = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }; -static double young_only_cost_per_remset_card_ms_defaults[] = { +static double young_only_cost_per_card_scan_ms_defaults[] = { 0.015, 0.01, 0.01, 0.008, 0.008, 0.0055, 0.0055, 0.005 }; @@ -61,7 +61,6 @@ 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0 }; - static double young_other_cost_per_region_ms_defaults[] = { 0.3, 0.2, 0.2, 0.15, 0.15, 0.12, 0.12, 0.1 }; @@ -80,13 +79,13 @@ _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)), _concurrent_refine_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _logged_cards_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)), - _cost_per_logged_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), - _cost_scan_hcc_seq(new TruncatedSeq(TruncatedSeqLength)), - _young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), - _mixed_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), - _young_only_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), - _mixed_cost_per_remset_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)), - _cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _young_card_merge_to_scan_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), + _mixed_card_merge_to_scan_ratio_seq(new TruncatedSeq(TruncatedSeqLength)), + _young_cost_per_card_scan_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _mixed_cost_per_card_scan_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _young_cost_per_card_merge_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _mixed_cost_per_card_merge_ms_seq(new TruncatedSeq(TruncatedSeqLength)), + _copy_cost_per_byte_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _constant_other_time_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)), _non_young_other_cost_per_region_ms_seq(new TruncatedSeq(TruncatedSeqLength)), @@ -108,11 +107,10 @@ _concurrent_refine_rate_ms_seq->add(1/cost_per_logged_card_ms_defaults[0]); // Some applications have very low rates for logging cards. _logged_cards_rate_ms_seq->add(0.0); - _cost_per_logged_card_ms_seq->add(cost_per_logged_card_ms_defaults[index]); - _cost_scan_hcc_seq->add(0.0); - _young_cards_per_entry_ratio_seq->add(young_cards_per_entry_ratio_defaults[index]); - _young_only_cost_per_remset_card_ms_seq->add(young_only_cost_per_remset_card_ms_defaults[index]); - _cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]); + _young_card_merge_to_scan_ratio_seq->add(young_card_merge_to_scan_ratio_defaults[index]); + _young_cost_per_card_scan_ms_seq->add(young_only_cost_per_card_scan_ms_defaults[index]); + + _copy_cost_per_byte_ms_seq->add(cost_per_byte_ms_defaults[index]); _constant_other_time_ms_seq->add(constant_other_time_ms_defaults[index]); _young_other_cost_per_region_ms_seq->add(young_other_cost_per_region_ms_defaults[index]); _non_young_other_cost_per_region_ms_seq->add(non_young_other_cost_per_region_ms_defaults[index]); @@ -173,27 +171,27 @@ _logged_cards_rate_ms_seq->add(cards_per_ms); } -void G1Analytics::report_cost_per_logged_card_ms(double cost_per_logged_card_ms) { - _cost_per_logged_card_ms_seq->add(cost_per_logged_card_ms); -} - -void G1Analytics::report_cost_scan_hcc(double cost_scan_hcc) { - _cost_scan_hcc_seq->add(cost_scan_hcc); +void G1Analytics::report_cost_per_card_scan_ms(double cost_per_card_ms, bool for_young_gc) { + if (for_young_gc) { + _young_cost_per_card_scan_ms_seq->add(cost_per_card_ms); + } else { + _mixed_cost_per_card_scan_ms_seq->add(cost_per_card_ms); + } } -void G1Analytics::report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc) { +void G1Analytics::report_cost_per_card_merge_ms(double cost_per_card_ms, bool for_young_gc) { if (for_young_gc) { - _young_only_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms); + _young_cost_per_card_merge_ms_seq->add(cost_per_card_ms); } else { - _mixed_cost_per_remset_card_ms_seq->add(cost_per_remset_card_ms); + _mixed_cost_per_card_merge_ms_seq->add(cost_per_card_ms); } } -void G1Analytics::report_cards_per_entry_ratio(double cards_per_entry_ratio, bool for_young_gc) { +void G1Analytics::report_card_merge_to_scan_ratio(double merge_to_scan_ratio, bool for_young_gc) { if (for_young_gc) { - _young_cards_per_entry_ratio_seq->add(cards_per_entry_ratio); + _young_card_merge_to_scan_ratio_seq->add(merge_to_scan_ratio); } else { - _mixed_cards_per_entry_ratio_seq->add(cards_per_entry_ratio); + _mixed_card_merge_to_scan_ratio_seq->add(merge_to_scan_ratio); } } @@ -205,7 +203,7 @@ if (mark_or_rebuild_in_progress) { _cost_per_byte_ms_during_cm_seq->add(cost_per_byte_ms); } else { - _cost_per_byte_ms_seq->add(cost_per_byte_ms); + _copy_cost_per_byte_ms_seq->add(cost_per_byte_ms); } } @@ -241,57 +239,37 @@ return get_new_prediction(_logged_cards_rate_ms_seq); } -double G1Analytics::predict_cost_per_logged_card_ms() const { - return get_new_prediction(_cost_per_logged_card_ms_seq); -} - -double G1Analytics::predict_scan_hcc_ms() const { - return get_new_prediction(_cost_scan_hcc_seq); -} - -double G1Analytics::predict_rs_update_time_ms(size_t pending_cards) const { - return pending_cards * predict_cost_per_logged_card_ms() + predict_scan_hcc_ms(); +double G1Analytics::predict_young_card_merge_to_scan_ratio() const { + return get_new_prediction(_young_card_merge_to_scan_ratio_seq); } -double G1Analytics::predict_young_cards_per_entry_ratio() const { - return get_new_prediction(_young_cards_per_entry_ratio_seq); -} - -double G1Analytics::predict_mixed_cards_per_entry_ratio() const { - if (_mixed_cards_per_entry_ratio_seq->num() < 2) { - return predict_young_cards_per_entry_ratio(); - } else { - return get_new_prediction(_mixed_cards_per_entry_ratio_seq); - } -} - -size_t G1Analytics::predict_card_num(size_t rs_length, bool for_young_gc) const { - if (for_young_gc) { - return (size_t) (rs_length * predict_young_cards_per_entry_ratio()); +size_t G1Analytics::predict_scan_card_num(size_t rs_length, bool for_young_gc) const { + if (for_young_gc || _mixed_card_merge_to_scan_ratio_seq->num() < 3) { + return (size_t) (rs_length * predict_young_card_merge_to_scan_ratio()); } else { - return (size_t) (rs_length * predict_mixed_cards_per_entry_ratio()); + return (size_t) (rs_length * get_new_prediction(_mixed_card_merge_to_scan_ratio_seq)); } } -double G1Analytics::predict_rs_scan_time_ms(size_t card_num, bool for_young_gc) const { - if (for_young_gc) { - return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq); +double G1Analytics::predict_card_merge_time_ms(size_t card_num, bool for_young_gc) const { + if (for_young_gc || _mixed_cost_per_card_merge_ms_seq->num() < 3) { + return card_num * get_new_prediction(_young_cost_per_card_merge_ms_seq); } else { - return predict_mixed_rs_scan_time_ms(card_num); + return card_num * get_new_prediction(_mixed_cost_per_card_merge_ms_seq); } } -double G1Analytics::predict_mixed_rs_scan_time_ms(size_t card_num) const { - if (_mixed_cost_per_remset_card_ms_seq->num() < 3) { - return card_num * get_new_prediction(_young_only_cost_per_remset_card_ms_seq); +double G1Analytics::predict_card_scan_time_ms(size_t card_num, bool for_young_gc) const { + if (for_young_gc || _mixed_cost_per_card_scan_ms_seq->num() < 3) { + return card_num * get_new_prediction(_young_cost_per_card_scan_ms_seq); } else { - return card_num * get_new_prediction(_mixed_cost_per_remset_card_ms_seq); + return card_num * get_new_prediction(_mixed_cost_per_card_scan_ms_seq); } } double G1Analytics::predict_object_copy_time_ms_during_cm(size_t bytes_to_copy) const { if (_cost_per_byte_ms_during_cm_seq->num() < 3) { - return (1.1 * bytes_to_copy) * get_new_prediction(_cost_per_byte_ms_seq); + return (1.1 * bytes_to_copy) * get_new_prediction(_copy_cost_per_byte_ms_seq); } else { return bytes_to_copy * get_new_prediction(_cost_per_byte_ms_during_cm_seq); } @@ -301,14 +279,10 @@ if (during_concurrent_mark) { return predict_object_copy_time_ms_during_cm(bytes_to_copy); } else { - return bytes_to_copy * get_new_prediction(_cost_per_byte_ms_seq); + return bytes_to_copy * get_new_prediction(_copy_cost_per_byte_ms_seq); } } -double G1Analytics::predict_cost_per_byte_ms() const { - return get_new_prediction(_cost_per_byte_ms_seq); -} - double G1Analytics::predict_constant_other_time_ms() const { return get_new_prediction(_constant_other_time_ms_seq); } --- old/src/hotspot/share/gc/g1/g1Analytics.hpp 2019-10-22 14:06:32.276983294 +0200 +++ new/src/hotspot/share/gc/g1/g1Analytics.hpp 2019-10-22 14:06:31.935972591 +0200 @@ -48,13 +48,21 @@ TruncatedSeq* _rs_length_diff_seq; TruncatedSeq* _concurrent_refine_rate_ms_seq; TruncatedSeq* _logged_cards_rate_ms_seq; - TruncatedSeq* _cost_per_logged_card_ms_seq; - TruncatedSeq* _cost_scan_hcc_seq; - TruncatedSeq* _young_cards_per_entry_ratio_seq; - TruncatedSeq* _mixed_cards_per_entry_ratio_seq; - TruncatedSeq* _young_only_cost_per_remset_card_ms_seq; - TruncatedSeq* _mixed_cost_per_remset_card_ms_seq; - TruncatedSeq* _cost_per_byte_ms_seq; + // The ratio between the number of merged cards and actually scanned cards, for + // young-only and mixed gcs. + TruncatedSeq* _young_card_merge_to_scan_ratio_seq; + TruncatedSeq* _mixed_card_merge_to_scan_ratio_seq; + + // The cost to scan a card during young-only and mixed gcs in ms. + TruncatedSeq* _young_cost_per_card_scan_ms_seq; + TruncatedSeq* _mixed_cost_per_card_scan_ms_seq; + + // The cost to merge a card during young-only and mixed gcs in ms. + TruncatedSeq* _young_cost_per_card_merge_ms_seq; + TruncatedSeq* _mixed_cost_per_card_merge_ms_seq; + + // The cost to copy a byte in ms. + TruncatedSeq* _copy_cost_per_byte_ms_seq; TruncatedSeq* _constant_other_time_ms_seq; TruncatedSeq* _young_other_cost_per_region_ms_seq; TruncatedSeq* _non_young_other_cost_per_region_ms_seq; @@ -103,10 +111,9 @@ void report_alloc_rate_ms(double alloc_rate); void report_concurrent_refine_rate_ms(double cards_per_ms); void report_logged_cards_rate_ms(double cards_per_ms); - void report_cost_per_logged_card_ms(double cost_per_logged_card_ms); - void report_cost_scan_hcc(double cost_scan_hcc); - void report_cost_per_remset_card_ms(double cost_per_remset_card_ms, bool for_young_gc); - void report_cards_per_entry_ratio(double cards_per_entry_ratio, bool for_young_gc); + void report_cost_per_card_scan_ms(double cost_per_remset_card_ms, bool for_young_gc); + void report_cost_per_card_merge_ms(double cost_per_card_ms, bool for_young_gc); + void report_card_merge_to_scan_ratio(double cards_per_entry_ratio, bool for_young_gc); void report_rs_length_diff(double rs_length_diff); void report_cost_per_byte_ms(double cost_per_byte_ms, bool mark_or_rebuild_in_progress); void report_young_other_cost_per_region_ms(double other_cost_per_region_ms); @@ -120,21 +127,14 @@ double predict_concurrent_refine_rate_ms() const; double predict_logged_cards_rate_ms() const; - double predict_cost_per_logged_card_ms() const; - - double predict_scan_hcc_ms() const; - - double predict_rs_update_time_ms(size_t pending_cards) const; + double predict_young_card_merge_to_scan_ratio() const; - double predict_young_cards_per_entry_ratio() const; + double predict_mixed_card_merge_to_scan_ratio() const; - double predict_mixed_cards_per_entry_ratio() const; + size_t predict_scan_card_num(size_t rs_length, bool for_young_gc) const; - size_t predict_card_num(size_t rs_length, bool for_young_gc) const; - - double predict_rs_scan_time_ms(size_t card_num, bool for_young_gc) const; - - double predict_mixed_rs_scan_time_ms(size_t card_num) const; + double predict_card_merge_time_ms(size_t card_num, bool for_young_gc) const; + double predict_card_scan_time_ms(size_t card_num, bool for_young_gc) const; double predict_object_copy_time_ms_during_cm(size_t bytes_to_copy) const; @@ -153,8 +153,6 @@ size_t predict_rs_length() const; size_t predict_pending_cards() const; - double predict_cost_per_byte_ms() const; - // Add a new GC of the given duration and end time to the record. void update_recent_gc_times(double end_time_sec, double elapsed_ms); void compute_pause_time_ratio(double interval_ms, double pause_time_ms); --- old/src/hotspot/share/gc/g1/g1CardTable.hpp 2019-10-22 14:06:33.811031443 +0200 +++ new/src/hotspot/share/gc/g1/g1CardTable.hpp 2019-10-22 14:06:33.467020646 +0200 @@ -92,12 +92,16 @@ return pointer_delta(p, _byte_map, sizeof(CardValue)); } - // Mark the given card as Dirty if it is Clean. - inline void mark_clean_as_dirty(size_t card_index); + // Mark the given card as Dirty if it is Clean. Returns the number of dirtied + // cards that were not yet dirty. This result may be inaccurate as it does not + // perform the dirtying atomically. + inline size_t mark_clean_as_dirty(size_t card_index); // Change Clean cards in a (large) area on the card table as Dirty, preserving // already scanned cards. Assumes that most cards in that area are Clean. - inline void mark_region_dirty(size_t start_card_index, size_t num_cards); + // Returns the number of dirtied cards that were not yet dirty. This result may + // be inaccurate as it does not perform the dirtying atomically. + inline size_t mark_region_dirty(size_t start_card_index, size_t num_cards); // Mark the given range of cards as Scanned. All of these cards must be Dirty. inline void mark_as_scanned(size_t start_card_index, size_t num_cards); --- old/src/hotspot/share/gc/g1/g1CardTable.inline.hpp 2019-10-22 14:06:35.337079341 +0200 +++ new/src/hotspot/share/gc/g1/g1CardTable.inline.hpp 2019-10-22 14:06:34.999068732 +0200 @@ -33,17 +33,21 @@ return (uint)(card_idx >> (HeapRegion::LogOfHRGrainBytes - card_shift)); } -inline void G1CardTable::mark_clean_as_dirty(size_t card_index) { +inline size_t G1CardTable::mark_clean_as_dirty(size_t card_index) { CardValue value = _byte_map[card_index]; if (value == clean_card_val()) { _byte_map[card_index] = dirty_card_val(); + return 1; } + return 0; } -inline void G1CardTable::mark_region_dirty(size_t start_card_index, size_t num_cards) { +inline size_t G1CardTable::mark_region_dirty(size_t start_card_index, size_t num_cards) { assert(is_aligned(start_card_index, sizeof(size_t)), "Start card index must be aligned."); assert(is_aligned(num_cards, sizeof(size_t)), "Number of cards to change must be evenly divisible."); + size_t result = 0; + size_t const num_chunks = num_cards / sizeof(size_t); size_t* cur_word = (size_t*)&_byte_map[start_card_index]; @@ -52,6 +56,7 @@ size_t value = *cur_word; if (value == WordAllClean) { *cur_word = WordAllDirty; + result += sizeof(value); } else if (value == WordAllDirty) { // do nothing. } else { @@ -61,12 +66,15 @@ CardValue value = *cur; if (value == clean_card_val()) { *cur = dirty_card_val(); + result++; } cur++; } } cur_word++; } + + return result; } inline void G1CardTable::mark_as_scanned(size_t start_card_index, size_t num_cards) { --- old/src/hotspot/share/gc/g1/g1CollectionSet.cpp 2019-10-22 14:06:36.857127051 +0200 +++ new/src/hotspot/share/gc/g1/g1CollectionSet.cpp 2019-10-22 14:06:36.522116536 +0200 @@ -27,6 +27,7 @@ #include "gc/g1/g1CollectionSet.hpp" #include "gc/g1/g1CollectionSetCandidates.hpp" #include "gc/g1/g1CollectorState.hpp" +#include "gc/g1/g1HotCardCache.hpp" #include "gc/g1/g1ParScanThreadState.hpp" #include "gc/g1/g1Policy.hpp" #include "gc/g1/heapRegion.inline.hpp" @@ -409,7 +410,7 @@ guarantee(target_pause_time_ms > 0.0, "target_pause_time_ms = %1.6lf should be positive", target_pause_time_ms); - size_t pending_cards = _policy->pending_cards_at_gc_start(); + size_t pending_cards = _policy->pending_cards_at_gc_start() + _g1h->hot_card_cache()->num_entries(); double base_time_ms = _policy->predict_base_elapsed_time_ms(pending_cards); double time_remaining_ms = MAX2(target_pause_time_ms - base_time_ms, 0.0); --- old/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp 2019-10-22 14:06:38.510178936 +0200 +++ new/src/hotspot/share/gc/g1/g1GCPhaseTimes.cpp 2019-10-22 14:06:38.136167197 +0200 @@ -72,6 +72,8 @@ _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_merged_fine, MergeRSMergedFine); _merge_rs_merged_coarse = new WorkerDataArray(max_gc_threads, "Merged Coarse:"); _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_merged_coarse, MergeRSMergedCoarse); + _merge_rs_dirty_cards = new WorkerDataArray(max_gc_threads, "Dirty Cards:"); + _gc_par_phases[MergeRS]->link_thread_work_items(_merge_rs_dirty_cards, MergeRSDirtyCards); _gc_par_phases[OptMergeRS] = new WorkerDataArray(max_gc_threads, "Optional Remembered Sets (ms):"); _opt_merge_rs_merged_sparse = new WorkerDataArray(max_gc_threads, "Merged Sparse:"); @@ -80,6 +82,8 @@ _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_merged_fine, MergeRSMergedFine); _opt_merge_rs_merged_coarse = new WorkerDataArray(max_gc_threads, "Merged Coarse:"); _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_merged_coarse, MergeRSMergedCoarse); + _opt_merge_rs_dirty_cards = new WorkerDataArray(max_gc_threads, "Dirty Cards:"); + _gc_par_phases[OptMergeRS]->link_thread_work_items(_opt_merge_rs_dirty_cards, MergeRSDirtyCards); _gc_par_phases[MergeLB] = new WorkerDataArray(max_gc_threads, "Log Buffers (ms):"); if (G1HotCardCache::default_use_cache()) { @@ -310,10 +314,16 @@ // return the average time for a phase in milliseconds double G1GCPhaseTimes::average_time_ms(GCParPhases phase) { + if (_gc_par_phases[phase] == NULL) { + return 0.0; + } return _gc_par_phases[phase]->average() * 1000.0; } size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase, uint index) { + if (_gc_par_phases[phase] == NULL) { + return 0; + } assert(_gc_par_phases[phase]->thread_work_items(index) != NULL, "No sub count"); return _gc_par_phases[phase]->thread_work_items(index)->sum(); } --- old/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp 2019-10-22 14:06:40.046227148 +0200 +++ new/src/hotspot/share/gc/g1/g1GCPhaseTimes.hpp 2019-10-22 14:06:39.694216099 +0200 @@ -87,7 +87,8 @@ enum GCMergeRSWorkTimes { MergeRSMergedSparse, MergeRSMergedFine, - MergeRSMergedCoarse + MergeRSMergedCoarse, + MergeRSDirtyCards }; enum GCScanHRWorkItems { @@ -123,6 +124,7 @@ WorkerDataArray* _merge_rs_merged_sparse; WorkerDataArray* _merge_rs_merged_fine; WorkerDataArray* _merge_rs_merged_coarse; + WorkerDataArray* _merge_rs_dirty_cards; WorkerDataArray* _merge_hcc_dirty_cards; WorkerDataArray* _merge_hcc_skipped_cards; @@ -137,6 +139,7 @@ WorkerDataArray* _opt_merge_rs_merged_sparse; WorkerDataArray* _opt_merge_rs_merged_fine; WorkerDataArray* _opt_merge_rs_merged_coarse; + WorkerDataArray* _opt_merge_rs_dirty_cards; WorkerDataArray* _opt_scan_hr_scanned_cards; WorkerDataArray* _opt_scan_hr_scanned_blocks; --- old/src/hotspot/share/gc/g1/g1HotCardCache.cpp 2019-10-22 14:06:41.581275329 +0200 +++ new/src/hotspot/share/gc/g1/g1HotCardCache.cpp 2019-10-22 14:06:41.241264657 +0200 @@ -32,7 +32,7 @@ G1HotCardCache::G1HotCardCache(G1CollectedHeap *g1h): _g1h(g1h), _use_cache(false), _card_counts(g1h), _hot_cache(NULL), _hot_cache_size(0), _hot_cache_par_chunk_size(0), - _hot_cache_idx(0), _hot_cache_par_claimed_idx(0) + _hot_cache_idx(0), _hot_cache_par_claimed_idx(0), _cache_wrapped_around(false) {} void G1HotCardCache::initialize(G1RegionToSpaceMapper* card_counts_storage) { @@ -48,6 +48,8 @@ _hot_cache_par_chunk_size = ClaimChunkSize; _hot_cache_par_claimed_idx = 0; + _cache_wrapped_around = false; + _card_counts.initialize(card_counts_storage); } } @@ -69,6 +71,11 @@ } // Otherwise, the card is hot. size_t index = Atomic::add(1u, &_hot_cache_idx) - 1; + // This does not need an atomic update. Racing threads may at most write the + // same value. + if (index == _hot_cache_size) { + _cache_wrapped_around = true; + } size_t masked_index = index & (_hot_cache_size - 1); CardValue* current_ptr = _hot_cache[masked_index]; --- old/src/hotspot/share/gc/g1/g1HotCardCache.hpp 2019-10-22 14:06:43.125323792 +0200 +++ new/src/hotspot/share/gc/g1/g1HotCardCache.hpp 2019-10-22 14:06:42.775312806 +0200 @@ -81,6 +81,11 @@ char _pad_after[DEFAULT_CACHE_LINE_SIZE]; + // Records whether insertion overflowed the hot card cache at least once. This + // avoids the need for a separate atomic counter of how many valid entries are + // in the HCC. + bool _cache_wrapped_around; + // The number of cached cards a thread claims when flushing the cache static const int ClaimChunkSize = 32; @@ -125,13 +130,17 @@ assert(SafepointSynchronize::is_at_safepoint(), "Should be at a safepoint"); assert(Thread::current()->is_VM_thread(), "Current thread should be the VMthread"); if (default_use_cache()) { - reset_hot_cache_internal(); + reset_hot_cache_internal(); } } // Zeros the values in the card counts table for the given region void reset_card_counts(HeapRegion* hr); + // Number of entries in the HCC. + size_t num_entries() const { + return _cache_wrapped_around ? _hot_cache_size : _hot_cache_idx + 1; + } private: void reset_hot_cache_internal() { assert(_hot_cache != NULL, "Logic"); @@ -139,6 +148,7 @@ for (size_t i = 0; i < _hot_cache_size; i++) { _hot_cache[i] = NULL; } + _cache_wrapped_around = false; } }; --- old/src/hotspot/share/gc/g1/g1Policy.cpp 2019-10-22 14:06:44.656371847 +0200 +++ new/src/hotspot/share/gc/g1/g1Policy.cpp 2019-10-22 14:06:44.313361081 +0200 @@ -329,9 +329,8 @@ const double target_pause_time_ms = _mmu_tracker->max_gc_time() * 1000.0; const double survivor_regions_evac_time = predict_survivor_regions_evac_time(); const size_t pending_cards = _analytics->predict_pending_cards(); - const size_t scanned_cards = _analytics->predict_card_num(rs_length, true /* for_young_gc */); const double base_time_ms = - predict_base_elapsed_time_ms(pending_cards, scanned_cards) + + predict_base_elapsed_time_ms(pending_cards, rs_length) + survivor_regions_evac_time; const uint available_free_regions = _free_regions_at_end_of_collection; const uint base_free_regions = @@ -713,67 +712,54 @@ } _short_lived_surv_rate_group->start_adding_regions(); - // Do that for any other surv rate groups - - double scan_hcc_time_ms = G1HotCardCache::default_use_cache() ? average_time_ms(G1GCPhaseTimes::MergeHCC) : 0.0; + double merge_hcc_time_ms = average_time_ms(G1GCPhaseTimes::MergeHCC); if (update_stats) { - double cost_per_logged_card = 0.0; - size_t const pending_logged_cards = p->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards); - if (pending_logged_cards > 0) { - cost_per_logged_card = logged_cards_processing_time() / pending_logged_cards; - _analytics->report_cost_per_logged_card_ms(cost_per_logged_card); + size_t const total_log_buffer_cards = p->sum_thread_work_items(G1GCPhaseTimes::MergeHCC, G1GCPhaseTimes::MergeHCCDirtyCards) + + p->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards); + // Update prediction for card merge; MergeRSDirtyCards includes the cards from the Eager Reclaim phase. + size_t const total_cards_merged = p->sum_thread_work_items(G1GCPhaseTimes::MergeRS, G1GCPhaseTimes::MergeRSDirtyCards) + + p->sum_thread_work_items(G1GCPhaseTimes::OptMergeRS, G1GCPhaseTimes::MergeRSDirtyCards) + + total_log_buffer_cards; + + if (total_cards_merged > 10) { + double avg_time_merge_cards = average_time_ms(G1GCPhaseTimes::MergeER) + + average_time_ms(G1GCPhaseTimes::MergeRS) + + average_time_ms(G1GCPhaseTimes::MergeHCC) + + average_time_ms(G1GCPhaseTimes::MergeLB) + + average_time_ms(G1GCPhaseTimes::OptMergeRS); + _analytics->report_cost_per_card_merge_ms(avg_time_merge_cards / total_cards_merged, this_pause_was_young_only); } - _analytics->report_cost_scan_hcc(scan_hcc_time_ms); + // Update prediction for card scan size_t const total_cards_scanned = p->sum_thread_work_items(G1GCPhaseTimes::ScanHR, G1GCPhaseTimes::ScanHRScannedCards) + p->sum_thread_work_items(G1GCPhaseTimes::OptScanHR, G1GCPhaseTimes::ScanHRScannedCards); - size_t remset_cards_scanned = 0; - // There might have been duplicate log buffer entries in the queues which could - // increase this value beyond the cards scanned. In this case attribute all cards - // to the log buffers. - if (pending_logged_cards <= total_cards_scanned) { - remset_cards_scanned = total_cards_scanned - pending_logged_cards; - } - - double cost_per_remset_card_ms = 0.0; - if (remset_cards_scanned > 10) { - double avg_time_remset_scan = ((average_time_ms(G1GCPhaseTimes::ScanHR) + average_time_ms(G1GCPhaseTimes::OptScanHR)) * - remset_cards_scanned / total_cards_scanned) + - average_time_ms(G1GCPhaseTimes::MergeER) + - average_time_ms(G1GCPhaseTimes::MergeRS) + - average_time_ms(G1GCPhaseTimes::OptMergeRS); - - cost_per_remset_card_ms = avg_time_remset_scan / remset_cards_scanned; - _analytics->report_cost_per_remset_card_ms(cost_per_remset_card_ms, this_pause_was_young_only); - } - - if (_rs_length > 0) { - double cards_per_entry_ratio = - (double) remset_cards_scanned / (double) _rs_length; - _analytics->report_cards_per_entry_ratio(cards_per_entry_ratio, this_pause_was_young_only); - } - - // This is defensive. For a while _max_rs_length could get - // smaller than _recorded_rs_length which was causing - // rs_length_diff to get very large and mess up the RSet length - // predictions. The reason was unsafe concurrent updates to the - // _inc_cset_recorded_rs_length field which the code below guards - // against (see CR 7118202). This bug has now been fixed (see CR - // 7119027). However, I'm still worried that - // _inc_cset_recorded_rs_length might still end up somewhat - // inaccurate. The concurrent refinement thread calculates an - // RSet's length concurrently with other CR threads updating it - // which might cause it to calculate the length incorrectly (if, - // say, it's in mid-coarsening). So I'll leave in the defensive - // conditional below just in case. - size_t rs_length_diff = 0; - size_t recorded_rs_length = _collection_set->recorded_rs_length(); - if (_rs_length > recorded_rs_length) { - rs_length_diff = _rs_length - recorded_rs_length; - } - _analytics->report_rs_length_diff((double) rs_length_diff); + if (total_cards_scanned > 10) { + double avg_time_dirty_card_scan = average_time_ms(G1GCPhaseTimes::ScanHR) + + average_time_ms(G1GCPhaseTimes::OptScanHR); + + _analytics->report_cost_per_card_scan_ms(avg_time_dirty_card_scan / total_cards_scanned, this_pause_was_young_only); + } + + // Update prediction for the ratio between cards from the remembered + // sets and actually scanned cards from the remembered sets. + // Cards from the remembered sets are all cards not duplicated by cards from + // the logs. + // Due to duplicates in the log buffers, the number of actually scanned cards + // can be smaller than the cards in the log buffers. + const size_t from_rs_length_cards = (total_cards_scanned > total_log_buffer_cards) ? total_cards_scanned - total_log_buffer_cards : 0; + double merge_to_scan_ratio = 0.0; + if (total_cards_scanned > 0) { + merge_to_scan_ratio = (double) from_rs_length_cards / total_cards_scanned; + } + _analytics->report_card_merge_to_scan_ratio(merge_to_scan_ratio, this_pause_was_young_only); + + const size_t recorded_rs_length = _collection_set->recorded_rs_length(); + const size_t rs_length_diff = _rs_length > recorded_rs_length ? _rs_length - recorded_rs_length : 0; + _analytics->report_rs_length_diff(rs_length_diff); + + // Update prediction for copy cost per byte size_t copied_bytes = p->sum_thread_work_items(G1GCPhaseTimes::ObjCopy, G1GCPhaseTimes::ObjCopyCopiedBytes) + p->sum_thread_work_items(G1GCPhaseTimes::OptObjCopy, G1GCPhaseTimes::ObjCopyCopiedBytes); @@ -843,21 +829,21 @@ // Note that _mmu_tracker->max_gc_time() returns the time in seconds. double scan_logged_cards_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0; - if (scan_logged_cards_time_goal_ms < scan_hcc_time_ms) { + if (scan_logged_cards_time_goal_ms < merge_hcc_time_ms) { log_debug(gc, ergo, refine)("Adjust concurrent refinement thresholds (scanning the HCC expected to take longer than Update RS time goal)." "Logged Cards Scan time goal: %1.2fms Scan HCC time: %1.2fms", - scan_logged_cards_time_goal_ms, scan_hcc_time_ms); + scan_logged_cards_time_goal_ms, merge_hcc_time_ms); scan_logged_cards_time_goal_ms = 0; } else { - scan_logged_cards_time_goal_ms -= scan_hcc_time_ms; + scan_logged_cards_time_goal_ms -= merge_hcc_time_ms; } _pending_cards_at_prev_gc_end = _g1h->pending_card_num(); double const logged_cards_time = logged_cards_processing_time(); log_debug(gc, ergo, refine)("Concurrent refinement times: Logged Cards Scan time goal: %1.2fms Logged Cards Scan time: %1.2fms HCC time: %1.2fms", - scan_logged_cards_time_goal_ms, logged_cards_time, scan_hcc_time_ms); + scan_logged_cards_time_goal_ms, logged_cards_time, merge_hcc_time_ms); _g1h->concurrent_refine()->adjust(logged_cards_time, phase_times()->sum_thread_work_items(G1GCPhaseTimes::MergeLB, G1GCPhaseTimes::MergeLBDirtyCards), @@ -937,17 +923,17 @@ } double G1Policy::predict_base_elapsed_time_ms(size_t pending_cards, - size_t scanned_cards) const { + size_t rs_length) const { + size_t effective_scanned_cards = _analytics->predict_scan_card_num(rs_length, collector_state()->in_young_only_phase()); return - _analytics->predict_rs_update_time_ms(pending_cards) + - _analytics->predict_rs_scan_time_ms(scanned_cards, collector_state()->in_young_only_phase()) + + _analytics->predict_card_merge_time_ms(pending_cards + rs_length, collector_state()->in_young_only_phase()) + + _analytics->predict_card_scan_time_ms(effective_scanned_cards, collector_state()->in_young_only_phase()) + _analytics->predict_constant_other_time_ms(); } double G1Policy::predict_base_elapsed_time_ms(size_t pending_cards) const { size_t rs_length = _analytics->predict_rs_length(); - size_t card_num = _analytics->predict_card_num(rs_length, collector_state()->in_young_only_phase()); - return predict_base_elapsed_time_ms(pending_cards, card_num); + return predict_base_elapsed_time_ms(pending_cards, rs_length); } size_t G1Policy::predict_bytes_to_copy(HeapRegion* hr) const { @@ -966,13 +952,13 @@ double G1Policy::predict_region_elapsed_time_ms(HeapRegion* hr, bool for_young_gc) const { size_t rs_length = hr->rem_set()->occupied(); - // Predicting the number of cards is based on which type of GC - // we're predicting for. - size_t card_num = _analytics->predict_card_num(rs_length, for_young_gc); + size_t scan_card_num = _analytics->predict_scan_card_num(rs_length, for_young_gc); + size_t bytes_to_copy = predict_bytes_to_copy(hr); double region_elapsed_time_ms = - _analytics->predict_rs_scan_time_ms(card_num, collector_state()->in_young_only_phase()) + + _analytics->predict_card_merge_time_ms(rs_length, collector_state()->in_young_only_phase()) + + _analytics->predict_card_scan_time_ms(scan_card_num, collector_state()->in_young_only_phase()) + _analytics->predict_object_copy_time_ms(bytes_to_copy, collector_state()->mark_or_rebuild_in_progress()); // The prediction of the "other" time for this region is based --- old/src/hotspot/share/gc/g1/g1Policy.hpp 2019-10-22 14:06:46.217420844 +0200 +++ new/src/hotspot/share/gc/g1/g1Policy.hpp 2019-10-22 14:06:45.875410109 +0200 @@ -140,9 +140,9 @@ _rs_length = rs_length; } - double predict_base_elapsed_time_ms(size_t pending_cards) const; - double predict_base_elapsed_time_ms(size_t pending_cards, - size_t scanned_cards) const; + double predict_base_elapsed_time_ms(size_t num_pending_cards) const; + double predict_base_elapsed_time_ms(size_t num_pending_cards, + size_t rs_length) const; size_t predict_bytes_to_copy(HeapRegion* hr) const; double predict_region_elapsed_time_ms(HeapRegion* hr, bool for_young_gc) const; --- old/src/hotspot/share/gc/g1/g1RemSet.cpp 2019-10-22 14:06:47.751468993 +0200 +++ new/src/hotspot/share/gc/g1/g1RemSet.cpp 2019-10-22 14:06:47.411458321 +0200 @@ -927,6 +927,8 @@ uint _merged_fine; uint _merged_coarse; + size_t _cards_dirty; + // Returns if the region contains cards we need to scan. If so, remember that // region in the current set of dirty regions. bool remember_if_interesting(uint const region_idx) { @@ -942,7 +944,8 @@ _ct(G1CollectedHeap::heap()->card_table()), _merged_sparse(0), _merged_fine(0), - _merged_coarse(0) { } + _merged_coarse(0), + _cards_dirty(0) { } void next_coarse_prt(uint const region_idx) { if (!remember_if_interesting(region_idx)) { @@ -952,7 +955,7 @@ _merged_coarse++; size_t region_base_idx = (size_t)region_idx << HeapRegion::LogCardsPerRegion; - _ct->mark_region_dirty(region_base_idx, HeapRegion::CardsPerRegion); + _cards_dirty += _ct->mark_region_dirty(region_base_idx, HeapRegion::CardsPerRegion); _scan_state->set_chunk_region_dirty(region_base_idx); } @@ -966,7 +969,7 @@ size_t const region_base_idx = (size_t)region_idx << HeapRegion::LogCardsPerRegion; BitMap::idx_t cur = bm->get_next_one_offset(0); while (cur != bm->size()) { - _ct->mark_clean_as_dirty(region_base_idx + cur); + _cards_dirty += _ct->mark_clean_as_dirty(region_base_idx + cur); _scan_state->set_chunk_dirty(region_base_idx + cur); cur = bm->get_next_one_offset(cur + 1); } @@ -982,7 +985,7 @@ size_t const region_base_idx = (size_t)region_idx << HeapRegion::LogCardsPerRegion; for (uint i = 0; i < num_cards; i++) { size_t card_idx = region_base_idx + cards[i]; - _ct->mark_clean_as_dirty(card_idx); + _cards_dirty += _ct->mark_clean_as_dirty(card_idx); _scan_state->set_chunk_dirty(card_idx); } } @@ -1001,6 +1004,8 @@ size_t merged_sparse() const { return _merged_sparse; } size_t merged_fine() const { return _merged_fine; } size_t merged_coarse() const { return _merged_coarse; } + + size_t cards_dirty() const { return _cards_dirty; } }; // Visitor for the remembered sets of humongous candidate regions to merge their @@ -1046,6 +1051,8 @@ size_t merged_sparse() const { return _cl.merged_sparse(); } size_t merged_fine() const { return _cl.merged_fine(); } size_t merged_coarse() const { return _cl.merged_coarse(); } + + size_t cards_dirty() const { return _cl.cards_dirty(); } }; // Visitor for the log buffer entries to merge them into the card table. @@ -1147,6 +1154,7 @@ p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_sparse(), G1GCPhaseTimes::MergeRSMergedSparse); p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_fine(), G1GCPhaseTimes::MergeRSMergedFine); p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_coarse(), G1GCPhaseTimes::MergeRSMergedCoarse); + p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.cards_dirty(), G1GCPhaseTimes::MergeRSDirtyCards); } // Merge remembered sets of current candidates. @@ -1158,6 +1166,7 @@ p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_sparse(), G1GCPhaseTimes::MergeRSMergedSparse); p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_fine(), G1GCPhaseTimes::MergeRSMergedFine); p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.merged_coarse(), G1GCPhaseTimes::MergeRSMergedCoarse); + p->record_or_add_thread_work_item(merge_remset_phase, worker_id, cl.cards_dirty(), G1GCPhaseTimes::MergeRSDirtyCards); } // Apply closure to log entries in the HCC.