rev 59956 : 8247819: G1: Process strong OopStorage entries in parallel
Reviewed-by:
Contributed-by: Erik Osterlund <erik.osterlund@oracle.com>, Stefan Karlsson <stefan.karlsson@oracle.com>, Thomas Schatzl <thomas.schatzl@oracle.com>

   1 /*
   2  * Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef SHARE_GC_G1_G1GCPHASETIMES_HPP
  26 #define SHARE_GC_G1_G1GCPHASETIMES_HPP
  27 
  28 #include "gc/shared/oopStorageSet.hpp"
  29 #include "gc/shared/referenceProcessorPhaseTimes.hpp"
  30 #include "gc/shared/weakProcessorPhaseTimes.hpp"
  31 #include "logging/logLevel.hpp"
  32 #include "memory/allocation.hpp"
  33 #include "utilities/macros.hpp"
  34 
  35 class LineBuffer;
  36 class G1ParScanThreadState;
  37 class STWGCTimer;
  38 
  39 template <class T> class WorkerDataArray;
  40 
  41 class G1GCPhaseTimes : public CHeapObj<mtGC> {
  42   uint _max_gc_threads;
  43   jlong _gc_start_counter;
  44   double _gc_pause_time_ms;
  45 
  46  public:
  47   enum GCParPhases {
  48     GCWorkerStart,
  49     ExtRootScan,
  50     ThreadRoots,
  51     UniverseRoots,

  52     ObjectSynchronizerRoots,
  53     ManagementRoots,

  54     CLDGRoots,
  55     JVMTIRoots,
  56     AOT_ONLY(AOTCodeRoots COMMA)
  57     CMRefRoots,
  58     // For every OopStorage there will be one element in the enum, starting with
  59     // StrongOopStorageSetRoots.
  60     StrongOopStorageSetRoots,
  61     MergeER = StrongOopStorageSetRoots + OopStorageSet::strong_count,
  62     MergeRS,
  63     OptMergeRS,
  64     MergeLB,
  65     MergeHCC,
  66     ScanHR,
  67     OptScanHR,
  68     CodeRoots,
  69     OptCodeRoots,
  70     ObjCopy,
  71     OptObjCopy,
  72     Termination,
  73     OptTermination,
  74     Other,
  75     GCWorkerTotal,
  76     GCWorkerEnd,
  77     StringDedupQueueFixup,
  78     StringDedupTableFixup,
  79     RedirtyCards,
  80     ParFreeCSet,
  81     YoungFreeCSet,
  82     NonYoungFreeCSet,
  83     RebuildFreeList,
  84     MergePSS,
  85     GCParPhasesSentinel
  86   };
  87 
  88   static const GCParPhases ExtRootScanSubPhasesFirst = ThreadRoots;
  89   static const GCParPhases ExtRootScanSubPhasesLast = GCParPhases(MergeER - 1);
  90 
  91   enum GCMergeRSWorkTimes {
  92     MergeRSMergedSparse,
  93     MergeRSMergedFine,
  94     MergeRSMergedCoarse,
  95     MergeRSDirtyCards
  96   };
  97 
  98   enum GCScanHRWorkItems {
  99     ScanHRScannedCards,
 100     ScanHRScannedBlocks,
 101     ScanHRClaimedChunks,
 102     ScanHRScannedOptRefs,
 103     ScanHRUsedMemory
 104   };
 105 
 106   enum GCMergeHCCWorkItems {
 107     MergeHCCDirtyCards,
 108     MergeHCCSkippedCards
 109   };
 110 
 111   enum GCMergeLBWorkItems {
 112     MergeLBDirtyCards,
 113     MergeLBSkippedCards
 114   };
 115 
 116   enum GCMergePSSWorkItems {
 117     MergePSSCopiedBytes,
 118     MergePSSLABWasteBytes,
 119     MergePSSLABUndoWasteBytes
 120   };
 121 
 122  private:
 123   // Markers for grouping the phases in the GCPhases enum above
 124   static const int GCMainParPhasesLast = GCWorkerEnd;
 125 
 126   WorkerDataArray<double>* _gc_par_phases[GCParPhasesSentinel];
 127 
 128   double _cur_collection_initial_evac_time_ms;
 129   double _cur_optional_evac_time_ms;
 130   double _cur_collection_code_root_fixup_time_ms;
 131   double _cur_strong_code_root_purge_time_ms;
 132 
 133   double _cur_evac_fail_recalc_used;
 134   double _cur_evac_fail_remove_self_forwards;
 135 
 136   double _cur_string_deduplication_time_ms;
 137 
 138   double _cur_merge_heap_roots_time_ms;
 139   double _cur_optional_merge_heap_roots_time_ms;
 140 
 141   double _cur_prepare_merge_heap_roots_time_ms;
 142   double _cur_optional_prepare_merge_heap_roots_time_ms;
 143 
 144   double _cur_prepare_tlab_time_ms;
 145   double _cur_resize_tlab_time_ms;
 146 
 147   double _cur_concatenate_dirty_card_logs_time_ms;
 148 
 149   double _cur_derived_pointer_table_update_time_ms;
 150 
 151   double _cur_clear_ct_time_ms;
 152   double _cur_expand_heap_time_ms;
 153   double _cur_ref_proc_time_ms;
 154 
 155   double _cur_collection_start_sec;
 156   double _root_region_scan_wait_time_ms;
 157 
 158   double _external_accounted_time_ms;
 159 
 160   double _recorded_prepare_heap_roots_time_ms;
 161 
 162   double _recorded_clear_claimed_marks_time_ms;
 163 
 164   double _recorded_young_cset_choice_time_ms;
 165   double _recorded_non_young_cset_choice_time_ms;
 166 
 167   double _recorded_redirty_logged_cards_time_ms;
 168 
 169   double _recorded_preserve_cm_referents_time_ms;
 170 
 171   double _recorded_merge_pss_time_ms;
 172 
 173   double _recorded_start_new_cset_time_ms;
 174 
 175   double _recorded_total_free_cset_time_ms;
 176 
 177   double _recorded_serial_free_cset_time_ms;
 178 
 179   double _recorded_total_rebuild_freelist_time_ms;
 180 
 181   double _recorded_serial_rebuild_freelist_time_ms;
 182 
 183   double _cur_region_register_time;
 184 
 185   double _cur_fast_reclaim_humongous_time_ms;
 186   size_t _cur_fast_reclaim_humongous_total;
 187   size_t _cur_fast_reclaim_humongous_candidates;
 188   size_t _cur_fast_reclaim_humongous_reclaimed;
 189 
 190   double _cur_verify_before_time_ms;
 191   double _cur_verify_after_time_ms;
 192 
 193   ReferenceProcessorPhaseTimes _ref_phase_times;
 194   WeakProcessorPhaseTimes _weak_phase_times;
 195 
 196   double worker_time(GCParPhases phase, uint worker);
 197   void note_gc_end();
 198   void reset();
 199 
 200   template <class T>
 201   void details(T* phase, const char* indent_str) const;
 202 
 203   void log_work_items(WorkerDataArray<double>* phase, uint indent, outputStream* out) const;
 204   void log_phase(WorkerDataArray<double>* phase, uint indent_level, outputStream* out, bool print_sum) const;
 205   void debug_serial_phase(WorkerDataArray<double>* phase, uint extra_indent = 0) const;
 206   void debug_phase(WorkerDataArray<double>* phase, uint extra_indent = 0) const;
 207   void trace_phase(WorkerDataArray<double>* phase, bool print_sum = true, uint extra_indent = 0) const;
 208 
 209   void info_time(const char* name, double value) const;
 210   void debug_time(const char* name, double value) const;
 211   // This will print logs for both 'gc+phases' and 'gc+phases+ref'.
 212   void debug_time_for_reference(const char* name, double value) const;
 213   void trace_time(const char* name, double value) const;
 214   void trace_count(const char* name, size_t value) const;
 215 
 216   double print_pre_evacuate_collection_set() const;
 217   double print_merge_heap_roots_time() const;
 218   double print_evacuate_initial_collection_set() const;
 219   double print_evacuate_optional_collection_set() const;
 220   double print_post_evacuate_collection_set() const;
 221   void print_other(double accounted_ms) const;
 222 
 223  public:
 224   G1GCPhaseTimes(STWGCTimer* gc_timer, uint max_gc_threads);
 225   void note_gc_start();
 226   void print();
 227   static const char* phase_name(GCParPhases phase);
 228 
 229   // record the time a phase took in seconds
 230   void record_time_secs(GCParPhases phase, uint worker_id, double secs);
 231 
 232   // add a number of seconds to a phase
 233   void add_time_secs(GCParPhases phase, uint worker_id, double secs);
 234 
 235   void record_or_add_time_secs(GCParPhases phase, uint worker_id, double secs);
 236 
 237   double get_time_secs(GCParPhases phase, uint worker_id);
 238 
 239   void record_thread_work_item(GCParPhases phase, uint worker_id, size_t count, uint index = 0);
 240 
 241   void record_or_add_thread_work_item(GCParPhases phase, uint worker_id, size_t count, uint index = 0);
 242 
 243   size_t get_thread_work_item(GCParPhases phase, uint worker_id, uint index = 0);
 244 
 245   // return the average time for a phase in milliseconds
 246   double average_time_ms(GCParPhases phase);
 247 
 248   size_t sum_thread_work_items(GCParPhases phase, uint index = 0);
 249 
 250   void record_prepare_tlab_time_ms(double ms) {
 251     _cur_prepare_tlab_time_ms = ms;
 252   }
 253 
 254   void record_resize_tlab_time_ms(double ms) {
 255     _cur_resize_tlab_time_ms = ms;
 256   }
 257 
 258   void record_concatenate_dirty_card_logs_time_ms(double ms) {
 259     _cur_concatenate_dirty_card_logs_time_ms = ms;
 260   }
 261 
 262   void record_derived_pointer_table_update_time(double ms) {
 263     _cur_derived_pointer_table_update_time_ms = ms;
 264   }
 265 
 266   void record_clear_ct_time(double ms) {
 267     _cur_clear_ct_time_ms = ms;
 268   }
 269 
 270   void record_expand_heap_time(double ms) {
 271     _cur_expand_heap_time_ms = ms;
 272   }
 273 
 274   void record_initial_evac_time(double ms) {
 275     _cur_collection_initial_evac_time_ms = ms;
 276   }
 277 
 278   void record_or_add_optional_evac_time(double ms) {
 279     _cur_optional_evac_time_ms += ms;
 280   }
 281 
 282   void record_or_add_code_root_fixup_time(double ms) {
 283     _cur_collection_code_root_fixup_time_ms += ms;
 284   }
 285 
 286   void record_strong_code_root_purge_time(double ms) {
 287     _cur_strong_code_root_purge_time_ms = ms;
 288   }
 289 
 290   void record_merge_heap_roots_time(double ms) {
 291     _cur_merge_heap_roots_time_ms += ms;
 292   }
 293 
 294   void record_or_add_optional_merge_heap_roots_time(double ms) {
 295     _cur_optional_merge_heap_roots_time_ms += ms;
 296   }
 297 
 298   void record_prepare_merge_heap_roots_time(double ms) {
 299     _cur_prepare_merge_heap_roots_time_ms += ms;
 300   }
 301 
 302   void record_or_add_optional_prepare_merge_heap_roots_time(double ms) {
 303     _cur_optional_prepare_merge_heap_roots_time_ms += ms;
 304   }
 305 
 306   void record_evac_fail_recalc_used_time(double ms) {
 307     _cur_evac_fail_recalc_used = ms;
 308   }
 309 
 310   void record_evac_fail_remove_self_forwards(double ms) {
 311     _cur_evac_fail_remove_self_forwards = ms;
 312   }
 313 
 314   void record_string_deduplication_time(double ms) {
 315     _cur_string_deduplication_time_ms = ms;
 316   }
 317 
 318   void record_ref_proc_time(double ms) {
 319     _cur_ref_proc_time_ms = ms;
 320   }
 321 
 322   void record_root_region_scan_wait_time(double time_ms) {
 323     _root_region_scan_wait_time_ms = time_ms;
 324   }
 325 
 326   void record_total_free_cset_time_ms(double time_ms) {
 327     _recorded_total_free_cset_time_ms = time_ms;
 328   }
 329 
 330   void record_serial_free_cset_time_ms(double time_ms) {
 331     _recorded_serial_free_cset_time_ms = time_ms;
 332   }
 333 
 334   void record_total_rebuild_freelist_time_ms(double time_ms) {
 335     _recorded_total_rebuild_freelist_time_ms = time_ms;
 336   }
 337 
 338   void record_serial_rebuild_freelist_time_ms(double time_ms) {
 339     _recorded_serial_rebuild_freelist_time_ms = time_ms;
 340   }
 341 
 342   void record_register_regions(double time_ms, size_t total, size_t candidates) {
 343     _cur_region_register_time = time_ms;
 344     _cur_fast_reclaim_humongous_total = total;
 345     _cur_fast_reclaim_humongous_candidates = candidates;
 346   }
 347 
 348   void record_fast_reclaim_humongous_time_ms(double value, size_t reclaimed) {
 349     _cur_fast_reclaim_humongous_time_ms = value;
 350     _cur_fast_reclaim_humongous_reclaimed = reclaimed;
 351   }
 352 
 353   void record_young_cset_choice_time_ms(double time_ms) {
 354     _recorded_young_cset_choice_time_ms = time_ms;
 355   }
 356 
 357   void record_non_young_cset_choice_time_ms(double time_ms) {
 358     _recorded_non_young_cset_choice_time_ms = time_ms;
 359   }
 360 
 361   void record_redirty_logged_cards_time_ms(double time_ms) {
 362     _recorded_redirty_logged_cards_time_ms = time_ms;
 363   }
 364 
 365   void record_preserve_cm_referents_time_ms(double time_ms) {
 366     _recorded_preserve_cm_referents_time_ms = time_ms;
 367   }
 368 
 369   void record_start_new_cset_time_ms(double time_ms) {
 370     _recorded_start_new_cset_time_ms = time_ms;
 371   }
 372 
 373   void record_cur_collection_start_sec(double time_ms) {
 374     _cur_collection_start_sec = time_ms;
 375   }
 376 
 377   void record_verify_before_time_ms(double time_ms) {
 378     _cur_verify_before_time_ms = time_ms;
 379   }
 380 
 381   void record_verify_after_time_ms(double time_ms) {
 382     _cur_verify_after_time_ms = time_ms;
 383   }
 384 
 385   void inc_external_accounted_time_ms(double time_ms) {
 386     _external_accounted_time_ms += time_ms;
 387   }
 388 
 389   void record_prepare_heap_roots_time_ms(double recorded_prepare_heap_roots_time_ms) {
 390     _recorded_prepare_heap_roots_time_ms = recorded_prepare_heap_roots_time_ms;
 391   }
 392 
 393   void record_clear_claimed_marks_time_ms(double recorded_clear_claimed_marks_time_ms) {
 394     _recorded_clear_claimed_marks_time_ms = recorded_clear_claimed_marks_time_ms;
 395   }
 396 
 397   double cur_collection_start_sec() {
 398     return _cur_collection_start_sec;
 399   }
 400 
 401   double cur_collection_par_time_ms() {
 402     return _cur_collection_initial_evac_time_ms + _cur_optional_evac_time_ms;
 403   }
 404 
 405   double cur_clear_ct_time_ms() {
 406     return _cur_clear_ct_time_ms;
 407   }
 408 
 409   double cur_expand_heap_time_ms() {
 410     return _cur_expand_heap_time_ms;
 411   }
 412 
 413   double root_region_scan_wait_time_ms() {
 414     return _root_region_scan_wait_time_ms;
 415   }
 416 
 417   double young_cset_choice_time_ms() {
 418     return _recorded_young_cset_choice_time_ms;
 419   }
 420 
 421   double total_free_cset_time_ms() {
 422     return _recorded_total_free_cset_time_ms;
 423   }
 424 
 425   double total_rebuild_freelist_time_ms() {
 426     return _recorded_total_rebuild_freelist_time_ms;
 427   }
 428 
 429   double non_young_cset_choice_time_ms() {
 430     return _recorded_non_young_cset_choice_time_ms;
 431   }
 432 
 433   double fast_reclaim_humongous_time_ms() {
 434     return _cur_fast_reclaim_humongous_time_ms;
 435   }
 436 
 437   size_t fast_reclaim_humongous_candidates() const {
 438     return _cur_fast_reclaim_humongous_candidates;
 439   }
 440 
 441   ReferenceProcessorPhaseTimes* ref_phase_times() { return &_ref_phase_times; }
 442 
 443   WeakProcessorPhaseTimes* weak_phase_times() { return &_weak_phase_times; }
 444 };
 445 
 446 class G1EvacPhaseWithTrimTimeTracker : public StackObj {
 447   G1ParScanThreadState* _pss;
 448   Ticks _start;
 449 
 450   Tickspan& _total_time;
 451   Tickspan& _trim_time;
 452 
 453   bool _stopped;
 454 public:
 455   G1EvacPhaseWithTrimTimeTracker(G1ParScanThreadState* pss, Tickspan& total_time, Tickspan& trim_time);
 456   ~G1EvacPhaseWithTrimTimeTracker();
 457 
 458   void stop();
 459 };
 460 
 461 #endif // SHARE_GC_G1_G1GCPHASETIMES_HPP
--- EOF ---