1 # HG changeset patch
   2 # User brutisso
   3 # Date 1426151480 -3600
   4 #      Thu Mar 12 10:11:20 2015 +0100
   5 # Node ID 42c9c92d320aea9bdbf7df06207675ebf8d14009
   6 # Parent  af3b0db916597ad07785becaa95103fa00bb929f
   7 8074037: Refactor the G1GCPhaseTime logging to make it easier to add new phases
   8 Reviewed-by: tschatzl, mgerdin, ecaspole
   9 
  10 diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
  11 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
  12 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
  13 @@ -2206,11 +2206,11 @@
  14    hot_card_cache->drain(worker_i, g1_rem_set(), into_cset_dcq);
  15  
  16    DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
  17 -  int n_completed_buffers = 0;
  18 +  size_t n_completed_buffers = 0;
  19    while (dcqs.apply_closure_to_completed_buffer(cl, worker_i, 0, true)) {
  20      n_completed_buffers++;
  21    }
  22 -  g1_policy()->phase_times()->record_update_rs_processed_buffers(worker_i, n_completed_buffers);
  23 +  g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, n_completed_buffers);
  24    dcqs.clear_n_completed_buffers();
  25    assert(!dcqs.completed_buffers_exist_dirty(), "Completed buffers exist!");
  26  }
  27 @@ -3751,9 +3751,9 @@
  28  
  29      TraceCPUTime tcpu(G1Log::finer(), true, gclog_or_tty);
  30  
  31 -    int active_workers = workers()->active_workers();
  32 +    uint active_workers = workers()->active_workers();
  33      double pause_start_sec = os::elapsedTime();
  34 -    g1_policy()->phase_times()->note_gc_start(active_workers);
  35 +    g1_policy()->phase_times()->note_gc_start(active_workers, mark_in_progress());
  36      log_gc_header();
  37  
  38      TraceCollectorStats tcs(g1mm()->incremental_collection_counters());
  39 @@ -4486,8 +4486,7 @@
  40    void work(uint worker_id) {
  41      if (worker_id >= _n_workers) return;  // no work needed this round
  42  
  43 -    double start_time_ms = os::elapsedTime() * 1000.0;
  44 -    _g1h->g1_policy()->phase_times()->record_gc_worker_start_time(worker_id, start_time_ms);
  45 +    _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::GCWorkerStart, worker_id, os::elapsedTime());
  46  
  47      {
  48        ResourceMark rm;
  49 @@ -4567,10 +4566,11 @@
  50          double start = os::elapsedTime();
  51          G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator);
  52          evac.do_void();
  53 -        double elapsed_ms = (os::elapsedTime()-start)*1000.0;
  54 -        double term_ms = pss.term_time()*1000.0;
  55 -        _g1h->g1_policy()->phase_times()->add_obj_copy_time(worker_id, elapsed_ms-term_ms);
  56 -        _g1h->g1_policy()->phase_times()->record_termination(worker_id, term_ms, pss.term_attempts());
  57 +        double elapsed_sec = os::elapsedTime() - start;
  58 +        double term_sec = pss.term_time();
  59 +        _g1h->g1_policy()->phase_times()->add_time_secs(G1GCPhaseTimes::ObjCopy, worker_id, elapsed_sec - term_sec);
  60 +        _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::Termination, worker_id, term_sec);
  61 +        _g1h->g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::Termination, worker_id, pss.term_attempts());
  62        }
  63        _g1h->g1_policy()->record_thread_age_table(pss.age_table());
  64        _g1h->update_surviving_young_words(pss.surviving_young_words()+1);
  65 @@ -4586,9 +4586,7 @@
  66        // destructors are executed here and are included as part of the
  67        // "GC Worker Time".
  68      }
  69 -
  70 -    double end_time_ms = os::elapsedTime() * 1000.0;
  71 -    _g1h->g1_policy()->phase_times()->record_gc_worker_end_time(worker_id, end_time_ms);
  72 +    _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::GCWorkerEnd, worker_id, os::elapsedTime());
  73    }
  74  };
  75  
  76 @@ -4650,27 +4648,20 @@
  77    double obj_copy_time_sec = buf_scan_non_heap_roots.closure_app_seconds()
  78        + buf_scan_non_heap_weak_roots.closure_app_seconds();
  79  
  80 -  g1_policy()->phase_times()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0);
  81 -
  82 -  double ext_root_time_ms =
  83 -    ((os::elapsedTime() - ext_roots_start) - obj_copy_time_sec) * 1000.0;
  84 -
  85 -  g1_policy()->phase_times()->record_ext_root_scan_time(worker_i, ext_root_time_ms);
  86 +  g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::ObjCopy, worker_i, obj_copy_time_sec);
  87 +
  88 +  double ext_root_time_sec = os::elapsedTime() - ext_roots_start - obj_copy_time_sec;
  89 +  g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::ExtRootScan, worker_i, ext_root_time_sec);
  90  
  91    // During conc marking we have to filter the per-thread SATB buffers
  92    // to make sure we remove any oops into the CSet (which will show up
  93    // as implicitly live).
  94 -  double satb_filtering_ms = 0.0;
  95 -  if (!_process_strong_tasks->is_task_claimed(G1H_PS_filter_satb_buffers)) {
  96 -    if (mark_in_progress()) {
  97 -      double satb_filter_start = os::elapsedTime();
  98 -
  99 +  {
 100 +    G1GCParPhaseTimesTracker x(g1_policy()->phase_times(), G1GCPhaseTimes::SATBFiltering, worker_i);
 101 +    if (!_process_strong_tasks->is_task_claimed(G1H_PS_filter_satb_buffers) && mark_in_progress()) {
 102        JavaThread::satb_mark_queue_set().filter_thread_buffers();
 103 -
 104 -      satb_filtering_ms = (os::elapsedTime() - satb_filter_start) * 1000.0;
 105      }
 106    }
 107 -  g1_policy()->phase_times()->record_satb_filtering_time(worker_i, satb_filtering_ms);
 108  
 109    // Now scan the complement of the collection set.
 110    G1CodeBlobClosure scavenge_cs_nmethods(scan_non_heap_weak_roots);
 111 @@ -5073,14 +5064,13 @@
 112    G1RedirtyLoggedCardsTask(DirtyCardQueueSet* queue) : AbstractGangTask("Redirty Cards"), _queue(queue) { }
 113  
 114    virtual void work(uint worker_id) {
 115 -    double start_time = os::elapsedTime();
 116 +    G1GCPhaseTimes* phase_times = G1CollectedHeap::heap()->g1_policy()->phase_times();
 117 +    G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::RedirtyCards, worker_id);
 118  
 119      RedirtyLoggedCardTableEntryClosure cl;
 120      _queue->par_apply_closure_to_all_completed_buffers(&cl);
 121  
 122 -    G1GCPhaseTimes* timer = G1CollectedHeap::heap()->g1_policy()->phase_times();
 123 -    timer->record_redirty_logged_cards_time_ms(worker_id, (os::elapsedTime() - start_time) * 1000.0);
 124 -    timer->record_redirty_logged_cards_processed_cards(worker_id, cl.num_processed());
 125 +    phase_times->record_thread_work_item(G1GCPhaseTimes::RedirtyCards, worker_id, cl.num_processed());
 126    }
 127  };
 128  
 129 @@ -5658,12 +5648,14 @@
 130      // reported parallel time.
 131    }
 132  
 133 +  G1GCPhaseTimes* phase_times = g1_policy()->phase_times();
 134 +
 135    double par_time_ms = (end_par_time_sec - start_par_time_sec) * 1000.0;
 136 -  g1_policy()->phase_times()->record_par_time(par_time_ms);
 137 +  phase_times->record_par_time(par_time_ms);
 138  
 139    double code_root_fixup_time_ms =
 140          (os::elapsedTime() - end_par_time_sec) * 1000.0;
 141 -  g1_policy()->phase_times()->record_code_root_fixup_time(code_root_fixup_time_ms);
 142 +  phase_times->record_code_root_fixup_time(code_root_fixup_time_ms);
 143  
 144    set_par_threads(0);
 145  
 146 @@ -5675,9 +5667,14 @@
 147    process_discovered_references(n_workers);
 148  
 149    if (G1StringDedup::is_enabled()) {
 150 +    double fixup_start = os::elapsedTime();
 151 +
 152      G1STWIsAliveClosure is_alive(this);
 153      G1KeepAliveClosure keep_alive(this);
 154 -    G1StringDedup::unlink_or_oops_do(&is_alive, &keep_alive);
 155 +    G1StringDedup::unlink_or_oops_do(&is_alive, &keep_alive, true, phase_times);
 156 +
 157 +    double fixup_time_ms = (os::elapsedTime() - fixup_start) * 1000.0;
 158 +    phase_times->record_string_dedup_fixup_time(fixup_time_ms);
 159    }
 160  
 161    _allocator->release_gc_alloc_regions(n_workers, evacuation_info);
 162 diff --git a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
 163 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
 164 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
 165 @@ -1073,7 +1073,7 @@
 166    if (update_stats) {
 167      double cost_per_card_ms = 0.0;
 168      if (_pending_cards > 0) {
 169 -      cost_per_card_ms = phase_times()->average_last_update_rs_time() / (double) _pending_cards;
 170 +      cost_per_card_ms = phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS) / (double) _pending_cards;
 171        _cost_per_card_ms_seq->add(cost_per_card_ms);
 172      }
 173  
 174 @@ -1081,7 +1081,7 @@
 175  
 176      double cost_per_entry_ms = 0.0;
 177      if (cards_scanned > 10) {
 178 -      cost_per_entry_ms = phase_times()->average_last_scan_rs_time() / (double) cards_scanned;
 179 +      cost_per_entry_ms = phase_times()->average_time_ms(G1GCPhaseTimes::ScanRS) / (double) cards_scanned;
 180        if (_last_gc_was_young) {
 181          _cost_per_entry_ms_seq->add(cost_per_entry_ms);
 182        } else {
 183 @@ -1123,7 +1123,7 @@
 184      double cost_per_byte_ms = 0.0;
 185  
 186      if (copied_bytes > 0) {
 187 -      cost_per_byte_ms = phase_times()->average_last_obj_copy_time() / (double) copied_bytes;
 188 +      cost_per_byte_ms = phase_times()->average_time_ms(G1GCPhaseTimes::ObjCopy) / (double) copied_bytes;
 189        if (_in_marking_window) {
 190          _cost_per_byte_ms_during_cm_seq->add(cost_per_byte_ms);
 191        } else {
 192 @@ -1132,8 +1132,8 @@
 193      }
 194  
 195      double all_other_time_ms = pause_time_ms -
 196 -      (phase_times()->average_last_update_rs_time() + phase_times()->average_last_scan_rs_time()
 197 -      + phase_times()->average_last_obj_copy_time() + phase_times()->average_last_termination_time());
 198 +      (phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS) + phase_times()->average_time_ms(G1GCPhaseTimes::ScanRS) +
 199 +          phase_times()->average_time_ms(G1GCPhaseTimes::ObjCopy) + phase_times()->average_time_ms(G1GCPhaseTimes::Termination));
 200  
 201      double young_other_time_ms = 0.0;
 202      if (young_cset_region_length() > 0) {
 203 @@ -1174,8 +1174,8 @@
 204  
 205    // Note that _mmu_tracker->max_gc_time() returns the time in seconds.
 206    double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
 207 -  adjust_concurrent_refinement(phase_times()->average_last_update_rs_time(),
 208 -                               phase_times()->sum_last_update_rs_processed_buffers(), update_rs_time_goal_ms);
 209 +  adjust_concurrent_refinement(phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS),
 210 +                               phase_times()->sum_thread_work_items(G1GCPhaseTimes::UpdateRS), update_rs_time_goal_ms);
 211  
 212    _collectionSetChooser->verify();
 213  }
 214 @@ -2114,19 +2114,19 @@
 215      _other.add(pause_time_ms - phase_times->accounted_time_ms());
 216      _root_region_scan_wait.add(phase_times->root_region_scan_wait_time_ms());
 217      _parallel.add(phase_times->cur_collection_par_time_ms());
 218 -    _ext_root_scan.add(phase_times->average_last_ext_root_scan_time());
 219 -    _satb_filtering.add(phase_times->average_last_satb_filtering_times_ms());
 220 -    _update_rs.add(phase_times->average_last_update_rs_time());
 221 -    _scan_rs.add(phase_times->average_last_scan_rs_time());
 222 -    _obj_copy.add(phase_times->average_last_obj_copy_time());
 223 -    _termination.add(phase_times->average_last_termination_time());
 224 +    _ext_root_scan.add(phase_times->average_time_ms(G1GCPhaseTimes::ExtRootScan));
 225 +    _satb_filtering.add(phase_times->average_time_ms(G1GCPhaseTimes::SATBFiltering));
 226 +    _update_rs.add(phase_times->average_time_ms(G1GCPhaseTimes::UpdateRS));
 227 +    _scan_rs.add(phase_times->average_time_ms(G1GCPhaseTimes::ScanRS));
 228 +    _obj_copy.add(phase_times->average_time_ms(G1GCPhaseTimes::ObjCopy));
 229 +    _termination.add(phase_times->average_time_ms(G1GCPhaseTimes::Termination));
 230  
 231 -    double parallel_known_time = phase_times->average_last_ext_root_scan_time() +
 232 -      phase_times->average_last_satb_filtering_times_ms() +
 233 -      phase_times->average_last_update_rs_time() +
 234 -      phase_times->average_last_scan_rs_time() +
 235 -      phase_times->average_last_obj_copy_time() +
 236 -      + phase_times->average_last_termination_time();
 237 +    double parallel_known_time = phase_times->average_time_ms(G1GCPhaseTimes::ExtRootScan) +
 238 +      phase_times->average_time_ms(G1GCPhaseTimes::SATBFiltering) +
 239 +      phase_times->average_time_ms(G1GCPhaseTimes::UpdateRS) +
 240 +      phase_times->average_time_ms(G1GCPhaseTimes::ScanRS) +
 241 +      phase_times->average_time_ms(G1GCPhaseTimes::ObjCopy) +
 242 +      phase_times->average_time_ms(G1GCPhaseTimes::Termination);
 243  
 244      double parallel_other_time = phase_times->cur_collection_par_time_ms() - parallel_known_time;
 245      _parallel_other.add(parallel_other_time);
 246 diff --git a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
 247 --- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
 248 +++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp
 249 @@ -22,13 +22,13 @@
 250   *
 251   */
 252  
 253 -
 254  #include "precompiled.hpp"
 255  #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 256  #include "gc_implementation/g1/g1GCPhaseTimes.hpp"
 257  #include "gc_implementation/g1/g1Log.hpp"
 258  #include "gc_implementation/g1/g1StringDedup.hpp"
 259 -#include "runtime/atomic.inline.hpp"
 260 +#include "memory/allocation.hpp"
 261 +#include "runtime/os.hpp"
 262  
 263  // Helper class for avoiding interleaved logging
 264  class LineBuffer: public StackObj {
 265 @@ -71,184 +71,243 @@
 266      va_end(ap);
 267    }
 268  
 269 +  void print_cr() {
 270 +    gclog_or_tty->print_cr("%s", _buffer);
 271 +    _cur = _indent_level * INDENT_CHARS;
 272 +  }
 273 +
 274    void append_and_print_cr(const char* format, ...)  ATTRIBUTE_PRINTF(2, 3) {
 275      va_list ap;
 276      va_start(ap, format);
 277      vappend(format, ap);
 278      va_end(ap);
 279 -    gclog_or_tty->print_cr("%s", _buffer);
 280 -    _cur = _indent_level * INDENT_CHARS;
 281 +    print_cr();
 282    }
 283  };
 284  
 285 -PRAGMA_DIAG_PUSH
 286 -PRAGMA_FORMAT_NONLITERAL_IGNORED
 287  template <class T>
 288 -void WorkerDataArray<T>::print(int level, const char* title) {
 289 -  if (_length == 1) {
 290 -    // No need for min, max, average and sum for only one worker
 291 -    LineBuffer buf(level);
 292 -    buf.append("[%s:  ", title);
 293 -    buf.append(_print_format, _data[0]);
 294 -    buf.append_and_print_cr("]");
 295 -    return;
 296 +class WorkerDataArray  : public CHeapObj<mtGC> {
 297 +  friend class G1GCParPhasePrinter;
 298 +  T*          _data;
 299 +  uint        _length;
 300 +  const char* _title;
 301 +  bool        _print_sum;
 302 +  int         _log_level;
 303 +  uint        _indent_level;
 304 +  bool        _enabled;
 305 +
 306 +  WorkerDataArray<size_t>* _thread_work_items;
 307 +
 308 +  NOT_PRODUCT(T uninitialized();)
 309 +
 310 +  // We are caching the sum and average to only have to calculate them once.
 311 +  // This is not done in an MT-safe way. It is intended to allow single
 312 +  // threaded code to call sum() and average() multiple times in any order
 313 +  // without having to worry about the cost.
 314 +  bool   _has_new_data;
 315 +  T      _sum;
 316 +  T      _min;
 317 +  T      _max;
 318 +  double _average;
 319 +
 320 + public:
 321 +  WorkerDataArray(uint length, const char* title, bool print_sum, int log_level, uint indent_level) :
 322 +    _title(title), _length(0), _print_sum(print_sum), _log_level(log_level), _indent_level(indent_level),
 323 +    _has_new_data(true), _thread_work_items(NULL), _enabled(true) {
 324 +    assert(length > 0, "Must have some workers to store data for");
 325 +    _length = length;
 326 +    _data = NEW_C_HEAP_ARRAY(T, _length, mtGC);
 327    }
 328  
 329 -  T min = _data[0];
 330 -  T max = _data[0];
 331 -  T sum = 0;
 332 -
 333 -  LineBuffer buf(level);
 334 -  buf.append("[%s:", title);
 335 -  for (uint i = 0; i < _length; ++i) {
 336 -    T val = _data[i];
 337 -    min = MIN2(val, min);
 338 -    max = MAX2(val, max);
 339 -    sum += val;
 340 -    if (G1Log::finest()) {
 341 -      buf.append("  ");
 342 -      buf.append(_print_format, val);
 343 -    }
 344 +  ~WorkerDataArray() {
 345 +    FREE_C_HEAP_ARRAY(T, _data);
 346    }
 347  
 348 -  if (G1Log::finest()) {
 349 -    buf.append_and_print_cr("%s", "");
 350 +  void link_thread_work_items(WorkerDataArray<size_t>* thread_work_items) {
 351 +    _thread_work_items = thread_work_items;
 352    }
 353  
 354 -  double avg = (double)sum / (double)_length;
 355 -  buf.append(" Min: ");
 356 -  buf.append(_print_format, min);
 357 -  buf.append(", Avg: ");
 358 -  buf.append("%.1lf", avg); // Always print average as a double
 359 -  buf.append(", Max: ");
 360 -  buf.append(_print_format, max);
 361 -  buf.append(", Diff: ");
 362 -  buf.append(_print_format, max - min);
 363 -  if (_print_sum) {
 364 -    // for things like the start and end times the sum is not
 365 -    // that relevant
 366 -    buf.append(", Sum: ");
 367 -    buf.append(_print_format, sum);
 368 +  WorkerDataArray<size_t>* thread_work_items() { return _thread_work_items; }
 369 +
 370 +  void set(uint worker_i, T value) {
 371 +    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
 372 +    assert(_data[worker_i] == WorkerDataArray<T>::uninitialized(), err_msg("Overwriting data for worker %d in %s", worker_i, _title));
 373 +    _data[worker_i] = value;
 374 +    _has_new_data = true;
 375    }
 376 -  buf.append_and_print_cr("]");
 377 -}
 378 -PRAGMA_DIAG_POP
 379 +
 380 +  void set_thread_work_item(uint worker_i, size_t value) {
 381 +    assert(_thread_work_items != NULL, "No sub count");
 382 +    _thread_work_items->set(worker_i, value);
 383 +  }
 384 +
 385 +  T get(uint worker_i) {
 386 +    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
 387 +    assert(_data[worker_i] != WorkerDataArray<T>::uninitialized(), err_msg("No data added for worker %d", worker_i));
 388 +    return _data[worker_i];
 389 +  }
 390 +
 391 +  void add(uint worker_i, T value) {
 392 +    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
 393 +    assert(_data[worker_i] != WorkerDataArray<T>::uninitialized(), err_msg("No data to add to for worker %d", worker_i));
 394 +    _data[worker_i] += value;
 395 +    _has_new_data = true;
 396 +  }
 397 +
 398 +  double average(){
 399 +    calculate_totals();
 400 +    return _average;
 401 +  }
 402 +
 403 +  T sum() {
 404 +    calculate_totals();
 405 +    return _sum;
 406 +  }
 407 +
 408 +  T minimum() {
 409 +    calculate_totals();
 410 +    return _min;
 411 +  }
 412 +
 413 +  T maximum() {
 414 +    calculate_totals();
 415 +    return _max;
 416 +  }
 417 +
 418 +  void reset() PRODUCT_RETURN;
 419 +  void verify() PRODUCT_RETURN;
 420 +
 421 +  void set_enabled(bool enabled) { _enabled = enabled; }
 422 +
 423 +  int log_level() { return _log_level;  }
 424 +
 425 + private:
 426 +
 427 +  void calculate_totals(){
 428 +    if (!_has_new_data) {
 429 +      return;
 430 +    }
 431 +
 432 +    _sum = (T)0;
 433 +    _min = _data[0];
 434 +    _max = _min;
 435 +    for (uint i = 0; i < _length; ++i) {
 436 +      T val = _data[i];
 437 +      _sum += val;
 438 +      _min = MIN2(_min, val);
 439 +      _max = MAX2(_max, val);
 440 +    }
 441 +    _average = (double)_sum / (double)_length;
 442 +    _has_new_data = false;
 443 +  }
 444 +};
 445 +
 446  
 447  #ifndef PRODUCT
 448  
 449 -template <> const int WorkerDataArray<int>::_uninitialized = -1;
 450 -template <> const double WorkerDataArray<double>::_uninitialized = -1.0;
 451 -template <> const size_t WorkerDataArray<size_t>::_uninitialized = (size_t)-1;
 452 +template <>
 453 +size_t WorkerDataArray<size_t>::uninitialized() {
 454 +  return (size_t)-1;
 455 +}
 456 +
 457 +template <>
 458 +double WorkerDataArray<double>::uninitialized() {
 459 +  return -1.0;
 460 +}
 461  
 462  template <class T>
 463  void WorkerDataArray<T>::reset() {
 464    for (uint i = 0; i < _length; i++) {
 465 -    _data[i] = (T)_uninitialized;
 466 +    _data[i] = WorkerDataArray<T>::uninitialized();
 467 +  }
 468 +  if (_thread_work_items != NULL) {
 469 +    _thread_work_items->reset();
 470    }
 471  }
 472  
 473  template <class T>
 474  void WorkerDataArray<T>::verify() {
 475 +  if (!_enabled) {
 476 +    return;
 477 +  }
 478 +
 479    for (uint i = 0; i < _length; i++) {
 480 -    assert(_data[i] != _uninitialized,
 481 -        err_msg("Invalid data for worker %u, data: %lf, uninitialized: %lf",
 482 -            i, (double)_data[i], (double)_uninitialized));
 483 +    assert(_data[i] != WorkerDataArray<T>::uninitialized(),
 484 +        err_msg("Invalid data for worker %u in '%s'", i, _title));
 485 +  }
 486 +  if (_thread_work_items != NULL) {
 487 +    _thread_work_items->verify();
 488    }
 489  }
 490  
 491  #endif
 492  
 493  G1GCPhaseTimes::G1GCPhaseTimes(uint max_gc_threads) :
 494 -  _max_gc_threads(max_gc_threads),
 495 -  _last_gc_worker_start_times_ms(_max_gc_threads, "%.1lf", false),
 496 -  _last_ext_root_scan_times_ms(_max_gc_threads, "%.1lf"),
 497 -  _last_satb_filtering_times_ms(_max_gc_threads, "%.1lf"),
 498 -  _last_update_rs_times_ms(_max_gc_threads, "%.1lf"),
 499 -  _last_update_rs_processed_buffers(_max_gc_threads, "%d"),
 500 -  _last_scan_rs_times_ms(_max_gc_threads, "%.1lf"),
 501 -  _last_strong_code_root_scan_times_ms(_max_gc_threads, "%.1lf"),
 502 -  _last_obj_copy_times_ms(_max_gc_threads, "%.1lf"),
 503 -  _last_termination_times_ms(_max_gc_threads, "%.1lf"),
 504 -  _last_termination_attempts(_max_gc_threads, SIZE_FORMAT),
 505 -  _last_gc_worker_end_times_ms(_max_gc_threads, "%.1lf", false),
 506 -  _last_gc_worker_times_ms(_max_gc_threads, "%.1lf"),
 507 -  _last_gc_worker_other_times_ms(_max_gc_threads, "%.1lf"),
 508 -  _last_redirty_logged_cards_time_ms(_max_gc_threads, "%.1lf"),
 509 -  _last_redirty_logged_cards_processed_cards(_max_gc_threads, SIZE_FORMAT),
 510 -  _cur_string_dedup_queue_fixup_worker_times_ms(_max_gc_threads, "%.1lf"),
 511 -  _cur_string_dedup_table_fixup_worker_times_ms(_max_gc_threads, "%.1lf")
 512 +  _max_gc_threads(max_gc_threads)
 513  {
 514    assert(max_gc_threads > 0, "Must have some GC threads");
 515 +
 516 +  _gc_par_phases[GCWorkerStart] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Start (ms)", false, G1Log::LevelFiner, 2);
 517 +  _gc_par_phases[ExtRootScan] = new WorkerDataArray<double>(max_gc_threads, "Ext Root Scanning (ms)", true, G1Log::LevelFiner, 2);
 518 +  _gc_par_phases[SATBFiltering] = new WorkerDataArray<double>(max_gc_threads, "SATB Filtering (ms)", true, G1Log::LevelFiner, 2);
 519 +  _gc_par_phases[UpdateRS] = new WorkerDataArray<double>(max_gc_threads, "Update RS (ms)", true, G1Log::LevelFiner, 2);
 520 +  _gc_par_phases[ScanRS] = new WorkerDataArray<double>(max_gc_threads, "Scan RS (ms)", true, G1Log::LevelFiner, 2);
 521 +  _gc_par_phases[CodeRoots] = new WorkerDataArray<double>(max_gc_threads, "Code Root Scanning (ms)", true, G1Log::LevelFiner, 2);
 522 +  _gc_par_phases[ObjCopy] = new WorkerDataArray<double>(max_gc_threads, "Object Copy (ms)", true, G1Log::LevelFiner, 2);
 523 +  _gc_par_phases[Termination] = new WorkerDataArray<double>(max_gc_threads, "Termination (ms)", true, G1Log::LevelFiner, 2);
 524 +  _gc_par_phases[GCWorkerTotal] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Total (ms)", true, G1Log::LevelFiner, 2);
 525 +  _gc_par_phases[GCWorkerEnd] = new WorkerDataArray<double>(max_gc_threads, "GC Worker End (ms)", false, G1Log::LevelFiner, 2);
 526 +  _gc_par_phases[Other] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Other (ms)", true, G1Log::LevelFiner, 2);
 527 +
 528 +  _update_rs_processed_buffers = new WorkerDataArray<size_t>(max_gc_threads, "Processed Buffers", true, G1Log::LevelFiner, 3);
 529 +  _gc_par_phases[UpdateRS]->link_thread_work_items(_update_rs_processed_buffers);
 530 +
 531 +  _termination_attempts = new WorkerDataArray<size_t>(max_gc_threads, "Termination Attempts", true, G1Log::LevelFinest, 3);
 532 +  _gc_par_phases[Termination]->link_thread_work_items(_termination_attempts);
 533 +
 534 +  _gc_par_phases[StringDedupQueueFixup] = new WorkerDataArray<double>(max_gc_threads, "Queue Fixup (ms)", true, G1Log::LevelFiner, 2);
 535 +  _gc_par_phases[StringDedupTableFixup] = new WorkerDataArray<double>(max_gc_threads, "Table Fixup (ms)", true, G1Log::LevelFiner, 2);
 536 +
 537 +  _gc_par_phases[RedirtyCards] = new WorkerDataArray<double>(max_gc_threads, "Parallel Redirty", true, G1Log::LevelFinest, 3);
 538 +  _redirtied_cards = new WorkerDataArray<size_t>(max_gc_threads, "Redirtied Cards", true, G1Log::LevelFinest, 3);
 539 +  _gc_par_phases[RedirtyCards]->link_thread_work_items(_redirtied_cards);
 540  }
 541  
 542 -void G1GCPhaseTimes::note_gc_start(uint active_gc_threads) {
 543 +void G1GCPhaseTimes::note_gc_start(uint active_gc_threads, bool mark_in_progress) {
 544    assert(active_gc_threads > 0, "The number of threads must be > 0");
 545 -  assert(active_gc_threads <= _max_gc_threads, "The number of active threads must be <= the max nubmer of threads");
 546 +  assert(active_gc_threads <= _max_gc_threads, "The number of active threads must be <= the max number of threads");
 547    _active_gc_threads = active_gc_threads;
 548  
 549 -  _last_gc_worker_start_times_ms.reset();
 550 -  _last_ext_root_scan_times_ms.reset();
 551 -  _last_satb_filtering_times_ms.reset();
 552 -  _last_update_rs_times_ms.reset();
 553 -  _last_update_rs_processed_buffers.reset();
 554 -  _last_scan_rs_times_ms.reset();
 555 -  _last_strong_code_root_scan_times_ms.reset();
 556 -  _last_obj_copy_times_ms.reset();
 557 -  _last_termination_times_ms.reset();
 558 -  _last_termination_attempts.reset();
 559 -  _last_gc_worker_end_times_ms.reset();
 560 -  _last_gc_worker_times_ms.reset();
 561 -  _last_gc_worker_other_times_ms.reset();
 562 +  for (int i = 0; i < GCParPhasesSentinel; i++) {
 563 +    _gc_par_phases[i]->reset();
 564 +  }
 565  
 566 -  _last_redirty_logged_cards_time_ms.reset();
 567 -  _last_redirty_logged_cards_processed_cards.reset();
 568 +  _gc_par_phases[SATBFiltering]->set_enabled(mark_in_progress);
 569  
 570 +  _gc_par_phases[StringDedupQueueFixup]->set_enabled(G1StringDedup::is_enabled());
 571 +  _gc_par_phases[StringDedupTableFixup]->set_enabled(G1StringDedup::is_enabled());
 572  }
 573  
 574  void G1GCPhaseTimes::note_gc_end() {
 575 -  _last_gc_worker_start_times_ms.verify();
 576 -  _last_ext_root_scan_times_ms.verify();
 577 -  _last_satb_filtering_times_ms.verify();
 578 -  _last_update_rs_times_ms.verify();
 579 -  _last_update_rs_processed_buffers.verify();
 580 -  _last_scan_rs_times_ms.verify();
 581 -  _last_strong_code_root_scan_times_ms.verify();
 582 -  _last_obj_copy_times_ms.verify();
 583 -  _last_termination_times_ms.verify();
 584 -  _last_termination_attempts.verify();
 585 -  _last_gc_worker_end_times_ms.verify();
 586 +  for (uint i = 0; i < _active_gc_threads; i++) {
 587 +    double worker_time = _gc_par_phases[GCWorkerEnd]->get(i) - _gc_par_phases[GCWorkerStart]->get(i);
 588 +    record_time_secs(GCWorkerTotal, i , worker_time);
 589  
 590 -  for (uint i = 0; i < _active_gc_threads; i++) {
 591 -    double worker_time = _last_gc_worker_end_times_ms.get(i) - _last_gc_worker_start_times_ms.get(i);
 592 -    _last_gc_worker_times_ms.set(i, worker_time);
 593 +    double worker_known_time =
 594 +        _gc_par_phases[ExtRootScan]->get(i) +
 595 +        _gc_par_phases[SATBFiltering]->get(i) +
 596 +        _gc_par_phases[UpdateRS]->get(i) +
 597 +        _gc_par_phases[ScanRS]->get(i) +
 598 +        _gc_par_phases[CodeRoots]->get(i) +
 599 +        _gc_par_phases[ObjCopy]->get(i) +
 600 +        _gc_par_phases[Termination]->get(i);
 601  
 602 -    double worker_known_time = _last_ext_root_scan_times_ms.get(i) +
 603 -                               _last_satb_filtering_times_ms.get(i) +
 604 -                               _last_update_rs_times_ms.get(i) +
 605 -                               _last_scan_rs_times_ms.get(i) +
 606 -                               _last_strong_code_root_scan_times_ms.get(i) +
 607 -                               _last_obj_copy_times_ms.get(i) +
 608 -                               _last_termination_times_ms.get(i);
 609 -
 610 -    double worker_other_time = worker_time - worker_known_time;
 611 -    _last_gc_worker_other_times_ms.set(i, worker_other_time);
 612 +    record_time_secs(Other, i, worker_time - worker_known_time);
 613    }
 614  
 615 -  _last_gc_worker_times_ms.verify();
 616 -  _last_gc_worker_other_times_ms.verify();
 617 -
 618 -  _last_redirty_logged_cards_time_ms.verify();
 619 -  _last_redirty_logged_cards_processed_cards.verify();
 620 -}
 621 -
 622 -void G1GCPhaseTimes::note_string_dedup_fixup_start() {
 623 -  _cur_string_dedup_queue_fixup_worker_times_ms.reset();
 624 -  _cur_string_dedup_table_fixup_worker_times_ms.reset();
 625 -}
 626 -
 627 -void G1GCPhaseTimes::note_string_dedup_fixup_end() {
 628 -  _cur_string_dedup_queue_fixup_worker_times_ms.verify();
 629 -  _cur_string_dedup_table_fixup_worker_times_ms.verify();
 630 +  for (int i = 0; i < GCParPhasesSentinel; i++) {
 631 +    _gc_par_phases[i]->verify();
 632 +  }
 633  }
 634  
 635  void G1GCPhaseTimes::print_stats(int level, const char* str, double value) {
 636 @@ -288,35 +347,172 @@
 637      return misc_time_ms;
 638  }
 639  
 640 +// record the time a phase took in seconds
 641 +void G1GCPhaseTimes::record_time_secs(GCParPhases phase, uint worker_i, double secs) {
 642 +  _gc_par_phases[phase]->set(worker_i, secs);
 643 +}
 644 +
 645 +// add a number of seconds to a phase
 646 +void G1GCPhaseTimes::add_time_secs(GCParPhases phase, uint worker_i, double secs) {
 647 +  _gc_par_phases[phase]->add(worker_i, secs);
 648 +}
 649 +
 650 +void G1GCPhaseTimes::record_thread_work_item(GCParPhases phase, uint worker_i, size_t count) {
 651 +  _gc_par_phases[phase]->set_thread_work_item(worker_i, count);
 652 +}
 653 +
 654 +// return the average time for a phase in milliseconds
 655 +double G1GCPhaseTimes::average_time_ms(GCParPhases phase) {
 656 +  return _gc_par_phases[phase]->average() * 1000.0;
 657 +}
 658 +
 659 +double G1GCPhaseTimes::get_time_ms(GCParPhases phase, uint worker_i) {
 660 +  return _gc_par_phases[phase]->get(worker_i) * 1000.0;
 661 +}
 662 +
 663 +double G1GCPhaseTimes::sum_time_ms(GCParPhases phase) {
 664 +  return _gc_par_phases[phase]->sum() * 1000.0;
 665 +}
 666 +
 667 +double G1GCPhaseTimes::min_time_ms(GCParPhases phase) {
 668 +  return _gc_par_phases[phase]->minimum() * 1000.0;
 669 +}
 670 +
 671 +double G1GCPhaseTimes::max_time_ms(GCParPhases phase) {
 672 +  return _gc_par_phases[phase]->maximum() * 1000.0;
 673 +}
 674 +
 675 +size_t G1GCPhaseTimes::get_thread_work_item(GCParPhases phase, uint worker_i) {
 676 +  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
 677 +  return _gc_par_phases[phase]->thread_work_items()->get(worker_i);
 678 +}
 679 +
 680 +size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase) {
 681 +  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
 682 +  return _gc_par_phases[phase]->thread_work_items()->sum();
 683 +}
 684 +
 685 +double G1GCPhaseTimes::average_thread_work_items(GCParPhases phase) {
 686 +  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
 687 +  return _gc_par_phases[phase]->thread_work_items()->average();
 688 +}
 689 +
 690 +size_t G1GCPhaseTimes::min_thread_work_items(GCParPhases phase) {
 691 +  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
 692 +  return _gc_par_phases[phase]->thread_work_items()->minimum();
 693 +}
 694 +
 695 +size_t G1GCPhaseTimes::max_thread_work_items(GCParPhases phase) {
 696 +  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
 697 +  return _gc_par_phases[phase]->thread_work_items()->maximum();
 698 +}
 699 +
 700 +class G1GCParPhasePrinter : public StackObj {
 701 +  G1GCPhaseTimes* _phase_times;
 702 + public:
 703 +  G1GCParPhasePrinter(G1GCPhaseTimes* phase_times) : _phase_times(phase_times) {}
 704 +
 705 +  void print(G1GCPhaseTimes::GCParPhases phase_id) {
 706 +    WorkerDataArray<double>* phase = _phase_times->_gc_par_phases[phase_id];
 707 +
 708 +    if (phase->_log_level > G1Log::level() || !phase->_enabled) {
 709 +      return;
 710 +    }
 711 +
 712 +    if (phase->_length == 1) {
 713 +      print_single_length(phase_id, phase);
 714 +    } else {
 715 +      print_multi_length(phase_id, phase);
 716 +    }
 717 +  }
 718 +
 719 + private:
 720 +
 721 +  void print_single_length(G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) {
 722 +    // No need for min, max, average and sum for only one worker
 723 +    LineBuffer buf(phase->_indent_level);
 724 +    buf.append_and_print_cr("[%s:  %.1lf]", phase->_title, _phase_times->get_time_ms(phase_id, 0));
 725 +
 726 +    if (phase->_thread_work_items != NULL) {
 727 +      LineBuffer buf2(phase->_thread_work_items->_indent_level);
 728 +      buf2.append_and_print_cr("[%s:  "SIZE_FORMAT"]", phase->_thread_work_items->_title, _phase_times->sum_thread_work_items(phase_id));
 729 +    }
 730 +  }
 731 +
 732 +  void print_time_values(LineBuffer& buf, G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) {
 733 +    for (uint i = 0; i < phase->_length; ++i) {
 734 +      buf.append("  %.1lf", _phase_times->get_time_ms(phase_id, i));
 735 +    }
 736 +    buf.print_cr();
 737 +  }
 738 +
 739 +  void print_count_values(LineBuffer& buf, G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<size_t>* thread_work_items) {
 740 +    for (uint i = 0; i < thread_work_items->_length; ++i) {
 741 +      buf.append("  " SIZE_FORMAT, _phase_times->get_thread_work_item(phase_id, i));
 742 +    }
 743 +    buf.print_cr();
 744 +  }
 745 +
 746 +  void print_thread_work_items(G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<size_t>* thread_work_items) {
 747 +    LineBuffer buf(thread_work_items->_indent_level);
 748 +    buf.append("[%s:", thread_work_items->_title);
 749 +
 750 +    if (G1Log::finest()) {
 751 +      print_count_values(buf, phase_id, thread_work_items);
 752 +    }
 753 +
 754 +    assert(thread_work_items->_print_sum, err_msg("%s does not have print sum true even though it is a count", thread_work_items->_title));
 755 +
 756 +    buf.append_and_print_cr(" Min: " SIZE_FORMAT ", Avg: %.1lf, Max: " SIZE_FORMAT ", Diff: " SIZE_FORMAT ", Sum: " SIZE_FORMAT "]",
 757 +        _phase_times->min_thread_work_items(phase_id), _phase_times->average_thread_work_items(phase_id), _phase_times->max_thread_work_items(phase_id),
 758 +        _phase_times->max_thread_work_items(phase_id) - _phase_times->min_thread_work_items(phase_id), _phase_times->sum_thread_work_items(phase_id));
 759 +  }
 760 +
 761 +  void print_multi_length(G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) {
 762 +    LineBuffer buf(phase->_indent_level);
 763 +    buf.append("[%s:", phase->_title);
 764 +
 765 +    if (G1Log::finest()) {
 766 +      print_time_values(buf, phase_id, phase);
 767 +    }
 768 +
 769 +    buf.append(" Min: %.1lf, Avg: %.1lf, Max: %.1lf, Diff: %.1lf",
 770 +        _phase_times->min_time_ms(phase_id), _phase_times->average_time_ms(phase_id), _phase_times->max_time_ms(phase_id),
 771 +        _phase_times->max_time_ms(phase_id) - _phase_times->min_time_ms(phase_id));
 772 +
 773 +    if (phase->_print_sum) {
 774 +      // for things like the start and end times the sum is not
 775 +      // that relevant
 776 +      buf.append(", Sum: %.1lf", _phase_times->sum_time_ms(phase_id));
 777 +    }
 778 +
 779 +    buf.append_and_print_cr("]");
 780 +
 781 +    if (phase->_thread_work_items != NULL) {
 782 +      print_thread_work_items(phase_id, phase->_thread_work_items);
 783 +    }
 784 +  }
 785 +};
 786 +
 787  void G1GCPhaseTimes::print(double pause_time_sec) {
 788 +  G1GCParPhasePrinter par_phase_printer(this);
 789 +
 790    if (_root_region_scan_wait_time_ms > 0.0) {
 791      print_stats(1, "Root Region Scan Waiting", _root_region_scan_wait_time_ms);
 792    }
 793 +
 794    print_stats(1, "Parallel Time", _cur_collection_par_time_ms, _active_gc_threads);
 795 -  _last_gc_worker_start_times_ms.print(2, "GC Worker Start (ms)");
 796 -  _last_ext_root_scan_times_ms.print(2, "Ext Root Scanning (ms)");
 797 -  if (_last_satb_filtering_times_ms.sum() > 0.0) {
 798 -    _last_satb_filtering_times_ms.print(2, "SATB Filtering (ms)");
 799 +  for (int i = 0; i <= GCMainParPhasesLast; i++) {
 800 +    par_phase_printer.print((GCParPhases) i);
 801    }
 802 -  _last_update_rs_times_ms.print(2, "Update RS (ms)");
 803 -    _last_update_rs_processed_buffers.print(3, "Processed Buffers");
 804 -  _last_scan_rs_times_ms.print(2, "Scan RS (ms)");
 805 -  _last_strong_code_root_scan_times_ms.print(2, "Code Root Scanning (ms)");
 806 -  _last_obj_copy_times_ms.print(2, "Object Copy (ms)");
 807 -  _last_termination_times_ms.print(2, "Termination (ms)");
 808 -  if (G1Log::finest()) {
 809 -    _last_termination_attempts.print(3, "Termination Attempts");
 810 -  }
 811 -  _last_gc_worker_other_times_ms.print(2, "GC Worker Other (ms)");
 812 -  _last_gc_worker_times_ms.print(2, "GC Worker Total (ms)");
 813 -  _last_gc_worker_end_times_ms.print(2, "GC Worker End (ms)");
 814  
 815    print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms);
 816    print_stats(1, "Code Root Purge", _cur_strong_code_root_purge_time_ms);
 817    if (G1StringDedup::is_enabled()) {
 818      print_stats(1, "String Dedup Fixup", _cur_string_dedup_fixup_time_ms, _active_gc_threads);
 819 -    _cur_string_dedup_queue_fixup_worker_times_ms.print(2, "Queue Fixup (ms)");
 820 -    _cur_string_dedup_table_fixup_worker_times_ms.print(2, "Table Fixup (ms)");
 821 +    for (int i = StringDedupPhasesFirst; i <= StringDedupPhasesLast; i++) {
 822 +      par_phase_printer.print((GCParPhases) i);
 823 +    }
 824    }
 825    print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
 826    double misc_time_ms = pause_time_sec * MILLIUNITS - accounted_time_ms();
 827 @@ -340,10 +536,7 @@
 828    print_stats(2, "Ref Proc", _cur_ref_proc_time_ms);
 829    print_stats(2, "Ref Enq", _cur_ref_enq_time_ms);
 830    print_stats(2, "Redirty Cards", _recorded_redirty_logged_cards_time_ms);
 831 -  if (G1Log::finest()) {
 832 -    _last_redirty_logged_cards_time_ms.print(3, "Parallel Redirty");
 833 -    _last_redirty_logged_cards_processed_cards.print(3, "Redirtied Cards");
 834 -  }
 835 +  par_phase_printer.print(RedirtyCards);
 836    if (G1EagerReclaimHumongousObjects) {
 837      print_stats(2, "Humongous Register", _cur_fast_reclaim_humongous_register_time_ms);
 838      if (G1Log::finest()) {
 839 @@ -366,3 +559,17 @@
 840      print_stats(2, "Verify After", _cur_verify_after_time_ms);
 841    }
 842  }
 843 +
 844 +G1GCParPhaseTimesTracker::G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id) :
 845 +    _phase_times(phase_times), _phase(phase), _worker_id(worker_id) {
 846 +  if (_phase_times != NULL) {
 847 +    _start_time = os::elapsedTime();
 848 +  }
 849 +}
 850 +
 851 +G1GCParPhaseTimesTracker::~G1GCParPhaseTimesTracker() {
 852 +  if (_phase_times != NULL) {
 853 +    _phase_times->record_time_secs(_phase, _worker_id, os::elapsedTime() - _start_time);
 854 +  }
 855 +}
 856 +
 857 diff --git a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp
 858 --- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp
 859 +++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp
 860 @@ -26,106 +26,46 @@
 861  #define SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP
 862  
 863  #include "memory/allocation.hpp"
 864 -#include "gc_interface/gcCause.hpp"
 865  
 866 -template <class T>
 867 -class WorkerDataArray  : public CHeapObj<mtGC> {
 868 -  T*          _data;
 869 -  uint        _length;
 870 -  const char* _print_format;
 871 -  bool        _print_sum;
 872 +class LineBuffer;
 873  
 874 -  NOT_PRODUCT(static const T _uninitialized;)
 875 -
 876 -  // We are caching the sum and average to only have to calculate them once.
 877 -  // This is not done in an MT-safe way. It is intended to allow single
 878 -  // threaded code to call sum() and average() multiple times in any order
 879 -  // without having to worry about the cost.
 880 -  bool   _has_new_data;
 881 -  T      _sum;
 882 -  double _average;
 883 -
 884 - public:
 885 -  WorkerDataArray(uint length, const char* print_format, bool print_sum = true) :
 886 -  _length(length), _print_format(print_format), _print_sum(print_sum), _has_new_data(true) {
 887 -    assert(length > 0, "Must have some workers to store data for");
 888 -    _data = NEW_C_HEAP_ARRAY(T, _length, mtGC);
 889 -  }
 890 -
 891 -  ~WorkerDataArray() {
 892 -    FREE_C_HEAP_ARRAY(T, _data);
 893 -  }
 894 -
 895 -  void set(uint worker_i, T value) {
 896 -    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
 897 -    assert(_data[worker_i] == (T)-1, err_msg("Overwriting data for worker %d", worker_i));
 898 -    _data[worker_i] = value;
 899 -    _has_new_data = true;
 900 -  }
 901 -
 902 -  T get(uint worker_i) {
 903 -    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
 904 -    assert(_data[worker_i] != (T)-1, err_msg("No data to add to for worker %d", worker_i));
 905 -    return _data[worker_i];
 906 -  }
 907 -
 908 -  void add(uint worker_i, T value) {
 909 -    assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length));
 910 -    assert(_data[worker_i] != (T)-1, err_msg("No data to add to for worker %d", worker_i));
 911 -    _data[worker_i] += value;
 912 -    _has_new_data = true;
 913 -  }
 914 -
 915 -  double average(){
 916 -    if (_has_new_data) {
 917 -      calculate_totals();
 918 -    }
 919 -    return _average;
 920 -  }
 921 -
 922 -  T sum() {
 923 -    if (_has_new_data) {
 924 -      calculate_totals();
 925 -    }
 926 -    return _sum;
 927 -  }
 928 -
 929 -  void print(int level, const char* title);
 930 -
 931 -  void reset() PRODUCT_RETURN;
 932 -  void verify() PRODUCT_RETURN;
 933 -
 934 - private:
 935 -
 936 -  void calculate_totals(){
 937 -    _sum = (T)0;
 938 -    for (uint i = 0; i < _length; ++i) {
 939 -      _sum += _data[i];
 940 -    }
 941 -    _average = (double)_sum / (double)_length;
 942 -    _has_new_data = false;
 943 -  }
 944 -};
 945 +template <class T> class WorkerDataArray;
 946  
 947  class G1GCPhaseTimes : public CHeapObj<mtGC> {
 948 +  friend class G1GCParPhasePrinter;
 949  
 950 - private:
 951    uint _active_gc_threads;
 952    uint _max_gc_threads;
 953  
 954 -  WorkerDataArray<double> _last_gc_worker_start_times_ms;
 955 -  WorkerDataArray<double> _last_ext_root_scan_times_ms;
 956 -  WorkerDataArray<double> _last_satb_filtering_times_ms;
 957 -  WorkerDataArray<double> _last_update_rs_times_ms;
 958 -  WorkerDataArray<int>    _last_update_rs_processed_buffers;
 959 -  WorkerDataArray<double> _last_scan_rs_times_ms;
 960 -  WorkerDataArray<double> _last_strong_code_root_scan_times_ms;
 961 -  WorkerDataArray<double> _last_obj_copy_times_ms;
 962 -  WorkerDataArray<double> _last_termination_times_ms;
 963 -  WorkerDataArray<size_t> _last_termination_attempts;
 964 -  WorkerDataArray<double> _last_gc_worker_end_times_ms;
 965 -  WorkerDataArray<double> _last_gc_worker_times_ms;
 966 -  WorkerDataArray<double> _last_gc_worker_other_times_ms;
 967 + public:
 968 +  enum GCParPhases {
 969 +    GCWorkerStart,
 970 +    ExtRootScan,
 971 +    SATBFiltering,
 972 +    UpdateRS,
 973 +    ScanRS,
 974 +    CodeRoots,
 975 +    ObjCopy,
 976 +    Termination,
 977 +    Other,
 978 +    GCWorkerTotal,
 979 +    GCWorkerEnd,
 980 +    StringDedupQueueFixup,
 981 +    StringDedupTableFixup,
 982 +    RedirtyCards,
 983 +    GCParPhasesSentinel
 984 +  };
 985 +
 986 + private:
 987 +  // Markers for grouping the phases in the GCPhases enum above
 988 +  static const int GCMainParPhasesLast = GCWorkerEnd;
 989 +  static const int StringDedupPhasesFirst = StringDedupQueueFixup;
 990 +  static const int StringDedupPhasesLast = StringDedupTableFixup;
 991 +
 992 +  WorkerDataArray<double>* _gc_par_phases[GCParPhasesSentinel];
 993 +  WorkerDataArray<size_t>* _update_rs_processed_buffers;
 994 +  WorkerDataArray<size_t>* _termination_attempts;
 995 +  WorkerDataArray<size_t>* _redirtied_cards;
 996  
 997    double _cur_collection_par_time_ms;
 998    double _cur_collection_code_root_fixup_time_ms;
 999 @@ -135,9 +75,7 @@
1000    double _cur_evac_fail_restore_remsets;
1001    double _cur_evac_fail_remove_self_forwards;
1002  
1003 -  double                  _cur_string_dedup_fixup_time_ms;
1004 -  WorkerDataArray<double> _cur_string_dedup_queue_fixup_worker_times_ms;
1005 -  WorkerDataArray<double> _cur_string_dedup_table_fixup_worker_times_ms;
1006 +  double _cur_string_dedup_fixup_time_ms;
1007  
1008    double _cur_clear_ct_time_ms;
1009    double _cur_ref_proc_time_ms;
1010 @@ -149,8 +87,6 @@
1011    double _recorded_young_cset_choice_time_ms;
1012    double _recorded_non_young_cset_choice_time_ms;
1013  
1014 -  WorkerDataArray<double> _last_redirty_logged_cards_time_ms;
1015 -  WorkerDataArray<size_t> _last_redirty_logged_cards_processed_cards;
1016    double _recorded_redirty_logged_cards_time_ms;
1017  
1018    double _recorded_young_free_cset_time_ms;
1019 @@ -172,54 +108,34 @@
1020  
1021   public:
1022    G1GCPhaseTimes(uint max_gc_threads);
1023 -  void note_gc_start(uint active_gc_threads);
1024 +  void note_gc_start(uint active_gc_threads, bool mark_in_progress);
1025    void note_gc_end();
1026    void print(double pause_time_sec);
1027  
1028 -  void record_gc_worker_start_time(uint worker_i, double ms) {
1029 -    _last_gc_worker_start_times_ms.set(worker_i, ms);
1030 -  }
1031 +  // record the time a phase took in seconds
1032 +  void record_time_secs(GCParPhases phase, uint worker_i, double secs);
1033  
1034 -  void record_ext_root_scan_time(uint worker_i, double ms) {
1035 -    _last_ext_root_scan_times_ms.set(worker_i, ms);
1036 -  }
1037 +  // add a number of seconds to a phase
1038 +  void add_time_secs(GCParPhases phase, uint worker_i, double secs);
1039  
1040 -  void record_satb_filtering_time(uint worker_i, double ms) {
1041 -    _last_satb_filtering_times_ms.set(worker_i, ms);
1042 -  }
1043 +  void record_thread_work_item(GCParPhases phase, uint worker_i, size_t count);
1044  
1045 -  void record_update_rs_time(uint worker_i, double ms) {
1046 -    _last_update_rs_times_ms.set(worker_i, ms);
1047 -  }
1048 +  // return the average time for a phase in milliseconds
1049 +  double average_time_ms(GCParPhases phase);
1050  
1051 -  void record_update_rs_processed_buffers(uint worker_i, int processed_buffers) {
1052 -    _last_update_rs_processed_buffers.set(worker_i, processed_buffers);
1053 -  }
1054 +  size_t sum_thread_work_items(GCParPhases phase);
1055  
1056 -  void record_scan_rs_time(uint worker_i, double ms) {
1057 -    _last_scan_rs_times_ms.set(worker_i, ms);
1058 -  }
1059 + private:
1060 +  double get_time_ms(GCParPhases phase, uint worker_i);
1061 +  double sum_time_ms(GCParPhases phase);
1062 +  double min_time_ms(GCParPhases phase);
1063 +  double max_time_ms(GCParPhases phase);
1064 +  size_t get_thread_work_item(GCParPhases phase, uint worker_i);
1065 +  double average_thread_work_items(GCParPhases phase);
1066 +  size_t min_thread_work_items(GCParPhases phase);
1067 +  size_t max_thread_work_items(GCParPhases phase);
1068  
1069 -  void record_strong_code_root_scan_time(uint worker_i, double ms) {
1070 -    _last_strong_code_root_scan_times_ms.set(worker_i, ms);
1071 -  }
1072 -
1073 -  void record_obj_copy_time(uint worker_i, double ms) {
1074 -    _last_obj_copy_times_ms.set(worker_i, ms);
1075 -  }
1076 -
1077 -  void add_obj_copy_time(uint worker_i, double ms) {
1078 -    _last_obj_copy_times_ms.add(worker_i, ms);
1079 -  }
1080 -
1081 -  void record_termination(uint worker_i, double ms, size_t attempts) {
1082 -    _last_termination_times_ms.set(worker_i, ms);
1083 -    _last_termination_attempts.set(worker_i, attempts);
1084 -  }
1085 -
1086 -  void record_gc_worker_end_time(uint worker_i, double ms) {
1087 -    _last_gc_worker_end_times_ms.set(worker_i, ms);
1088 -  }
1089 + public:
1090  
1091    void record_clear_ct_time(double ms) {
1092      _cur_clear_ct_time_ms = ms;
1093 @@ -249,21 +165,10 @@
1094      _cur_evac_fail_remove_self_forwards = ms;
1095    }
1096  
1097 -  void note_string_dedup_fixup_start();
1098 -  void note_string_dedup_fixup_end();
1099 -
1100    void record_string_dedup_fixup_time(double ms) {
1101      _cur_string_dedup_fixup_time_ms = ms;
1102    }
1103  
1104 -  void record_string_dedup_queue_fixup_worker_time(uint worker_id, double ms) {
1105 -    _cur_string_dedup_queue_fixup_worker_times_ms.set(worker_id, ms);
1106 -  }
1107 -
1108 -  void record_string_dedup_table_fixup_worker_time(uint worker_id, double ms) {
1109 -    _cur_string_dedup_table_fixup_worker_times_ms.set(worker_id, ms);
1110 -  }
1111 -
1112    void record_ref_proc_time(double ms) {
1113      _cur_ref_proc_time_ms = ms;
1114    }
1115 @@ -303,14 +208,6 @@
1116      _recorded_non_young_cset_choice_time_ms = time_ms;
1117    }
1118  
1119 -  void record_redirty_logged_cards_time_ms(uint worker_i, double time_ms) {
1120 -    _last_redirty_logged_cards_time_ms.set(worker_i, time_ms);
1121 -  }
1122 -
1123 -  void record_redirty_logged_cards_processed_cards(uint worker_i, size_t processed_buffers) {
1124 -    _last_redirty_logged_cards_processed_cards.set(worker_i, processed_buffers);
1125 -  }
1126 -
1127    void record_redirty_logged_cards_time_ms(double time_ms) {
1128      _recorded_redirty_logged_cards_time_ms = time_ms;
1129    }
1130 @@ -364,38 +261,16 @@
1131    double fast_reclaim_humongous_time_ms() {
1132      return _cur_fast_reclaim_humongous_time_ms;
1133    }
1134 +};
1135  
1136 -  double average_last_update_rs_time() {
1137 -    return _last_update_rs_times_ms.average();
1138 -  }
1139 -
1140 -  int sum_last_update_rs_processed_buffers() {
1141 -    return _last_update_rs_processed_buffers.sum();
1142 -  }
1143 -
1144 -  double average_last_scan_rs_time(){
1145 -    return _last_scan_rs_times_ms.average();
1146 -  }
1147 -
1148 -  double average_last_strong_code_root_scan_time(){
1149 -    return _last_strong_code_root_scan_times_ms.average();
1150 -  }
1151 -
1152 -  double average_last_obj_copy_time() {
1153 -    return _last_obj_copy_times_ms.average();
1154 -  }
1155 -
1156 -  double average_last_termination_time() {
1157 -    return _last_termination_times_ms.average();
1158 -  }
1159 -
1160 -  double average_last_ext_root_scan_time() {
1161 -    return _last_ext_root_scan_times_ms.average();
1162 -  }
1163 -
1164 -  double average_last_satb_filtering_times_ms() {
1165 -    return _last_satb_filtering_times_ms.average();
1166 -  }
1167 +class G1GCParPhaseTimesTracker : public StackObj {
1168 +  double _start_time;
1169 +  G1GCPhaseTimes::GCParPhases _phase;
1170 +  G1GCPhaseTimes* _phase_times;
1171 +  uint _worker_id;
1172 +public:
1173 +  G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id);
1174 +  ~G1GCParPhaseTimesTracker();
1175  };
1176  
1177  #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP
1178 diff --git a/src/share/vm/gc_implementation/g1/g1Log.hpp b/src/share/vm/gc_implementation/g1/g1Log.hpp
1179 --- a/src/share/vm/gc_implementation/g1/g1Log.hpp
1180 +++ b/src/share/vm/gc_implementation/g1/g1Log.hpp
1181 @@ -28,6 +28,7 @@
1182  #include "memory/allocation.hpp"
1183  
1184  class G1Log : public AllStatic {
1185 + public:
1186    typedef enum {
1187      LevelNone,
1188      LevelFine,
1189 @@ -35,6 +36,7 @@
1190      LevelFinest
1191    } LogLevel;
1192  
1193 + private:
1194    static LogLevel _level;
1195  
1196   public:
1197 @@ -50,6 +52,10 @@
1198      return _level == LevelFinest;
1199    }
1200  
1201 +  static LogLevel level() {
1202 +    return _level;
1203 +  }
1204 +
1205    static void init();
1206  };
1207  
1208 diff --git a/src/share/vm/gc_implementation/g1/g1RemSet.cpp b/src/share/vm/gc_implementation/g1/g1RemSet.cpp
1209 --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp
1210 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp
1211 @@ -248,9 +248,8 @@
1212    assert(_cards_scanned != NULL, "invariant");
1213    _cards_scanned[worker_i] = scanRScl.cards_done();
1214  
1215 -  _g1p->phase_times()->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0);
1216 -  _g1p->phase_times()->record_strong_code_root_scan_time(worker_i,
1217 -                                                         scanRScl.strong_code_root_scan_time_sec() * 1000.0);
1218 +  _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, scan_rs_time_sec);
1219 +  _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, scanRScl.strong_code_root_scan_time_sec());
1220  }
1221  
1222  // Closure used for updating RSets and recording references that
1223 @@ -287,13 +286,11 @@
1224  };
1225  
1226  void G1RemSet::updateRS(DirtyCardQueue* into_cset_dcq, uint worker_i) {
1227 -  double start = os::elapsedTime();
1228 +  G1GCParPhaseTimesTracker x(_g1p->phase_times(), G1GCPhaseTimes::UpdateRS, worker_i);
1229    // Apply the given closure to all remaining log entries.
1230    RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq);
1231  
1232    _g1->iterate_dirty_card_closure(&into_cset_update_rs_cl, into_cset_dcq, false, worker_i);
1233 -
1234 -  _g1p->phase_times()->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0);
1235  }
1236  
1237  void G1RemSet::cleanupHRRS() {
1238 diff --git a/src/share/vm/gc_implementation/g1/g1StringDedup.cpp b/src/share/vm/gc_implementation/g1/g1StringDedup.cpp
1239 --- a/src/share/vm/gc_implementation/g1/g1StringDedup.cpp
1240 +++ b/src/share/vm/gc_implementation/g1/g1StringDedup.cpp
1241 @@ -106,7 +106,7 @@
1242  
1243  void G1StringDedup::oops_do(OopClosure* keep_alive) {
1244    assert(is_enabled(), "String deduplication not enabled");
1245 -  unlink_or_oops_do(NULL, keep_alive);
1246 +  unlink_or_oops_do(NULL, keep_alive, true /* allow_resize_and_rehash */);
1247  }
1248  
1249  void G1StringDedup::unlink(BoolObjectClosure* is_alive) {
1250 @@ -123,45 +123,39 @@
1251  class G1StringDedupUnlinkOrOopsDoTask : public AbstractGangTask {
1252  private:
1253    G1StringDedupUnlinkOrOopsDoClosure _cl;
1254 +  G1GCPhaseTimes* _phase_times;
1255  
1256  public:
1257    G1StringDedupUnlinkOrOopsDoTask(BoolObjectClosure* is_alive,
1258                                    OopClosure* keep_alive,
1259 -                                  bool allow_resize_and_rehash) :
1260 +                                  bool allow_resize_and_rehash,
1261 +                                  G1GCPhaseTimes* phase_times) :
1262      AbstractGangTask("G1StringDedupUnlinkOrOopsDoTask"),
1263 -    _cl(is_alive, keep_alive, allow_resize_and_rehash) {
1264 -  }
1265 +    _cl(is_alive, keep_alive, allow_resize_and_rehash), _phase_times(phase_times) { }
1266  
1267    virtual void work(uint worker_id) {
1268 -    double queue_fixup_start = os::elapsedTime();
1269 -    G1StringDedupQueue::unlink_or_oops_do(&_cl);
1270 -
1271 -    double table_fixup_start = os::elapsedTime();
1272 -    G1StringDedupTable::unlink_or_oops_do(&_cl, worker_id);
1273 -
1274 -    double queue_fixup_time_ms = (table_fixup_start - queue_fixup_start) * 1000.0;
1275 -    double table_fixup_time_ms = (os::elapsedTime() - table_fixup_start) * 1000.0;
1276 -    G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1277 -    g1p->phase_times()->record_string_dedup_queue_fixup_worker_time(worker_id, queue_fixup_time_ms);
1278 -    g1p->phase_times()->record_string_dedup_table_fixup_worker_time(worker_id, table_fixup_time_ms);
1279 +    {
1280 +      G1GCParPhaseTimesTracker x(_phase_times, G1GCPhaseTimes::StringDedupQueueFixup, worker_id);
1281 +      G1StringDedupQueue::unlink_or_oops_do(&_cl);
1282 +    }
1283 +    {
1284 +      G1GCParPhaseTimesTracker x(_phase_times, G1GCPhaseTimes::StringDedupTableFixup, worker_id);
1285 +      G1StringDedupTable::unlink_or_oops_do(&_cl, worker_id);
1286 +    }
1287    }
1288  };
1289  
1290 -void G1StringDedup::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive, bool allow_resize_and_rehash) {
1291 +void G1StringDedup::unlink_or_oops_do(BoolObjectClosure* is_alive,
1292 +                                      OopClosure* keep_alive,
1293 +                                      bool allow_resize_and_rehash,
1294 +                                      G1GCPhaseTimes* phase_times) {
1295    assert(is_enabled(), "String deduplication not enabled");
1296 -  G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1297 -  g1p->phase_times()->note_string_dedup_fixup_start();
1298 -  double fixup_start = os::elapsedTime();
1299  
1300 -  G1StringDedupUnlinkOrOopsDoTask task(is_alive, keep_alive, allow_resize_and_rehash);
1301 +  G1StringDedupUnlinkOrOopsDoTask task(is_alive, keep_alive, allow_resize_and_rehash, phase_times);
1302    G1CollectedHeap* g1h = G1CollectedHeap::heap();
1303    g1h->set_par_threads();
1304    g1h->workers()->run_task(&task);
1305    g1h->set_par_threads(0);
1306 -
1307 -  double fixup_time_ms = (os::elapsedTime() - fixup_start) * 1000.0;
1308 -  g1p->phase_times()->record_string_dedup_fixup_time(fixup_time_ms);
1309 -  g1p->phase_times()->note_string_dedup_fixup_end();
1310  }
1311  
1312  void G1StringDedup::threads_do(ThreadClosure* tc) {
1313 diff --git a/src/share/vm/gc_implementation/g1/g1StringDedup.hpp b/src/share/vm/gc_implementation/g1/g1StringDedup.hpp
1314 --- a/src/share/vm/gc_implementation/g1/g1StringDedup.hpp
1315 +++ b/src/share/vm/gc_implementation/g1/g1StringDedup.hpp
1316 @@ -91,6 +91,7 @@
1317  class ThreadClosure;
1318  class outputStream;
1319  class G1StringDedupTable;
1320 +class G1GCPhaseTimes;
1321  
1322  //
1323  // Main interface for interacting with string deduplication.
1324 @@ -131,7 +132,7 @@
1325    static void oops_do(OopClosure* keep_alive);
1326    static void unlink(BoolObjectClosure* is_alive);
1327    static void unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive,
1328 -                                bool allow_resize_and_rehash = true);
1329 +                                bool allow_resize_and_rehash, G1GCPhaseTimes* phase_times = NULL);
1330  
1331    static void threads_do(ThreadClosure* tc);
1332    static void print_worker_threads_on(outputStream* st);