1 # HG changeset patch 2 # User brutisso 3 # Date 1426151480 -3600 4 # Thu Mar 12 10:11:20 2015 +0100 5 # Node ID 42c9c92d320aea9bdbf7df06207675ebf8d14009 6 # Parent af3b0db916597ad07785becaa95103fa00bb929f 7 8074037: Refactor the G1GCPhaseTime logging to make it easier to add new phases 8 Reviewed-by: tschatzl, mgerdin, ecaspole 9 10 diff --git a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp 11 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp 12 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp 13 @@ -2206,11 +2206,11 @@ 14 hot_card_cache->drain(worker_i, g1_rem_set(), into_cset_dcq); 15 16 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 17 - int n_completed_buffers = 0; 18 + size_t n_completed_buffers = 0; 19 while (dcqs.apply_closure_to_completed_buffer(cl, worker_i, 0, true)) { 20 n_completed_buffers++; 21 } 22 - g1_policy()->phase_times()->record_update_rs_processed_buffers(worker_i, n_completed_buffers); 23 + g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, n_completed_buffers); 24 dcqs.clear_n_completed_buffers(); 25 assert(!dcqs.completed_buffers_exist_dirty(), "Completed buffers exist!"); 26 } 27 @@ -3751,9 +3751,9 @@ 28 29 TraceCPUTime tcpu(G1Log::finer(), true, gclog_or_tty); 30 31 - int active_workers = workers()->active_workers(); 32 + uint active_workers = workers()->active_workers(); 33 double pause_start_sec = os::elapsedTime(); 34 - g1_policy()->phase_times()->note_gc_start(active_workers); 35 + g1_policy()->phase_times()->note_gc_start(active_workers, mark_in_progress()); 36 log_gc_header(); 37 38 TraceCollectorStats tcs(g1mm()->incremental_collection_counters()); 39 @@ -4486,8 +4486,7 @@ 40 void work(uint worker_id) { 41 if (worker_id >= _n_workers) return; // no work needed this round 42 43 - double start_time_ms = os::elapsedTime() * 1000.0; 44 - _g1h->g1_policy()->phase_times()->record_gc_worker_start_time(worker_id, start_time_ms); 45 + _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::GCWorkerStart, worker_id, os::elapsedTime()); 46 47 { 48 ResourceMark rm; 49 @@ -4567,10 +4566,11 @@ 50 double start = os::elapsedTime(); 51 G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator); 52 evac.do_void(); 53 - double elapsed_ms = (os::elapsedTime()-start)*1000.0; 54 - double term_ms = pss.term_time()*1000.0; 55 - _g1h->g1_policy()->phase_times()->add_obj_copy_time(worker_id, elapsed_ms-term_ms); 56 - _g1h->g1_policy()->phase_times()->record_termination(worker_id, term_ms, pss.term_attempts()); 57 + double elapsed_sec = os::elapsedTime() - start; 58 + double term_sec = pss.term_time(); 59 + _g1h->g1_policy()->phase_times()->add_time_secs(G1GCPhaseTimes::ObjCopy, worker_id, elapsed_sec - term_sec); 60 + _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::Termination, worker_id, term_sec); 61 + _g1h->g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::Termination, worker_id, pss.term_attempts()); 62 } 63 _g1h->g1_policy()->record_thread_age_table(pss.age_table()); 64 _g1h->update_surviving_young_words(pss.surviving_young_words()+1); 65 @@ -4586,9 +4586,7 @@ 66 // destructors are executed here and are included as part of the 67 // "GC Worker Time". 68 } 69 - 70 - double end_time_ms = os::elapsedTime() * 1000.0; 71 - _g1h->g1_policy()->phase_times()->record_gc_worker_end_time(worker_id, end_time_ms); 72 + _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::GCWorkerEnd, worker_id, os::elapsedTime()); 73 } 74 }; 75 76 @@ -4650,27 +4648,20 @@ 77 double obj_copy_time_sec = buf_scan_non_heap_roots.closure_app_seconds() 78 + buf_scan_non_heap_weak_roots.closure_app_seconds(); 79 80 - g1_policy()->phase_times()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0); 81 - 82 - double ext_root_time_ms = 83 - ((os::elapsedTime() - ext_roots_start) - obj_copy_time_sec) * 1000.0; 84 - 85 - g1_policy()->phase_times()->record_ext_root_scan_time(worker_i, ext_root_time_ms); 86 + g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::ObjCopy, worker_i, obj_copy_time_sec); 87 + 88 + double ext_root_time_sec = os::elapsedTime() - ext_roots_start - obj_copy_time_sec; 89 + g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::ExtRootScan, worker_i, ext_root_time_sec); 90 91 // During conc marking we have to filter the per-thread SATB buffers 92 // to make sure we remove any oops into the CSet (which will show up 93 // as implicitly live). 94 - double satb_filtering_ms = 0.0; 95 - if (!_process_strong_tasks->is_task_claimed(G1H_PS_filter_satb_buffers)) { 96 - if (mark_in_progress()) { 97 - double satb_filter_start = os::elapsedTime(); 98 - 99 + { 100 + G1GCParPhaseTimesTracker x(g1_policy()->phase_times(), G1GCPhaseTimes::SATBFiltering, worker_i); 101 + if (!_process_strong_tasks->is_task_claimed(G1H_PS_filter_satb_buffers) && mark_in_progress()) { 102 JavaThread::satb_mark_queue_set().filter_thread_buffers(); 103 - 104 - satb_filtering_ms = (os::elapsedTime() - satb_filter_start) * 1000.0; 105 } 106 } 107 - g1_policy()->phase_times()->record_satb_filtering_time(worker_i, satb_filtering_ms); 108 109 // Now scan the complement of the collection set. 110 G1CodeBlobClosure scavenge_cs_nmethods(scan_non_heap_weak_roots); 111 @@ -5073,14 +5064,13 @@ 112 G1RedirtyLoggedCardsTask(DirtyCardQueueSet* queue) : AbstractGangTask("Redirty Cards"), _queue(queue) { } 113 114 virtual void work(uint worker_id) { 115 - double start_time = os::elapsedTime(); 116 + G1GCPhaseTimes* phase_times = G1CollectedHeap::heap()->g1_policy()->phase_times(); 117 + G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::RedirtyCards, worker_id); 118 119 RedirtyLoggedCardTableEntryClosure cl; 120 _queue->par_apply_closure_to_all_completed_buffers(&cl); 121 122 - G1GCPhaseTimes* timer = G1CollectedHeap::heap()->g1_policy()->phase_times(); 123 - timer->record_redirty_logged_cards_time_ms(worker_id, (os::elapsedTime() - start_time) * 1000.0); 124 - timer->record_redirty_logged_cards_processed_cards(worker_id, cl.num_processed()); 125 + phase_times->record_thread_work_item(G1GCPhaseTimes::RedirtyCards, worker_id, cl.num_processed()); 126 } 127 }; 128 129 @@ -5658,12 +5648,14 @@ 130 // reported parallel time. 131 } 132 133 + G1GCPhaseTimes* phase_times = g1_policy()->phase_times(); 134 + 135 double par_time_ms = (end_par_time_sec - start_par_time_sec) * 1000.0; 136 - g1_policy()->phase_times()->record_par_time(par_time_ms); 137 + phase_times->record_par_time(par_time_ms); 138 139 double code_root_fixup_time_ms = 140 (os::elapsedTime() - end_par_time_sec) * 1000.0; 141 - g1_policy()->phase_times()->record_code_root_fixup_time(code_root_fixup_time_ms); 142 + phase_times->record_code_root_fixup_time(code_root_fixup_time_ms); 143 144 set_par_threads(0); 145 146 @@ -5675,9 +5667,14 @@ 147 process_discovered_references(n_workers); 148 149 if (G1StringDedup::is_enabled()) { 150 + double fixup_start = os::elapsedTime(); 151 + 152 G1STWIsAliveClosure is_alive(this); 153 G1KeepAliveClosure keep_alive(this); 154 - G1StringDedup::unlink_or_oops_do(&is_alive, &keep_alive); 155 + G1StringDedup::unlink_or_oops_do(&is_alive, &keep_alive, true, phase_times); 156 + 157 + double fixup_time_ms = (os::elapsedTime() - fixup_start) * 1000.0; 158 + phase_times->record_string_dedup_fixup_time(fixup_time_ms); 159 } 160 161 _allocator->release_gc_alloc_regions(n_workers, evacuation_info); 162 diff --git a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp 163 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp 164 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp 165 @@ -1073,7 +1073,7 @@ 166 if (update_stats) { 167 double cost_per_card_ms = 0.0; 168 if (_pending_cards > 0) { 169 - cost_per_card_ms = phase_times()->average_last_update_rs_time() / (double) _pending_cards; 170 + cost_per_card_ms = phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS) / (double) _pending_cards; 171 _cost_per_card_ms_seq->add(cost_per_card_ms); 172 } 173 174 @@ -1081,7 +1081,7 @@ 175 176 double cost_per_entry_ms = 0.0; 177 if (cards_scanned > 10) { 178 - cost_per_entry_ms = phase_times()->average_last_scan_rs_time() / (double) cards_scanned; 179 + cost_per_entry_ms = phase_times()->average_time_ms(G1GCPhaseTimes::ScanRS) / (double) cards_scanned; 180 if (_last_gc_was_young) { 181 _cost_per_entry_ms_seq->add(cost_per_entry_ms); 182 } else { 183 @@ -1123,7 +1123,7 @@ 184 double cost_per_byte_ms = 0.0; 185 186 if (copied_bytes > 0) { 187 - cost_per_byte_ms = phase_times()->average_last_obj_copy_time() / (double) copied_bytes; 188 + cost_per_byte_ms = phase_times()->average_time_ms(G1GCPhaseTimes::ObjCopy) / (double) copied_bytes; 189 if (_in_marking_window) { 190 _cost_per_byte_ms_during_cm_seq->add(cost_per_byte_ms); 191 } else { 192 @@ -1132,8 +1132,8 @@ 193 } 194 195 double all_other_time_ms = pause_time_ms - 196 - (phase_times()->average_last_update_rs_time() + phase_times()->average_last_scan_rs_time() 197 - + phase_times()->average_last_obj_copy_time() + phase_times()->average_last_termination_time()); 198 + (phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS) + phase_times()->average_time_ms(G1GCPhaseTimes::ScanRS) + 199 + phase_times()->average_time_ms(G1GCPhaseTimes::ObjCopy) + phase_times()->average_time_ms(G1GCPhaseTimes::Termination)); 200 201 double young_other_time_ms = 0.0; 202 if (young_cset_region_length() > 0) { 203 @@ -1174,8 +1174,8 @@ 204 205 // Note that _mmu_tracker->max_gc_time() returns the time in seconds. 206 double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0; 207 - adjust_concurrent_refinement(phase_times()->average_last_update_rs_time(), 208 - phase_times()->sum_last_update_rs_processed_buffers(), update_rs_time_goal_ms); 209 + adjust_concurrent_refinement(phase_times()->average_time_ms(G1GCPhaseTimes::UpdateRS), 210 + phase_times()->sum_thread_work_items(G1GCPhaseTimes::UpdateRS), update_rs_time_goal_ms); 211 212 _collectionSetChooser->verify(); 213 } 214 @@ -2114,19 +2114,19 @@ 215 _other.add(pause_time_ms - phase_times->accounted_time_ms()); 216 _root_region_scan_wait.add(phase_times->root_region_scan_wait_time_ms()); 217 _parallel.add(phase_times->cur_collection_par_time_ms()); 218 - _ext_root_scan.add(phase_times->average_last_ext_root_scan_time()); 219 - _satb_filtering.add(phase_times->average_last_satb_filtering_times_ms()); 220 - _update_rs.add(phase_times->average_last_update_rs_time()); 221 - _scan_rs.add(phase_times->average_last_scan_rs_time()); 222 - _obj_copy.add(phase_times->average_last_obj_copy_time()); 223 - _termination.add(phase_times->average_last_termination_time()); 224 + _ext_root_scan.add(phase_times->average_time_ms(G1GCPhaseTimes::ExtRootScan)); 225 + _satb_filtering.add(phase_times->average_time_ms(G1GCPhaseTimes::SATBFiltering)); 226 + _update_rs.add(phase_times->average_time_ms(G1GCPhaseTimes::UpdateRS)); 227 + _scan_rs.add(phase_times->average_time_ms(G1GCPhaseTimes::ScanRS)); 228 + _obj_copy.add(phase_times->average_time_ms(G1GCPhaseTimes::ObjCopy)); 229 + _termination.add(phase_times->average_time_ms(G1GCPhaseTimes::Termination)); 230 231 - double parallel_known_time = phase_times->average_last_ext_root_scan_time() + 232 - phase_times->average_last_satb_filtering_times_ms() + 233 - phase_times->average_last_update_rs_time() + 234 - phase_times->average_last_scan_rs_time() + 235 - phase_times->average_last_obj_copy_time() + 236 - + phase_times->average_last_termination_time(); 237 + double parallel_known_time = phase_times->average_time_ms(G1GCPhaseTimes::ExtRootScan) + 238 + phase_times->average_time_ms(G1GCPhaseTimes::SATBFiltering) + 239 + phase_times->average_time_ms(G1GCPhaseTimes::UpdateRS) + 240 + phase_times->average_time_ms(G1GCPhaseTimes::ScanRS) + 241 + phase_times->average_time_ms(G1GCPhaseTimes::ObjCopy) + 242 + phase_times->average_time_ms(G1GCPhaseTimes::Termination); 243 244 double parallel_other_time = phase_times->cur_collection_par_time_ms() - parallel_known_time; 245 _parallel_other.add(parallel_other_time); 246 diff --git a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp 247 --- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp 248 +++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp 249 @@ -22,13 +22,13 @@ 250 * 251 */ 252 253 - 254 #include "precompiled.hpp" 255 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" 256 #include "gc_implementation/g1/g1GCPhaseTimes.hpp" 257 #include "gc_implementation/g1/g1Log.hpp" 258 #include "gc_implementation/g1/g1StringDedup.hpp" 259 -#include "runtime/atomic.inline.hpp" 260 +#include "memory/allocation.hpp" 261 +#include "runtime/os.hpp" 262 263 // Helper class for avoiding interleaved logging 264 class LineBuffer: public StackObj { 265 @@ -71,184 +71,243 @@ 266 va_end(ap); 267 } 268 269 + void print_cr() { 270 + gclog_or_tty->print_cr("%s", _buffer); 271 + _cur = _indent_level * INDENT_CHARS; 272 + } 273 + 274 void append_and_print_cr(const char* format, ...) ATTRIBUTE_PRINTF(2, 3) { 275 va_list ap; 276 va_start(ap, format); 277 vappend(format, ap); 278 va_end(ap); 279 - gclog_or_tty->print_cr("%s", _buffer); 280 - _cur = _indent_level * INDENT_CHARS; 281 + print_cr(); 282 } 283 }; 284 285 -PRAGMA_DIAG_PUSH 286 -PRAGMA_FORMAT_NONLITERAL_IGNORED 287 template <class T> 288 -void WorkerDataArray<T>::print(int level, const char* title) { 289 - if (_length == 1) { 290 - // No need for min, max, average and sum for only one worker 291 - LineBuffer buf(level); 292 - buf.append("[%s: ", title); 293 - buf.append(_print_format, _data[0]); 294 - buf.append_and_print_cr("]"); 295 - return; 296 +class WorkerDataArray : public CHeapObj<mtGC> { 297 + friend class G1GCParPhasePrinter; 298 + T* _data; 299 + uint _length; 300 + const char* _title; 301 + bool _print_sum; 302 + int _log_level; 303 + uint _indent_level; 304 + bool _enabled; 305 + 306 + WorkerDataArray<size_t>* _thread_work_items; 307 + 308 + NOT_PRODUCT(T uninitialized();) 309 + 310 + // We are caching the sum and average to only have to calculate them once. 311 + // This is not done in an MT-safe way. It is intended to allow single 312 + // threaded code to call sum() and average() multiple times in any order 313 + // without having to worry about the cost. 314 + bool _has_new_data; 315 + T _sum; 316 + T _min; 317 + T _max; 318 + double _average; 319 + 320 + public: 321 + WorkerDataArray(uint length, const char* title, bool print_sum, int log_level, uint indent_level) : 322 + _title(title), _length(0), _print_sum(print_sum), _log_level(log_level), _indent_level(indent_level), 323 + _has_new_data(true), _thread_work_items(NULL), _enabled(true) { 324 + assert(length > 0, "Must have some workers to store data for"); 325 + _length = length; 326 + _data = NEW_C_HEAP_ARRAY(T, _length, mtGC); 327 } 328 329 - T min = _data[0]; 330 - T max = _data[0]; 331 - T sum = 0; 332 - 333 - LineBuffer buf(level); 334 - buf.append("[%s:", title); 335 - for (uint i = 0; i < _length; ++i) { 336 - T val = _data[i]; 337 - min = MIN2(val, min); 338 - max = MAX2(val, max); 339 - sum += val; 340 - if (G1Log::finest()) { 341 - buf.append(" "); 342 - buf.append(_print_format, val); 343 - } 344 + ~WorkerDataArray() { 345 + FREE_C_HEAP_ARRAY(T, _data); 346 } 347 348 - if (G1Log::finest()) { 349 - buf.append_and_print_cr("%s", ""); 350 + void link_thread_work_items(WorkerDataArray<size_t>* thread_work_items) { 351 + _thread_work_items = thread_work_items; 352 } 353 354 - double avg = (double)sum / (double)_length; 355 - buf.append(" Min: "); 356 - buf.append(_print_format, min); 357 - buf.append(", Avg: "); 358 - buf.append("%.1lf", avg); // Always print average as a double 359 - buf.append(", Max: "); 360 - buf.append(_print_format, max); 361 - buf.append(", Diff: "); 362 - buf.append(_print_format, max - min); 363 - if (_print_sum) { 364 - // for things like the start and end times the sum is not 365 - // that relevant 366 - buf.append(", Sum: "); 367 - buf.append(_print_format, sum); 368 + WorkerDataArray<size_t>* thread_work_items() { return _thread_work_items; } 369 + 370 + void set(uint worker_i, T value) { 371 + assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length)); 372 + assert(_data[worker_i] == WorkerDataArray<T>::uninitialized(), err_msg("Overwriting data for worker %d in %s", worker_i, _title)); 373 + _data[worker_i] = value; 374 + _has_new_data = true; 375 } 376 - buf.append_and_print_cr("]"); 377 -} 378 -PRAGMA_DIAG_POP 379 + 380 + void set_thread_work_item(uint worker_i, size_t value) { 381 + assert(_thread_work_items != NULL, "No sub count"); 382 + _thread_work_items->set(worker_i, value); 383 + } 384 + 385 + T get(uint worker_i) { 386 + assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length)); 387 + assert(_data[worker_i] != WorkerDataArray<T>::uninitialized(), err_msg("No data added for worker %d", worker_i)); 388 + return _data[worker_i]; 389 + } 390 + 391 + void add(uint worker_i, T value) { 392 + assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length)); 393 + assert(_data[worker_i] != WorkerDataArray<T>::uninitialized(), err_msg("No data to add to for worker %d", worker_i)); 394 + _data[worker_i] += value; 395 + _has_new_data = true; 396 + } 397 + 398 + double average(){ 399 + calculate_totals(); 400 + return _average; 401 + } 402 + 403 + T sum() { 404 + calculate_totals(); 405 + return _sum; 406 + } 407 + 408 + T minimum() { 409 + calculate_totals(); 410 + return _min; 411 + } 412 + 413 + T maximum() { 414 + calculate_totals(); 415 + return _max; 416 + } 417 + 418 + void reset() PRODUCT_RETURN; 419 + void verify() PRODUCT_RETURN; 420 + 421 + void set_enabled(bool enabled) { _enabled = enabled; } 422 + 423 + int log_level() { return _log_level; } 424 + 425 + private: 426 + 427 + void calculate_totals(){ 428 + if (!_has_new_data) { 429 + return; 430 + } 431 + 432 + _sum = (T)0; 433 + _min = _data[0]; 434 + _max = _min; 435 + for (uint i = 0; i < _length; ++i) { 436 + T val = _data[i]; 437 + _sum += val; 438 + _min = MIN2(_min, val); 439 + _max = MAX2(_max, val); 440 + } 441 + _average = (double)_sum / (double)_length; 442 + _has_new_data = false; 443 + } 444 +}; 445 + 446 447 #ifndef PRODUCT 448 449 -template <> const int WorkerDataArray<int>::_uninitialized = -1; 450 -template <> const double WorkerDataArray<double>::_uninitialized = -1.0; 451 -template <> const size_t WorkerDataArray<size_t>::_uninitialized = (size_t)-1; 452 +template <> 453 +size_t WorkerDataArray<size_t>::uninitialized() { 454 + return (size_t)-1; 455 +} 456 + 457 +template <> 458 +double WorkerDataArray<double>::uninitialized() { 459 + return -1.0; 460 +} 461 462 template <class T> 463 void WorkerDataArray<T>::reset() { 464 for (uint i = 0; i < _length; i++) { 465 - _data[i] = (T)_uninitialized; 466 + _data[i] = WorkerDataArray<T>::uninitialized(); 467 + } 468 + if (_thread_work_items != NULL) { 469 + _thread_work_items->reset(); 470 } 471 } 472 473 template <class T> 474 void WorkerDataArray<T>::verify() { 475 + if (!_enabled) { 476 + return; 477 + } 478 + 479 for (uint i = 0; i < _length; i++) { 480 - assert(_data[i] != _uninitialized, 481 - err_msg("Invalid data for worker %u, data: %lf, uninitialized: %lf", 482 - i, (double)_data[i], (double)_uninitialized)); 483 + assert(_data[i] != WorkerDataArray<T>::uninitialized(), 484 + err_msg("Invalid data for worker %u in '%s'", i, _title)); 485 + } 486 + if (_thread_work_items != NULL) { 487 + _thread_work_items->verify(); 488 } 489 } 490 491 #endif 492 493 G1GCPhaseTimes::G1GCPhaseTimes(uint max_gc_threads) : 494 - _max_gc_threads(max_gc_threads), 495 - _last_gc_worker_start_times_ms(_max_gc_threads, "%.1lf", false), 496 - _last_ext_root_scan_times_ms(_max_gc_threads, "%.1lf"), 497 - _last_satb_filtering_times_ms(_max_gc_threads, "%.1lf"), 498 - _last_update_rs_times_ms(_max_gc_threads, "%.1lf"), 499 - _last_update_rs_processed_buffers(_max_gc_threads, "%d"), 500 - _last_scan_rs_times_ms(_max_gc_threads, "%.1lf"), 501 - _last_strong_code_root_scan_times_ms(_max_gc_threads, "%.1lf"), 502 - _last_obj_copy_times_ms(_max_gc_threads, "%.1lf"), 503 - _last_termination_times_ms(_max_gc_threads, "%.1lf"), 504 - _last_termination_attempts(_max_gc_threads, SIZE_FORMAT), 505 - _last_gc_worker_end_times_ms(_max_gc_threads, "%.1lf", false), 506 - _last_gc_worker_times_ms(_max_gc_threads, "%.1lf"), 507 - _last_gc_worker_other_times_ms(_max_gc_threads, "%.1lf"), 508 - _last_redirty_logged_cards_time_ms(_max_gc_threads, "%.1lf"), 509 - _last_redirty_logged_cards_processed_cards(_max_gc_threads, SIZE_FORMAT), 510 - _cur_string_dedup_queue_fixup_worker_times_ms(_max_gc_threads, "%.1lf"), 511 - _cur_string_dedup_table_fixup_worker_times_ms(_max_gc_threads, "%.1lf") 512 + _max_gc_threads(max_gc_threads) 513 { 514 assert(max_gc_threads > 0, "Must have some GC threads"); 515 + 516 + _gc_par_phases[GCWorkerStart] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Start (ms)", false, G1Log::LevelFiner, 2); 517 + _gc_par_phases[ExtRootScan] = new WorkerDataArray<double>(max_gc_threads, "Ext Root Scanning (ms)", true, G1Log::LevelFiner, 2); 518 + _gc_par_phases[SATBFiltering] = new WorkerDataArray<double>(max_gc_threads, "SATB Filtering (ms)", true, G1Log::LevelFiner, 2); 519 + _gc_par_phases[UpdateRS] = new WorkerDataArray<double>(max_gc_threads, "Update RS (ms)", true, G1Log::LevelFiner, 2); 520 + _gc_par_phases[ScanRS] = new WorkerDataArray<double>(max_gc_threads, "Scan RS (ms)", true, G1Log::LevelFiner, 2); 521 + _gc_par_phases[CodeRoots] = new WorkerDataArray<double>(max_gc_threads, "Code Root Scanning (ms)", true, G1Log::LevelFiner, 2); 522 + _gc_par_phases[ObjCopy] = new WorkerDataArray<double>(max_gc_threads, "Object Copy (ms)", true, G1Log::LevelFiner, 2); 523 + _gc_par_phases[Termination] = new WorkerDataArray<double>(max_gc_threads, "Termination (ms)", true, G1Log::LevelFiner, 2); 524 + _gc_par_phases[GCWorkerTotal] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Total (ms)", true, G1Log::LevelFiner, 2); 525 + _gc_par_phases[GCWorkerEnd] = new WorkerDataArray<double>(max_gc_threads, "GC Worker End (ms)", false, G1Log::LevelFiner, 2); 526 + _gc_par_phases[Other] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Other (ms)", true, G1Log::LevelFiner, 2); 527 + 528 + _update_rs_processed_buffers = new WorkerDataArray<size_t>(max_gc_threads, "Processed Buffers", true, G1Log::LevelFiner, 3); 529 + _gc_par_phases[UpdateRS]->link_thread_work_items(_update_rs_processed_buffers); 530 + 531 + _termination_attempts = new WorkerDataArray<size_t>(max_gc_threads, "Termination Attempts", true, G1Log::LevelFinest, 3); 532 + _gc_par_phases[Termination]->link_thread_work_items(_termination_attempts); 533 + 534 + _gc_par_phases[StringDedupQueueFixup] = new WorkerDataArray<double>(max_gc_threads, "Queue Fixup (ms)", true, G1Log::LevelFiner, 2); 535 + _gc_par_phases[StringDedupTableFixup] = new WorkerDataArray<double>(max_gc_threads, "Table Fixup (ms)", true, G1Log::LevelFiner, 2); 536 + 537 + _gc_par_phases[RedirtyCards] = new WorkerDataArray<double>(max_gc_threads, "Parallel Redirty", true, G1Log::LevelFinest, 3); 538 + _redirtied_cards = new WorkerDataArray<size_t>(max_gc_threads, "Redirtied Cards", true, G1Log::LevelFinest, 3); 539 + _gc_par_phases[RedirtyCards]->link_thread_work_items(_redirtied_cards); 540 } 541 542 -void G1GCPhaseTimes::note_gc_start(uint active_gc_threads) { 543 +void G1GCPhaseTimes::note_gc_start(uint active_gc_threads, bool mark_in_progress) { 544 assert(active_gc_threads > 0, "The number of threads must be > 0"); 545 - assert(active_gc_threads <= _max_gc_threads, "The number of active threads must be <= the max nubmer of threads"); 546 + assert(active_gc_threads <= _max_gc_threads, "The number of active threads must be <= the max number of threads"); 547 _active_gc_threads = active_gc_threads; 548 549 - _last_gc_worker_start_times_ms.reset(); 550 - _last_ext_root_scan_times_ms.reset(); 551 - _last_satb_filtering_times_ms.reset(); 552 - _last_update_rs_times_ms.reset(); 553 - _last_update_rs_processed_buffers.reset(); 554 - _last_scan_rs_times_ms.reset(); 555 - _last_strong_code_root_scan_times_ms.reset(); 556 - _last_obj_copy_times_ms.reset(); 557 - _last_termination_times_ms.reset(); 558 - _last_termination_attempts.reset(); 559 - _last_gc_worker_end_times_ms.reset(); 560 - _last_gc_worker_times_ms.reset(); 561 - _last_gc_worker_other_times_ms.reset(); 562 + for (int i = 0; i < GCParPhasesSentinel; i++) { 563 + _gc_par_phases[i]->reset(); 564 + } 565 566 - _last_redirty_logged_cards_time_ms.reset(); 567 - _last_redirty_logged_cards_processed_cards.reset(); 568 + _gc_par_phases[SATBFiltering]->set_enabled(mark_in_progress); 569 570 + _gc_par_phases[StringDedupQueueFixup]->set_enabled(G1StringDedup::is_enabled()); 571 + _gc_par_phases[StringDedupTableFixup]->set_enabled(G1StringDedup::is_enabled()); 572 } 573 574 void G1GCPhaseTimes::note_gc_end() { 575 - _last_gc_worker_start_times_ms.verify(); 576 - _last_ext_root_scan_times_ms.verify(); 577 - _last_satb_filtering_times_ms.verify(); 578 - _last_update_rs_times_ms.verify(); 579 - _last_update_rs_processed_buffers.verify(); 580 - _last_scan_rs_times_ms.verify(); 581 - _last_strong_code_root_scan_times_ms.verify(); 582 - _last_obj_copy_times_ms.verify(); 583 - _last_termination_times_ms.verify(); 584 - _last_termination_attempts.verify(); 585 - _last_gc_worker_end_times_ms.verify(); 586 + for (uint i = 0; i < _active_gc_threads; i++) { 587 + double worker_time = _gc_par_phases[GCWorkerEnd]->get(i) - _gc_par_phases[GCWorkerStart]->get(i); 588 + record_time_secs(GCWorkerTotal, i , worker_time); 589 590 - for (uint i = 0; i < _active_gc_threads; i++) { 591 - double worker_time = _last_gc_worker_end_times_ms.get(i) - _last_gc_worker_start_times_ms.get(i); 592 - _last_gc_worker_times_ms.set(i, worker_time); 593 + double worker_known_time = 594 + _gc_par_phases[ExtRootScan]->get(i) + 595 + _gc_par_phases[SATBFiltering]->get(i) + 596 + _gc_par_phases[UpdateRS]->get(i) + 597 + _gc_par_phases[ScanRS]->get(i) + 598 + _gc_par_phases[CodeRoots]->get(i) + 599 + _gc_par_phases[ObjCopy]->get(i) + 600 + _gc_par_phases[Termination]->get(i); 601 602 - double worker_known_time = _last_ext_root_scan_times_ms.get(i) + 603 - _last_satb_filtering_times_ms.get(i) + 604 - _last_update_rs_times_ms.get(i) + 605 - _last_scan_rs_times_ms.get(i) + 606 - _last_strong_code_root_scan_times_ms.get(i) + 607 - _last_obj_copy_times_ms.get(i) + 608 - _last_termination_times_ms.get(i); 609 - 610 - double worker_other_time = worker_time - worker_known_time; 611 - _last_gc_worker_other_times_ms.set(i, worker_other_time); 612 + record_time_secs(Other, i, worker_time - worker_known_time); 613 } 614 615 - _last_gc_worker_times_ms.verify(); 616 - _last_gc_worker_other_times_ms.verify(); 617 - 618 - _last_redirty_logged_cards_time_ms.verify(); 619 - _last_redirty_logged_cards_processed_cards.verify(); 620 -} 621 - 622 -void G1GCPhaseTimes::note_string_dedup_fixup_start() { 623 - _cur_string_dedup_queue_fixup_worker_times_ms.reset(); 624 - _cur_string_dedup_table_fixup_worker_times_ms.reset(); 625 -} 626 - 627 -void G1GCPhaseTimes::note_string_dedup_fixup_end() { 628 - _cur_string_dedup_queue_fixup_worker_times_ms.verify(); 629 - _cur_string_dedup_table_fixup_worker_times_ms.verify(); 630 + for (int i = 0; i < GCParPhasesSentinel; i++) { 631 + _gc_par_phases[i]->verify(); 632 + } 633 } 634 635 void G1GCPhaseTimes::print_stats(int level, const char* str, double value) { 636 @@ -288,35 +347,172 @@ 637 return misc_time_ms; 638 } 639 640 +// record the time a phase took in seconds 641 +void G1GCPhaseTimes::record_time_secs(GCParPhases phase, uint worker_i, double secs) { 642 + _gc_par_phases[phase]->set(worker_i, secs); 643 +} 644 + 645 +// add a number of seconds to a phase 646 +void G1GCPhaseTimes::add_time_secs(GCParPhases phase, uint worker_i, double secs) { 647 + _gc_par_phases[phase]->add(worker_i, secs); 648 +} 649 + 650 +void G1GCPhaseTimes::record_thread_work_item(GCParPhases phase, uint worker_i, size_t count) { 651 + _gc_par_phases[phase]->set_thread_work_item(worker_i, count); 652 +} 653 + 654 +// return the average time for a phase in milliseconds 655 +double G1GCPhaseTimes::average_time_ms(GCParPhases phase) { 656 + return _gc_par_phases[phase]->average() * 1000.0; 657 +} 658 + 659 +double G1GCPhaseTimes::get_time_ms(GCParPhases phase, uint worker_i) { 660 + return _gc_par_phases[phase]->get(worker_i) * 1000.0; 661 +} 662 + 663 +double G1GCPhaseTimes::sum_time_ms(GCParPhases phase) { 664 + return _gc_par_phases[phase]->sum() * 1000.0; 665 +} 666 + 667 +double G1GCPhaseTimes::min_time_ms(GCParPhases phase) { 668 + return _gc_par_phases[phase]->minimum() * 1000.0; 669 +} 670 + 671 +double G1GCPhaseTimes::max_time_ms(GCParPhases phase) { 672 + return _gc_par_phases[phase]->maximum() * 1000.0; 673 +} 674 + 675 +size_t G1GCPhaseTimes::get_thread_work_item(GCParPhases phase, uint worker_i) { 676 + assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count"); 677 + return _gc_par_phases[phase]->thread_work_items()->get(worker_i); 678 +} 679 + 680 +size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase) { 681 + assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count"); 682 + return _gc_par_phases[phase]->thread_work_items()->sum(); 683 +} 684 + 685 +double G1GCPhaseTimes::average_thread_work_items(GCParPhases phase) { 686 + assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count"); 687 + return _gc_par_phases[phase]->thread_work_items()->average(); 688 +} 689 + 690 +size_t G1GCPhaseTimes::min_thread_work_items(GCParPhases phase) { 691 + assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count"); 692 + return _gc_par_phases[phase]->thread_work_items()->minimum(); 693 +} 694 + 695 +size_t G1GCPhaseTimes::max_thread_work_items(GCParPhases phase) { 696 + assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count"); 697 + return _gc_par_phases[phase]->thread_work_items()->maximum(); 698 +} 699 + 700 +class G1GCParPhasePrinter : public StackObj { 701 + G1GCPhaseTimes* _phase_times; 702 + public: 703 + G1GCParPhasePrinter(G1GCPhaseTimes* phase_times) : _phase_times(phase_times) {} 704 + 705 + void print(G1GCPhaseTimes::GCParPhases phase_id) { 706 + WorkerDataArray<double>* phase = _phase_times->_gc_par_phases[phase_id]; 707 + 708 + if (phase->_log_level > G1Log::level() || !phase->_enabled) { 709 + return; 710 + } 711 + 712 + if (phase->_length == 1) { 713 + print_single_length(phase_id, phase); 714 + } else { 715 + print_multi_length(phase_id, phase); 716 + } 717 + } 718 + 719 + private: 720 + 721 + void print_single_length(G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) { 722 + // No need for min, max, average and sum for only one worker 723 + LineBuffer buf(phase->_indent_level); 724 + buf.append_and_print_cr("[%s: %.1lf]", phase->_title, _phase_times->get_time_ms(phase_id, 0)); 725 + 726 + if (phase->_thread_work_items != NULL) { 727 + LineBuffer buf2(phase->_thread_work_items->_indent_level); 728 + buf2.append_and_print_cr("[%s: "SIZE_FORMAT"]", phase->_thread_work_items->_title, _phase_times->sum_thread_work_items(phase_id)); 729 + } 730 + } 731 + 732 + void print_time_values(LineBuffer& buf, G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) { 733 + for (uint i = 0; i < phase->_length; ++i) { 734 + buf.append(" %.1lf", _phase_times->get_time_ms(phase_id, i)); 735 + } 736 + buf.print_cr(); 737 + } 738 + 739 + void print_count_values(LineBuffer& buf, G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<size_t>* thread_work_items) { 740 + for (uint i = 0; i < thread_work_items->_length; ++i) { 741 + buf.append(" " SIZE_FORMAT, _phase_times->get_thread_work_item(phase_id, i)); 742 + } 743 + buf.print_cr(); 744 + } 745 + 746 + void print_thread_work_items(G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<size_t>* thread_work_items) { 747 + LineBuffer buf(thread_work_items->_indent_level); 748 + buf.append("[%s:", thread_work_items->_title); 749 + 750 + if (G1Log::finest()) { 751 + print_count_values(buf, phase_id, thread_work_items); 752 + } 753 + 754 + assert(thread_work_items->_print_sum, err_msg("%s does not have print sum true even though it is a count", thread_work_items->_title)); 755 + 756 + buf.append_and_print_cr(" Min: " SIZE_FORMAT ", Avg: %.1lf, Max: " SIZE_FORMAT ", Diff: " SIZE_FORMAT ", Sum: " SIZE_FORMAT "]", 757 + _phase_times->min_thread_work_items(phase_id), _phase_times->average_thread_work_items(phase_id), _phase_times->max_thread_work_items(phase_id), 758 + _phase_times->max_thread_work_items(phase_id) - _phase_times->min_thread_work_items(phase_id), _phase_times->sum_thread_work_items(phase_id)); 759 + } 760 + 761 + void print_multi_length(G1GCPhaseTimes::GCParPhases phase_id, WorkerDataArray<double>* phase) { 762 + LineBuffer buf(phase->_indent_level); 763 + buf.append("[%s:", phase->_title); 764 + 765 + if (G1Log::finest()) { 766 + print_time_values(buf, phase_id, phase); 767 + } 768 + 769 + buf.append(" Min: %.1lf, Avg: %.1lf, Max: %.1lf, Diff: %.1lf", 770 + _phase_times->min_time_ms(phase_id), _phase_times->average_time_ms(phase_id), _phase_times->max_time_ms(phase_id), 771 + _phase_times->max_time_ms(phase_id) - _phase_times->min_time_ms(phase_id)); 772 + 773 + if (phase->_print_sum) { 774 + // for things like the start and end times the sum is not 775 + // that relevant 776 + buf.append(", Sum: %.1lf", _phase_times->sum_time_ms(phase_id)); 777 + } 778 + 779 + buf.append_and_print_cr("]"); 780 + 781 + if (phase->_thread_work_items != NULL) { 782 + print_thread_work_items(phase_id, phase->_thread_work_items); 783 + } 784 + } 785 +}; 786 + 787 void G1GCPhaseTimes::print(double pause_time_sec) { 788 + G1GCParPhasePrinter par_phase_printer(this); 789 + 790 if (_root_region_scan_wait_time_ms > 0.0) { 791 print_stats(1, "Root Region Scan Waiting", _root_region_scan_wait_time_ms); 792 } 793 + 794 print_stats(1, "Parallel Time", _cur_collection_par_time_ms, _active_gc_threads); 795 - _last_gc_worker_start_times_ms.print(2, "GC Worker Start (ms)"); 796 - _last_ext_root_scan_times_ms.print(2, "Ext Root Scanning (ms)"); 797 - if (_last_satb_filtering_times_ms.sum() > 0.0) { 798 - _last_satb_filtering_times_ms.print(2, "SATB Filtering (ms)"); 799 + for (int i = 0; i <= GCMainParPhasesLast; i++) { 800 + par_phase_printer.print((GCParPhases) i); 801 } 802 - _last_update_rs_times_ms.print(2, "Update RS (ms)"); 803 - _last_update_rs_processed_buffers.print(3, "Processed Buffers"); 804 - _last_scan_rs_times_ms.print(2, "Scan RS (ms)"); 805 - _last_strong_code_root_scan_times_ms.print(2, "Code Root Scanning (ms)"); 806 - _last_obj_copy_times_ms.print(2, "Object Copy (ms)"); 807 - _last_termination_times_ms.print(2, "Termination (ms)"); 808 - if (G1Log::finest()) { 809 - _last_termination_attempts.print(3, "Termination Attempts"); 810 - } 811 - _last_gc_worker_other_times_ms.print(2, "GC Worker Other (ms)"); 812 - _last_gc_worker_times_ms.print(2, "GC Worker Total (ms)"); 813 - _last_gc_worker_end_times_ms.print(2, "GC Worker End (ms)"); 814 815 print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms); 816 print_stats(1, "Code Root Purge", _cur_strong_code_root_purge_time_ms); 817 if (G1StringDedup::is_enabled()) { 818 print_stats(1, "String Dedup Fixup", _cur_string_dedup_fixup_time_ms, _active_gc_threads); 819 - _cur_string_dedup_queue_fixup_worker_times_ms.print(2, "Queue Fixup (ms)"); 820 - _cur_string_dedup_table_fixup_worker_times_ms.print(2, "Table Fixup (ms)"); 821 + for (int i = StringDedupPhasesFirst; i <= StringDedupPhasesLast; i++) { 822 + par_phase_printer.print((GCParPhases) i); 823 + } 824 } 825 print_stats(1, "Clear CT", _cur_clear_ct_time_ms); 826 double misc_time_ms = pause_time_sec * MILLIUNITS - accounted_time_ms(); 827 @@ -340,10 +536,7 @@ 828 print_stats(2, "Ref Proc", _cur_ref_proc_time_ms); 829 print_stats(2, "Ref Enq", _cur_ref_enq_time_ms); 830 print_stats(2, "Redirty Cards", _recorded_redirty_logged_cards_time_ms); 831 - if (G1Log::finest()) { 832 - _last_redirty_logged_cards_time_ms.print(3, "Parallel Redirty"); 833 - _last_redirty_logged_cards_processed_cards.print(3, "Redirtied Cards"); 834 - } 835 + par_phase_printer.print(RedirtyCards); 836 if (G1EagerReclaimHumongousObjects) { 837 print_stats(2, "Humongous Register", _cur_fast_reclaim_humongous_register_time_ms); 838 if (G1Log::finest()) { 839 @@ -366,3 +559,17 @@ 840 print_stats(2, "Verify After", _cur_verify_after_time_ms); 841 } 842 } 843 + 844 +G1GCParPhaseTimesTracker::G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id) : 845 + _phase_times(phase_times), _phase(phase), _worker_id(worker_id) { 846 + if (_phase_times != NULL) { 847 + _start_time = os::elapsedTime(); 848 + } 849 +} 850 + 851 +G1GCParPhaseTimesTracker::~G1GCParPhaseTimesTracker() { 852 + if (_phase_times != NULL) { 853 + _phase_times->record_time_secs(_phase, _worker_id, os::elapsedTime() - _start_time); 854 + } 855 +} 856 + 857 diff --git a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp 858 --- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp 859 +++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp 860 @@ -26,106 +26,46 @@ 861 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP 862 863 #include "memory/allocation.hpp" 864 -#include "gc_interface/gcCause.hpp" 865 866 -template <class T> 867 -class WorkerDataArray : public CHeapObj<mtGC> { 868 - T* _data; 869 - uint _length; 870 - const char* _print_format; 871 - bool _print_sum; 872 +class LineBuffer; 873 874 - NOT_PRODUCT(static const T _uninitialized;) 875 - 876 - // We are caching the sum and average to only have to calculate them once. 877 - // This is not done in an MT-safe way. It is intended to allow single 878 - // threaded code to call sum() and average() multiple times in any order 879 - // without having to worry about the cost. 880 - bool _has_new_data; 881 - T _sum; 882 - double _average; 883 - 884 - public: 885 - WorkerDataArray(uint length, const char* print_format, bool print_sum = true) : 886 - _length(length), _print_format(print_format), _print_sum(print_sum), _has_new_data(true) { 887 - assert(length > 0, "Must have some workers to store data for"); 888 - _data = NEW_C_HEAP_ARRAY(T, _length, mtGC); 889 - } 890 - 891 - ~WorkerDataArray() { 892 - FREE_C_HEAP_ARRAY(T, _data); 893 - } 894 - 895 - void set(uint worker_i, T value) { 896 - assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length)); 897 - assert(_data[worker_i] == (T)-1, err_msg("Overwriting data for worker %d", worker_i)); 898 - _data[worker_i] = value; 899 - _has_new_data = true; 900 - } 901 - 902 - T get(uint worker_i) { 903 - assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length)); 904 - assert(_data[worker_i] != (T)-1, err_msg("No data to add to for worker %d", worker_i)); 905 - return _data[worker_i]; 906 - } 907 - 908 - void add(uint worker_i, T value) { 909 - assert(worker_i < _length, err_msg("Worker %d is greater than max: %d", worker_i, _length)); 910 - assert(_data[worker_i] != (T)-1, err_msg("No data to add to for worker %d", worker_i)); 911 - _data[worker_i] += value; 912 - _has_new_data = true; 913 - } 914 - 915 - double average(){ 916 - if (_has_new_data) { 917 - calculate_totals(); 918 - } 919 - return _average; 920 - } 921 - 922 - T sum() { 923 - if (_has_new_data) { 924 - calculate_totals(); 925 - } 926 - return _sum; 927 - } 928 - 929 - void print(int level, const char* title); 930 - 931 - void reset() PRODUCT_RETURN; 932 - void verify() PRODUCT_RETURN; 933 - 934 - private: 935 - 936 - void calculate_totals(){ 937 - _sum = (T)0; 938 - for (uint i = 0; i < _length; ++i) { 939 - _sum += _data[i]; 940 - } 941 - _average = (double)_sum / (double)_length; 942 - _has_new_data = false; 943 - } 944 -}; 945 +template <class T> class WorkerDataArray; 946 947 class G1GCPhaseTimes : public CHeapObj<mtGC> { 948 + friend class G1GCParPhasePrinter; 949 950 - private: 951 uint _active_gc_threads; 952 uint _max_gc_threads; 953 954 - WorkerDataArray<double> _last_gc_worker_start_times_ms; 955 - WorkerDataArray<double> _last_ext_root_scan_times_ms; 956 - WorkerDataArray<double> _last_satb_filtering_times_ms; 957 - WorkerDataArray<double> _last_update_rs_times_ms; 958 - WorkerDataArray<int> _last_update_rs_processed_buffers; 959 - WorkerDataArray<double> _last_scan_rs_times_ms; 960 - WorkerDataArray<double> _last_strong_code_root_scan_times_ms; 961 - WorkerDataArray<double> _last_obj_copy_times_ms; 962 - WorkerDataArray<double> _last_termination_times_ms; 963 - WorkerDataArray<size_t> _last_termination_attempts; 964 - WorkerDataArray<double> _last_gc_worker_end_times_ms; 965 - WorkerDataArray<double> _last_gc_worker_times_ms; 966 - WorkerDataArray<double> _last_gc_worker_other_times_ms; 967 + public: 968 + enum GCParPhases { 969 + GCWorkerStart, 970 + ExtRootScan, 971 + SATBFiltering, 972 + UpdateRS, 973 + ScanRS, 974 + CodeRoots, 975 + ObjCopy, 976 + Termination, 977 + Other, 978 + GCWorkerTotal, 979 + GCWorkerEnd, 980 + StringDedupQueueFixup, 981 + StringDedupTableFixup, 982 + RedirtyCards, 983 + GCParPhasesSentinel 984 + }; 985 + 986 + private: 987 + // Markers for grouping the phases in the GCPhases enum above 988 + static const int GCMainParPhasesLast = GCWorkerEnd; 989 + static const int StringDedupPhasesFirst = StringDedupQueueFixup; 990 + static const int StringDedupPhasesLast = StringDedupTableFixup; 991 + 992 + WorkerDataArray<double>* _gc_par_phases[GCParPhasesSentinel]; 993 + WorkerDataArray<size_t>* _update_rs_processed_buffers; 994 + WorkerDataArray<size_t>* _termination_attempts; 995 + WorkerDataArray<size_t>* _redirtied_cards; 996 997 double _cur_collection_par_time_ms; 998 double _cur_collection_code_root_fixup_time_ms; 999 @@ -135,9 +75,7 @@ 1000 double _cur_evac_fail_restore_remsets; 1001 double _cur_evac_fail_remove_self_forwards; 1002 1003 - double _cur_string_dedup_fixup_time_ms; 1004 - WorkerDataArray<double> _cur_string_dedup_queue_fixup_worker_times_ms; 1005 - WorkerDataArray<double> _cur_string_dedup_table_fixup_worker_times_ms; 1006 + double _cur_string_dedup_fixup_time_ms; 1007 1008 double _cur_clear_ct_time_ms; 1009 double _cur_ref_proc_time_ms; 1010 @@ -149,8 +87,6 @@ 1011 double _recorded_young_cset_choice_time_ms; 1012 double _recorded_non_young_cset_choice_time_ms; 1013 1014 - WorkerDataArray<double> _last_redirty_logged_cards_time_ms; 1015 - WorkerDataArray<size_t> _last_redirty_logged_cards_processed_cards; 1016 double _recorded_redirty_logged_cards_time_ms; 1017 1018 double _recorded_young_free_cset_time_ms; 1019 @@ -172,54 +108,34 @@ 1020 1021 public: 1022 G1GCPhaseTimes(uint max_gc_threads); 1023 - void note_gc_start(uint active_gc_threads); 1024 + void note_gc_start(uint active_gc_threads, bool mark_in_progress); 1025 void note_gc_end(); 1026 void print(double pause_time_sec); 1027 1028 - void record_gc_worker_start_time(uint worker_i, double ms) { 1029 - _last_gc_worker_start_times_ms.set(worker_i, ms); 1030 - } 1031 + // record the time a phase took in seconds 1032 + void record_time_secs(GCParPhases phase, uint worker_i, double secs); 1033 1034 - void record_ext_root_scan_time(uint worker_i, double ms) { 1035 - _last_ext_root_scan_times_ms.set(worker_i, ms); 1036 - } 1037 + // add a number of seconds to a phase 1038 + void add_time_secs(GCParPhases phase, uint worker_i, double secs); 1039 1040 - void record_satb_filtering_time(uint worker_i, double ms) { 1041 - _last_satb_filtering_times_ms.set(worker_i, ms); 1042 - } 1043 + void record_thread_work_item(GCParPhases phase, uint worker_i, size_t count); 1044 1045 - void record_update_rs_time(uint worker_i, double ms) { 1046 - _last_update_rs_times_ms.set(worker_i, ms); 1047 - } 1048 + // return the average time for a phase in milliseconds 1049 + double average_time_ms(GCParPhases phase); 1050 1051 - void record_update_rs_processed_buffers(uint worker_i, int processed_buffers) { 1052 - _last_update_rs_processed_buffers.set(worker_i, processed_buffers); 1053 - } 1054 + size_t sum_thread_work_items(GCParPhases phase); 1055 1056 - void record_scan_rs_time(uint worker_i, double ms) { 1057 - _last_scan_rs_times_ms.set(worker_i, ms); 1058 - } 1059 + private: 1060 + double get_time_ms(GCParPhases phase, uint worker_i); 1061 + double sum_time_ms(GCParPhases phase); 1062 + double min_time_ms(GCParPhases phase); 1063 + double max_time_ms(GCParPhases phase); 1064 + size_t get_thread_work_item(GCParPhases phase, uint worker_i); 1065 + double average_thread_work_items(GCParPhases phase); 1066 + size_t min_thread_work_items(GCParPhases phase); 1067 + size_t max_thread_work_items(GCParPhases phase); 1068 1069 - void record_strong_code_root_scan_time(uint worker_i, double ms) { 1070 - _last_strong_code_root_scan_times_ms.set(worker_i, ms); 1071 - } 1072 - 1073 - void record_obj_copy_time(uint worker_i, double ms) { 1074 - _last_obj_copy_times_ms.set(worker_i, ms); 1075 - } 1076 - 1077 - void add_obj_copy_time(uint worker_i, double ms) { 1078 - _last_obj_copy_times_ms.add(worker_i, ms); 1079 - } 1080 - 1081 - void record_termination(uint worker_i, double ms, size_t attempts) { 1082 - _last_termination_times_ms.set(worker_i, ms); 1083 - _last_termination_attempts.set(worker_i, attempts); 1084 - } 1085 - 1086 - void record_gc_worker_end_time(uint worker_i, double ms) { 1087 - _last_gc_worker_end_times_ms.set(worker_i, ms); 1088 - } 1089 + public: 1090 1091 void record_clear_ct_time(double ms) { 1092 _cur_clear_ct_time_ms = ms; 1093 @@ -249,21 +165,10 @@ 1094 _cur_evac_fail_remove_self_forwards = ms; 1095 } 1096 1097 - void note_string_dedup_fixup_start(); 1098 - void note_string_dedup_fixup_end(); 1099 - 1100 void record_string_dedup_fixup_time(double ms) { 1101 _cur_string_dedup_fixup_time_ms = ms; 1102 } 1103 1104 - void record_string_dedup_queue_fixup_worker_time(uint worker_id, double ms) { 1105 - _cur_string_dedup_queue_fixup_worker_times_ms.set(worker_id, ms); 1106 - } 1107 - 1108 - void record_string_dedup_table_fixup_worker_time(uint worker_id, double ms) { 1109 - _cur_string_dedup_table_fixup_worker_times_ms.set(worker_id, ms); 1110 - } 1111 - 1112 void record_ref_proc_time(double ms) { 1113 _cur_ref_proc_time_ms = ms; 1114 } 1115 @@ -303,14 +208,6 @@ 1116 _recorded_non_young_cset_choice_time_ms = time_ms; 1117 } 1118 1119 - void record_redirty_logged_cards_time_ms(uint worker_i, double time_ms) { 1120 - _last_redirty_logged_cards_time_ms.set(worker_i, time_ms); 1121 - } 1122 - 1123 - void record_redirty_logged_cards_processed_cards(uint worker_i, size_t processed_buffers) { 1124 - _last_redirty_logged_cards_processed_cards.set(worker_i, processed_buffers); 1125 - } 1126 - 1127 void record_redirty_logged_cards_time_ms(double time_ms) { 1128 _recorded_redirty_logged_cards_time_ms = time_ms; 1129 } 1130 @@ -364,38 +261,16 @@ 1131 double fast_reclaim_humongous_time_ms() { 1132 return _cur_fast_reclaim_humongous_time_ms; 1133 } 1134 +}; 1135 1136 - double average_last_update_rs_time() { 1137 - return _last_update_rs_times_ms.average(); 1138 - } 1139 - 1140 - int sum_last_update_rs_processed_buffers() { 1141 - return _last_update_rs_processed_buffers.sum(); 1142 - } 1143 - 1144 - double average_last_scan_rs_time(){ 1145 - return _last_scan_rs_times_ms.average(); 1146 - } 1147 - 1148 - double average_last_strong_code_root_scan_time(){ 1149 - return _last_strong_code_root_scan_times_ms.average(); 1150 - } 1151 - 1152 - double average_last_obj_copy_time() { 1153 - return _last_obj_copy_times_ms.average(); 1154 - } 1155 - 1156 - double average_last_termination_time() { 1157 - return _last_termination_times_ms.average(); 1158 - } 1159 - 1160 - double average_last_ext_root_scan_time() { 1161 - return _last_ext_root_scan_times_ms.average(); 1162 - } 1163 - 1164 - double average_last_satb_filtering_times_ms() { 1165 - return _last_satb_filtering_times_ms.average(); 1166 - } 1167 +class G1GCParPhaseTimesTracker : public StackObj { 1168 + double _start_time; 1169 + G1GCPhaseTimes::GCParPhases _phase; 1170 + G1GCPhaseTimes* _phase_times; 1171 + uint _worker_id; 1172 +public: 1173 + G1GCParPhaseTimesTracker(G1GCPhaseTimes* phase_times, G1GCPhaseTimes::GCParPhases phase, uint worker_id); 1174 + ~G1GCParPhaseTimesTracker(); 1175 }; 1176 1177 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1GCPHASETIMESLOG_HPP 1178 diff --git a/src/share/vm/gc_implementation/g1/g1Log.hpp b/src/share/vm/gc_implementation/g1/g1Log.hpp 1179 --- a/src/share/vm/gc_implementation/g1/g1Log.hpp 1180 +++ b/src/share/vm/gc_implementation/g1/g1Log.hpp 1181 @@ -28,6 +28,7 @@ 1182 #include "memory/allocation.hpp" 1183 1184 class G1Log : public AllStatic { 1185 + public: 1186 typedef enum { 1187 LevelNone, 1188 LevelFine, 1189 @@ -35,6 +36,7 @@ 1190 LevelFinest 1191 } LogLevel; 1192 1193 + private: 1194 static LogLevel _level; 1195 1196 public: 1197 @@ -50,6 +52,10 @@ 1198 return _level == LevelFinest; 1199 } 1200 1201 + static LogLevel level() { 1202 + return _level; 1203 + } 1204 + 1205 static void init(); 1206 }; 1207 1208 diff --git a/src/share/vm/gc_implementation/g1/g1RemSet.cpp b/src/share/vm/gc_implementation/g1/g1RemSet.cpp 1209 --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp 1210 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp 1211 @@ -248,9 +248,8 @@ 1212 assert(_cards_scanned != NULL, "invariant"); 1213 _cards_scanned[worker_i] = scanRScl.cards_done(); 1214 1215 - _g1p->phase_times()->record_scan_rs_time(worker_i, scan_rs_time_sec * 1000.0); 1216 - _g1p->phase_times()->record_strong_code_root_scan_time(worker_i, 1217 - scanRScl.strong_code_root_scan_time_sec() * 1000.0); 1218 + _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, scan_rs_time_sec); 1219 + _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, scanRScl.strong_code_root_scan_time_sec()); 1220 } 1221 1222 // Closure used for updating RSets and recording references that 1223 @@ -287,13 +286,11 @@ 1224 }; 1225 1226 void G1RemSet::updateRS(DirtyCardQueue* into_cset_dcq, uint worker_i) { 1227 - double start = os::elapsedTime(); 1228 + G1GCParPhaseTimesTracker x(_g1p->phase_times(), G1GCPhaseTimes::UpdateRS, worker_i); 1229 // Apply the given closure to all remaining log entries. 1230 RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq); 1231 1232 _g1->iterate_dirty_card_closure(&into_cset_update_rs_cl, into_cset_dcq, false, worker_i); 1233 - 1234 - _g1p->phase_times()->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0); 1235 } 1236 1237 void G1RemSet::cleanupHRRS() { 1238 diff --git a/src/share/vm/gc_implementation/g1/g1StringDedup.cpp b/src/share/vm/gc_implementation/g1/g1StringDedup.cpp 1239 --- a/src/share/vm/gc_implementation/g1/g1StringDedup.cpp 1240 +++ b/src/share/vm/gc_implementation/g1/g1StringDedup.cpp 1241 @@ -106,7 +106,7 @@ 1242 1243 void G1StringDedup::oops_do(OopClosure* keep_alive) { 1244 assert(is_enabled(), "String deduplication not enabled"); 1245 - unlink_or_oops_do(NULL, keep_alive); 1246 + unlink_or_oops_do(NULL, keep_alive, true /* allow_resize_and_rehash */); 1247 } 1248 1249 void G1StringDedup::unlink(BoolObjectClosure* is_alive) { 1250 @@ -123,45 +123,39 @@ 1251 class G1StringDedupUnlinkOrOopsDoTask : public AbstractGangTask { 1252 private: 1253 G1StringDedupUnlinkOrOopsDoClosure _cl; 1254 + G1GCPhaseTimes* _phase_times; 1255 1256 public: 1257 G1StringDedupUnlinkOrOopsDoTask(BoolObjectClosure* is_alive, 1258 OopClosure* keep_alive, 1259 - bool allow_resize_and_rehash) : 1260 + bool allow_resize_and_rehash, 1261 + G1GCPhaseTimes* phase_times) : 1262 AbstractGangTask("G1StringDedupUnlinkOrOopsDoTask"), 1263 - _cl(is_alive, keep_alive, allow_resize_and_rehash) { 1264 - } 1265 + _cl(is_alive, keep_alive, allow_resize_and_rehash), _phase_times(phase_times) { } 1266 1267 virtual void work(uint worker_id) { 1268 - double queue_fixup_start = os::elapsedTime(); 1269 - G1StringDedupQueue::unlink_or_oops_do(&_cl); 1270 - 1271 - double table_fixup_start = os::elapsedTime(); 1272 - G1StringDedupTable::unlink_or_oops_do(&_cl, worker_id); 1273 - 1274 - double queue_fixup_time_ms = (table_fixup_start - queue_fixup_start) * 1000.0; 1275 - double table_fixup_time_ms = (os::elapsedTime() - table_fixup_start) * 1000.0; 1276 - G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1277 - g1p->phase_times()->record_string_dedup_queue_fixup_worker_time(worker_id, queue_fixup_time_ms); 1278 - g1p->phase_times()->record_string_dedup_table_fixup_worker_time(worker_id, table_fixup_time_ms); 1279 + { 1280 + G1GCParPhaseTimesTracker x(_phase_times, G1GCPhaseTimes::StringDedupQueueFixup, worker_id); 1281 + G1StringDedupQueue::unlink_or_oops_do(&_cl); 1282 + } 1283 + { 1284 + G1GCParPhaseTimesTracker x(_phase_times, G1GCPhaseTimes::StringDedupTableFixup, worker_id); 1285 + G1StringDedupTable::unlink_or_oops_do(&_cl, worker_id); 1286 + } 1287 } 1288 }; 1289 1290 -void G1StringDedup::unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive, bool allow_resize_and_rehash) { 1291 +void G1StringDedup::unlink_or_oops_do(BoolObjectClosure* is_alive, 1292 + OopClosure* keep_alive, 1293 + bool allow_resize_and_rehash, 1294 + G1GCPhaseTimes* phase_times) { 1295 assert(is_enabled(), "String deduplication not enabled"); 1296 - G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1297 - g1p->phase_times()->note_string_dedup_fixup_start(); 1298 - double fixup_start = os::elapsedTime(); 1299 1300 - G1StringDedupUnlinkOrOopsDoTask task(is_alive, keep_alive, allow_resize_and_rehash); 1301 + G1StringDedupUnlinkOrOopsDoTask task(is_alive, keep_alive, allow_resize_and_rehash, phase_times); 1302 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1303 g1h->set_par_threads(); 1304 g1h->workers()->run_task(&task); 1305 g1h->set_par_threads(0); 1306 - 1307 - double fixup_time_ms = (os::elapsedTime() - fixup_start) * 1000.0; 1308 - g1p->phase_times()->record_string_dedup_fixup_time(fixup_time_ms); 1309 - g1p->phase_times()->note_string_dedup_fixup_end(); 1310 } 1311 1312 void G1StringDedup::threads_do(ThreadClosure* tc) { 1313 diff --git a/src/share/vm/gc_implementation/g1/g1StringDedup.hpp b/src/share/vm/gc_implementation/g1/g1StringDedup.hpp 1314 --- a/src/share/vm/gc_implementation/g1/g1StringDedup.hpp 1315 +++ b/src/share/vm/gc_implementation/g1/g1StringDedup.hpp 1316 @@ -91,6 +91,7 @@ 1317 class ThreadClosure; 1318 class outputStream; 1319 class G1StringDedupTable; 1320 +class G1GCPhaseTimes; 1321 1322 // 1323 // Main interface for interacting with string deduplication. 1324 @@ -131,7 +132,7 @@ 1325 static void oops_do(OopClosure* keep_alive); 1326 static void unlink(BoolObjectClosure* is_alive); 1327 static void unlink_or_oops_do(BoolObjectClosure* is_alive, OopClosure* keep_alive, 1328 - bool allow_resize_and_rehash = true); 1329 + bool allow_resize_and_rehash, G1GCPhaseTimes* phase_times = NULL); 1330 1331 static void threads_do(ThreadClosure* tc); 1332 static void print_worker_threads_on(outputStream* st);