rev 3986 : 7176479: G1: JVM crashes on T5-8 system with 1.5 TB heap
Summary: Refactor G1's hot card cache and card counts table into their own files. Simplify the card counts table, including removing the encoding of the card index in each entry. The card counts table now has a 1:1 correspondence with the cards spanned by heap. Space for the card counts table is reserved from virtual memory (rather than C heap) during JVM startup and is committed/expanded when the heap is expanded.
Reviewed-by:

   1 /*
   2  * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 
  26 #include "precompiled.hpp"
  27 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  28 #include "gc_implementation/g1/g1GCPhaseTimes.hpp"
  29 #include "gc_implementation/g1/g1Log.hpp"
  30 
  31 // Helper class for avoiding interleaved logging
  32 class LineBuffer: public StackObj {
  33 
  34 private:
  35   static const int BUFFER_LEN = 1024;
  36   static const int INDENT_CHARS = 3;
  37   char _buffer[BUFFER_LEN];
  38   int _indent_level;
  39   int _cur;
  40 
  41   void vappend(const char* format, va_list ap) {
  42     int res = vsnprintf(&_buffer[_cur], BUFFER_LEN - _cur, format, ap);
  43     if (res != -1) {
  44       _cur += res;
  45     } else {
  46       DEBUG_ONLY(warning("buffer too small in LineBuffer");)
  47       _buffer[BUFFER_LEN -1] = 0;
  48       _cur = BUFFER_LEN; // vsnprintf above should not add to _buffer if we are called again
  49     }
  50   }
  51 
  52 public:
  53   explicit LineBuffer(int indent_level): _indent_level(indent_level), _cur(0) {
  54     for (; (_cur < BUFFER_LEN && _cur < (_indent_level * INDENT_CHARS)); _cur++) {
  55       _buffer[_cur] = ' ';
  56     }
  57   }
  58 
  59 #ifndef PRODUCT
  60   ~LineBuffer() {
  61     assert(_cur == _indent_level * INDENT_CHARS, "pending data in buffer - append_and_print_cr() not called?");
  62   }
  63 #endif
  64 
  65   void append(const char* format, ...) {
  66     va_list ap;
  67     va_start(ap, format);
  68     vappend(format, ap);
  69     va_end(ap);
  70   }
  71 
  72   void append_and_print_cr(const char* format, ...) {
  73     va_list ap;
  74     va_start(ap, format);
  75     vappend(format, ap);
  76     va_end(ap);
  77     gclog_or_tty->print_cr("%s", _buffer);
  78     _cur = _indent_level * INDENT_CHARS;
  79   }
  80 };
  81 
  82 template <class T>
  83 void WorkerDataArray<T>::print(int level, const char* title) {
  84   if (_length == 1) {
  85     // No need for min, max, average and sum for only one worker
  86     LineBuffer buf(level);
  87     buf.append("[%s:  ", title);
  88     buf.append(_print_format, _data[0]);
  89     buf.append_and_print_cr("]");
  90     return;
  91   }
  92 
  93   T min = _data[0];
  94   T max = _data[0];
  95   T sum = 0;
  96 
  97   LineBuffer buf(level);
  98   buf.append("[%s:", title);
  99   for (uint i = 0; i < _length; ++i) {
 100     T val = _data[i];
 101     min = MIN2(val, min);
 102     max = MAX2(val, max);
 103     sum += val;
 104     if (G1Log::finest()) {
 105       buf.append("  ");
 106       buf.append(_print_format, val);
 107     }
 108   }
 109 
 110   if (G1Log::finest()) {
 111     buf.append_and_print_cr("");
 112   }
 113 
 114   double avg = (double)sum / (double)_length;
 115   buf.append(" Min: ");
 116   buf.append(_print_format, min);
 117   buf.append(", Avg: ");
 118   buf.append("%.1lf", avg); // Always print average as a double
 119   buf.append(", Max: ");
 120   buf.append(_print_format, max);
 121   buf.append(", Diff: ");
 122   buf.append(_print_format, max - min);
 123   if (_print_sum) {
 124     // for things like the start and end times the sum is not
 125     // that relevant
 126     buf.append(", Sum: ");
 127     buf.append(_print_format, sum);
 128   }
 129   buf.append_and_print_cr("]");
 130 }
 131 
 132 #ifndef PRODUCT
 133 
 134 template <class T>
 135 void WorkerDataArray<T>::reset() {
 136   for (uint i = 0; i < _length; i++) {
 137     _data[i] = (T)-1;
 138   }
 139 }
 140 
 141 template <class T>
 142 void WorkerDataArray<T>::verify() {
 143   for (uint i = 0; i < _length; i++) {
 144     assert(_data[i] >= (T)0, err_msg("Invalid data for worker %d", i));
 145   }
 146 }
 147 
 148 #endif
 149 
 150 G1GCPhaseTimes::G1GCPhaseTimes(uint max_gc_threads) :
 151   _max_gc_threads(max_gc_threads),
 152   _min_clear_cc_time_ms(-1.0),
 153   _max_clear_cc_time_ms(-1.0),
 154   _cur_clear_cc_time_ms(0.0),
 155   _cum_clear_cc_time_ms(0.0),
 156   _num_cc_clears(0L),
 157   _last_gc_worker_start_times_ms(_max_gc_threads, "%.1lf", false),
 158   _last_ext_root_scan_times_ms(_max_gc_threads, "%.1lf"),
 159   _last_satb_filtering_times_ms(_max_gc_threads, "%.1lf"),
 160   _last_update_rs_times_ms(_max_gc_threads, "%.1lf"),
 161   _last_update_rs_processed_buffers(_max_gc_threads, "%d"),
 162   _last_scan_rs_times_ms(_max_gc_threads, "%.1lf"),
 163   _last_obj_copy_times_ms(_max_gc_threads, "%.1lf"),
 164   _last_termination_times_ms(_max_gc_threads, "%.1lf"),
 165   _last_termination_attempts(_max_gc_threads, SIZE_FORMAT),
 166   _last_gc_worker_end_times_ms(_max_gc_threads, "%.1lf", false),
 167   _last_gc_worker_times_ms(_max_gc_threads, "%.1lf"),
 168   _last_gc_worker_other_times_ms(_max_gc_threads, "%.1lf")
 169 {
 170   assert(max_gc_threads > 0, "Must have some GC threads");
 171 }
 172 
 173 void G1GCPhaseTimes::note_gc_start(uint active_gc_threads) {
 174   assert(active_gc_threads > 0, "The number of threads must be > 0");
 175   assert(active_gc_threads <= _max_gc_threads, "The number of active threads must be <= the max nubmer of threads");
 176   _active_gc_threads = active_gc_threads;
 177 
 178   _last_gc_worker_start_times_ms.reset();
 179   _last_ext_root_scan_times_ms.reset();
 180   _last_satb_filtering_times_ms.reset();
 181   _last_update_rs_times_ms.reset();
 182   _last_update_rs_processed_buffers.reset();
 183   _last_scan_rs_times_ms.reset();
 184   _last_obj_copy_times_ms.reset();
 185   _last_termination_times_ms.reset();
 186   _last_termination_attempts.reset();
 187   _last_gc_worker_end_times_ms.reset();
 188   _last_gc_worker_times_ms.reset();
 189   _last_gc_worker_other_times_ms.reset();
 190 }
 191 
 192 void G1GCPhaseTimes::note_gc_end() {
 193   _last_gc_worker_start_times_ms.verify();
 194   _last_ext_root_scan_times_ms.verify();
 195   _last_satb_filtering_times_ms.verify();
 196   _last_update_rs_times_ms.verify();
 197   _last_update_rs_processed_buffers.verify();
 198   _last_scan_rs_times_ms.verify();
 199   _last_obj_copy_times_ms.verify();
 200   _last_termination_times_ms.verify();
 201   _last_termination_attempts.verify();
 202   _last_gc_worker_end_times_ms.verify();
 203 
 204     for (uint i = 0; i < _active_gc_threads; i++) {
 205       double worker_time = _last_gc_worker_end_times_ms.get(i) - _last_gc_worker_start_times_ms.get(i);
 206       _last_gc_worker_times_ms.set(i, worker_time);
 207 
 208       double worker_known_time = _last_ext_root_scan_times_ms.get(i) +
 209         _last_satb_filtering_times_ms.get(i) +
 210         _last_update_rs_times_ms.get(i) +
 211         _last_scan_rs_times_ms.get(i) +
 212         _last_obj_copy_times_ms.get(i) +
 213         _last_termination_times_ms.get(i);
 214 
 215       double worker_other_time = worker_time - worker_known_time;
 216       _last_gc_worker_other_times_ms.set(i, worker_other_time);
 217     }
 218 
 219   _last_gc_worker_times_ms.verify();
 220   _last_gc_worker_other_times_ms.verify();
 221 }
 222 
 223 void G1GCPhaseTimes::print_stats(int level, const char* str, double value) {
 224   LineBuffer(level).append_and_print_cr("[%s: %.1lf ms]", str, value);
 225 }
 226 
 227 void G1GCPhaseTimes::print_stats(int level, const char* str, double value, int workers) {
 228   LineBuffer(level).append_and_print_cr("[%s: %.1lf ms, GC Workers: %d]", str, value, workers);
 229 }
 230 
 231 double G1GCPhaseTimes::accounted_time_ms() {
 232     // Subtract the root region scanning wait time. It's initialized to
 233     // zero at the start of the pause.
 234     double misc_time_ms = _root_region_scan_wait_time_ms;
 235 
 236     misc_time_ms += _cur_collection_par_time_ms;
 237 
 238     // Now subtract the time taken to fix up roots in generated code
 239     misc_time_ms += _cur_collection_code_root_fixup_time_ms;
 240 
 241     // Subtract the time taken to clean the card table from the
 242     // current value of "other time"
 243     misc_time_ms += _cur_clear_ct_time_ms;
 244 
 245     return misc_time_ms;
 246 }
 247 
 248 void G1GCPhaseTimes::print(double pause_time_sec) {
 249   if (_root_region_scan_wait_time_ms > 0.0) {
 250     print_stats(1, "Root Region Scan Waiting", _root_region_scan_wait_time_ms);
 251   }
 252   if (G1CollectedHeap::use_parallel_gc_threads()) {
 253     print_stats(1, "Parallel Time", _cur_collection_par_time_ms, _active_gc_threads);
 254     _last_gc_worker_start_times_ms.print(2, "GC Worker Start (ms)");
 255     _last_ext_root_scan_times_ms.print(2, "Ext Root Scanning (ms)");
 256     if (_last_satb_filtering_times_ms.sum() > 0.0) {
 257       _last_satb_filtering_times_ms.print(2, "SATB Filtering (ms)");
 258     }
 259     _last_update_rs_times_ms.print(2, "Update RS (ms)");
 260       _last_update_rs_processed_buffers.print(3, "Processed Buffers");
 261     _last_scan_rs_times_ms.print(2, "Scan RS (ms)");
 262     _last_obj_copy_times_ms.print(2, "Object Copy (ms)");
 263     _last_termination_times_ms.print(2, "Termination (ms)");
 264     if (G1Log::finest()) {
 265       _last_termination_attempts.print(3, "Termination Attempts");
 266     }
 267     _last_gc_worker_other_times_ms.print(2, "GC Worker Other (ms)");
 268     _last_gc_worker_times_ms.print(2, "GC Worker Total (ms)");
 269     _last_gc_worker_end_times_ms.print(2, "GC Worker End (ms)");
 270   } else {
 271     _last_ext_root_scan_times_ms.print(1, "Ext Root Scanning (ms)");
 272     if (_last_satb_filtering_times_ms.sum() > 0.0) {
 273       _last_satb_filtering_times_ms.print(1, "SATB Filtering (ms)");
 274     }
 275     _last_update_rs_times_ms.print(1, "Update RS (ms)");
 276       _last_update_rs_processed_buffers.print(2, "Processed Buffers");
 277     _last_scan_rs_times_ms.print(1, "Scan RS (ms)");
 278     _last_obj_copy_times_ms.print(1, "Object Copy (ms)");
 279   }
 280   print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms);
 281   print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
 282   if (Verbose && G1Log::finest()) {
 283     print_stats(1, "Cur Clear CC", _cur_clear_cc_time_ms);
 284     print_stats(1, "Cum Clear CC", _cum_clear_cc_time_ms);
 285     print_stats(1, "Min Clear CC", _min_clear_cc_time_ms);
 286     print_stats(1, "Max Clear CC", _max_clear_cc_time_ms);
 287     if (_num_cc_clears > 0) {
 288       print_stats(1, "Avg Clear CC", _cum_clear_cc_time_ms / ((double)_num_cc_clears));
 289     }
 290   }
 291   double misc_time_ms = pause_time_sec * MILLIUNITS - accounted_time_ms();
 292   print_stats(1, "Other", misc_time_ms);
 293   if (_cur_verify_before_time_ms > 0.0) {
 294     print_stats(2, "Verify Before", _cur_verify_before_time_ms);
 295   }
 296   print_stats(2, "Choose CSet",
 297     (_recorded_young_cset_choice_time_ms +
 298     _recorded_non_young_cset_choice_time_ms));
 299   print_stats(2, "Ref Proc", _cur_ref_proc_time_ms);
 300   print_stats(2, "Ref Enq", _cur_ref_enq_time_ms);
 301   print_stats(2, "Free CSet",
 302     (_recorded_young_free_cset_time_ms +
 303     _recorded_non_young_free_cset_time_ms));
 304   if (_cur_verify_after_time_ms > 0.0) {
 305     print_stats(2, "Verify After", _cur_verify_after_time_ms);
 306   }
 307 }
 308 
 309 void G1GCPhaseTimes::record_cc_clear_time_ms(double ms) {
 310   if (!(Verbose && G1Log::finest())) {
 311     return;
 312   }
 313 
 314   if (_min_clear_cc_time_ms < 0.0 || ms <= _min_clear_cc_time_ms) {
 315     _min_clear_cc_time_ms = ms;
 316   }
 317   if (_max_clear_cc_time_ms < 0.0 || ms >= _max_clear_cc_time_ms) {
 318     _max_clear_cc_time_ms = ms;
 319   }
 320   _cur_clear_cc_time_ms = ms;
 321   _cum_clear_cc_time_ms += ms;
 322   _num_cc_clears++;
 323 }
--- EOF ---