11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 package org.graalvm.compiler.hotspot; 24 25 import static org.graalvm.compiler.hotspot.HotSpotGraalCompiler.fmt; 26 27 import java.util.Arrays; 28 29 import org.graalvm.compiler.debug.TTY; 30 import org.graalvm.compiler.options.Option; 31 import org.graalvm.compiler.options.OptionType; 32 import org.graalvm.compiler.options.OptionValue; 33 34 import jdk.vm.ci.meta.ResolvedJavaMethod; 35 36 /** 37 * A watch dog for reporting long running compilations. This is designed to be an always on 38 * mechanism for the purpose of getting better reports from customer sites. As such, it only exits 39 * the VM when it is very sure about a stuck compilation as opposed to only observing a long running 40 * compilation. In both cases, it logs messages to {@link TTY}. 41 * 42 * A watch dog thread is associated with each compiler thread. It wakes up every 43 * {@value #SPIN_TIMEOUT_MS} milliseconds to observe the state of the compiler thread. After the 44 * first {@link Options#CompilationWatchDogStartDelay} seconds of a specific compilation, the watch 45 * dog reports a long running compilation. Every 46 * {@link Options#CompilationWatchDogStackTraceInterval} seconds after that point in time where the 47 * same compilation is still executing, the watch dog takes a stack trace of the compiler thread. If 48 * more than {@value Options#NonFatalIdenticalCompilationSnapshots} contiguous identical stack 49 * traces are seen, the watch dog reports a stuck compilation and exits the VM. 50 */ 51 class CompilationWatchDog extends Thread implements AutoCloseable { 52 53 public static class Options { 54 // @formatter:off 55 @Option(help = "Delay in seconds before watch dog monitoring a compilation (0 disables monitoring).", type = OptionType.Debug) 56 public static final OptionValue<Double> CompilationWatchDogStartDelay = new OptionValue<>(30.0D); 57 @Option(help = "Interval in seconds between a watch dog reporting stack traces for long running compilations.", type = OptionType.Debug) 58 public static final OptionValue<Double> CompilationWatchDogStackTraceInterval = new OptionValue<>(30.0D); 59 @Option(help = "Number of contiguous identical compiler thread stack traces allowed before the VM exits " + 60 "on the basis of a stuck compilation.", type = OptionType.Debug) 61 public static final OptionValue<Integer> NonFatalIdenticalCompilationSnapshots = new OptionValue<>(10); 62 // @formatter:on 63 } 64 65 private enum WatchDogState { 66 /** 67 * The watch dog thread sleeps currently, either no method is currently compiled, or no 68 * method is compiled long enough to be monitored. 69 */ 70 SLEEPING, 71 /** 72 * The watch dog thread identified a compilation that already takes long enough to be 73 * interesting. It will sleep and wake up periodically and check if the current compilation 74 * takes too long. If it takes too long it will start collecting stack traces from the 75 * compiler thread. 76 */ 77 WATCHING_WITHOUT_STACK_INSPECTION, 78 /** 79 * The watch dog thread is fully monitoring the compiler thread. It takes stack traces 80 * periodically and sleeps again until the next period. If the number of stack traces 81 * reaches a certain upper bound and those stack traces are equal it will shut down the 82 * entire VM with an error. 83 */ 84 WATCHING_WITH_STACK_INSPECTION 85 } 86 87 /** 88 * The number of milliseconds a watch dog thread sleeps between observing the state of the 89 * compilation thread it is associated with. Most compilations are expected to complete within 90 * this time period and thus not be actively monitored by the watch dog. 91 */ 92 private static final int SPIN_TIMEOUT_MS = 1000; 93 private static final long START_DELAY_MS = ms(Options.CompilationWatchDogStartDelay.getValue()); 94 private static final long STACK_TRACE_INTERVAL_MS = ms(Options.CompilationWatchDogStackTraceInterval.getValue()); 95 private static final boolean ENABLED = START_DELAY_MS > 0.0D; 96 97 private WatchDogState state = WatchDogState.SLEEPING; 98 private final Thread compilerThread; 99 private volatile ResolvedJavaMethod currentMethod; 100 private volatile int currentId; 101 private ResolvedJavaMethod lastWatched; 102 103 // The 4 fields below are for a single compilation being watched 104 private long elapsed; 105 private int traceIntervals; 106 private int numberOfIdenticalStackTraces; 107 private StackTraceElement[] lastStackTrace; 108 109 CompilationWatchDog(Thread compilerThread) { 110 this.compilerThread = compilerThread; 111 this.setName("WatchDog" + getId() + "[" + compilerThread.getName() + "]"); 112 this.setPriority(Thread.MAX_PRIORITY); 113 this.setDaemon(true); 114 } 115 116 public void startCompilation(ResolvedJavaMethod method, int id) { 117 trace("start %s", fmt(method)); 118 this.currentMethod = method; 119 this.currentId = id; 120 } 121 122 public void stopCompilation() { 123 trace(" stop %s", fmt(currentMethod)); 124 this.currentMethod = null; 125 } 126 127 private void reset() { 128 elapsed = 0; 129 traceIntervals = 0; 130 numberOfIdenticalStackTraces = 0; 131 lastWatched = null; 132 lastStackTrace = null; 133 state = WatchDogState.SLEEPING; 166 if (DEBUG) { 167 TTY.println(this + ": " + String.format(format, args)); 168 } 169 } 170 171 private static long ms(double seconds) { 172 return (long) seconds * 1000; 173 } 174 175 private static double secs(long ms) { 176 return (double) ms / 1000; 177 } 178 179 @Override 180 public String toString() { 181 return getName(); 182 } 183 184 @Override 185 public void run() { 186 trace("Started%n", this); 187 try { 188 while (true) { 189 // get a copy of the last set method 190 final ResolvedJavaMethod currentlyCompiling = currentMethod; 191 if (currentlyCompiling == null) { 192 // continue sleeping, compilation is either over before starting 193 // to watch the compiler thread or no compilation at all started 194 reset(); 195 } else { 196 switch (state) { 197 case SLEEPING: 198 lastWatched = currentlyCompiling; 199 elapsed = 0; 200 tick(WatchDogState.WATCHING_WITHOUT_STACK_INSPECTION); 201 break; 202 case WATCHING_WITHOUT_STACK_INSPECTION: 203 if (currentlyCompiling == lastWatched) { 204 if (elapsed >= START_DELAY_MS) { 205 // we looked at the same compilation for a certain time 206 // so now we start to collect stack traces 207 tick(WatchDogState.WATCHING_WITH_STACK_INSPECTION); 208 trace("changes mode to watching with stack traces"); 209 } else { 210 // we still compile the same method but won't collect traces yet 211 trace("watching without stack traces [%.2f seconds]", secs(elapsed)); 212 } 213 elapsed += SPIN_TIMEOUT_MS; 214 } else { 215 // compilation finished before we exceeded initial watching period 216 reset(); 217 } 218 break; 219 case WATCHING_WITH_STACK_INSPECTION: 220 if (currentlyCompiling == lastWatched) { 221 if (elapsed >= START_DELAY_MS + (traceIntervals * STACK_TRACE_INTERVAL_MS)) { 222 trace("took a stack trace"); 223 boolean newStackTrace = recordStackTrace(compilerThread.getStackTrace()); 224 if (!newStackTrace) { 225 trace("%d identical stack traces in a row", numberOfIdenticalStackTraces); 226 numberOfIdenticalStackTraces = 0; 227 } 228 numberOfIdenticalStackTraces++; 229 if (numberOfIdenticalStackTraces > Options.NonFatalIdenticalCompilationSnapshots.getValue()) { 230 synchronized (CompilationWatchDog.class) { 231 TTY.printf("======================= WATCH DOG THREAD =======================%n" + 232 "%s took %d identical stack traces, which indicates a stuck compilation (id=%d) of %s%n%sExiting VM%n", this, 233 numberOfIdenticalStackTraces, currentId, fmt(currentMethod), fmt(lastStackTrace)); 234 System.exit(-1); 235 } 236 } else if (newStackTrace) { 237 synchronized (CompilationWatchDog.class) { 238 TTY.printf("======================= WATCH DOG THREAD =======================%n" + 239 "%s detected long running compilation (id=%d) of %s [%.2f seconds]%n%s", this, currentId, fmt(currentMethod), 240 secs(elapsed), fmt(lastStackTrace)); 241 } 242 } 243 traceIntervals++; 244 } else { 245 // we still watch the compilation in the same trace interval 246 trace("watching with stack traces [%.2f seconds]", secs(elapsed)); 247 } 248 elapsed += SPIN_TIMEOUT_MS; 249 } else { 250 // compilation finished before we are able to collect stack traces 251 reset(); 252 } 253 break; 254 default: 255 break; 256 } 257 } 258 Thread.sleep(SPIN_TIMEOUT_MS); 259 } 260 } catch (Throwable t) { 261 synchronized (CompilationWatchDog.class) { 262 TTY.printf("%s encountered an exception%n%s%n", this, fmt(t)); 263 } 264 } 265 } 266 267 private static final ThreadLocal<CompilationWatchDog> WATCH_DOGS = ENABLED ? new ThreadLocal<>() : null; 268 269 /** 270 * Opens a scope for watching the compilation of a given method. 271 * 272 * @param method a method about to be compiled 273 * @param id compilation request identifier 274 * @return {@code null} if the compilation watch dog is disabled otherwise this object. The 275 * returned value should be used in a {@code try}-with-resources statement whose scope 276 * is the whole compilation so that leaving the scope will cause {@link #close()} to be 277 * called. 278 */ 279 static CompilationWatchDog watch(ResolvedJavaMethod method, int id) { 280 if (ENABLED) { 281 // Lazily get a watch dog thread for the current compiler thread 282 CompilationWatchDog watchDog = WATCH_DOGS.get(); 283 if (watchDog == null) { 284 Thread currentThread = currentThread(); 285 watchDog = new CompilationWatchDog(currentThread); 286 WATCH_DOGS.set(watchDog); 287 watchDog.start(); 288 } 289 watchDog.startCompilation(method, id); 290 return watchDog; 291 } 292 return null; 293 } 294 295 @Override 296 public void close() { 297 stopCompilation(); 298 } 299 } | 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 package org.graalvm.compiler.hotspot; 24 25 import static org.graalvm.compiler.hotspot.HotSpotGraalCompiler.fmt; 26 27 import java.util.Arrays; 28 29 import org.graalvm.compiler.debug.TTY; 30 import org.graalvm.compiler.options.Option; 31 import org.graalvm.compiler.options.OptionKey; 32 import org.graalvm.compiler.options.OptionType; 33 import org.graalvm.compiler.options.OptionValues; 34 35 import jdk.vm.ci.meta.ResolvedJavaMethod; 36 37 /** 38 * A watch dog for reporting long running compilations. This is designed to be an always on 39 * mechanism for the purpose of getting better reports from customer sites. As such, it only exits 40 * the VM when it is very sure about a stuck compilation as opposed to only observing a long running 41 * compilation. In both cases, it logs messages to {@link TTY}. 42 * 43 * A watch dog thread is associated with each compiler thread. It wakes up every 44 * {@value #SPIN_TIMEOUT_MS} milliseconds to observe the state of the compiler thread. After the 45 * first {@link Options#CompilationWatchDogStartDelay} seconds of a specific compilation, the watch 46 * dog reports a long running compilation. Every 47 * {@link Options#CompilationWatchDogStackTraceInterval} seconds after that point in time where the 48 * same compilation is still executing, the watch dog takes a stack trace of the compiler thread. If 49 * more than {@value Options#NonFatalIdenticalCompilationSnapshots} contiguous identical stack 50 * traces are seen, the watch dog reports a stuck compilation and exits the VM. 51 */ 52 class CompilationWatchDog extends Thread implements AutoCloseable { 53 54 public static class Options { 55 // @formatter:off 56 @Option(help = "Delay in seconds before watch dog monitoring a compilation (0 disables monitoring).", type = OptionType.Debug) 57 public static final OptionKey<Double> CompilationWatchDogStartDelay = new OptionKey<>(0.0D); 58 @Option(help = "Interval in seconds between a watch dog reporting stack traces for long running compilations.", type = OptionType.Debug) 59 public static final OptionKey<Double> CompilationWatchDogStackTraceInterval = new OptionKey<>(60.0D); 60 @Option(help = "Number of contiguous identical compiler thread stack traces allowed before the VM exits " + 61 "on the basis of a stuck compilation.", type = OptionType.Debug) 62 public static final OptionKey<Integer> NonFatalIdenticalCompilationSnapshots = new OptionKey<>(20); 63 // @formatter:on 64 } 65 66 private enum WatchDogState { 67 /** 68 * The watch dog thread sleeps currently, either no method is currently compiled, or no 69 * method is compiled long enough to be monitored. 70 */ 71 SLEEPING, 72 /** 73 * The watch dog thread identified a compilation that already takes long enough to be 74 * interesting. It will sleep and wake up periodically and check if the current compilation 75 * takes too long. If it takes too long it will start collecting stack traces from the 76 * compiler thread. 77 */ 78 WATCHING_WITHOUT_STACK_INSPECTION, 79 /** 80 * The watch dog thread is fully monitoring the compiler thread. It takes stack traces 81 * periodically and sleeps again until the next period. If the number of stack traces 82 * reaches a certain upper bound and those stack traces are equal it will shut down the 83 * entire VM with an error. 84 */ 85 WATCHING_WITH_STACK_INSPECTION 86 } 87 88 /** 89 * The number of milliseconds a watch dog thread sleeps between observing the state of the 90 * compilation thread it is associated with. Most compilations are expected to complete within 91 * this time period and thus not be actively monitored by the watch dog. 92 */ 93 private static final int SPIN_TIMEOUT_MS = 1000; 94 95 private WatchDogState state = WatchDogState.SLEEPING; 96 private final Thread compilerThread; 97 private final long startDelayMilliseconds; 98 private final long stackTraceIntervalMilliseconds; 99 private final int nonFatalIdenticalCompilationSnapshots; 100 private volatile ResolvedJavaMethod currentMethod; 101 private volatile int currentId; 102 private ResolvedJavaMethod lastWatched; 103 104 // The 4 fields below are for a single compilation being watched 105 private long elapsed; 106 private int traceIntervals; 107 private int numberOfIdenticalStackTraces; 108 private StackTraceElement[] lastStackTrace; 109 110 CompilationWatchDog(Thread compilerThread, long startDelayMilliseconds, long stackTraceIntervalMilliseconds, int nonFatalIdenticalCompilationSnapshots) { 111 this.compilerThread = compilerThread; 112 this.setName("WatchDog" + getId() + "[" + compilerThread.getName() + "]"); 113 this.setPriority(Thread.MAX_PRIORITY); 114 this.setDaemon(true); 115 this.startDelayMilliseconds = startDelayMilliseconds; 116 this.stackTraceIntervalMilliseconds = stackTraceIntervalMilliseconds; 117 this.nonFatalIdenticalCompilationSnapshots = nonFatalIdenticalCompilationSnapshots; 118 } 119 120 public void startCompilation(ResolvedJavaMethod method, int id) { 121 trace("start %s", fmt(method)); 122 this.currentMethod = method; 123 this.currentId = id; 124 } 125 126 public void stopCompilation() { 127 trace(" stop %s", fmt(currentMethod)); 128 this.currentMethod = null; 129 } 130 131 private void reset() { 132 elapsed = 0; 133 traceIntervals = 0; 134 numberOfIdenticalStackTraces = 0; 135 lastWatched = null; 136 lastStackTrace = null; 137 state = WatchDogState.SLEEPING; 170 if (DEBUG) { 171 TTY.println(this + ": " + String.format(format, args)); 172 } 173 } 174 175 private static long ms(double seconds) { 176 return (long) seconds * 1000; 177 } 178 179 private static double secs(long ms) { 180 return (double) ms / 1000; 181 } 182 183 @Override 184 public String toString() { 185 return getName(); 186 } 187 188 @Override 189 public void run() { 190 try { 191 trace("Started%n", this); 192 while (true) { 193 // get a copy of the last set method 194 final ResolvedJavaMethod currentlyCompiling = currentMethod; 195 if (currentlyCompiling == null) { 196 // continue sleeping, compilation is either over before starting 197 // to watch the compiler thread or no compilation at all started 198 reset(); 199 } else { 200 switch (state) { 201 case SLEEPING: 202 lastWatched = currentlyCompiling; 203 elapsed = 0; 204 tick(WatchDogState.WATCHING_WITHOUT_STACK_INSPECTION); 205 break; 206 case WATCHING_WITHOUT_STACK_INSPECTION: 207 if (currentlyCompiling.equals(lastWatched)) { 208 if (elapsed >= startDelayMilliseconds) { 209 // we looked at the same compilation for a certain time 210 // so now we start to collect stack traces 211 tick(WatchDogState.WATCHING_WITH_STACK_INSPECTION); 212 trace("changes mode to watching with stack traces"); 213 } else { 214 // we still compile the same method but won't collect traces 215 // yet 216 trace("watching without stack traces [%.2f seconds]", secs(elapsed)); 217 } 218 elapsed += SPIN_TIMEOUT_MS; 219 } else { 220 // compilation finished before we exceeded initial watching 221 // period 222 reset(); 223 } 224 break; 225 case WATCHING_WITH_STACK_INSPECTION: 226 if (currentlyCompiling.equals(lastWatched)) { 227 if (elapsed >= startDelayMilliseconds + (traceIntervals * stackTraceIntervalMilliseconds)) { 228 trace("took a stack trace"); 229 boolean newStackTrace = recordStackTrace(compilerThread.getStackTrace()); 230 if (!newStackTrace) { 231 trace("%d identical stack traces in a row", numberOfIdenticalStackTraces); 232 numberOfIdenticalStackTraces = 0; 233 } 234 numberOfIdenticalStackTraces++; 235 if (numberOfIdenticalStackTraces > nonFatalIdenticalCompilationSnapshots) { 236 synchronized (CompilationWatchDog.class) { 237 TTY.printf("======================= WATCH DOG THREAD =======================%n" + 238 "%s took %d identical stack traces, which indicates a stuck compilation (id=%d) of %s%n%sExiting VM%n", this, 239 numberOfIdenticalStackTraces, currentId, fmt(currentMethod), fmt(lastStackTrace)); 240 System.exit(-1); 241 } 242 } else if (newStackTrace) { 243 synchronized (CompilationWatchDog.class) { 244 TTY.printf("======================= WATCH DOG THREAD =======================%n" + 245 "%s detected long running compilation (id=%d) of %s [%.2f seconds]%n%s", this, currentId, fmt(currentMethod), 246 secs(elapsed), fmt(lastStackTrace)); 247 } 248 } 249 traceIntervals++; 250 } else { 251 // we still watch the compilation in the same trace interval 252 trace("watching with stack traces [%.2f seconds]", secs(elapsed)); 253 } 254 elapsed += SPIN_TIMEOUT_MS; 255 } else { 256 // compilation finished before we are able to collect stack 257 // traces 258 reset(); 259 } 260 break; 261 default: 262 break; 263 } 264 } 265 try { 266 Thread.sleep(SPIN_TIMEOUT_MS); 267 } catch (InterruptedException e) { 268 // Silently swallow 269 } 270 } 271 } catch (VirtualMachineError vmError) { 272 /* 273 * We encounter a VM error. This includes for example OutOfMemoryExceptions. In such a 274 * case we silently swallow the error. If it happens again the application thread will 275 * most likely encounter the same problem. If not the watchdog thread will no longer 276 * monitor the compilation and thus the error cannot happen again. 277 */ 278 } catch (Throwable t) { 279 /* 280 * A real exception happened on the compilation watchdog. This is unintended behavior 281 * and must not happen in any case. 282 */ 283 throw new InternalError(String.format("%s encountered an exception%n%s%n", this, fmt(t)), t); 284 } 285 } 286 287 private static final ThreadLocal<CompilationWatchDog> WATCH_DOGS = new ThreadLocal<>(); 288 289 /** 290 * Opens a scope for watching the compilation of a given method. 291 * 292 * @param method a method about to be compiled 293 * @param id compilation request identifier 294 * @return {@code null} if the compilation watch dog is disabled otherwise this object. The 295 * returned value should be used in a {@code try}-with-resources statement whose scope 296 * is the whole compilation so that leaving the scope will cause {@link #close()} to be 297 * called. 298 */ 299 static CompilationWatchDog watch(ResolvedJavaMethod method, int id, OptionValues options) { 300 long startDelayMilliseconds = ms(Options.CompilationWatchDogStartDelay.getValue(options)); 301 if (startDelayMilliseconds > 0.0D) { 302 // Lazily get a watch dog thread for the current compiler thread 303 CompilationWatchDog watchDog = WATCH_DOGS.get(); 304 if (watchDog == null) { 305 Thread currentThread = currentThread(); 306 long stackTraceIntervalMilliseconds = ms(Options.CompilationWatchDogStackTraceInterval.getValue(options)); 307 int nonFatalIdenticalCompilationSnapshots = Options.NonFatalIdenticalCompilationSnapshots.getValue(options); 308 watchDog = new CompilationWatchDog(currentThread, startDelayMilliseconds, stackTraceIntervalMilliseconds, nonFatalIdenticalCompilationSnapshots); 309 WATCH_DOGS.set(watchDog); 310 watchDog.start(); 311 } 312 watchDog.startCompilation(method, id); 313 return watchDog; 314 } 315 return null; 316 } 317 318 @Override 319 public void close() { 320 stopCompilation(); 321 } 322 } |