1 /* 2 * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 package org.graalvm.compiler.hotspot; 24 25 import static org.graalvm.compiler.hotspot.HotSpotGraalCompiler.fmt; 26 27 import java.util.Arrays; 28 29 import org.graalvm.compiler.debug.TTY; 30 import org.graalvm.compiler.options.Option; 31 import org.graalvm.compiler.options.OptionKey; 32 import org.graalvm.compiler.options.OptionType; 33 import org.graalvm.compiler.options.OptionValues; 34 35 import jdk.vm.ci.meta.ResolvedJavaMethod; 36 37 /** 38 * A watch dog for reporting long running compilations. This is designed to be an always on 39 * mechanism for the purpose of getting better reports from customer sites. As such, it only exits 40 * the VM when it is very sure about a stuck compilation as opposed to only observing a long running 41 * compilation. In both cases, it logs messages to {@link TTY}. 42 * 43 * A watch dog thread is associated with each compiler thread. It wakes up every 44 * {@value #SPIN_TIMEOUT_MS} milliseconds to observe the state of the compiler thread. After the 45 * first {@link Options#CompilationWatchDogStartDelay} seconds of a specific compilation, the watch 46 * dog reports a long running compilation. Every 47 * {@link Options#CompilationWatchDogStackTraceInterval} seconds after that point in time where the 48 * same compilation is still executing, the watch dog takes a stack trace of the compiler thread. If 49 * more than {@value Options#NonFatalIdenticalCompilationSnapshots} contiguous identical stack 50 * traces are seen, the watch dog reports a stuck compilation and exits the VM. 51 */ 52 class CompilationWatchDog extends Thread implements AutoCloseable { 53 54 public static class Options { 55 // @formatter:off 56 @Option(help = "Delay in seconds before watch dog monitoring a compilation (0 disables monitoring).", type = OptionType.Debug) 57 public static final OptionKey<Double> CompilationWatchDogStartDelay = new OptionKey<>(0.0D); 58 @Option(help = "Interval in seconds between a watch dog reporting stack traces for long running compilations.", type = OptionType.Debug) 59 public static final OptionKey<Double> CompilationWatchDogStackTraceInterval = new OptionKey<>(60.0D); 60 @Option(help = "Number of contiguous identical compiler thread stack traces allowed before the VM exits " + 61 "on the basis of a stuck compilation.", type = OptionType.Debug) 62 public static final OptionKey<Integer> NonFatalIdenticalCompilationSnapshots = new OptionKey<>(20); 63 // @formatter:on 64 } 65 66 private enum WatchDogState { 67 /** 68 * The watch dog thread sleeps currently, either no method is currently compiled, or no 69 * method is compiled long enough to be monitored. 70 */ 71 SLEEPING, 72 /** 73 * The watch dog thread identified a compilation that already takes long enough to be 74 * interesting. It will sleep and wake up periodically and check if the current compilation 75 * takes too long. If it takes too long it will start collecting stack traces from the 76 * compiler thread. 77 */ 78 WATCHING_WITHOUT_STACK_INSPECTION, 79 /** 80 * The watch dog thread is fully monitoring the compiler thread. It takes stack traces 81 * periodically and sleeps again until the next period. If the number of stack traces 82 * reaches a certain upper bound and those stack traces are equal it will shut down the 83 * entire VM with an error. 84 */ 85 WATCHING_WITH_STACK_INSPECTION 86 } 87 88 /** 89 * The number of milliseconds a watch dog thread sleeps between observing the state of the 90 * compilation thread it is associated with. Most compilations are expected to complete within 91 * this time period and thus not be actively monitored by the watch dog. 92 */ 93 private static final int SPIN_TIMEOUT_MS = 1000; 94 95 private WatchDogState state = WatchDogState.SLEEPING; 96 private final Thread compilerThread; 97 private final long startDelayMilliseconds; 98 private final long stackTraceIntervalMilliseconds; 99 private final int nonFatalIdenticalCompilationSnapshots; 100 private volatile ResolvedJavaMethod currentMethod; 101 private volatile int currentId; 102 private ResolvedJavaMethod lastWatched; 103 104 // The 4 fields below are for a single compilation being watched 105 private long elapsed; 106 private int traceIntervals; 107 private int numberOfIdenticalStackTraces; 108 private StackTraceElement[] lastStackTrace; 109 110 CompilationWatchDog(Thread compilerThread, long startDelayMilliseconds, long stackTraceIntervalMilliseconds, int nonFatalIdenticalCompilationSnapshots) { 111 this.compilerThread = compilerThread; 112 this.setName("WatchDog" + getId() + "[" + compilerThread.getName() + "]"); 113 this.setPriority(Thread.MAX_PRIORITY); 114 this.setDaemon(true); 115 this.startDelayMilliseconds = startDelayMilliseconds; 116 this.stackTraceIntervalMilliseconds = stackTraceIntervalMilliseconds; 117 this.nonFatalIdenticalCompilationSnapshots = nonFatalIdenticalCompilationSnapshots; 118 } 119 120 public void startCompilation(ResolvedJavaMethod method, int id) { 121 trace("start %s", fmt(method)); 122 this.currentMethod = method; 123 this.currentId = id; 124 } 125 126 public void stopCompilation() { 127 trace(" stop %s", fmt(currentMethod)); 128 this.currentMethod = null; 129 } 130 131 private void reset() { 132 elapsed = 0; 133 traceIntervals = 0; 134 numberOfIdenticalStackTraces = 0; 135 lastWatched = null; 136 lastStackTrace = null; 137 state = WatchDogState.SLEEPING; 138 } 139 140 private void tick(WatchDogState newState) { 141 state = newState; 142 } 143 144 /** 145 * Saves the current stack trace {@link StackTraceElement} of the monitored compiler thread 146 * {@link CompilationWatchDog#compilerThread}. 147 * 148 * @param newStackTrace the current stack trace of the monitored compiler thread 149 * @return {@code true} if the stack trace is equal to the last stack trace (or if it is the 150 * first one) and {@code false} if it is not equal to the last one. 151 */ 152 private boolean recordStackTrace(StackTraceElement[] newStackTrace) { 153 if (lastStackTrace == null) { 154 lastStackTrace = newStackTrace; 155 return true; 156 } 157 if (!Arrays.equals(lastStackTrace, newStackTrace)) { 158 lastStackTrace = newStackTrace; 159 return false; 160 } 161 return true; 162 } 163 164 /** 165 * Set to true to debug the watch dog. 166 */ 167 private static final boolean DEBUG = Boolean.getBoolean("debug.graal.CompilationWatchDog"); 168 169 private void trace(String format, Object... args) { 170 if (DEBUG) { 171 TTY.println(this + ": " + String.format(format, args)); 172 } 173 } 174 175 private static long ms(double seconds) { 176 return (long) seconds * 1000; 177 } 178 179 private static double secs(long ms) { 180 return (double) ms / 1000; 181 } 182 183 @Override 184 public String toString() { 185 return getName(); 186 } 187 188 @Override 189 public void run() { 190 try { 191 trace("Started%n", this); 192 while (true) { 193 // get a copy of the last set method 194 final ResolvedJavaMethod currentlyCompiling = currentMethod; 195 if (currentlyCompiling == null) { 196 // continue sleeping, compilation is either over before starting 197 // to watch the compiler thread or no compilation at all started 198 reset(); 199 } else { 200 switch (state) { 201 case SLEEPING: 202 lastWatched = currentlyCompiling; 203 elapsed = 0; 204 tick(WatchDogState.WATCHING_WITHOUT_STACK_INSPECTION); 205 break; 206 case WATCHING_WITHOUT_STACK_INSPECTION: 207 if (currentlyCompiling.equals(lastWatched)) { 208 if (elapsed >= startDelayMilliseconds) { 209 // we looked at the same compilation for a certain time 210 // so now we start to collect stack traces 211 tick(WatchDogState.WATCHING_WITH_STACK_INSPECTION); 212 trace("changes mode to watching with stack traces"); 213 } else { 214 // we still compile the same method but won't collect traces 215 // yet 216 trace("watching without stack traces [%.2f seconds]", secs(elapsed)); 217 } 218 elapsed += SPIN_TIMEOUT_MS; 219 } else { 220 // compilation finished before we exceeded initial watching 221 // period 222 reset(); 223 } 224 break; 225 case WATCHING_WITH_STACK_INSPECTION: 226 if (currentlyCompiling.equals(lastWatched)) { 227 if (elapsed >= startDelayMilliseconds + (traceIntervals * stackTraceIntervalMilliseconds)) { 228 trace("took a stack trace"); 229 boolean newStackTrace = recordStackTrace(compilerThread.getStackTrace()); 230 if (!newStackTrace) { 231 trace("%d identical stack traces in a row", numberOfIdenticalStackTraces); 232 numberOfIdenticalStackTraces = 0; 233 } 234 numberOfIdenticalStackTraces++; 235 if (numberOfIdenticalStackTraces > nonFatalIdenticalCompilationSnapshots) { 236 synchronized (CompilationWatchDog.class) { 237 TTY.printf("======================= WATCH DOG THREAD =======================%n" + 238 "%s took %d identical stack traces, which indicates a stuck compilation (id=%d) of %s%n%sExiting VM%n", this, 239 numberOfIdenticalStackTraces, currentId, fmt(currentMethod), fmt(lastStackTrace)); 240 System.exit(-1); 241 } 242 } else if (newStackTrace) { 243 synchronized (CompilationWatchDog.class) { 244 TTY.printf("======================= WATCH DOG THREAD =======================%n" + 245 "%s detected long running compilation (id=%d) of %s [%.2f seconds]%n%s", this, currentId, fmt(currentMethod), 246 secs(elapsed), fmt(lastStackTrace)); 247 } 248 } 249 traceIntervals++; 250 } else { 251 // we still watch the compilation in the same trace interval 252 trace("watching with stack traces [%.2f seconds]", secs(elapsed)); 253 } 254 elapsed += SPIN_TIMEOUT_MS; 255 } else { 256 // compilation finished before we are able to collect stack 257 // traces 258 reset(); 259 } 260 break; 261 default: 262 break; 263 } 264 } 265 try { 266 Thread.sleep(SPIN_TIMEOUT_MS); 267 } catch (InterruptedException e) { 268 // Silently swallow 269 } 270 } 271 } catch (VirtualMachineError vmError) { 272 /* 273 * We encounter a VM error. This includes for example OutOfMemoryExceptions. In such a 274 * case we silently swallow the error. If it happens again the application thread will 275 * most likely encounter the same problem. If not the watchdog thread will no longer 276 * monitor the compilation and thus the error cannot happen again. 277 */ 278 } catch (Throwable t) { 279 /* 280 * A real exception happened on the compilation watchdog. This is unintended behavior 281 * and must not happen in any case. 282 */ 283 throw new InternalError(String.format("%s encountered an exception%n%s%n", this, fmt(t)), t); 284 } 285 } 286 287 private static final ThreadLocal<CompilationWatchDog> WATCH_DOGS = new ThreadLocal<>(); 288 289 /** 290 * Opens a scope for watching the compilation of a given method. 291 * 292 * @param method a method about to be compiled 293 * @param id compilation request identifier 294 * @return {@code null} if the compilation watch dog is disabled otherwise this object. The 295 * returned value should be used in a {@code try}-with-resources statement whose scope 296 * is the whole compilation so that leaving the scope will cause {@link #close()} to be 297 * called. 298 */ 299 static CompilationWatchDog watch(ResolvedJavaMethod method, int id, OptionValues options) { 300 long startDelayMilliseconds = ms(Options.CompilationWatchDogStartDelay.getValue(options)); 301 if (startDelayMilliseconds > 0.0D) { 302 // Lazily get a watch dog thread for the current compiler thread 303 CompilationWatchDog watchDog = WATCH_DOGS.get(); 304 if (watchDog == null) { 305 Thread currentThread = currentThread(); 306 long stackTraceIntervalMilliseconds = ms(Options.CompilationWatchDogStackTraceInterval.getValue(options)); 307 int nonFatalIdenticalCompilationSnapshots = Options.NonFatalIdenticalCompilationSnapshots.getValue(options); 308 watchDog = new CompilationWatchDog(currentThread, startDelayMilliseconds, stackTraceIntervalMilliseconds, nonFatalIdenticalCompilationSnapshots); 309 WATCH_DOGS.set(watchDog); 310 watchDog.start(); 311 } 312 watchDog.startCompilation(method, id); 313 return watchDog; 314 } 315 return null; 316 } 317 318 @Override 319 public void close() { 320 stopCompilation(); 321 } 322 }