1 /* 2 * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 25 package org.graalvm.compiler.hotspot; 26 27 import static org.graalvm.compiler.hotspot.HotSpotGraalCompiler.fmt; 28 29 import java.util.Arrays; 30 31 import org.graalvm.compiler.debug.TTY; 32 import org.graalvm.compiler.options.Option; 33 import org.graalvm.compiler.options.OptionKey; 34 import org.graalvm.compiler.options.OptionType; 35 import org.graalvm.compiler.options.OptionValues; 36 37 import jdk.vm.ci.meta.ResolvedJavaMethod; 38 import jdk.vm.ci.services.Services; 39 40 /** 41 * A watch dog for reporting long running compilations. This is designed to be an always on 42 * mechanism for the purpose of getting better reports from customer sites. As such, it only exits 43 * the VM when it is very sure about a stuck compilation as opposed to only observing a long running 44 * compilation. In both cases, it logs messages to {@link TTY}. 45 * 46 * A watch dog thread is associated with each compiler thread. It wakes up every 47 * {@value #SPIN_TIMEOUT_MS} milliseconds to observe the state of the compiler thread. After the 48 * first {@link Options#CompilationWatchDogStartDelay} seconds of a specific compilation, the watch 49 * dog reports a long running compilation. Every 50 * {@link Options#CompilationWatchDogStackTraceInterval} seconds after that point in time where the 51 * same compilation is still executing, the watch dog takes a stack trace of the compiler thread. If 52 * more than {@value Options#NonFatalIdenticalCompilationSnapshots} contiguous identical stack 53 * traces are seen, the watch dog reports a stuck compilation and exits the VM. 54 */ 55 class CompilationWatchDog extends Thread implements AutoCloseable { 56 57 public static class Options { 58 // @formatter:off 59 @Option(help = "Delay in seconds before watch dog monitoring a compilation (0 disables monitoring).", type = OptionType.Debug) 60 public static final OptionKey<Double> CompilationWatchDogStartDelay = new OptionKey<>(0.0D); 61 @Option(help = "Interval in seconds between a watch dog reporting stack traces for long running compilations.", type = OptionType.Debug) 62 public static final OptionKey<Double> CompilationWatchDogStackTraceInterval = new OptionKey<>(60.0D); 63 @Option(help = "Number of contiguous identical compiler thread stack traces allowed before the VM exits " + 64 "on the basis of a stuck compilation.", type = OptionType.Debug) 65 public static final OptionKey<Integer> NonFatalIdenticalCompilationSnapshots = new OptionKey<>(20); 66 // @formatter:on 67 } 68 69 private enum WatchDogState { 70 /** 71 * The watch dog thread sleeps currently, either no method is currently compiled, or no 72 * method is compiled long enough to be monitored. 73 */ 74 SLEEPING, 75 /** 76 * The watch dog thread identified a compilation that already takes long enough to be 77 * interesting. It will sleep and wake up periodically and check if the current compilation 78 * takes too long. If it takes too long it will start collecting stack traces from the 79 * compiler thread. 80 */ 81 WATCHING_WITHOUT_STACK_INSPECTION, 82 /** 83 * The watch dog thread is fully monitoring the compiler thread. It takes stack traces 84 * periodically and sleeps again until the next period. If the number of stack traces 85 * reaches a certain upper bound and those stack traces are equal it will shut down the 86 * entire VM with an error. 87 */ 88 WATCHING_WITH_STACK_INSPECTION 89 } 90 91 /** 92 * The number of milliseconds a watch dog thread sleeps between observing the state of the 93 * compilation thread it is associated with. Most compilations are expected to complete within 94 * this time period and thus not be actively monitored by the watch dog. 95 */ 96 private static final int SPIN_TIMEOUT_MS = 1000; 97 98 private WatchDogState state = WatchDogState.SLEEPING; 99 private final Thread compilerThread; 100 private final long startDelayMilliseconds; 101 private final long stackTraceIntervalMilliseconds; 102 private final int nonFatalIdenticalCompilationSnapshots; 103 private volatile ResolvedJavaMethod currentMethod; 104 private volatile int currentId; 105 private ResolvedJavaMethod lastWatched; 106 107 // The 4 fields below are for a single compilation being watched 108 private long elapsed; 109 private int traceIntervals; 110 private int numberOfIdenticalStackTraces; 111 private StackTraceElement[] lastStackTrace; 112 113 CompilationWatchDog(Thread compilerThread, long startDelayMilliseconds, long stackTraceIntervalMilliseconds, int nonFatalIdenticalCompilationSnapshots) { 114 this.compilerThread = compilerThread; 115 this.setName("WatchDog" + getId() + "[" + compilerThread.getName() + "]"); 116 this.setPriority(Thread.MAX_PRIORITY); 117 this.setDaemon(true); 118 this.startDelayMilliseconds = startDelayMilliseconds; 119 this.stackTraceIntervalMilliseconds = stackTraceIntervalMilliseconds; 120 this.nonFatalIdenticalCompilationSnapshots = nonFatalIdenticalCompilationSnapshots; 121 } 122 123 public void startCompilation(ResolvedJavaMethod method, int id) { 124 trace("start %s", fmt(method)); 125 this.currentMethod = method; 126 this.currentId = id; 127 } 128 129 public void stopCompilation() { 130 trace(" stop %s", fmt(currentMethod)); 131 this.currentMethod = null; 132 } 133 134 private void reset() { 135 elapsed = 0; 136 traceIntervals = 0; 137 numberOfIdenticalStackTraces = 0; 138 lastWatched = null; 139 lastStackTrace = null; 140 state = WatchDogState.SLEEPING; 141 } 142 143 private void tick(WatchDogState newState) { 144 state = newState; 145 } 146 147 /** 148 * Saves the current stack trace {@link StackTraceElement} of the monitored compiler thread 149 * {@link CompilationWatchDog#compilerThread}. 150 * 151 * @param newStackTrace the current stack trace of the monitored compiler thread 152 * @return {@code true} if the stack trace is equal to the last stack trace (or if it is the 153 * first one) and {@code false} if it is not equal to the last one. 154 */ 155 private boolean recordStackTrace(StackTraceElement[] newStackTrace) { 156 if (lastStackTrace == null) { 157 lastStackTrace = newStackTrace; 158 return true; 159 } 160 if (!Arrays.equals(lastStackTrace, newStackTrace)) { 161 lastStackTrace = newStackTrace; 162 return false; 163 } 164 return true; 165 } 166 167 /** 168 * Set to true to debug the watch dog. 169 */ 170 private static final boolean DEBUG = Boolean.parseBoolean(Services.getSavedProperties().get("debug.graal.CompilationWatchDog")); 171 172 private void trace(String format, Object... args) { 173 if (DEBUG) { 174 TTY.println(this + ": " + String.format(format, args)); 175 } 176 } 177 178 private static long ms(double seconds) { 179 return (long) seconds * 1000; 180 } 181 182 private static double secs(long ms) { 183 return (double) ms / 1000; 184 } 185 186 @Override 187 public String toString() { 188 return getName(); 189 } 190 191 @Override 192 public void run() { 193 try { 194 trace("Started%n", this); 195 while (true) { 196 // get a copy of the last set method 197 final ResolvedJavaMethod currentlyCompiling = currentMethod; 198 if (currentlyCompiling == null) { 199 // continue sleeping, compilation is either over before starting 200 // to watch the compiler thread or no compilation at all started 201 reset(); 202 } else { 203 switch (state) { 204 case SLEEPING: 205 lastWatched = currentlyCompiling; 206 elapsed = 0; 207 tick(WatchDogState.WATCHING_WITHOUT_STACK_INSPECTION); 208 break; 209 case WATCHING_WITHOUT_STACK_INSPECTION: 210 if (currentlyCompiling.equals(lastWatched)) { 211 if (elapsed >= startDelayMilliseconds) { 212 // we looked at the same compilation for a certain time 213 // so now we start to collect stack traces 214 tick(WatchDogState.WATCHING_WITH_STACK_INSPECTION); 215 trace("changes mode to watching with stack traces"); 216 } else { 217 // we still compile the same method but won't collect traces 218 // yet 219 trace("watching without stack traces [%.2f seconds]", secs(elapsed)); 220 } 221 elapsed += SPIN_TIMEOUT_MS; 222 } else { 223 // compilation finished before we exceeded initial watching 224 // period 225 reset(); 226 } 227 break; 228 case WATCHING_WITH_STACK_INSPECTION: 229 if (currentlyCompiling.equals(lastWatched)) { 230 if (elapsed >= startDelayMilliseconds + (traceIntervals * stackTraceIntervalMilliseconds)) { 231 trace("took a stack trace"); 232 boolean newStackTrace = recordStackTrace(compilerThread.getStackTrace()); 233 if (!newStackTrace) { 234 trace("%d identical stack traces in a row", numberOfIdenticalStackTraces); 235 numberOfIdenticalStackTraces = 0; 236 } 237 numberOfIdenticalStackTraces++; 238 if (numberOfIdenticalStackTraces > nonFatalIdenticalCompilationSnapshots) { 239 synchronized (CompilationWatchDog.class) { 240 TTY.printf("======================= WATCH DOG THREAD =======================%n" + 241 "%s took %d identical stack traces, which indicates a stuck compilation (id=%d) of %s%n%sExiting VM%n", this, 242 numberOfIdenticalStackTraces, currentId, fmt(currentMethod), fmt(lastStackTrace)); 243 System.exit(-1); 244 } 245 } else if (newStackTrace) { 246 synchronized (CompilationWatchDog.class) { 247 TTY.printf("======================= WATCH DOG THREAD =======================%n" + 248 "%s detected long running compilation (id=%d) of %s [%.2f seconds]%n%s", this, currentId, fmt(currentMethod), 249 secs(elapsed), fmt(lastStackTrace)); 250 } 251 } 252 traceIntervals++; 253 } else { 254 // we still watch the compilation in the same trace interval 255 trace("watching with stack traces [%.2f seconds]", secs(elapsed)); 256 } 257 elapsed += SPIN_TIMEOUT_MS; 258 } else { 259 // compilation finished before we are able to collect stack 260 // traces 261 reset(); 262 } 263 break; 264 default: 265 break; 266 } 267 } 268 try { 269 Thread.sleep(SPIN_TIMEOUT_MS); 270 } catch (InterruptedException e) { 271 // Silently swallow 272 } 273 } 274 } catch (VirtualMachineError vmError) { 275 /* 276 * We encounter a VM error. This includes for example OutOfMemoryExceptions. In such a 277 * case we silently swallow the error. If it happens again the application thread will 278 * most likely encounter the same problem. If not the watchdog thread will no longer 279 * monitor the compilation and thus the error cannot happen again. 280 */ 281 } catch (Throwable t) { 282 /* 283 * A real exception happened on the compilation watchdog. This is unintended behavior 284 * and must not happen in any case. 285 */ 286 throw new InternalError(String.format("%s encountered an exception%n%s%n", this, fmt(t)), t); 287 } 288 } 289 290 private static final ThreadLocal<CompilationWatchDog> WATCH_DOGS = new ThreadLocal<>(); 291 292 /** 293 * Opens a scope for watching the compilation of a given method. 294 * 295 * @param method a method about to be compiled 296 * @param id compilation request identifier 297 * @return {@code null} if the compilation watch dog is disabled otherwise this object. The 298 * returned value should be used in a {@code try}-with-resources statement whose scope 299 * is the whole compilation so that leaving the scope will cause {@link #close()} to be 300 * called. 301 */ 302 static CompilationWatchDog watch(ResolvedJavaMethod method, int id, OptionValues options) { 303 long startDelayMilliseconds = ms(Options.CompilationWatchDogStartDelay.getValue(options)); 304 if (startDelayMilliseconds > 0.0D) { 305 // Lazily get a watch dog thread for the current compiler thread 306 CompilationWatchDog watchDog = WATCH_DOGS.get(); 307 if (watchDog == null) { 308 Thread currentThread = currentThread(); 309 long stackTraceIntervalMilliseconds = ms(Options.CompilationWatchDogStackTraceInterval.getValue(options)); 310 int nonFatalIdenticalCompilationSnapshots = Options.NonFatalIdenticalCompilationSnapshots.getValue(options); 311 watchDog = new CompilationWatchDog(currentThread, startDelayMilliseconds, stackTraceIntervalMilliseconds, nonFatalIdenticalCompilationSnapshots); 312 WATCH_DOGS.set(watchDog); 313 watchDog.start(); 314 } 315 watchDog.startCompilation(method, id); 316 return watchDog; 317 } 318 return null; 319 } 320 321 @Override 322 public void close() { 323 stopCompilation(); 324 } 325 }