1 /*
   2  * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 
  25 package org.graalvm.compiler.hotspot;
  26 
  27 import static org.graalvm.compiler.hotspot.HotSpotGraalCompiler.fmt;
  28 
  29 import java.util.Arrays;
  30 
  31 import org.graalvm.compiler.core.GraalServiceThread;
  32 import org.graalvm.compiler.debug.TTY;
  33 import org.graalvm.compiler.options.Option;
  34 import org.graalvm.compiler.options.OptionKey;
  35 import org.graalvm.compiler.options.OptionType;
  36 import org.graalvm.compiler.options.OptionValues;
  37 
  38 import jdk.vm.ci.meta.ResolvedJavaMethod;
  39 import jdk.vm.ci.services.Services;
  40 
  41 /**
  42  * A watch dog for reporting long running compilations. This is designed to be an always on
  43  * mechanism for the purpose of getting better reports from customer sites. As such, it only exits
  44  * the VM when it is very sure about a stuck compilation as opposed to only observing a long running
  45  * compilation. In both cases, it logs messages to {@link TTY}.
  46  *
  47  * A watch dog thread is associated with each compiler thread. It wakes up every
  48  * {@value #SPIN_TIMEOUT_MS} milliseconds to observe the state of the compiler thread. After the
  49  * first {@link Options#CompilationWatchDogStartDelay} seconds of a specific compilation, the watch
  50  * dog reports a long running compilation. Every
  51  * {@link Options#CompilationWatchDogStackTraceInterval} seconds after that point in time where the
  52  * same compilation is still executing, the watch dog takes a stack trace of the compiler thread. If
  53  * more than {@link Options#NonFatalIdenticalCompilationSnapshots} contiguous identical stack traces
  54  * are seen, the watch dog reports a stuck compilation and exits the VM.
  55  */
  56 class CompilationWatchDog implements Runnable, AutoCloseable {
  57 
  58     public static class Options {
  59         // @formatter:off
  60         @Option(help = "Delay in seconds before watch dog monitoring a compilation (0 disables monitoring).", type = OptionType.Debug)
  61         public static final OptionKey<Double> CompilationWatchDogStartDelay = new OptionKey<>(0.0D);
  62         @Option(help = "Interval in seconds between a watch dog reporting stack traces for long running compilations.", type = OptionType.Debug)
  63         public static final OptionKey<Double> CompilationWatchDogStackTraceInterval = new OptionKey<>(60.0D);
  64         @Option(help = "Number of contiguous identical compiler thread stack traces allowed before the VM exits " +
  65                        "on the basis of a stuck compilation.", type = OptionType.Debug)
  66         public static final OptionKey<Integer> NonFatalIdenticalCompilationSnapshots = new OptionKey<>(20);
  67         // @formatter:on
  68     }
  69 
  70     private enum WatchDogState {
  71         /**
  72          * The watch dog thread sleeps currently, either no method is currently compiled, or no
  73          * method is compiled long enough to be monitored.
  74          */
  75         SLEEPING,
  76         /**
  77          * The watch dog thread identified a compilation that already takes long enough to be
  78          * interesting. It will sleep and wake up periodically and check if the current compilation
  79          * takes too long. If it takes too long it will start collecting stack traces from the
  80          * compiler thread.
  81          */
  82         WATCHING_WITHOUT_STACK_INSPECTION,
  83         /**
  84          * The watch dog thread is fully monitoring the compiler thread. It takes stack traces
  85          * periodically and sleeps again until the next period. If the number of stack traces
  86          * reaches a certain upper bound and those stack traces are equal it will shut down the
  87          * entire VM with an error.
  88          */
  89         WATCHING_WITH_STACK_INSPECTION
  90     }
  91 
  92     /**
  93      * The number of milliseconds a watch dog thread sleeps between observing the state of the
  94      * compilation thread it is associated with. Most compilations are expected to complete within
  95      * this time period and thus not be actively monitored by the watch dog.
  96      */
  97     private static final int SPIN_TIMEOUT_MS = 1000;
  98 
  99     private WatchDogState state = WatchDogState.SLEEPING;
 100     private final Thread compilerThread;
 101     private final long startDelayMilliseconds;
 102     private final long stackTraceIntervalMilliseconds;
 103     private final int nonFatalIdenticalCompilationSnapshots;
 104     private volatile ResolvedJavaMethod currentMethod;
 105     private volatile int currentId;
 106     private ResolvedJavaMethod lastWatched;
 107 
 108     // The 4 fields below are for a single compilation being watched
 109     private long elapsed;
 110     private int traceIntervals;
 111     private int numberOfIdenticalStackTraces;
 112     private StackTraceElement[] lastStackTrace;
 113 
 114     CompilationWatchDog(Thread compilerThread, long startDelayMilliseconds, long stackTraceIntervalMilliseconds, int nonFatalIdenticalCompilationSnapshots) {
 115         this.compilerThread = compilerThread;
 116         this.startDelayMilliseconds = startDelayMilliseconds;
 117         this.stackTraceIntervalMilliseconds = stackTraceIntervalMilliseconds;
 118         this.nonFatalIdenticalCompilationSnapshots = nonFatalIdenticalCompilationSnapshots;
 119     }
 120 
 121     public void startCompilation(ResolvedJavaMethod method, int id) {
 122         trace("start %s", fmt(method));
 123         this.currentMethod = method;
 124         this.currentId = id;
 125     }
 126 
 127     public void stopCompilation() {
 128         trace(" stop %s", fmt(currentMethod));
 129         this.currentMethod = null;
 130     }
 131 
 132     private void reset() {
 133         elapsed = 0;
 134         traceIntervals = 0;
 135         numberOfIdenticalStackTraces = 0;
 136         lastWatched = null;
 137         lastStackTrace = null;
 138         state = WatchDogState.SLEEPING;
 139     }
 140 
 141     private void tick(WatchDogState newState) {
 142         state = newState;
 143     }
 144 
 145     /**
 146      * Saves the current stack trace {@link StackTraceElement} of the monitored compiler thread
 147      * {@link CompilationWatchDog#compilerThread}.
 148      *
 149      * @param newStackTrace the current stack trace of the monitored compiler thread
 150      * @return {@code true} if the stack trace is equal to the last stack trace (or if it is the
 151      *         first one) and {@code false} if it is not equal to the last one.
 152      */
 153     private boolean recordStackTrace(StackTraceElement[] newStackTrace) {
 154         if (lastStackTrace == null) {
 155             lastStackTrace = newStackTrace;
 156             return true;
 157         }
 158         if (!Arrays.equals(lastStackTrace, newStackTrace)) {
 159             lastStackTrace = newStackTrace;
 160             return false;
 161         }
 162         return true;
 163     }
 164 
 165     /**
 166      * Set to true to debug the watch dog.
 167      */
 168     private static final boolean DEBUG = Boolean.parseBoolean(Services.getSavedProperties().get("debug.graal.CompilationWatchDog"));
 169 
 170     private void trace(String format, Object... args) {
 171         if (DEBUG) {
 172             TTY.println(this + ": " + String.format(format, args));
 173         }
 174     }
 175 
 176     private static long ms(double seconds) {
 177         return (long) seconds * 1000;
 178     }
 179 
 180     private static double secs(long ms) {
 181         return (double) ms / 1000;
 182     }
 183 
 184     @Override
 185     public String toString() {
 186         return "WatchDog[" + compilerThread.getName() + "]";
 187     }
 188 
 189     @Override
 190     public void run() {
 191         try {
 192             trace("Started%n", this);
 193             while (true) {
 194                 // get a copy of the last set method
 195                 final ResolvedJavaMethod currentlyCompiling = currentMethod;
 196                 if (currentlyCompiling == null) {
 197                     // continue sleeping, compilation is either over before starting
 198                     // to watch the compiler thread or no compilation at all started
 199                     reset();
 200                 } else {
 201                     switch (state) {
 202                         case SLEEPING:
 203                             lastWatched = currentlyCompiling;
 204                             elapsed = 0;
 205                             tick(WatchDogState.WATCHING_WITHOUT_STACK_INSPECTION);
 206                             break;
 207                         case WATCHING_WITHOUT_STACK_INSPECTION:
 208                             if (currentlyCompiling.equals(lastWatched)) {
 209                                 if (elapsed >= startDelayMilliseconds) {
 210                                     // we looked at the same compilation for a certain time
 211                                     // so now we start to collect stack traces
 212                                     tick(WatchDogState.WATCHING_WITH_STACK_INSPECTION);
 213                                     trace("changes mode to watching with stack traces");
 214                                 } else {
 215                                     // we still compile the same method but won't collect traces
 216                                     // yet
 217                                     trace("watching without stack traces [%.2f seconds]", secs(elapsed));
 218                                 }
 219                                 elapsed += SPIN_TIMEOUT_MS;
 220                             } else {
 221                                 // compilation finished before we exceeded initial watching
 222                                 // period
 223                                 reset();
 224                             }
 225                             break;
 226                         case WATCHING_WITH_STACK_INSPECTION:
 227                             if (currentlyCompiling.equals(lastWatched)) {
 228                                 if (elapsed >= startDelayMilliseconds + (traceIntervals * stackTraceIntervalMilliseconds)) {
 229                                     trace("took a stack trace");
 230                                     boolean newStackTrace = recordStackTrace(compilerThread.getStackTrace());
 231                                     if (!newStackTrace) {
 232                                         trace("%d identical stack traces in a row", numberOfIdenticalStackTraces);
 233                                         numberOfIdenticalStackTraces = 0;
 234                                     }
 235                                     numberOfIdenticalStackTraces++;
 236                                     if (numberOfIdenticalStackTraces > nonFatalIdenticalCompilationSnapshots) {
 237                                         synchronized (CompilationWatchDog.class) {
 238                                             TTY.printf("======================= WATCH DOG THREAD =======================%n" +
 239                                                             "%s took %d identical stack traces, which indicates a stuck compilation (id=%d) of %s%n%sExiting VM%n", this,
 240                                                             numberOfIdenticalStackTraces, currentId, fmt(currentMethod), fmt(lastStackTrace));
 241                                             HotSpotGraalServices.exit(-1);
 242                                         }
 243                                     } else if (newStackTrace) {
 244                                         synchronized (CompilationWatchDog.class) {
 245                                             TTY.printf("======================= WATCH DOG THREAD =======================%n" +
 246                                                             "%s detected long running compilation (id=%d) of %s [%.2f seconds]%n%s", this, currentId, fmt(currentMethod),
 247                                                             secs(elapsed), fmt(lastStackTrace));
 248                                         }
 249                                     }
 250                                     traceIntervals++;
 251                                 } else {
 252                                     // we still watch the compilation in the same trace interval
 253                                     trace("watching with stack traces [%.2f seconds]", secs(elapsed));
 254                                 }
 255                                 elapsed += SPIN_TIMEOUT_MS;
 256                             } else {
 257                                 // compilation finished before we are able to collect stack
 258                                 // traces
 259                                 reset();
 260                             }
 261                             break;
 262                         default:
 263                             break;
 264                     }
 265                 }
 266                 try {
 267                     Thread.sleep(SPIN_TIMEOUT_MS);
 268                 } catch (InterruptedException e) {
 269                     // Silently swallow
 270                 }
 271             }
 272         } catch (VirtualMachineError vmError) {
 273             /*
 274              * We encounter a VM error. This includes for example OutOfMemoryExceptions. In such a
 275              * case we silently swallow the error. If it happens again the application thread will
 276              * most likely encounter the same problem. If not the watchdog thread will no longer
 277              * monitor the compilation and thus the error cannot happen again.
 278              */
 279         } catch (Throwable t) {
 280             /*
 281              * A real exception happened on the compilation watchdog. This is unintended behavior
 282              * and must not happen in any case.
 283              */
 284             throw new InternalError(String.format("%s encountered an exception%n%s%n", this, fmt(t)), t);
 285         }
 286     }
 287 
 288     private static final ThreadLocal<CompilationWatchDog> WATCH_DOGS = new ThreadLocal<>();
 289 
 290     /**
 291      * Opens a scope for watching the compilation of a given method.
 292      *
 293      * @param method a method about to be compiled
 294      * @param id compilation request identifier
 295      * @return {@code null} if the compilation watch dog is disabled otherwise this object. The
 296      *         returned value should be used in a {@code try}-with-resources statement whose scope
 297      *         is the whole compilation so that leaving the scope will cause {@link #close()} to be
 298      *         called.
 299      */
 300     static CompilationWatchDog watch(ResolvedJavaMethod method, int id, OptionValues options) {
 301         long startDelayMilliseconds = ms(Options.CompilationWatchDogStartDelay.getValue(options));
 302         if (startDelayMilliseconds > 0.0D) {
 303             // Lazily get a watch dog thread for the current compiler thread
 304             CompilationWatchDog watchDog = WATCH_DOGS.get();
 305             if (watchDog == null) {
 306                 Thread currentThread = Thread.currentThread();
 307                 long stackTraceIntervalMilliseconds = ms(Options.CompilationWatchDogStackTraceInterval.getValue(options));
 308                 int nonFatalIdenticalCompilationSnapshots = Options.NonFatalIdenticalCompilationSnapshots.getValue(options);
 309                 watchDog = new CompilationWatchDog(currentThread, startDelayMilliseconds, stackTraceIntervalMilliseconds, nonFatalIdenticalCompilationSnapshots);
 310                 WATCH_DOGS.set(watchDog);
 311                 GraalServiceThread thread = new GraalServiceThread(watchDog);
 312                 thread.setName(thread.getId() + " " + watchDog.toString());
 313                 thread.setPriority(Thread.MAX_PRIORITY);
 314                 thread.setDaemon(true);
 315                 thread.start();
 316             }
 317             watchDog.startCompilation(method, id);
 318             return watchDog;
 319         }
 320         return null;
 321     }
 322 
 323     @Override
 324     public void close() {
 325         stopCompilation();
 326     }
 327 }