1 /*
   2  * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 package org.graalvm.compiler.hotspot;
  24 
  25 import static org.graalvm.compiler.hotspot.HotSpotGraalCompiler.fmt;
  26 
  27 import java.util.Arrays;
  28 
  29 import org.graalvm.compiler.debug.TTY;
  30 import org.graalvm.compiler.options.Option;
  31 import org.graalvm.compiler.options.OptionType;
  32 import org.graalvm.compiler.options.OptionValue;
  33 
  34 import jdk.vm.ci.meta.ResolvedJavaMethod;
  35 
  36 /**
  37  * A watch dog for reporting long running compilations. This is designed to be an always on
  38  * mechanism for the purpose of getting better reports from customer sites. As such, it only exits
  39  * the VM when it is very sure about a stuck compilation as opposed to only observing a long running
  40  * compilation. In both cases, it logs messages to {@link TTY}.
  41  *
  42  * A watch dog thread is associated with each compiler thread. It wakes up every
  43  * {@value #SPIN_TIMEOUT_MS} milliseconds to observe the state of the compiler thread. After the
  44  * first {@link Options#CompilationWatchDogStartDelay} seconds of a specific compilation, the watch
  45  * dog reports a long running compilation. Every
  46  * {@link Options#CompilationWatchDogStackTraceInterval} seconds after that point in time where the
  47  * same compilation is still executing, the watch dog takes a stack trace of the compiler thread. If
  48  * more than {@value Options#NonFatalIdenticalCompilationSnapshots} contiguous identical stack
  49  * traces are seen, the watch dog reports a stuck compilation and exits the VM.
  50  */
  51 class CompilationWatchDog extends Thread implements AutoCloseable {
  52 
  53     public static class Options {
  54         // @formatter:off
  55         @Option(help = "Delay in seconds before watch dog monitoring a compilation (0 disables monitoring).", type = OptionType.Debug)
  56         public static final OptionValue<Double> CompilationWatchDogStartDelay = new OptionValue<>(30.0D);
  57         @Option(help = "Interval in seconds between a watch dog reporting stack traces for long running compilations.", type = OptionType.Debug)
  58         public static final OptionValue<Double> CompilationWatchDogStackTraceInterval = new OptionValue<>(30.0D);
  59         @Option(help = "Number of contiguous identical compiler thread stack traces allowed before the VM exits " +
  60                        "on the basis of a stuck compilation.", type = OptionType.Debug)
  61          public static final OptionValue<Integer> NonFatalIdenticalCompilationSnapshots = new OptionValue<>(10);
  62         // @formatter:on
  63     }
  64 
  65     private enum WatchDogState {
  66         /**
  67          * The watch dog thread sleeps currently, either no method is currently compiled, or no
  68          * method is compiled long enough to be monitored.
  69          */
  70         SLEEPING,
  71         /**
  72          * The watch dog thread identified a compilation that already takes long enough to be
  73          * interesting. It will sleep and wake up periodically and check if the current compilation
  74          * takes too long. If it takes too long it will start collecting stack traces from the
  75          * compiler thread.
  76          */
  77         WATCHING_WITHOUT_STACK_INSPECTION,
  78         /**
  79          * The watch dog thread is fully monitoring the compiler thread. It takes stack traces
  80          * periodically and sleeps again until the next period. If the number of stack traces
  81          * reaches a certain upper bound and those stack traces are equal it will shut down the
  82          * entire VM with an error.
  83          */
  84         WATCHING_WITH_STACK_INSPECTION
  85     }
  86 
  87     /**
  88      * The number of milliseconds a watch dog thread sleeps between observing the state of the
  89      * compilation thread it is associated with. Most compilations are expected to complete within
  90      * this time period and thus not be actively monitored by the watch dog.
  91      */
  92     private static final int SPIN_TIMEOUT_MS = 1000;
  93     private static final long START_DELAY_MS = ms(Options.CompilationWatchDogStartDelay.getValue());
  94     private static final long STACK_TRACE_INTERVAL_MS = ms(Options.CompilationWatchDogStackTraceInterval.getValue());
  95     private static final boolean ENABLED = START_DELAY_MS > 0.0D;
  96 
  97     private WatchDogState state = WatchDogState.SLEEPING;
  98     private final Thread compilerThread;
  99     private volatile ResolvedJavaMethod currentMethod;
 100     private volatile int currentId;
 101     private ResolvedJavaMethod lastWatched;
 102 
 103     // The 4 fields below are for a single compilation being watched
 104     private long elapsed;
 105     private int traceIntervals;
 106     private int numberOfIdenticalStackTraces;
 107     private StackTraceElement[] lastStackTrace;
 108 
 109     CompilationWatchDog(Thread compilerThread) {
 110         this.compilerThread = compilerThread;
 111         this.setName("WatchDog" + getId() + "[" + compilerThread.getName() + "]");
 112         this.setPriority(Thread.MAX_PRIORITY);
 113         this.setDaemon(true);
 114     }
 115 
 116     public void startCompilation(ResolvedJavaMethod method, int id) {
 117         trace("start %s", fmt(method));
 118         this.currentMethod = method;
 119         this.currentId = id;
 120     }
 121 
 122     public void stopCompilation() {
 123         trace(" stop %s", fmt(currentMethod));
 124         this.currentMethod = null;
 125     }
 126 
 127     private void reset() {
 128         elapsed = 0;
 129         traceIntervals = 0;
 130         numberOfIdenticalStackTraces = 0;
 131         lastWatched = null;
 132         lastStackTrace = null;
 133         state = WatchDogState.SLEEPING;
 134     }
 135 
 136     private void tick(WatchDogState newState) {
 137         state = newState;
 138     }
 139 
 140     /**
 141      * Saves the current stack trace {@link StackTraceElement} of the monitored compiler thread
 142      * {@link CompilationWatchDog#compilerThread}.
 143      *
 144      * @param newStackTrace the current stack trace of the monitored compiler thread
 145      * @return {@code true} if the stack trace is equal to the last stack trace (or if it is the
 146      *         first one) and {@code false} if it is not equal to the last one.
 147      */
 148     private boolean recordStackTrace(StackTraceElement[] newStackTrace) {
 149         if (lastStackTrace == null) {
 150             lastStackTrace = newStackTrace;
 151             return true;
 152         }
 153         if (!Arrays.equals(lastStackTrace, newStackTrace)) {
 154             lastStackTrace = newStackTrace;
 155             return false;
 156         }
 157         return true;
 158     }
 159 
 160     /**
 161      * Set to true to debug the watch dog.
 162      */
 163     private static final boolean DEBUG = Boolean.getBoolean("debug.graal.CompilationWatchDog");
 164 
 165     private void trace(String format, Object... args) {
 166         if (DEBUG) {
 167             TTY.println(this + ": " + String.format(format, args));
 168         }
 169     }
 170 
 171     private static long ms(double seconds) {
 172         return (long) seconds * 1000;
 173     }
 174 
 175     private static double secs(long ms) {
 176         return (double) ms / 1000;
 177     }
 178 
 179     @Override
 180     public String toString() {
 181         return getName();
 182     }
 183 
 184     @Override
 185     public void run() {
 186         trace("Started%n", this);
 187         try {
 188             while (true) {
 189                 // get a copy of the last set method
 190                 final ResolvedJavaMethod currentlyCompiling = currentMethod;
 191                 if (currentlyCompiling == null) {
 192                     // continue sleeping, compilation is either over before starting
 193                     // to watch the compiler thread or no compilation at all started
 194                     reset();
 195                 } else {
 196                     switch (state) {
 197                         case SLEEPING:
 198                             lastWatched = currentlyCompiling;
 199                             elapsed = 0;
 200                             tick(WatchDogState.WATCHING_WITHOUT_STACK_INSPECTION);
 201                             break;
 202                         case WATCHING_WITHOUT_STACK_INSPECTION:
 203                             if (currentlyCompiling == lastWatched) {
 204                                 if (elapsed >= START_DELAY_MS) {
 205                                     // we looked at the same compilation for a certain time
 206                                     // so now we start to collect stack traces
 207                                     tick(WatchDogState.WATCHING_WITH_STACK_INSPECTION);
 208                                     trace("changes mode to watching with stack traces");
 209                                 } else {
 210                                     // we still compile the same method but won't collect traces yet
 211                                     trace("watching without stack traces [%.2f seconds]", secs(elapsed));
 212                                 }
 213                                 elapsed += SPIN_TIMEOUT_MS;
 214                             } else {
 215                                 // compilation finished before we exceeded initial watching period
 216                                 reset();
 217                             }
 218                             break;
 219                         case WATCHING_WITH_STACK_INSPECTION:
 220                             if (currentlyCompiling == lastWatched) {
 221                                 if (elapsed >= START_DELAY_MS + (traceIntervals * STACK_TRACE_INTERVAL_MS)) {
 222                                     trace("took a stack trace");
 223                                     boolean newStackTrace = recordStackTrace(compilerThread.getStackTrace());
 224                                     if (!newStackTrace) {
 225                                         trace("%d identical stack traces in a row", numberOfIdenticalStackTraces);
 226                                         numberOfIdenticalStackTraces = 0;
 227                                     }
 228                                     numberOfIdenticalStackTraces++;
 229                                     if (numberOfIdenticalStackTraces > Options.NonFatalIdenticalCompilationSnapshots.getValue()) {
 230                                         synchronized (CompilationWatchDog.class) {
 231                                             TTY.printf("======================= WATCH DOG THREAD =======================%n" +
 232                                                             "%s took %d identical stack traces, which indicates a stuck compilation (id=%d) of %s%n%sExiting VM%n", this,
 233                                                             numberOfIdenticalStackTraces, currentId, fmt(currentMethod), fmt(lastStackTrace));
 234                                             System.exit(-1);
 235                                         }
 236                                     } else if (newStackTrace) {
 237                                         synchronized (CompilationWatchDog.class) {
 238                                             TTY.printf("======================= WATCH DOG THREAD =======================%n" +
 239                                                             "%s detected long running compilation (id=%d) of %s [%.2f seconds]%n%s", this, currentId, fmt(currentMethod),
 240                                                             secs(elapsed), fmt(lastStackTrace));
 241                                         }
 242                                     }
 243                                     traceIntervals++;
 244                                 } else {
 245                                     // we still watch the compilation in the same trace interval
 246                                     trace("watching with stack traces [%.2f seconds]", secs(elapsed));
 247                                 }
 248                                 elapsed += SPIN_TIMEOUT_MS;
 249                             } else {
 250                                 // compilation finished before we are able to collect stack traces
 251                                 reset();
 252                             }
 253                             break;
 254                         default:
 255                             break;
 256                     }
 257                 }
 258                 Thread.sleep(SPIN_TIMEOUT_MS);
 259             }
 260         } catch (Throwable t) {
 261             synchronized (CompilationWatchDog.class) {
 262                 TTY.printf("%s encountered an exception%n%s%n", this, fmt(t));
 263             }
 264         }
 265     }
 266 
 267     private static final ThreadLocal<CompilationWatchDog> WATCH_DOGS = ENABLED ? new ThreadLocal<>() : null;
 268 
 269     /**
 270      * Opens a scope for watching the compilation of a given method.
 271      *
 272      * @param method a method about to be compiled
 273      * @param id compilation request identifier
 274      * @return {@code null} if the compilation watch dog is disabled otherwise this object. The
 275      *         returned value should be used in a {@code try}-with-resources statement whose scope
 276      *         is the whole compilation so that leaving the scope will cause {@link #close()} to be
 277      *         called.
 278      */
 279     static CompilationWatchDog watch(ResolvedJavaMethod method, int id) {
 280         if (ENABLED) {
 281             // Lazily get a watch dog thread for the current compiler thread
 282             CompilationWatchDog watchDog = WATCH_DOGS.get();
 283             if (watchDog == null) {
 284                 Thread currentThread = currentThread();
 285                 watchDog = new CompilationWatchDog(currentThread);
 286                 WATCH_DOGS.set(watchDog);
 287                 watchDog.start();
 288             }
 289             watchDog.startCompilation(method, id);
 290             return watchDog;
 291         }
 292         return null;
 293     }
 294 
 295     @Override
 296     public void close() {
 297         stopCompilation();
 298     }
 299 }