< prev index next >
src/share/vm/utilities/vmError.cpp
Print this page
rev 12487 : 8166944: Hanging Error Reporting steps may lead to torn error logs.
Reviewed-by: cjplummer, dholmes
Summary: Interupt error reporting if reporting steps hang to enable subsequent reporting steps to run.
*** 36,45 ****
--- 36,46 ----
#include "runtime/init.hpp"
#include "runtime/os.hpp"
#include "runtime/thread.inline.hpp"
#include "runtime/vmThread.hpp"
#include "runtime/vm_operations.hpp"
+ #include "runtime/vm_version.hpp"
#include "services/memTracker.hpp"
#include "trace/traceMacros.hpp"
#include "utilities/debug.hpp"
#include "utilities/decoder.hpp"
#include "utilities/defaultStream.hpp"
*** 310,323 ****
// segment.
int VMError::_current_step;
const char* VMError::_current_step_info;
void VMError::report(outputStream* st, bool _verbose) {
# define BEGIN if (_current_step == 0) { _current_step = __LINE__;
! # define STEP(s) } if (_current_step < __LINE__) { _current_step = __LINE__; _current_step_info = s;
# define END }
// don't allocate large buffer on stack
static char buf[O_BUFLEN];
--- 311,350 ----
// segment.
int VMError::_current_step;
const char* VMError::_current_step_info;
+ // Timeout handling.
+ static volatile jlong reporting_start_time = -1;
+ static volatile bool reporting_timeout = false;
+ static volatile jlong step_start_time = -1;
+ static volatile bool step_timeout = false;
+
+ // Helper function, sets a timestamp to the current time.
+ static void set_to_now(volatile jlong* p) {
+ const jlong now = os::javaTimeMillis();
+ if (VM_Version::supports_cx8()) {
+ Atomic::store(now, p);
+ } else {
+ *p = now;
+ }
+ }
+
+ // helper function, returns value of time stamp.
+ static jlong get_timestamp(volatile jlong* p) {
+ if (VM_Version::supports_cx8()) {
+ return Atomic::load(p);
+ } else {
+ return *p;
+ }
+ }
+
void VMError::report(outputStream* st, bool _verbose) {
# define BEGIN if (_current_step == 0) { _current_step = __LINE__;
! # define STEP(s) } if (_current_step < __LINE__) { _current_step = __LINE__; _current_step_info = s; \
! set_to_now(&step_start_time); step_timeout = false;
# define END }
// don't allocate large buffer on stack
static char buf[O_BUFLEN];
*** 350,359 ****
--- 377,397 ----
st->print_cr("Will crash now (TestCrashInErrorHandler=" UINTX_FORMAT ")...",
TestCrashInErrorHandler);
controlled_crash(TestCrashInErrorHandler);
}
+ // TestUnresponsiveErrorHandler: three times to trigger first a step timeout, then
+ // the global error reporting timeout.
+ STEP("test unresponsive error reporting step 1")
+ if (_verbose && TestUnresponsiveErrorHandler) { os::infinite_sleep(); }
+
+ STEP("test unresponsive error reporting step 2")
+ if (_verbose && TestUnresponsiveErrorHandler) { os::infinite_sleep(); }
+
+ STEP("test unresponsive error reporting step 3")
+ if (_verbose && TestUnresponsiveErrorHandler) { os::infinite_sleep(); }
+
STEP("test safefetch in error handler")
// test whether it is safe to use SafeFetch32 in Crash Handler. Test twice
// to test that resetting the signal handler works correctly.
if (_verbose && TestSafeFetchInErrorHandler) {
st->print_cr("Will test SafeFetch...");
*** 1174,1183 ****
--- 1212,1224 ----
jio_vsnprintf(_detail_msg, sizeof(_detail_msg), detail_fmt, detail_args);
// first time
set_error_reported();
+ reporting_started();
+ set_to_now(&reporting_start_time);
+
if (ShowMessageBoxOnError || PauseAtExit) {
show_message_box(buffer, sizeof(buffer));
// User has asked JVM to abort. Reset ShowMessageBoxOnError so the
// WatcherThread can kill JVM if the error handler hangs.
*** 1214,1234 ****
if (recursive_error_count++ > 30) {
out.print_raw_cr("[Too many errors, abort]");
os::die();
}
jio_snprintf(buffer, sizeof(buffer),
"[error occurred during error reporting (%s), id 0x%x]",
_current_step_info, _id);
! if (log.is_open()) {
! log.cr();
! log.print_raw_cr(buffer);
! log.cr();
! } else {
! out.cr();
! out.print_raw_cr(buffer);
! out.cr();
}
}
}
// print to screen
--- 1255,1288 ----
if (recursive_error_count++ > 30) {
out.print_raw_cr("[Too many errors, abort]");
os::die();
}
+ outputStream* const st = log.is_open() ? &log : &out;
+ st->cr();
+
+ // Timeout handling.
+ if (step_timeout) {
+ // The current step had a timeout. Lets continue reporting with the next step.
+ st->print_raw("[timeout occurred during error reporting in step \"");
+ st->print_raw(_current_step_info);
+ st->print_cr("\"] after " INT64_FORMAT " ms.", os::javaTimeMillis() - step_start_time);
+ } else if (reporting_timeout) {
+ // We hit ErrorLogTimeout. Reporting will stop altogether. Lets wrap things
+ // up, the process is about to be stopped by the WatcherThread.
+ st->print_cr("------ Timout during error reporting after " INT64_FORMAT "ms. ------",
+ os::javaTimeMillis() - reporting_start_time);
+ st->flush();
+ // Watcherthread is about to call os::die. Lets just wait.
+ os::infinite_sleep();
+ } else {
+ // Crash or assert during error reporting. Lets continue reporting with the next step.
jio_snprintf(buffer, sizeof(buffer),
"[error occurred during error reporting (%s), id 0x%x]",
_current_step_info, _id);
! st->print_raw_cr(buffer);
! st->cr();
}
}
}
// print to screen
*** 1419,1423 ****
--- 1473,1523 ----
do {
error_string(buf, buflen);
yes = os::start_debugging(buf,buflen);
} while (yes);
}
+
+ // Timeout handling: check if a timeout happened (either a single step did
+ // timeout or the whole of error reporting hit ErrorLogTimeout). Interrupt
+ // the reporting thread if that is the case.
+ bool VMError::check_timeout() {
+
+ if (ErrorLogTimeout == 0) {
+ return false;
+ }
+
+ // Do not check for timeouts if we still have a message box to show to the
+ // user or if there are OnError handlers to be run.
+ if (ShowMessageBoxOnError
+ || (OnError != NULL && OnError[0] != '\0')
+ || Arguments::abort_hook() != NULL) {
+ return false;
+ }
+
+ const jlong reporting_start_time_l = get_timestamp(&reporting_start_time);
+ const jlong now = os::javaTimeMillis();
+ if (reporting_start_time_l > 0) {
+ const jlong end = reporting_start_time_l + ErrorLogTimeout * 1000;
+ if (end <= now) {
+ reporting_timeout = true;
+ VMError::interrupt_reporting_thread();
+ return true; // global timeout
+ }
+ }
+
+ const jlong step_start_time_l = get_timestamp(&step_start_time);
+ if (step_start_time_l > 0) {
+ // A step times out after half of the total timeout. Steps are mostly fast unless they
+ // hang for some reason, so this simple rule allows for one hanging step and still
+ // leaves time enough for the rest of the steps to finish.
+ const jlong end = step_start_time_l + ErrorLogTimeout * 1000 / 2;
+ if (end <= now) {
+ step_timeout = true;
+ VMError::interrupt_reporting_thread();
+ return false; // (Not a global timeout)
+ }
+ }
+
+ return false;
+
+ }
+
< prev index next >