< prev index next >
src/share/vm/utilities/vmError.cpp
Print this page
rev 12487 : 8166944: Hanging Error Reporting steps may lead to torn error logs.
Reviewed-by: cjplummer, dholmes
Summary: Interupt error reporting if reporting steps hang to enable subsequent reporting steps to run.
rev 12488 : [mq]: 8166944-Hanging-Error-Reporting-2
*** 1,7 ****
/*
! * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
--- 1,7 ----
/*
! * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*** 311,350 ****
// segment.
int VMError::_current_step;
const char* VMError::_current_step_info;
! // Timeout handling.
! static volatile jlong reporting_start_time = -1;
! static volatile bool reporting_timeout = false;
! static volatile jlong step_start_time = -1;
! static volatile bool step_timeout = false;
! // Helper function, sets a timestamp to the current time.
! static void set_to_now(volatile jlong* p) {
! const jlong now = os::javaTimeMillis();
! if (VM_Version::supports_cx8()) {
! Atomic::store(now, p);
! } else {
! *p = now;
! }
}
! // helper function, returns value of time stamp.
! static jlong get_timestamp(volatile jlong* p) {
! if (VM_Version::supports_cx8()) {
! return Atomic::load(p);
! } else {
! return *p;
! }
}
void VMError::report(outputStream* st, bool _verbose) {
# define BEGIN if (_current_step == 0) { _current_step = __LINE__;
# define STEP(s) } if (_current_step < __LINE__) { _current_step = __LINE__; _current_step_info = s; \
! set_to_now(&step_start_time); step_timeout = false;
# define END }
// don't allocate large buffer on stack
static char buf[O_BUFLEN];
--- 311,355 ----
// segment.
int VMError::_current_step;
const char* VMError::_current_step_info;
! volatile jlong VMError::_reporting_start_time = -1;
! volatile bool VMError::_reporting_did_timeout = false;
! volatile jlong VMError::_step_start_time = -1;
! volatile bool VMError::_step_did_timeout = false;
! // Helper, return current timestamp for timeout handling.
! jlong VMError::get_current_timestamp() {
! return os::javaTimeNanos();
}
+ // Factor to translate the timestamp to seconds.
+ #define TIMESTAMP_TO_SECONDS_FACTOR (1000 * 1000 * 1000)
! void VMError::record_reporting_start_time() {
! const jlong now = get_current_timestamp();
! Atomic::store(now, &_reporting_start_time);
! }
!
! jlong VMError::get_reporting_start_time() {
! return Atomic::load(&_reporting_start_time);
! }
!
! void VMError::record_step_start_time() {
! const jlong now = get_current_timestamp();
! Atomic::store(now, &_step_start_time);
! }
!
! jlong VMError::get_step_start_time() {
! return Atomic::load(&_step_start_time);
}
void VMError::report(outputStream* st, bool _verbose) {
# define BEGIN if (_current_step == 0) { _current_step = __LINE__;
# define STEP(s) } if (_current_step < __LINE__) { _current_step = __LINE__; _current_step_info = s; \
! record_step_start_time(); _step_did_timeout = false;
# define END }
// don't allocate large buffer on stack
static char buf[O_BUFLEN];
*** 377,396 ****
st->print_cr("Will crash now (TestCrashInErrorHandler=" UINTX_FORMAT ")...",
TestCrashInErrorHandler);
controlled_crash(TestCrashInErrorHandler);
}
! // TestUnresponsiveErrorHandler: three times to trigger first a step timeout, then
! // the global error reporting timeout.
! STEP("test unresponsive error reporting step 1")
! if (_verbose && TestUnresponsiveErrorHandler) { os::infinite_sleep(); }
!
! STEP("test unresponsive error reporting step 2")
! if (_verbose && TestUnresponsiveErrorHandler) { os::infinite_sleep(); }
!
! STEP("test unresponsive error reporting step 3")
if (_verbose && TestUnresponsiveErrorHandler) { os::infinite_sleep(); }
STEP("test safefetch in error handler")
// test whether it is safe to use SafeFetch32 in Crash Handler. Test twice
// to test that resetting the signal handler works correctly.
if (_verbose && TestSafeFetchInErrorHandler) {
--- 382,402 ----
st->print_cr("Will crash now (TestCrashInErrorHandler=" UINTX_FORMAT ")...",
TestCrashInErrorHandler);
controlled_crash(TestCrashInErrorHandler);
}
! // TestUnresponsiveErrorHandler: We want to test both step timeouts and global timeout.
! // Step to global timeout ratio is 4:1, so in order to be absolutely sure we hit the
! // global timeout, lets execute the timeout step five times.
! // See corresponding test in test/runtime/ErrorHandling/TimeoutInErrorHandlingTest.java
! #define TIMEOUT_TEST_STEP STEP("test unresponsive error reporting step") \
if (_verbose && TestUnresponsiveErrorHandler) { os::infinite_sleep(); }
+ TIMEOUT_TEST_STEP
+ TIMEOUT_TEST_STEP
+ TIMEOUT_TEST_STEP
+ TIMEOUT_TEST_STEP
+ TIMEOUT_TEST_STEP
STEP("test safefetch in error handler")
// test whether it is safe to use SafeFetch32 in Crash Handler. Test twice
// to test that resetting the signal handler works correctly.
if (_verbose && TestSafeFetchInErrorHandler) {
*** 1213,1223 ****
// first time
set_error_reported();
reporting_started();
! set_to_now(&reporting_start_time);
if (ShowMessageBoxOnError || PauseAtExit) {
show_message_box(buffer, sizeof(buffer));
// User has asked JVM to abort. Reset ShowMessageBoxOnError so the
--- 1219,1229 ----
// first time
set_error_reported();
reporting_started();
! record_reporting_start_time();
if (ShowMessageBoxOnError || PauseAtExit) {
show_message_box(buffer, sizeof(buffer));
// User has asked JVM to abort. Reset ShowMessageBoxOnError so the
*** 1259,1278 ****
outputStream* const st = log.is_open() ? &log : &out;
st->cr();
// Timeout handling.
! if (step_timeout) {
// The current step had a timeout. Lets continue reporting with the next step.
st->print_raw("[timeout occurred during error reporting in step \"");
st->print_raw(_current_step_info);
! st->print_cr("\"] after " INT64_FORMAT " ms.", os::javaTimeMillis() - step_start_time);
! } else if (reporting_timeout) {
! // We hit ErrorLogTimeout. Reporting will stop altogether. Lets wrap things
// up, the process is about to be stopped by the WatcherThread.
! st->print_cr("------ Timout during error reporting after " INT64_FORMAT "ms. ------",
! os::javaTimeMillis() - reporting_start_time);
st->flush();
// Watcherthread is about to call os::die. Lets just wait.
os::infinite_sleep();
} else {
// Crash or assert during error reporting. Lets continue reporting with the next step.
--- 1265,1285 ----
outputStream* const st = log.is_open() ? &log : &out;
st->cr();
// Timeout handling.
! if (_step_did_timeout) {
// The current step had a timeout. Lets continue reporting with the next step.
st->print_raw("[timeout occurred during error reporting in step \"");
st->print_raw(_current_step_info);
! st->print_cr("\"] after " INT64_FORMAT " s.",
! (get_current_timestamp() - _step_start_time) / TIMESTAMP_TO_SECONDS_FACTOR);
! } else if (_reporting_did_timeout) {
! // We hit ErrorLogTimeout. Reporting will stop altogether. Let's wrap things
// up, the process is about to be stopped by the WatcherThread.
! st->print_cr("------ Timeout during error reporting after " INT64_FORMAT " s. ------",
! (get_current_timestamp() - _reporting_start_time) / TIMESTAMP_TO_SECONDS_FACTOR);
st->flush();
// Watcherthread is about to call os::die. Lets just wait.
os::infinite_sleep();
} else {
// Crash or assert during error reporting. Lets continue reporting with the next step.
*** 1491,1520 ****
|| (OnError != NULL && OnError[0] != '\0')
|| Arguments::abort_hook() != NULL) {
return false;
}
! const jlong reporting_start_time_l = get_timestamp(&reporting_start_time);
! const jlong now = os::javaTimeMillis();
if (reporting_start_time_l > 0) {
! const jlong end = reporting_start_time_l + ErrorLogTimeout * 1000;
if (end <= now) {
! reporting_timeout = true;
! VMError::interrupt_reporting_thread();
return true; // global timeout
}
}
! const jlong step_start_time_l = get_timestamp(&step_start_time);
if (step_start_time_l > 0) {
! // A step times out after half of the total timeout. Steps are mostly fast unless they
! // hang for some reason, so this simple rule allows for one hanging step and still
! // leaves time enough for the rest of the steps to finish.
! const jlong end = step_start_time_l + ErrorLogTimeout * 1000 / 2;
if (end <= now) {
! step_timeout = true;
! VMError::interrupt_reporting_thread();
return false; // (Not a global timeout)
}
}
return false;
--- 1498,1528 ----
|| (OnError != NULL && OnError[0] != '\0')
|| Arguments::abort_hook() != NULL) {
return false;
}
! const jlong reporting_start_time_l = get_reporting_start_time();
! const jlong now = get_current_timestamp();
! // Timestamp is stored in nanos.
if (reporting_start_time_l > 0) {
! const jlong end = reporting_start_time_l + (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR;
if (end <= now) {
! _reporting_did_timeout = true;
! interrupt_reporting_thread();
return true; // global timeout
}
}
! const jlong step_start_time_l = get_step_start_time();
if (step_start_time_l > 0) {
! // A step times out after a quarter of the total timeout. Steps are mostly fast unless they
! // hang for some reason, so this simple rule allows for three hanging step and still
! // hopefully leaves time enough for the rest of the steps to finish.
! const jlong end = step_start_time_l + (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR / 4;
if (end <= now) {
! _step_did_timeout = true;
! interrupt_reporting_thread();
return false; // (Not a global timeout)
}
}
return false;
< prev index next >