< prev index next >

src/share/vm/utilities/vmError.cpp

Print this page
rev 12487 : 8166944: Hanging Error Reporting steps may lead to torn error logs.
Reviewed-by: cjplummer, dholmes
Summary: Interupt error reporting if reporting steps hang to enable subsequent reporting steps to run.

*** 36,45 **** --- 36,46 ---- #include "runtime/init.hpp" #include "runtime/os.hpp" #include "runtime/thread.inline.hpp" #include "runtime/vmThread.hpp" #include "runtime/vm_operations.hpp" + #include "runtime/vm_version.hpp" #include "services/memTracker.hpp" #include "trace/traceMacros.hpp" #include "utilities/debug.hpp" #include "utilities/decoder.hpp" #include "utilities/defaultStream.hpp"
*** 310,323 **** // segment. int VMError::_current_step; const char* VMError::_current_step_info; void VMError::report(outputStream* st, bool _verbose) { # define BEGIN if (_current_step == 0) { _current_step = __LINE__; ! # define STEP(s) } if (_current_step < __LINE__) { _current_step = __LINE__; _current_step_info = s; # define END } // don't allocate large buffer on stack static char buf[O_BUFLEN]; --- 311,350 ---- // segment. int VMError::_current_step; const char* VMError::_current_step_info; + // Timeout handling. + static volatile jlong reporting_start_time = -1; + static volatile bool reporting_timeout = false; + static volatile jlong step_start_time = -1; + static volatile bool step_timeout = false; + + // Helper function, sets a timestamp to the current time. + static void set_to_now(volatile jlong* p) { + const jlong now = os::javaTimeMillis(); + if (VM_Version::supports_cx8()) { + Atomic::store(now, p); + } else { + *p = now; + } + } + + // helper function, returns value of time stamp. + static jlong get_timestamp(volatile jlong* p) { + if (VM_Version::supports_cx8()) { + return Atomic::load(p); + } else { + return *p; + } + } + void VMError::report(outputStream* st, bool _verbose) { # define BEGIN if (_current_step == 0) { _current_step = __LINE__; ! # define STEP(s) } if (_current_step < __LINE__) { _current_step = __LINE__; _current_step_info = s; \ ! set_to_now(&step_start_time); step_timeout = false; # define END } // don't allocate large buffer on stack static char buf[O_BUFLEN];
*** 350,359 **** --- 377,397 ---- st->print_cr("Will crash now (TestCrashInErrorHandler=" UINTX_FORMAT ")...", TestCrashInErrorHandler); controlled_crash(TestCrashInErrorHandler); } + // TestUnresponsiveErrorHandler: three times to trigger first a step timeout, then + // the global error reporting timeout. + STEP("test unresponsive error reporting step 1") + if (_verbose && TestUnresponsiveErrorHandler) { os::infinite_sleep(); } + + STEP("test unresponsive error reporting step 2") + if (_verbose && TestUnresponsiveErrorHandler) { os::infinite_sleep(); } + + STEP("test unresponsive error reporting step 3") + if (_verbose && TestUnresponsiveErrorHandler) { os::infinite_sleep(); } + STEP("test safefetch in error handler") // test whether it is safe to use SafeFetch32 in Crash Handler. Test twice // to test that resetting the signal handler works correctly. if (_verbose && TestSafeFetchInErrorHandler) { st->print_cr("Will test SafeFetch...");
*** 1174,1183 **** --- 1212,1224 ---- jio_vsnprintf(_detail_msg, sizeof(_detail_msg), detail_fmt, detail_args); // first time set_error_reported(); + reporting_started(); + set_to_now(&reporting_start_time); + if (ShowMessageBoxOnError || PauseAtExit) { show_message_box(buffer, sizeof(buffer)); // User has asked JVM to abort. Reset ShowMessageBoxOnError so the // WatcherThread can kill JVM if the error handler hangs.
*** 1214,1234 **** if (recursive_error_count++ > 30) { out.print_raw_cr("[Too many errors, abort]"); os::die(); } jio_snprintf(buffer, sizeof(buffer), "[error occurred during error reporting (%s), id 0x%x]", _current_step_info, _id); ! if (log.is_open()) { ! log.cr(); ! log.print_raw_cr(buffer); ! log.cr(); ! } else { ! out.cr(); ! out.print_raw_cr(buffer); ! out.cr(); } } } // print to screen --- 1255,1288 ---- if (recursive_error_count++ > 30) { out.print_raw_cr("[Too many errors, abort]"); os::die(); } + outputStream* const st = log.is_open() ? &log : &out; + st->cr(); + + // Timeout handling. + if (step_timeout) { + // The current step had a timeout. Lets continue reporting with the next step. + st->print_raw("[timeout occurred during error reporting in step \""); + st->print_raw(_current_step_info); + st->print_cr("\"] after " INT64_FORMAT " ms.", os::javaTimeMillis() - step_start_time); + } else if (reporting_timeout) { + // We hit ErrorLogTimeout. Reporting will stop altogether. Lets wrap things + // up, the process is about to be stopped by the WatcherThread. + st->print_cr("------ Timout during error reporting after " INT64_FORMAT "ms. ------", + os::javaTimeMillis() - reporting_start_time); + st->flush(); + // Watcherthread is about to call os::die. Lets just wait. + os::infinite_sleep(); + } else { + // Crash or assert during error reporting. Lets continue reporting with the next step. jio_snprintf(buffer, sizeof(buffer), "[error occurred during error reporting (%s), id 0x%x]", _current_step_info, _id); ! st->print_raw_cr(buffer); ! st->cr(); } } } // print to screen
*** 1419,1423 **** --- 1473,1523 ---- do { error_string(buf, buflen); yes = os::start_debugging(buf,buflen); } while (yes); } + + // Timeout handling: check if a timeout happened (either a single step did + // timeout or the whole of error reporting hit ErrorLogTimeout). Interrupt + // the reporting thread if that is the case. + bool VMError::check_timeout() { + + if (ErrorLogTimeout == 0) { + return false; + } + + // Do not check for timeouts if we still have a message box to show to the + // user or if there are OnError handlers to be run. + if (ShowMessageBoxOnError + || (OnError != NULL && OnError[0] != '\0') + || Arguments::abort_hook() != NULL) { + return false; + } + + const jlong reporting_start_time_l = get_timestamp(&reporting_start_time); + const jlong now = os::javaTimeMillis(); + if (reporting_start_time_l > 0) { + const jlong end = reporting_start_time_l + ErrorLogTimeout * 1000; + if (end <= now) { + reporting_timeout = true; + VMError::interrupt_reporting_thread(); + return true; // global timeout + } + } + + const jlong step_start_time_l = get_timestamp(&step_start_time); + if (step_start_time_l > 0) { + // A step times out after half of the total timeout. Steps are mostly fast unless they + // hang for some reason, so this simple rule allows for one hanging step and still + // leaves time enough for the rest of the steps to finish. + const jlong end = step_start_time_l + ErrorLogTimeout * 1000 / 2; + if (end <= now) { + step_timeout = true; + VMError::interrupt_reporting_thread(); + return false; // (Not a global timeout) + } + } + + return false; + + } +
< prev index next >