< prev index next >

src/share/vm/utilities/vmError.cpp

Print this page
rev 12487 : 8166944: Hanging Error Reporting steps may lead to torn error logs.
Reviewed-by: cjplummer, dholmes
Summary: Interupt error reporting if reporting steps hang to enable subsequent reporting steps to run.
rev 12488 : [mq]: 8166944-Hanging-Error-Reporting-2

*** 1,7 **** /* ! * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. --- 1,7 ---- /* ! * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation.
*** 311,350 **** // segment. int VMError::_current_step; const char* VMError::_current_step_info; ! // Timeout handling. ! static volatile jlong reporting_start_time = -1; ! static volatile bool reporting_timeout = false; ! static volatile jlong step_start_time = -1; ! static volatile bool step_timeout = false; ! // Helper function, sets a timestamp to the current time. ! static void set_to_now(volatile jlong* p) { ! const jlong now = os::javaTimeMillis(); ! if (VM_Version::supports_cx8()) { ! Atomic::store(now, p); ! } else { ! *p = now; ! } } ! // helper function, returns value of time stamp. ! static jlong get_timestamp(volatile jlong* p) { ! if (VM_Version::supports_cx8()) { ! return Atomic::load(p); ! } else { ! return *p; ! } } void VMError::report(outputStream* st, bool _verbose) { # define BEGIN if (_current_step == 0) { _current_step = __LINE__; # define STEP(s) } if (_current_step < __LINE__) { _current_step = __LINE__; _current_step_info = s; \ ! set_to_now(&step_start_time); step_timeout = false; # define END } // don't allocate large buffer on stack static char buf[O_BUFLEN]; --- 311,355 ---- // segment. int VMError::_current_step; const char* VMError::_current_step_info; ! volatile jlong VMError::_reporting_start_time = -1; ! volatile bool VMError::_reporting_did_timeout = false; ! volatile jlong VMError::_step_start_time = -1; ! volatile bool VMError::_step_did_timeout = false; ! // Helper, return current timestamp for timeout handling. ! jlong VMError::get_current_timestamp() { ! return os::javaTimeNanos(); } + // Factor to translate the timestamp to seconds. + #define TIMESTAMP_TO_SECONDS_FACTOR (1000 * 1000 * 1000) ! void VMError::record_reporting_start_time() { ! const jlong now = get_current_timestamp(); ! Atomic::store(now, &_reporting_start_time); ! } ! ! jlong VMError::get_reporting_start_time() { ! return Atomic::load(&_reporting_start_time); ! } ! ! void VMError::record_step_start_time() { ! const jlong now = get_current_timestamp(); ! Atomic::store(now, &_step_start_time); ! } ! ! jlong VMError::get_step_start_time() { ! return Atomic::load(&_step_start_time); } void VMError::report(outputStream* st, bool _verbose) { # define BEGIN if (_current_step == 0) { _current_step = __LINE__; # define STEP(s) } if (_current_step < __LINE__) { _current_step = __LINE__; _current_step_info = s; \ ! record_step_start_time(); _step_did_timeout = false; # define END } // don't allocate large buffer on stack static char buf[O_BUFLEN];
*** 377,396 **** st->print_cr("Will crash now (TestCrashInErrorHandler=" UINTX_FORMAT ")...", TestCrashInErrorHandler); controlled_crash(TestCrashInErrorHandler); } ! // TestUnresponsiveErrorHandler: three times to trigger first a step timeout, then ! // the global error reporting timeout. ! STEP("test unresponsive error reporting step 1") ! if (_verbose && TestUnresponsiveErrorHandler) { os::infinite_sleep(); } ! ! STEP("test unresponsive error reporting step 2") ! if (_verbose && TestUnresponsiveErrorHandler) { os::infinite_sleep(); } ! ! STEP("test unresponsive error reporting step 3") if (_verbose && TestUnresponsiveErrorHandler) { os::infinite_sleep(); } STEP("test safefetch in error handler") // test whether it is safe to use SafeFetch32 in Crash Handler. Test twice // to test that resetting the signal handler works correctly. if (_verbose && TestSafeFetchInErrorHandler) { --- 382,402 ---- st->print_cr("Will crash now (TestCrashInErrorHandler=" UINTX_FORMAT ")...", TestCrashInErrorHandler); controlled_crash(TestCrashInErrorHandler); } ! // TestUnresponsiveErrorHandler: We want to test both step timeouts and global timeout. ! // Step to global timeout ratio is 4:1, so in order to be absolutely sure we hit the ! // global timeout, lets execute the timeout step five times. ! // See corresponding test in test/runtime/ErrorHandling/TimeoutInErrorHandlingTest.java ! #define TIMEOUT_TEST_STEP STEP("test unresponsive error reporting step") \ if (_verbose && TestUnresponsiveErrorHandler) { os::infinite_sleep(); } + TIMEOUT_TEST_STEP + TIMEOUT_TEST_STEP + TIMEOUT_TEST_STEP + TIMEOUT_TEST_STEP + TIMEOUT_TEST_STEP STEP("test safefetch in error handler") // test whether it is safe to use SafeFetch32 in Crash Handler. Test twice // to test that resetting the signal handler works correctly. if (_verbose && TestSafeFetchInErrorHandler) {
*** 1213,1223 **** // first time set_error_reported(); reporting_started(); ! set_to_now(&reporting_start_time); if (ShowMessageBoxOnError || PauseAtExit) { show_message_box(buffer, sizeof(buffer)); // User has asked JVM to abort. Reset ShowMessageBoxOnError so the --- 1219,1229 ---- // first time set_error_reported(); reporting_started(); ! record_reporting_start_time(); if (ShowMessageBoxOnError || PauseAtExit) { show_message_box(buffer, sizeof(buffer)); // User has asked JVM to abort. Reset ShowMessageBoxOnError so the
*** 1259,1278 **** outputStream* const st = log.is_open() ? &log : &out; st->cr(); // Timeout handling. ! if (step_timeout) { // The current step had a timeout. Lets continue reporting with the next step. st->print_raw("[timeout occurred during error reporting in step \""); st->print_raw(_current_step_info); ! st->print_cr("\"] after " INT64_FORMAT " ms.", os::javaTimeMillis() - step_start_time); ! } else if (reporting_timeout) { ! // We hit ErrorLogTimeout. Reporting will stop altogether. Lets wrap things // up, the process is about to be stopped by the WatcherThread. ! st->print_cr("------ Timout during error reporting after " INT64_FORMAT "ms. ------", ! os::javaTimeMillis() - reporting_start_time); st->flush(); // Watcherthread is about to call os::die. Lets just wait. os::infinite_sleep(); } else { // Crash or assert during error reporting. Lets continue reporting with the next step. --- 1265,1285 ---- outputStream* const st = log.is_open() ? &log : &out; st->cr(); // Timeout handling. ! if (_step_did_timeout) { // The current step had a timeout. Lets continue reporting with the next step. st->print_raw("[timeout occurred during error reporting in step \""); st->print_raw(_current_step_info); ! st->print_cr("\"] after " INT64_FORMAT " s.", ! (get_current_timestamp() - _step_start_time) / TIMESTAMP_TO_SECONDS_FACTOR); ! } else if (_reporting_did_timeout) { ! // We hit ErrorLogTimeout. Reporting will stop altogether. Let's wrap things // up, the process is about to be stopped by the WatcherThread. ! st->print_cr("------ Timeout during error reporting after " INT64_FORMAT " s. ------", ! (get_current_timestamp() - _reporting_start_time) / TIMESTAMP_TO_SECONDS_FACTOR); st->flush(); // Watcherthread is about to call os::die. Lets just wait. os::infinite_sleep(); } else { // Crash or assert during error reporting. Lets continue reporting with the next step.
*** 1491,1520 **** || (OnError != NULL && OnError[0] != '\0') || Arguments::abort_hook() != NULL) { return false; } ! const jlong reporting_start_time_l = get_timestamp(&reporting_start_time); ! const jlong now = os::javaTimeMillis(); if (reporting_start_time_l > 0) { ! const jlong end = reporting_start_time_l + ErrorLogTimeout * 1000; if (end <= now) { ! reporting_timeout = true; ! VMError::interrupt_reporting_thread(); return true; // global timeout } } ! const jlong step_start_time_l = get_timestamp(&step_start_time); if (step_start_time_l > 0) { ! // A step times out after half of the total timeout. Steps are mostly fast unless they ! // hang for some reason, so this simple rule allows for one hanging step and still ! // leaves time enough for the rest of the steps to finish. ! const jlong end = step_start_time_l + ErrorLogTimeout * 1000 / 2; if (end <= now) { ! step_timeout = true; ! VMError::interrupt_reporting_thread(); return false; // (Not a global timeout) } } return false; --- 1498,1528 ---- || (OnError != NULL && OnError[0] != '\0') || Arguments::abort_hook() != NULL) { return false; } ! const jlong reporting_start_time_l = get_reporting_start_time(); ! const jlong now = get_current_timestamp(); ! // Timestamp is stored in nanos. if (reporting_start_time_l > 0) { ! const jlong end = reporting_start_time_l + (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR; if (end <= now) { ! _reporting_did_timeout = true; ! interrupt_reporting_thread(); return true; // global timeout } } ! const jlong step_start_time_l = get_step_start_time(); if (step_start_time_l > 0) { ! // A step times out after a quarter of the total timeout. Steps are mostly fast unless they ! // hang for some reason, so this simple rule allows for three hanging step and still ! // hopefully leaves time enough for the rest of the steps to finish. ! const jlong end = step_start_time_l + (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR / 4; if (end <= now) { ! _step_did_timeout = true; ! interrupt_reporting_thread(); return false; // (Not a global timeout) } } return false;
< prev index next >