1 /*
2 * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
296 // information that may be unsafe to get after a fatal error. If it happens,
297 // you may find nested report_and_die() frames when you look at the stack
298 // in a debugger.
299 //
300 // In general, a hang in error handler is much worse than a crash or internal
301 // error, as it's harder to recover from a hang. Deadlock can happen if we
302 // try to grab a lock that is already owned by current thread, or if the
303 // owner is blocked forever (e.g. in os::infinite_sleep()). If possible, the
304 // error handler and all the functions it called should avoid grabbing any
305 // lock. An important thing to notice is that memory allocation needs a lock.
306 //
307 // We should avoid using large stack allocated buffers. Many errors happen
308 // when stack space is already low. Making things even worse is that there
309 // could be nested report_and_die() calls on stack (see above). Only one
310 // thread can report error, so large buffers are statically allocated in data
311 // segment.
312
313 int VMError::_current_step;
314 const char* VMError::_current_step_info;
315
316 // Timeout handling.
317 static volatile jlong reporting_start_time = -1;
318 static volatile bool reporting_timeout = false;
319 static volatile jlong step_start_time = -1;
320 static volatile bool step_timeout = false;
321
322 // Helper function, sets a timestamp to the current time.
323 static void set_to_now(volatile jlong* p) {
324 const jlong now = os::javaTimeMillis();
325 if (VM_Version::supports_cx8()) {
326 Atomic::store(now, p);
327 } else {
328 *p = now;
329 }
330 }
331
332 // helper function, returns value of time stamp.
333 static jlong get_timestamp(volatile jlong* p) {
334 if (VM_Version::supports_cx8()) {
335 return Atomic::load(p);
336 } else {
337 return *p;
338 }
339 }
340
341 void VMError::report(outputStream* st, bool _verbose) {
342
343 # define BEGIN if (_current_step == 0) { _current_step = __LINE__;
344 # define STEP(s) } if (_current_step < __LINE__) { _current_step = __LINE__; _current_step_info = s; \
345 set_to_now(&step_start_time); step_timeout = false;
346 # define END }
347
348 // don't allocate large buffer on stack
349 static char buf[O_BUFLEN];
350
351 BEGIN
352
353 STEP("printing fatal error message")
354
355 st->print_cr("#");
356 if (should_report_bug(_id)) {
357 st->print_cr("# A fatal error has been detected by the Java Runtime Environment:");
358 } else {
359 st->print_cr("# There is insufficient memory for the Java "
360 "Runtime Environment to continue.");
361 }
362
363 #ifndef PRODUCT
364 // Error handler self tests
365
366 // test secondary error handling. Test it twice, to test that resetting
367 // error handler after a secondary crash works.
368 STEP("test secondary crash 1")
369 if (_verbose && TestCrashInErrorHandler != 0) {
370 st->print_cr("Will crash now (TestCrashInErrorHandler=" UINTX_FORMAT ")...",
371 TestCrashInErrorHandler);
372 controlled_crash(TestCrashInErrorHandler);
373 }
374
375 STEP("test secondary crash 2")
376 if (_verbose && TestCrashInErrorHandler != 0) {
377 st->print_cr("Will crash now (TestCrashInErrorHandler=" UINTX_FORMAT ")...",
378 TestCrashInErrorHandler);
379 controlled_crash(TestCrashInErrorHandler);
380 }
381
382 // TestUnresponsiveErrorHandler: three times to trigger first a step timeout, then
383 // the global error reporting timeout.
384 STEP("test unresponsive error reporting step 1")
385 if (_verbose && TestUnresponsiveErrorHandler) { os::infinite_sleep(); }
386
387 STEP("test unresponsive error reporting step 2")
388 if (_verbose && TestUnresponsiveErrorHandler) { os::infinite_sleep(); }
389
390 STEP("test unresponsive error reporting step 3")
391 if (_verbose && TestUnresponsiveErrorHandler) { os::infinite_sleep(); }
392
393 STEP("test safefetch in error handler")
394 // test whether it is safe to use SafeFetch32 in Crash Handler. Test twice
395 // to test that resetting the signal handler works correctly.
396 if (_verbose && TestSafeFetchInErrorHandler) {
397 st->print_cr("Will test SafeFetch...");
398 if (CanUseSafeFetch32()) {
399 int* const invalid_pointer = (int*) get_segfault_address();
400 const int x = 0x76543210;
401 int i1 = SafeFetch32(invalid_pointer, x);
402 int i2 = SafeFetch32(invalid_pointer, x);
403 if (i1 == x && i2 == x) {
404 st->print_cr("SafeFetch OK."); // Correctly deflected and returned default pattern
405 } else {
406 st->print_cr("??");
407 }
408 } else {
409 st->print_cr("not possible; skipped.");
410 }
411 }
1198
1199 // Initialize time stamps to use the same base.
1200 out.time_stamp().update_to(1);
1201 log.time_stamp().update_to(1);
1202
1203 _id = id;
1204 _message = message;
1205 _thread = thread;
1206 _pc = pc;
1207 _siginfo = siginfo;
1208 _context = context;
1209 _filename = filename;
1210 _lineno = lineno;
1211 _size = size;
1212 jio_vsnprintf(_detail_msg, sizeof(_detail_msg), detail_fmt, detail_args);
1213
1214 // first time
1215 set_error_reported();
1216
1217 reporting_started();
1218 set_to_now(&reporting_start_time);
1219
1220 if (ShowMessageBoxOnError || PauseAtExit) {
1221 show_message_box(buffer, sizeof(buffer));
1222
1223 // User has asked JVM to abort. Reset ShowMessageBoxOnError so the
1224 // WatcherThread can kill JVM if the error handler hangs.
1225 ShowMessageBoxOnError = false;
1226 }
1227
1228 os::check_dump_limit(buffer, sizeof(buffer));
1229
1230 // reset signal handlers or exception filter; make sure recursive crashes
1231 // are handled properly.
1232 reset_signal_handlers();
1233
1234 TRACE_VM_ERROR();
1235
1236 } else {
1237 // If UseOsErrorReporting we call this for each level of the call stack
1238 // while searching for the exception handler. Only the first level needs
1244 if (first_error_tid != mytid) {
1245 char msgbuf[64];
1246 jio_snprintf(msgbuf, sizeof(msgbuf),
1247 "[thread " INTX_FORMAT " also had an error]",
1248 mytid);
1249 out.print_raw_cr(msgbuf);
1250
1251 // error reporting is not MT-safe, block current thread
1252 os::infinite_sleep();
1253
1254 } else {
1255 if (recursive_error_count++ > 30) {
1256 out.print_raw_cr("[Too many errors, abort]");
1257 os::die();
1258 }
1259
1260 outputStream* const st = log.is_open() ? &log : &out;
1261 st->cr();
1262
1263 // Timeout handling.
1264 if (step_timeout) {
1265 // The current step had a timeout. Lets continue reporting with the next step.
1266 st->print_raw("[timeout occurred during error reporting in step \"");
1267 st->print_raw(_current_step_info);
1268 st->print_cr("\"] after " INT64_FORMAT " ms.", os::javaTimeMillis() - step_start_time);
1269 } else if (reporting_timeout) {
1270 // We hit ErrorLogTimeout. Reporting will stop altogether. Lets wrap things
1271 // up, the process is about to be stopped by the WatcherThread.
1272 st->print_cr("------ Timout during error reporting after " INT64_FORMAT "ms. ------",
1273 os::javaTimeMillis() - reporting_start_time);
1274 st->flush();
1275 // Watcherthread is about to call os::die. Lets just wait.
1276 os::infinite_sleep();
1277 } else {
1278 // Crash or assert during error reporting. Lets continue reporting with the next step.
1279 jio_snprintf(buffer, sizeof(buffer),
1280 "[error occurred during error reporting (%s), id 0x%x]",
1281 _current_step_info, _id);
1282 st->print_raw_cr(buffer);
1283 st->cr();
1284 }
1285 }
1286 }
1287
1288 // print to screen
1289 if (!out_done) {
1290 report(&out, false);
1291
1292 out_done = true;
1293
1476 } while (yes);
1477 }
1478
1479 // Timeout handling: check if a timeout happened (either a single step did
1480 // timeout or the whole of error reporting hit ErrorLogTimeout). Interrupt
1481 // the reporting thread if that is the case.
1482 bool VMError::check_timeout() {
1483
1484 if (ErrorLogTimeout == 0) {
1485 return false;
1486 }
1487
1488 // Do not check for timeouts if we still have a message box to show to the
1489 // user or if there are OnError handlers to be run.
1490 if (ShowMessageBoxOnError
1491 || (OnError != NULL && OnError[0] != '\0')
1492 || Arguments::abort_hook() != NULL) {
1493 return false;
1494 }
1495
1496 const jlong reporting_start_time_l = get_timestamp(&reporting_start_time);
1497 const jlong now = os::javaTimeMillis();
1498 if (reporting_start_time_l > 0) {
1499 const jlong end = reporting_start_time_l + ErrorLogTimeout * 1000;
1500 if (end <= now) {
1501 reporting_timeout = true;
1502 VMError::interrupt_reporting_thread();
1503 return true; // global timeout
1504 }
1505 }
1506
1507 const jlong step_start_time_l = get_timestamp(&step_start_time);
1508 if (step_start_time_l > 0) {
1509 // A step times out after half of the total timeout. Steps are mostly fast unless they
1510 // hang for some reason, so this simple rule allows for one hanging step and still
1511 // leaves time enough for the rest of the steps to finish.
1512 const jlong end = step_start_time_l + ErrorLogTimeout * 1000 / 2;
1513 if (end <= now) {
1514 step_timeout = true;
1515 VMError::interrupt_reporting_thread();
1516 return false; // (Not a global timeout)
1517 }
1518 }
1519
1520 return false;
1521
1522 }
1523
|
1 /*
2 * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
296 // information that may be unsafe to get after a fatal error. If it happens,
297 // you may find nested report_and_die() frames when you look at the stack
298 // in a debugger.
299 //
300 // In general, a hang in error handler is much worse than a crash or internal
301 // error, as it's harder to recover from a hang. Deadlock can happen if we
302 // try to grab a lock that is already owned by current thread, or if the
303 // owner is blocked forever (e.g. in os::infinite_sleep()). If possible, the
304 // error handler and all the functions it called should avoid grabbing any
305 // lock. An important thing to notice is that memory allocation needs a lock.
306 //
307 // We should avoid using large stack allocated buffers. Many errors happen
308 // when stack space is already low. Making things even worse is that there
309 // could be nested report_and_die() calls on stack (see above). Only one
310 // thread can report error, so large buffers are statically allocated in data
311 // segment.
312
313 int VMError::_current_step;
314 const char* VMError::_current_step_info;
315
316 volatile jlong VMError::_reporting_start_time = -1;
317 volatile bool VMError::_reporting_did_timeout = false;
318 volatile jlong VMError::_step_start_time = -1;
319 volatile bool VMError::_step_did_timeout = false;
320
321 // Helper, return current timestamp for timeout handling.
322 jlong VMError::get_current_timestamp() {
323 return os::javaTimeNanos();
324 }
325 // Factor to translate the timestamp to seconds.
326 #define TIMESTAMP_TO_SECONDS_FACTOR (1000 * 1000 * 1000)
327
328 void VMError::record_reporting_start_time() {
329 const jlong now = get_current_timestamp();
330 Atomic::store(now, &_reporting_start_time);
331 }
332
333 jlong VMError::get_reporting_start_time() {
334 return Atomic::load(&_reporting_start_time);
335 }
336
337 void VMError::record_step_start_time() {
338 const jlong now = get_current_timestamp();
339 Atomic::store(now, &_step_start_time);
340 }
341
342 jlong VMError::get_step_start_time() {
343 return Atomic::load(&_step_start_time);
344 }
345
346 void VMError::report(outputStream* st, bool _verbose) {
347
348 # define BEGIN if (_current_step == 0) { _current_step = __LINE__;
349 # define STEP(s) } if (_current_step < __LINE__) { _current_step = __LINE__; _current_step_info = s; \
350 record_step_start_time(); _step_did_timeout = false;
351 # define END }
352
353 // don't allocate large buffer on stack
354 static char buf[O_BUFLEN];
355
356 BEGIN
357
358 STEP("printing fatal error message")
359
360 st->print_cr("#");
361 if (should_report_bug(_id)) {
362 st->print_cr("# A fatal error has been detected by the Java Runtime Environment:");
363 } else {
364 st->print_cr("# There is insufficient memory for the Java "
365 "Runtime Environment to continue.");
366 }
367
368 #ifndef PRODUCT
369 // Error handler self tests
370
371 // test secondary error handling. Test it twice, to test that resetting
372 // error handler after a secondary crash works.
373 STEP("test secondary crash 1")
374 if (_verbose && TestCrashInErrorHandler != 0) {
375 st->print_cr("Will crash now (TestCrashInErrorHandler=" UINTX_FORMAT ")...",
376 TestCrashInErrorHandler);
377 controlled_crash(TestCrashInErrorHandler);
378 }
379
380 STEP("test secondary crash 2")
381 if (_verbose && TestCrashInErrorHandler != 0) {
382 st->print_cr("Will crash now (TestCrashInErrorHandler=" UINTX_FORMAT ")...",
383 TestCrashInErrorHandler);
384 controlled_crash(TestCrashInErrorHandler);
385 }
386
387 // TestUnresponsiveErrorHandler: We want to test both step timeouts and global timeout.
388 // Step to global timeout ratio is 4:1, so in order to be absolutely sure we hit the
389 // global timeout, lets execute the timeout step five times.
390 // See corresponding test in test/runtime/ErrorHandling/TimeoutInErrorHandlingTest.java
391 #define TIMEOUT_TEST_STEP STEP("test unresponsive error reporting step") \
392 if (_verbose && TestUnresponsiveErrorHandler) { os::infinite_sleep(); }
393 TIMEOUT_TEST_STEP
394 TIMEOUT_TEST_STEP
395 TIMEOUT_TEST_STEP
396 TIMEOUT_TEST_STEP
397 TIMEOUT_TEST_STEP
398
399 STEP("test safefetch in error handler")
400 // test whether it is safe to use SafeFetch32 in Crash Handler. Test twice
401 // to test that resetting the signal handler works correctly.
402 if (_verbose && TestSafeFetchInErrorHandler) {
403 st->print_cr("Will test SafeFetch...");
404 if (CanUseSafeFetch32()) {
405 int* const invalid_pointer = (int*) get_segfault_address();
406 const int x = 0x76543210;
407 int i1 = SafeFetch32(invalid_pointer, x);
408 int i2 = SafeFetch32(invalid_pointer, x);
409 if (i1 == x && i2 == x) {
410 st->print_cr("SafeFetch OK."); // Correctly deflected and returned default pattern
411 } else {
412 st->print_cr("??");
413 }
414 } else {
415 st->print_cr("not possible; skipped.");
416 }
417 }
1204
1205 // Initialize time stamps to use the same base.
1206 out.time_stamp().update_to(1);
1207 log.time_stamp().update_to(1);
1208
1209 _id = id;
1210 _message = message;
1211 _thread = thread;
1212 _pc = pc;
1213 _siginfo = siginfo;
1214 _context = context;
1215 _filename = filename;
1216 _lineno = lineno;
1217 _size = size;
1218 jio_vsnprintf(_detail_msg, sizeof(_detail_msg), detail_fmt, detail_args);
1219
1220 // first time
1221 set_error_reported();
1222
1223 reporting_started();
1224 record_reporting_start_time();
1225
1226 if (ShowMessageBoxOnError || PauseAtExit) {
1227 show_message_box(buffer, sizeof(buffer));
1228
1229 // User has asked JVM to abort. Reset ShowMessageBoxOnError so the
1230 // WatcherThread can kill JVM if the error handler hangs.
1231 ShowMessageBoxOnError = false;
1232 }
1233
1234 os::check_dump_limit(buffer, sizeof(buffer));
1235
1236 // reset signal handlers or exception filter; make sure recursive crashes
1237 // are handled properly.
1238 reset_signal_handlers();
1239
1240 TRACE_VM_ERROR();
1241
1242 } else {
1243 // If UseOsErrorReporting we call this for each level of the call stack
1244 // while searching for the exception handler. Only the first level needs
1250 if (first_error_tid != mytid) {
1251 char msgbuf[64];
1252 jio_snprintf(msgbuf, sizeof(msgbuf),
1253 "[thread " INTX_FORMAT " also had an error]",
1254 mytid);
1255 out.print_raw_cr(msgbuf);
1256
1257 // error reporting is not MT-safe, block current thread
1258 os::infinite_sleep();
1259
1260 } else {
1261 if (recursive_error_count++ > 30) {
1262 out.print_raw_cr("[Too many errors, abort]");
1263 os::die();
1264 }
1265
1266 outputStream* const st = log.is_open() ? &log : &out;
1267 st->cr();
1268
1269 // Timeout handling.
1270 if (_step_did_timeout) {
1271 // The current step had a timeout. Lets continue reporting with the next step.
1272 st->print_raw("[timeout occurred during error reporting in step \"");
1273 st->print_raw(_current_step_info);
1274 st->print_cr("\"] after " INT64_FORMAT " s.",
1275 (get_current_timestamp() - _step_start_time) / TIMESTAMP_TO_SECONDS_FACTOR);
1276 } else if (_reporting_did_timeout) {
1277 // We hit ErrorLogTimeout. Reporting will stop altogether. Let's wrap things
1278 // up, the process is about to be stopped by the WatcherThread.
1279 st->print_cr("------ Timeout during error reporting after " INT64_FORMAT " s. ------",
1280 (get_current_timestamp() - _reporting_start_time) / TIMESTAMP_TO_SECONDS_FACTOR);
1281 st->flush();
1282 // Watcherthread is about to call os::die. Lets just wait.
1283 os::infinite_sleep();
1284 } else {
1285 // Crash or assert during error reporting. Lets continue reporting with the next step.
1286 jio_snprintf(buffer, sizeof(buffer),
1287 "[error occurred during error reporting (%s), id 0x%x]",
1288 _current_step_info, _id);
1289 st->print_raw_cr(buffer);
1290 st->cr();
1291 }
1292 }
1293 }
1294
1295 // print to screen
1296 if (!out_done) {
1297 report(&out, false);
1298
1299 out_done = true;
1300
1483 } while (yes);
1484 }
1485
1486 // Timeout handling: check if a timeout happened (either a single step did
1487 // timeout or the whole of error reporting hit ErrorLogTimeout). Interrupt
1488 // the reporting thread if that is the case.
1489 bool VMError::check_timeout() {
1490
1491 if (ErrorLogTimeout == 0) {
1492 return false;
1493 }
1494
1495 // Do not check for timeouts if we still have a message box to show to the
1496 // user or if there are OnError handlers to be run.
1497 if (ShowMessageBoxOnError
1498 || (OnError != NULL && OnError[0] != '\0')
1499 || Arguments::abort_hook() != NULL) {
1500 return false;
1501 }
1502
1503 const jlong reporting_start_time_l = get_reporting_start_time();
1504 const jlong now = get_current_timestamp();
1505 // Timestamp is stored in nanos.
1506 if (reporting_start_time_l > 0) {
1507 const jlong end = reporting_start_time_l + (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR;
1508 if (end <= now) {
1509 _reporting_did_timeout = true;
1510 interrupt_reporting_thread();
1511 return true; // global timeout
1512 }
1513 }
1514
1515 const jlong step_start_time_l = get_step_start_time();
1516 if (step_start_time_l > 0) {
1517 // A step times out after a quarter of the total timeout. Steps are mostly fast unless they
1518 // hang for some reason, so this simple rule allows for three hanging step and still
1519 // hopefully leaves time enough for the rest of the steps to finish.
1520 const jlong end = step_start_time_l + (jlong)ErrorLogTimeout * TIMESTAMP_TO_SECONDS_FACTOR / 4;
1521 if (end <= now) {
1522 _step_did_timeout = true;
1523 interrupt_reporting_thread();
1524 return false; // (Not a global timeout)
1525 }
1526 }
1527
1528 return false;
1529
1530 }
1531
|