1 /* 2 * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "jfr/jfrEvents.hpp" 27 #include "jfr/jni/jfrJavaSupport.hpp" 28 #include "jfr/leakprofiler/leakProfiler.hpp" 29 #include "jfr/recorder/repository/jfrEmergencyDump.hpp" 30 #include "jfr/recorder/service/jfrPostBox.hpp" 31 #include "jfr/recorder/service/jfrRecorderService.hpp" 32 #include "jfr/utilities/jfrTypes.hpp" 33 #include "logging/log.hpp" 34 #include "runtime/atomic.hpp" 35 #include "runtime/globals.hpp" 36 #include "runtime/mutexLocker.hpp" 37 #include "runtime/os.hpp" 38 #include "runtime/thread.inline.hpp" 39 #include "utilities/growableArray.hpp" 40 #include "utilities/ostream.hpp" 41 42 static const char vm_error_filename_fmt[] = "hs_err_pid%p.jfr"; 43 static const char vm_oom_filename_fmt[] = "hs_oom_pid%p.jfr"; 44 static const char vm_soe_filename_fmt[] = "hs_soe_pid%p.jfr"; 45 static const char chunk_file_jfr_ext[] = ".jfr"; 46 static const size_t iso8601_len = 19; // "YYYY-MM-DDTHH:MM:SS" 47 static fio_fd emergency_fd = invalid_fd; 48 static const int64_t chunk_file_header_size = 68; 49 static const size_t chunk_file_extension_length = sizeof chunk_file_jfr_ext - 1; 50 51 /* 52 * The emergency dump logic is restrictive when it comes to 53 * using internal VM constructs such as ResourceArea / Handle / Arena. 54 * The reason being that the thread context is unknown. 55 * 56 * A single static buffer of size JVM_MAXPATHLEN is used for building paths. 57 * os::malloc / os::free are used in a few places. 58 */ 59 60 static const size_t _max_path_buffer_size = JVM_MAXPATHLEN; 61 static char _path_buffer[_max_path_buffer_size] = { 0 }; 62 63 static bool is_empty(const char* path) { 64 assert(path != NULL, "invariant"); 65 return path[0] == '\0'; 66 } 67 68 static bool is_path_empty() { 69 return *_path_buffer == '\0'; 70 } 71 72 static size_t append(size_t pos, const char* str) { 73 assert(_max_path_buffer_size - pos > 0, "invariant"); 74 const int result = jio_snprintf(_path_buffer + pos, _max_path_buffer_size - pos, "%s", str); 75 return result == -1 ? 0 : pos + (size_t)result; 76 } 77 78 // returns with an appended file separator (if successful) 79 static size_t get_current_directory() { 80 if (os::get_current_directory(_path_buffer, _max_path_buffer_size) == NULL) { 81 return 0; 82 } 83 return append(strlen(_path_buffer), os::file_separator()); 84 } 85 86 static fio_fd open_exclusivly(const char* path) { 87 assert(path != NULL, "invariant"); 88 assert(!is_empty(path), "invariant"); 89 return os::open(path, O_CREAT | O_RDWR, S_IREAD | S_IWRITE); 90 } 91 92 static bool is_emergency_dump_file_open() { 93 return emergency_fd != invalid_fd; 94 } 95 96 static bool open_emergency_dump_fd(const char* path) { 97 if (path == NULL) { 98 return false; 99 } 100 assert(emergency_fd == invalid_fd, "invariant"); 101 emergency_fd = open_exclusivly(path); 102 return emergency_fd != invalid_fd; 103 } 104 105 static void close_emergency_dump_file() { 106 if (is_emergency_dump_file_open()) { 107 os::close(emergency_fd); 108 } 109 } 110 111 static const char* create_emergency_dump_path() { 112 assert(is_path_empty(), "invariant"); 113 114 const size_t path_len = get_current_directory(); 115 if (path_len == 0) { 116 return NULL; 117 } 118 const char* filename_fmt = NULL; 119 // fetch specific error cause 120 switch (JfrJavaSupport::cause()) { 121 case JfrJavaSupport::OUT_OF_MEMORY: 122 filename_fmt = vm_oom_filename_fmt; 123 break; 124 case JfrJavaSupport::STACK_OVERFLOW: 125 filename_fmt = vm_soe_filename_fmt; 126 break; 127 default: 128 filename_fmt = vm_error_filename_fmt; 129 } 130 const bool result = Arguments::copy_expand_pid(filename_fmt, strlen(filename_fmt), _path_buffer + path_len, _max_path_buffer_size - path_len); 131 return result ? _path_buffer : NULL; 132 } 133 134 static bool open_emergency_dump_file() { 135 if (is_emergency_dump_file_open()) { 136 // opened already 137 return true; 138 } 139 return open_emergency_dump_fd(create_emergency_dump_path()); 140 } 141 142 static void report(outputStream* st, bool emergency_file_opened, const char* repository_path) { 143 assert(st != NULL, "invariant"); 144 if (emergency_file_opened) { 145 st->print_raw("# JFR recording file will be written. Location: "); 146 st->print_raw_cr(_path_buffer); 147 st->print_raw_cr("#"); 148 } else if (repository_path != NULL) { 149 st->print_raw("# The JFR repository may contain useful JFR files. Location: "); 150 st->print_raw_cr(repository_path); 151 st->print_raw_cr("#"); 152 } else if (!is_path_empty()) { 153 st->print_raw("# Unable to create a JFR recording file at location: "); 154 st->print_raw_cr(_path_buffer); 155 st->print_raw_cr("#"); 156 } 157 } 158 159 void JfrEmergencyDump::on_vm_error_report(outputStream* st, const char* repository_path) { 160 assert(st != NULL, "invariant"); 161 Thread* thread = Thread::current_or_null_safe(); 162 if (thread != NULL) { 163 report(st, open_emergency_dump_file(), repository_path); 164 } else if (repository_path != NULL) { 165 // a non-attached thread will not be able to write anything later 166 report(st, false, repository_path); 167 } 168 } 169 170 static int file_sort(const char** const file1, const char** file2) { 171 assert(NULL != *file1 && NULL != *file2, "invariant"); 172 int cmp = strncmp(*file1, *file2, iso8601_len); 173 if (0 == cmp) { 174 const char* const dot1 = strchr(*file1, '.'); 175 assert(NULL != dot1, "invariant"); 176 const char* const dot2 = strchr(*file2, '.'); 177 assert(NULL != dot2, "invariant"); 178 ptrdiff_t file1_len = dot1 - *file1; 179 ptrdiff_t file2_len = dot2 - *file2; 180 if (file1_len < file2_len) { 181 return -1; 182 } 183 if (file1_len > file2_len) { 184 return 1; 185 } 186 assert(file1_len == file2_len, "invariant"); 187 cmp = strncmp(*file1, *file2, file1_len); 188 } 189 assert(cmp != 0, "invariant"); 190 return cmp; 191 } 192 193 static void iso8601_to_date_time(char* iso8601_str) { 194 assert(iso8601_str != NULL, "invariant"); 195 assert(strlen(iso8601_str) == iso8601_len, "invariant"); 196 // "YYYY-MM-DDTHH:MM:SS" 197 for (size_t i = 0; i < iso8601_len; ++i) { 198 switch (iso8601_str[i]) { 199 case 'T': 200 case '-': 201 case ':': 202 iso8601_str[i] = '_'; 203 break; 204 } 205 } 206 // "YYYY_MM_DD_HH_MM_SS" 207 } 208 209 static void date_time(char* buffer, size_t buffer_len) { 210 assert(buffer != NULL, "invariant"); 211 assert(buffer_len >= iso8601_len, "buffer too small"); 212 os::iso8601_time(buffer, buffer_len); 213 assert(strlen(buffer) >= iso8601_len + 1, "invariant"); 214 // "YYYY-MM-DDTHH:MM:SS" 215 buffer[iso8601_len] = '\0'; 216 iso8601_to_date_time(buffer); 217 } 218 219 static int64_t file_size(fio_fd fd) { 220 assert(fd != invalid_fd, "invariant"); 221 const int64_t current_offset = os::current_file_offset(fd); 222 const int64_t size = os::lseek(fd, 0, SEEK_END); 223 os::seek_to_file_offset(fd, current_offset); 224 return size; 225 } 226 227 class RepositoryIterator : public StackObj { 228 private: 229 GrowableArray<const char*>* _file_names; 230 size_t _path_buffer_file_name_offset; 231 mutable int _iterator; 232 const char* fully_qualified(const char* file_name) const; 233 const char* filter(const char* file_name) const; 234 public: 235 RepositoryIterator(const char* repository_path); 236 ~RepositoryIterator(); 237 bool has_next() const; 238 const char* next() const; 239 }; 240 241 // append the file_name at the _path_buffer_file_name_offset position 242 const char* RepositoryIterator::fully_qualified(const char* file_name) const { 243 assert(NULL != file_name, "invariant"); 244 assert(!is_path_empty(), "invariant"); 245 assert(_path_buffer_file_name_offset != 0, "invariant"); 246 return append(_path_buffer_file_name_offset, file_name) != 0 ? _path_buffer : NULL; 247 } 248 249 // caller responsible for deallocation 250 const char* RepositoryIterator::filter(const char* file_name) const { 251 if (file_name == NULL) { 252 return NULL; 253 } 254 const size_t len = strlen(file_name); 255 if ((len < chunk_file_extension_length) || 256 (strncmp(&file_name[len - chunk_file_extension_length], 257 chunk_file_jfr_ext, 258 chunk_file_extension_length) != 0)) { 259 // not a .jfr file 260 return NULL; 261 } 262 const char* fqn = fully_qualified(file_name); 263 if (fqn == NULL) { 264 return NULL; 265 } 266 const fio_fd fd = open_exclusivly(fqn); 267 if (invalid_fd == fd) { 268 return NULL; 269 } 270 const int64_t size = file_size(fd); 271 os::close(fd); 272 if (size <= chunk_file_header_size) { 273 return NULL; 274 } 275 char* const file_name_copy = (char*)os::malloc(len + 1, mtTracing); 276 if (file_name_copy == NULL) { 277 log_error(jfr, system)("Unable to malloc memory during jfr emergency dump"); 278 return NULL; 279 } 280 strncpy(file_name_copy, file_name, len + 1); 281 return file_name_copy; 282 } 283 284 RepositoryIterator::RepositoryIterator(const char* repository_path) : 285 _file_names(NULL), 286 _path_buffer_file_name_offset(0), 287 _iterator(0) { 288 DIR* dirp = os::opendir(repository_path); 289 if (dirp == NULL) { 290 log_error(jfr, system)("Unable to open repository %s", repository_path); 291 return; 292 } 293 // store repository path in the path buffer 294 size_t result = append(0, repository_path); 295 if (result == 0) { 296 return; 297 } 298 // append a file separator and save that position 299 _path_buffer_file_name_offset = append(result, os::file_separator()); 300 if (_path_buffer_file_name_offset == 0) { 301 return; 302 } 303 _file_names = new (ResourceObj::C_HEAP, mtTracing) GrowableArray<const char*>(10, true, mtTracing); 304 if (_file_names == NULL) { 305 log_error(jfr, system)("Unable to malloc memory during jfr emergency dump"); 306 return; 307 } 308 // iterate files in the repository and append filtered file names to the files array 309 struct dirent* dentry; 310 while ((dentry = os::readdir(dirp)) != NULL) { 311 const char* file_name = filter(dentry->d_name); 312 if (file_name != NULL) { 313 _file_names->append(file_name); 314 } 315 } 316 os::closedir(dirp); 317 if (_file_names->length() > 1) { 318 _file_names->sort(file_sort); 319 } 320 } 321 322 RepositoryIterator::~RepositoryIterator() { 323 if (_file_names != NULL) { 324 for (int i = 0; i < _file_names->length(); ++i) { 325 os::free(const_cast<char*>(_file_names->at(i))); 326 } 327 delete _file_names; 328 } 329 } 330 331 bool RepositoryIterator::has_next() const { 332 return _file_names != NULL && _iterator < _file_names->length(); 333 } 334 335 const char* RepositoryIterator::next() const { 336 return _iterator >= _file_names->length() ? NULL : fully_qualified(_file_names->at(_iterator++)); 337 } 338 339 static void write_repository_files(const RepositoryIterator& iterator, char* const copy_block, size_t block_size) { 340 assert(is_emergency_dump_file_open(), "invariant"); 341 while (iterator.has_next()) { 342 fio_fd current_fd = invalid_fd; 343 const char* const fqn = iterator.next(); 344 assert(fqn != NULL, "invariant"); 345 current_fd = open_exclusivly(fqn); 346 if (current_fd != invalid_fd) { 347 const int64_t size = file_size(current_fd); 348 assert(size > 0, "invariant"); 349 int64_t bytes_read = 0; 350 int64_t bytes_written = 0; 351 while (bytes_read < size) { 352 const ssize_t read_result = os::read_at(current_fd, copy_block, (int)block_size, bytes_read); 353 if (-1 == read_result) { 354 log_info(jfr)( // For user, should not be "jfr, system" 355 "Unable to recover JFR data"); 356 break; 357 } 358 bytes_read += (int64_t)read_result; 359 assert(bytes_read - bytes_written <= (int64_t)block_size, "invariant"); 360 bytes_written += (int64_t)os::write(emergency_fd, copy_block, bytes_read - bytes_written); 361 assert(bytes_read == bytes_written, "invariant"); 362 } 363 os::close(current_fd); 364 } 365 } 366 } 367 368 static void write_emergency_dump_file(const RepositoryIterator& iterator) { 369 static const size_t block_size = 1 * M; // 1 mb 370 char* const copy_block = (char*)os::malloc(block_size, mtTracing); 371 if (copy_block == NULL) { 372 log_error(jfr, system)("Unable to malloc memory during jfr emergency dump"); 373 log_error(jfr, system)("Unable to write jfr emergency dump file"); 374 } 375 write_repository_files(iterator, copy_block, block_size); 376 os::free(copy_block); 377 } 378 379 void JfrEmergencyDump::on_vm_error(const char* repository_path) { 380 assert(repository_path != NULL, "invariant"); 381 if (open_emergency_dump_file()) { 382 RepositoryIterator iterator(repository_path); 383 write_emergency_dump_file(iterator); 384 close_emergency_dump_file(); 385 } 386 } 387 388 static const char* create_emergency_chunk_path(const char* repository_path) { 389 const size_t repository_path_len = strlen(repository_path); 390 char date_time_buffer[32] = { 0 }; 391 date_time(date_time_buffer, sizeof(date_time_buffer)); 392 // append the individual substrings 393 const int result = jio_snprintf(_path_buffer, 394 _max_path_buffer_size, 395 "%s%s%s%s", 396 repository_path, 397 os::file_separator(), 398 date_time_buffer, 399 chunk_file_jfr_ext); 400 return result == -1 ? NULL : _path_buffer; 401 } 402 403 const char* JfrEmergencyDump::chunk_path(const char* repository_path) { 404 if (repository_path == NULL) { 405 if (!open_emergency_dump_file()) { 406 return NULL; 407 } 408 // We can directly use the emergency dump file name as the chunk. 409 // The chunk writer will open its own fd so we close this descriptor. 410 close_emergency_dump_file(); 411 assert(!is_path_empty(), "invariant"); 412 return _path_buffer; 413 } 414 return create_emergency_chunk_path(repository_path); 415 } 416 417 /* 418 * We are just about to exit the VM, so we will be very aggressive 419 * at this point in order to increase overall success of dumping jfr data. 420 * 421 * If we end up deadlocking in the attempt of dumping out jfr data, 422 * we rely on the WatcherThread task "is_error_reported()", 423 * to exit the VM after a hard-coded timeout (disallow WatcherThread to emergency dump). 424 * This "safety net" somewhat explains the aggressiveness in this attempt. 425 * 426 */ 427 static bool prepare_for_emergency_dump(Thread* thread) { 428 assert(thread != NULL, "invariant"); 429 430 if (thread->is_Watcher_thread()) { 431 // need WatcherThread as a safeguard against potential deadlocks 432 return false; 433 } 434 if (JfrStream_lock->owned_by_self()) { 435 // crashed during jfr rotation, disallow recursion 436 return false; 437 } 438 439 #ifdef ASSERT 440 Mutex* owned_lock = thread->owned_locks(); 441 while (owned_lock != NULL) { 442 Mutex* next = owned_lock->next(); 443 owned_lock->unlock(); 444 owned_lock = next; 445 } 446 #endif // ASSERT 447 448 if (Threads_lock->owned_by_self()) { 449 Threads_lock->unlock(); 450 } 451 452 if (Module_lock->owned_by_self()) { 453 Module_lock->unlock(); 454 } 455 456 if (ClassLoaderDataGraph_lock->owned_by_self()) { 457 ClassLoaderDataGraph_lock->unlock(); 458 } 459 460 if (Heap_lock->owned_by_self()) { 461 Heap_lock->unlock(); 462 } 463 464 if (VMOperationQueue_lock->owned_by_self()) { 465 VMOperationQueue_lock->unlock(); 466 } 467 468 if (VMOperationRequest_lock->owned_by_self()) { 469 VMOperationRequest_lock->unlock(); 470 } 471 472 if (Service_lock->owned_by_self()) { 473 Service_lock->unlock(); 474 } 475 476 if (UseNotificationThread && Notification_lock->owned_by_self()) { 477 Notification_lock->unlock(); 478 } 479 480 if (CodeCache_lock->owned_by_self()) { 481 CodeCache_lock->unlock(); 482 } 483 484 if (PeriodicTask_lock->owned_by_self()) { 485 PeriodicTask_lock->unlock(); 486 } 487 488 if (JfrMsg_lock->owned_by_self()) { 489 JfrMsg_lock->unlock(); 490 } 491 492 if (JfrBuffer_lock->owned_by_self()) { 493 JfrBuffer_lock->unlock(); 494 } 495 496 if (JfrStacktrace_lock->owned_by_self()) { 497 JfrStacktrace_lock->unlock(); 498 } 499 return true; 500 } 501 502 static volatile int jfr_shutdown_lock = 0; 503 504 static bool guard_reentrancy() { 505 return Atomic::cmpxchg(&jfr_shutdown_lock, 0, 1) == 0; 506 } 507 508 class JavaThreadInVM : public StackObj { 509 private: 510 JavaThread* const _jt; 511 JavaThreadState _original_state; 512 public: 513 514 JavaThreadInVM(Thread* t) : _jt(t->is_Java_thread() ? (JavaThread*)t : NULL), 515 _original_state(_thread_max_state) { 516 if ((_jt != NULL) && (_jt->thread_state() != _thread_in_vm)) { 517 _original_state = _jt->thread_state(); 518 _jt->set_thread_state(_thread_in_vm); 519 } 520 } 521 522 ~JavaThreadInVM() { 523 if (_original_state != _thread_max_state) { 524 _jt->set_thread_state(_original_state); 525 } 526 } 527 528 }; 529 530 static void post_events(bool exception_handler) { 531 if (exception_handler) { 532 EventShutdown e; 533 e.set_reason("VM Error"); 534 e.commit(); 535 } else { 536 // OOM 537 LeakProfiler::emit_events(max_jlong, false); 538 } 539 EventDumpReason event; 540 event.set_reason(exception_handler ? "Crash" : "Out of Memory"); 541 event.set_recordingId(-1); 542 event.commit(); 543 } 544 545 void JfrEmergencyDump::on_vm_shutdown(bool exception_handler) { 546 if (!guard_reentrancy()) { 547 return; 548 } 549 Thread* thread = Thread::current_or_null_safe(); 550 if (thread == NULL) { 551 return; 552 } 553 // Ensure a JavaThread is _thread_in_vm when we make this call 554 JavaThreadInVM jtivm(thread); 555 if (!prepare_for_emergency_dump(thread)) { 556 return; 557 } 558 post_events(exception_handler); 559 const int messages = MSGBIT(MSG_VM_ERROR); 560 JfrRecorderService service; 561 service.rotate(messages); 562 }