1 /* 2 * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "jfr/jfrEvents.hpp" 27 #include "jfr/jni/jfrJavaSupport.hpp" 28 #include "jfr/leakprofiler/leakProfiler.hpp" 29 #include "jfr/recorder/repository/jfrEmergencyDump.hpp" 30 #include "jfr/recorder/service/jfrPostBox.hpp" 31 #include "jfr/recorder/service/jfrRecorderService.hpp" 32 #include "jfr/utilities/jfrTypes.hpp" 33 #include "logging/log.hpp" 34 #include "runtime/atomic.hpp" 35 #include "runtime/globals.hpp" 36 #include "runtime/mutexLocker.hpp" 37 #include "runtime/os.hpp" 38 #include "runtime/thread.inline.hpp" 39 #include "utilities/growableArray.hpp" 40 #include "utilities/ostream.hpp" 41 42 static const char vm_error_filename_fmt[] = "hs_err_pid%p.jfr"; 43 static const char vm_oom_filename_fmt[] = "hs_oom_pid%p.jfr"; 44 static const char vm_soe_filename_fmt[] = "hs_soe_pid%p.jfr"; 45 static const char chunk_file_jfr_ext[] = ".jfr"; 46 static const size_t iso8601_len = 19; // "YYYY-MM-DDTHH:MM:SS" 47 static fio_fd emergency_fd = invalid_fd; 48 static const int64_t chunk_file_header_size = 68; 49 static const size_t chunk_file_extension_length = sizeof chunk_file_jfr_ext - 1; 50 51 /* 52 * The emergency dump logic is restrictive when it comes to 53 * using internal VM constructs such as ResourceArea / Handle / Arena. 54 * The reason being that the thread context is unknown. 55 * 56 * A single static buffer of size JVM_MAXPATHLEN is used for building paths. 57 * os::malloc / os::free are used in a few places. 58 */ 59 60 static const size_t _max_path_buffer_size = JVM_MAXPATHLEN; 61 static char _path_buffer[_max_path_buffer_size] = { 0 }; 62 63 static bool is_path_empty() { 64 return *_path_buffer == '\0'; 65 } 66 67 // returns with an appended file separator (if successful) 68 static size_t get_current_directory() { 69 char cwd[_max_path_buffer_size]; 70 if (os::get_current_directory(cwd, sizeof(cwd)) == NULL) { 71 return 0; 72 } 73 const int result = jio_snprintf(_path_buffer, sizeof(_path_buffer), "%s%s", cwd, os::file_separator()); 74 return (result == -1) ? 0 : result; 75 } 76 77 static fio_fd open_exclusivly(const char* path) { 78 assert((path != NULL) && (*path != '\0'), "invariant"); 79 return os::open(path, O_CREAT | O_RDWR, S_IREAD | S_IWRITE); 80 } 81 82 static bool is_emergency_dump_file_open() { 83 return emergency_fd != invalid_fd; 84 } 85 86 static bool open_emergency_dump_fd(const char* path) { 87 if (path == NULL) { 88 return false; 89 } 90 assert(emergency_fd == invalid_fd, "invariant"); 91 emergency_fd = open_exclusivly(path); 92 return emergency_fd != invalid_fd; 93 } 94 95 static void close_emergency_dump_file() { 96 if (is_emergency_dump_file_open()) { 97 os::close(emergency_fd); 98 } 99 } 100 101 static const char* create_emergency_dump_path() { 102 assert(is_path_empty(), "invariant"); 103 104 const size_t path_len = get_current_directory(); 105 if (path_len == 0) { 106 return NULL; 107 } 108 const char* filename_fmt = NULL; 109 // fetch specific error cause 110 switch (JfrJavaSupport::cause()) { 111 case JfrJavaSupport::OUT_OF_MEMORY: 112 filename_fmt = vm_oom_filename_fmt; 113 break; 114 case JfrJavaSupport::STACK_OVERFLOW: 115 filename_fmt = vm_soe_filename_fmt; 116 break; 117 default: 118 filename_fmt = vm_error_filename_fmt; 119 } 120 const bool result = Arguments::copy_expand_pid(filename_fmt, strlen(filename_fmt), _path_buffer + path_len, _max_path_buffer_size - path_len); 121 return result ? _path_buffer : NULL; 122 } 123 124 static bool open_emergency_dump_file() { 125 if (is_emergency_dump_file_open()) { 126 // opened already 127 return true; 128 } 129 return open_emergency_dump_fd(create_emergency_dump_path()); 130 } 131 132 static void report(outputStream* st, bool emergency_file_opened, const char* repository_path) { 133 assert(st != NULL, "invariant"); 134 if (emergency_file_opened) { 135 st->print_raw("# JFR recording file will be written. Location: "); 136 st->print_raw_cr(_path_buffer); 137 st->print_raw_cr("#"); 138 } else if (repository_path != NULL) { 139 st->print_raw("# The JFR repository may contain useful JFR files. Location: "); 140 st->print_raw_cr(repository_path); 141 st->print_raw_cr("#"); 142 } else if (!is_path_empty()) { 143 st->print_raw("# Unable to create a JFR recording file at location: "); 144 st->print_raw_cr(_path_buffer); 145 st->print_raw_cr("#"); 146 } 147 } 148 149 void JfrEmergencyDump::on_vm_error_report(outputStream* st, const char* repository_path) { 150 assert(st != NULL, "invariant"); 151 Thread* thread = Thread::current_or_null_safe(); 152 if (thread != NULL) { 153 report(st, open_emergency_dump_file(), repository_path); 154 } else if (repository_path != NULL) { 155 // a non-attached thread will not be able to write anything later 156 report(st, false, repository_path); 157 } 158 } 159 160 static int file_sort(const char** const file1, const char** file2) { 161 assert(NULL != *file1 && NULL != *file2, "invariant"); 162 int cmp = strncmp(*file1, *file2, iso8601_len); 163 if (0 == cmp) { 164 const char* const dot1 = strchr(*file1, '.'); 165 assert(NULL != dot1, "invariant"); 166 const char* const dot2 = strchr(*file2, '.'); 167 assert(NULL != dot2, "invariant"); 168 ptrdiff_t file1_len = dot1 - *file1; 169 ptrdiff_t file2_len = dot2 - *file2; 170 if (file1_len < file2_len) { 171 return -1; 172 } 173 if (file1_len > file2_len) { 174 return 1; 175 } 176 assert(file1_len == file2_len, "invariant"); 177 cmp = strncmp(*file1, *file2, file1_len); 178 } 179 assert(cmp != 0, "invariant"); 180 return cmp; 181 } 182 183 static void iso8601_to_date_time(char* iso8601_str) { 184 assert(iso8601_str != NULL, "invariant"); 185 assert(strlen(iso8601_str) == iso8601_len, "invariant"); 186 // "YYYY-MM-DDTHH:MM:SS" 187 for (size_t i = 0; i < iso8601_len; ++i) { 188 switch (iso8601_str[i]) { 189 case 'T': 190 case '-': 191 case ':': 192 iso8601_str[i] = '_'; 193 break; 194 } 195 } 196 // "YYYY_MM_DD_HH_MM_SS" 197 } 198 199 static void date_time(char* buffer, size_t buffer_len) { 200 assert(buffer != NULL, "invariant"); 201 assert(buffer_len >= iso8601_len, "buffer too small"); 202 os::iso8601_time(buffer, buffer_len); 203 assert(strlen(buffer) >= iso8601_len + 1, "invariant"); 204 // "YYYY-MM-DDTHH:MM:SS" 205 buffer[iso8601_len] = '\0'; 206 iso8601_to_date_time(buffer); 207 } 208 209 static int64_t file_size(fio_fd fd) { 210 assert(fd != invalid_fd, "invariant"); 211 const int64_t current_offset = os::current_file_offset(fd); 212 const int64_t size = os::lseek(fd, 0, SEEK_END); 213 os::seek_to_file_offset(fd, current_offset); 214 return size; 215 } 216 217 class RepositoryIterator : public StackObj { 218 private: 219 GrowableArray<const char*>* _file_names; 220 int _path_buffer_file_name_offset; 221 mutable int _iterator; 222 const char* fully_qualified(const char* file_name) const; 223 const char* filter(const char* file_name) const; 224 public: 225 RepositoryIterator(const char* repository_path); 226 ~RepositoryIterator(); 227 bool has_next() const; 228 const char* next() const; 229 }; 230 231 // append the file_name at the _path_buffer_file_name_offset position 232 const char* RepositoryIterator::fully_qualified(const char* file_name) const { 233 assert(NULL != file_name, "invariant"); 234 assert(!is_path_empty(), "invariant"); 235 assert(_path_buffer_file_name_offset != 0, "invariant"); 236 237 const int result = jio_snprintf(_path_buffer + _path_buffer_file_name_offset, 238 sizeof(_path_buffer) - _path_buffer_file_name_offset, 239 "%s", 240 file_name); 241 return result != -1 ? _path_buffer : NULL; 242 } 243 244 // caller responsible for deallocation 245 const char* RepositoryIterator::filter(const char* file_name) const { 246 if (file_name == NULL) { 247 return NULL; 248 } 249 const size_t len = strlen(file_name); 250 if ((len < chunk_file_extension_length) || 251 (strncmp(&file_name[len - chunk_file_extension_length], 252 chunk_file_jfr_ext, 253 chunk_file_extension_length) != 0)) { 254 // not a .jfr file 255 return NULL; 256 } 257 const char* fqn = fully_qualified(file_name); 258 if (fqn == NULL) { 259 return NULL; 260 } 261 const fio_fd fd = open_exclusivly(fqn); 262 if (invalid_fd == fd) { 263 return NULL; 264 } 265 const int64_t size = file_size(fd); 266 os::close(fd); 267 if (size <= chunk_file_header_size) { 268 return NULL; 269 } 270 char* const file_name_copy = (char*)os::malloc(len + 1, mtTracing); 271 if (file_name_copy == NULL) { 272 log_error(jfr, system)("Unable to malloc memory during jfr emergency dump"); 273 return NULL; 274 } 275 strncpy(file_name_copy, file_name, len + 1); 276 return file_name_copy; 277 } 278 279 RepositoryIterator::RepositoryIterator(const char* repository_path) : 280 _file_names(NULL), 281 _path_buffer_file_name_offset(0), 282 _iterator(0) { 283 DIR* dirp = os::opendir(repository_path); 284 if (dirp == NULL) { 285 log_error(jfr, system)("Unable to open repository %s", repository_path); 286 return; 287 } 288 // store repository path in the path buffer and save that position 289 _path_buffer_file_name_offset = jio_snprintf(_path_buffer, 290 sizeof(_path_buffer), 291 "%s%s", 292 repository_path, 293 os::file_separator()); 294 if (_path_buffer_file_name_offset == -1) { 295 return; 296 } 297 _file_names = new (ResourceObj::C_HEAP, mtTracing) GrowableArray<const char*>(10, true, mtTracing); 298 if (_file_names == NULL) { 299 log_error(jfr, system)("Unable to malloc memory during jfr emergency dump"); 300 return; 301 } 302 // iterate files in the repository and append filtered file names to the files array 303 struct dirent* dentry; 304 while ((dentry = os::readdir(dirp)) != NULL) { 305 const char* file_name = filter(dentry->d_name); 306 if (file_name != NULL) { 307 _file_names->append(file_name); 308 } 309 } 310 os::closedir(dirp); 311 if (_file_names->length() > 1) { 312 _file_names->sort(file_sort); 313 } 314 } 315 316 RepositoryIterator::~RepositoryIterator() { 317 if (_file_names != NULL) { 318 for (int i = 0; i < _file_names->length(); ++i) { 319 os::free(const_cast<char*>(_file_names->at(i))); 320 } 321 delete _file_names; 322 } 323 } 324 325 bool RepositoryIterator::has_next() const { 326 return _file_names != NULL && _iterator < _file_names->length(); 327 } 328 329 const char* RepositoryIterator::next() const { 330 return _iterator >= _file_names->length() ? NULL : fully_qualified(_file_names->at(_iterator++)); 331 } 332 333 static void write_repository_files(const RepositoryIterator& iterator, char* const copy_block, size_t block_size) { 334 assert(is_emergency_dump_file_open(), "invariant"); 335 while (iterator.has_next()) { 336 fio_fd current_fd = invalid_fd; 337 const char* const fqn = iterator.next(); 338 assert(fqn != NULL, "invariant"); 339 current_fd = open_exclusivly(fqn); 340 if (current_fd != invalid_fd) { 341 const int64_t size = file_size(current_fd); 342 assert(size > 0, "invariant"); 343 int64_t bytes_read = 0; 344 int64_t bytes_written = 0; 345 while (bytes_read < size) { 346 const ssize_t read_result = os::read_at(current_fd, copy_block, (int)block_size, bytes_read); 347 if (-1 == read_result) { 348 log_info(jfr)( // For user, should not be "jfr, system" 349 "Unable to recover JFR data"); 350 break; 351 } 352 bytes_read += (int64_t)read_result; 353 assert(bytes_read - bytes_written <= (int64_t)block_size, "invariant"); 354 bytes_written += (int64_t)os::write(emergency_fd, copy_block, bytes_read - bytes_written); 355 assert(bytes_read == bytes_written, "invariant"); 356 } 357 os::close(current_fd); 358 } 359 } 360 } 361 362 static void write_emergency_dump_file(const RepositoryIterator& iterator) { 363 static const size_t block_size = 1 * M; // 1 mb 364 char* const copy_block = (char*)os::malloc(block_size, mtTracing); 365 if (copy_block == NULL) { 366 log_error(jfr, system)("Unable to malloc memory during jfr emergency dump"); 367 log_error(jfr, system)("Unable to write jfr emergency dump file"); 368 } 369 write_repository_files(iterator, copy_block, block_size); 370 os::free(copy_block); 371 } 372 373 void JfrEmergencyDump::on_vm_error(const char* repository_path) { 374 assert(repository_path != NULL, "invariant"); 375 if (open_emergency_dump_file()) { 376 RepositoryIterator iterator(repository_path); 377 write_emergency_dump_file(iterator); 378 close_emergency_dump_file(); 379 } 380 } 381 382 static const char* create_emergency_chunk_path(const char* repository_path) { 383 const size_t repository_path_len = strlen(repository_path); 384 char date_time_buffer[32] = { 0 }; 385 date_time(date_time_buffer, sizeof(date_time_buffer)); 386 // append the individual substrings 387 const int result = jio_snprintf(_path_buffer, 388 _max_path_buffer_size, 389 "%s%s%s%s", 390 repository_path, 391 os::file_separator(), 392 date_time_buffer, 393 chunk_file_jfr_ext); 394 return result == -1 ? NULL : _path_buffer; 395 } 396 397 const char* JfrEmergencyDump::chunk_path(const char* repository_path) { 398 if (repository_path == NULL) { 399 if (!open_emergency_dump_file()) { 400 return NULL; 401 } 402 // We can directly use the emergency dump file name as the chunk. 403 // The chunk writer will open its own fd so we close this descriptor. 404 close_emergency_dump_file(); 405 assert(!is_path_empty(), "invariant"); 406 return _path_buffer; 407 } 408 return create_emergency_chunk_path(repository_path); 409 } 410 411 /* 412 * We are just about to exit the VM, so we will be very aggressive 413 * at this point in order to increase overall success of dumping jfr data. 414 * 415 * If we end up deadlocking in the attempt of dumping out jfr data, 416 * we rely on the WatcherThread task "is_error_reported()", 417 * to exit the VM after a hard-coded timeout (disallow WatcherThread to emergency dump). 418 * This "safety net" somewhat explains the aggressiveness in this attempt. 419 * 420 */ 421 static bool prepare_for_emergency_dump(Thread* thread) { 422 assert(thread != NULL, "invariant"); 423 424 if (thread->is_Watcher_thread()) { 425 // need WatcherThread as a safeguard against potential deadlocks 426 return false; 427 } 428 if (JfrStream_lock->owned_by_self()) { 429 // crashed during jfr rotation, disallow recursion 430 return false; 431 } 432 433 #ifdef ASSERT 434 Mutex* owned_lock = thread->owned_locks(); 435 while (owned_lock != NULL) { 436 Mutex* next = owned_lock->next(); 437 owned_lock->unlock(); 438 owned_lock = next; 439 } 440 #endif // ASSERT 441 442 if (Threads_lock->owned_by_self()) { 443 Threads_lock->unlock(); 444 } 445 446 if (Module_lock->owned_by_self()) { 447 Module_lock->unlock(); 448 } 449 450 if (ClassLoaderDataGraph_lock->owned_by_self()) { 451 ClassLoaderDataGraph_lock->unlock(); 452 } 453 454 if (Heap_lock->owned_by_self()) { 455 Heap_lock->unlock(); 456 } 457 458 if (VMOperationQueue_lock->owned_by_self()) { 459 VMOperationQueue_lock->unlock(); 460 } 461 462 if (VMOperationRequest_lock->owned_by_self()) { 463 VMOperationRequest_lock->unlock(); 464 } 465 466 if (Service_lock->owned_by_self()) { 467 Service_lock->unlock(); 468 } 469 470 if (UseNotificationThread && Notification_lock->owned_by_self()) { 471 Notification_lock->unlock(); 472 } 473 474 if (CodeCache_lock->owned_by_self()) { 475 CodeCache_lock->unlock(); 476 } 477 478 if (PeriodicTask_lock->owned_by_self()) { 479 PeriodicTask_lock->unlock(); 480 } 481 482 if (JfrMsg_lock->owned_by_self()) { 483 JfrMsg_lock->unlock(); 484 } 485 486 if (JfrBuffer_lock->owned_by_self()) { 487 JfrBuffer_lock->unlock(); 488 } 489 490 if (JfrStacktrace_lock->owned_by_self()) { 491 JfrStacktrace_lock->unlock(); 492 } 493 return true; 494 } 495 496 static volatile int jfr_shutdown_lock = 0; 497 498 static bool guard_reentrancy() { 499 return Atomic::cmpxchg(&jfr_shutdown_lock, 0, 1) == 0; 500 } 501 502 class JavaThreadInVM : public StackObj { 503 private: 504 JavaThread* const _jt; 505 JavaThreadState _original_state; 506 public: 507 508 JavaThreadInVM(Thread* t) : _jt(t->is_Java_thread() ? (JavaThread*)t : NULL), 509 _original_state(_thread_max_state) { 510 if ((_jt != NULL) && (_jt->thread_state() != _thread_in_vm)) { 511 _original_state = _jt->thread_state(); 512 _jt->set_thread_state(_thread_in_vm); 513 } 514 } 515 516 ~JavaThreadInVM() { 517 if (_original_state != _thread_max_state) { 518 _jt->set_thread_state(_original_state); 519 } 520 } 521 522 }; 523 524 static void post_events(bool exception_handler) { 525 if (exception_handler) { 526 EventShutdown e; 527 e.set_reason("VM Error"); 528 e.commit(); 529 } else { 530 // OOM 531 LeakProfiler::emit_events(max_jlong, false); 532 } 533 EventDumpReason event; 534 event.set_reason(exception_handler ? "Crash" : "Out of Memory"); 535 event.set_recordingId(-1); 536 event.commit(); 537 } 538 539 void JfrEmergencyDump::on_vm_shutdown(bool exception_handler) { 540 if (!guard_reentrancy()) { 541 return; 542 } 543 Thread* thread = Thread::current_or_null_safe(); 544 if (thread == NULL) { 545 return; 546 } 547 // Ensure a JavaThread is _thread_in_vm when we make this call 548 JavaThreadInVM jtivm(thread); 549 if (!prepare_for_emergency_dump(thread)) { 550 return; 551 } 552 post_events(exception_handler); 553 const int messages = MSGBIT(MSG_VM_ERROR); 554 JfrRecorderService service; 555 service.rotate(messages); 556 }