1 /*
   2  * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "jfr/jfrEvents.hpp"
  27 #include "jfr/jni/jfrJavaSupport.hpp"
  28 #include "jfr/leakprofiler/leakProfiler.hpp"
  29 #include "jfr/recorder/repository/jfrEmergencyDump.hpp"
  30 #include "jfr/recorder/service/jfrPostBox.hpp"
  31 #include "jfr/recorder/service/jfrRecorderService.hpp"
  32 #include "jfr/utilities/jfrTypes.hpp"
  33 #include "logging/log.hpp"
  34 #include "runtime/atomic.hpp"
  35 #include "runtime/globals.hpp"
  36 #include "runtime/mutexLocker.hpp"
  37 #include "runtime/os.hpp"
  38 #include "runtime/thread.inline.hpp"
  39 #include "utilities/growableArray.hpp"
  40 #include "utilities/ostream.hpp"
  41 
  42 static const char vm_error_filename_fmt[] = "hs_err_pid%p.jfr";
  43 static const char vm_oom_filename_fmt[] = "hs_oom_pid%p.jfr";
  44 static const char vm_soe_filename_fmt[] = "hs_soe_pid%p.jfr";
  45 static const char chunk_file_jfr_ext[] = ".jfr";
  46 static const size_t iso8601_len = 19; // "YYYY-MM-DDTHH:MM:SS"
  47 static fio_fd emergency_fd = invalid_fd;
  48 static const int64_t chunk_file_header_size = 68;
  49 static const size_t chunk_file_extension_length = sizeof chunk_file_jfr_ext - 1;
  50 
  51 /*
  52  * The emergency dump logic is restrictive when it comes to
  53  * using internal VM constructs such as ResourceArea / Handle / Arena.
  54  * The reason being that the thread context is unknown.
  55  *
  56  * A single static buffer of size JVM_MAXPATHLEN is used for building paths.
  57  * os::malloc / os::free are used in a few places.
  58  */
  59 
  60 static const size_t _max_path_buffer_size = JVM_MAXPATHLEN;
  61 static char _path_buffer[_max_path_buffer_size] = { 0 };
  62 
  63 static bool is_path_empty() {
  64   return *_path_buffer == '\0';
  65 }
  66 
  67 // returns with an appended file separator (if successful)
  68 static size_t get_current_directory() {
  69   char cwd[_max_path_buffer_size];
  70   if (os::get_current_directory(cwd, sizeof(cwd)) == NULL) {
  71     return 0;
  72   }
  73   const int result = jio_snprintf(_path_buffer, sizeof(_path_buffer), "%s%s", cwd, os::file_separator());
  74   return (result == -1) ? 0 : result;
  75 }
  76 
  77 static fio_fd open_exclusivly(const char* path) {
  78   assert((path != NULL) && (*path != '\0'), "invariant");
  79   return os::open(path, O_CREAT | O_RDWR, S_IREAD | S_IWRITE);
  80 }
  81 
  82 static bool is_emergency_dump_file_open() {
  83   return emergency_fd != invalid_fd;
  84 }
  85 
  86 static bool open_emergency_dump_fd(const char* path) {
  87   if (path == NULL) {
  88     return false;
  89   }
  90   assert(emergency_fd == invalid_fd, "invariant");
  91   emergency_fd = open_exclusivly(path);
  92   return emergency_fd != invalid_fd;
  93 }
  94 
  95 static void close_emergency_dump_file() {
  96   if (is_emergency_dump_file_open()) {
  97     os::close(emergency_fd);
  98   }
  99 }
 100 
 101 static const char* create_emergency_dump_path() {
 102   assert(is_path_empty(), "invariant");
 103 
 104   const size_t path_len = get_current_directory();
 105   if (path_len == 0) {
 106     return NULL;
 107   }
 108   const char* filename_fmt = NULL;
 109   // fetch specific error cause
 110   switch (JfrJavaSupport::cause()) {
 111     case JfrJavaSupport::OUT_OF_MEMORY:
 112       filename_fmt = vm_oom_filename_fmt;
 113       break;
 114     case JfrJavaSupport::STACK_OVERFLOW:
 115       filename_fmt = vm_soe_filename_fmt;
 116       break;
 117     default:
 118       filename_fmt = vm_error_filename_fmt;
 119   }
 120   const bool result = Arguments::copy_expand_pid(filename_fmt, strlen(filename_fmt), _path_buffer + path_len, _max_path_buffer_size - path_len);
 121   return result ? _path_buffer : NULL;
 122 }
 123 
 124 static bool open_emergency_dump_file() {
 125   if (is_emergency_dump_file_open()) {
 126     // opened already
 127     return true;
 128   }
 129   return open_emergency_dump_fd(create_emergency_dump_path());
 130 }
 131 
 132 static void report(outputStream* st, bool emergency_file_opened, const char* repository_path) {
 133   assert(st != NULL, "invariant");
 134   if (emergency_file_opened) {
 135     st->print_raw("# JFR recording file will be written. Location: ");
 136     st->print_raw_cr(_path_buffer);
 137     st->print_raw_cr("#");
 138   } else if (repository_path != NULL) {
 139     st->print_raw("# The JFR repository may contain useful JFR files. Location: ");
 140     st->print_raw_cr(repository_path);
 141     st->print_raw_cr("#");
 142   } else if (!is_path_empty()) {
 143     st->print_raw("# Unable to create a JFR recording file at location: ");
 144     st->print_raw_cr(_path_buffer);
 145     st->print_raw_cr("#");
 146   }
 147 }
 148 
 149 void JfrEmergencyDump::on_vm_error_report(outputStream* st, const char* repository_path) {
 150   assert(st != NULL, "invariant");
 151   Thread* thread = Thread::current_or_null_safe();
 152   if (thread != NULL) {
 153     report(st, open_emergency_dump_file(), repository_path);
 154   } else if (repository_path != NULL) {
 155     // a non-attached thread will not be able to write anything later
 156     report(st, false, repository_path);
 157   }
 158 }
 159 
 160 static int file_sort(const char** const file1, const char** file2) {
 161   assert(NULL != *file1 && NULL != *file2, "invariant");
 162   int cmp = strncmp(*file1, *file2, iso8601_len);
 163   if (0 == cmp) {
 164     const char* const dot1 = strchr(*file1, '.');
 165     assert(NULL != dot1, "invariant");
 166     const char* const dot2 = strchr(*file2, '.');
 167     assert(NULL != dot2, "invariant");
 168     ptrdiff_t file1_len = dot1 - *file1;
 169     ptrdiff_t file2_len = dot2 - *file2;
 170     if (file1_len < file2_len) {
 171       return -1;
 172     }
 173     if (file1_len > file2_len) {
 174       return 1;
 175     }
 176     assert(file1_len == file2_len, "invariant");
 177     cmp = strncmp(*file1, *file2, file1_len);
 178   }
 179   assert(cmp != 0, "invariant");
 180   return cmp;
 181 }
 182 
 183 static void iso8601_to_date_time(char* iso8601_str) {
 184   assert(iso8601_str != NULL, "invariant");
 185   assert(strlen(iso8601_str) == iso8601_len, "invariant");
 186   // "YYYY-MM-DDTHH:MM:SS"
 187   for (size_t i = 0; i < iso8601_len; ++i) {
 188     switch (iso8601_str[i]) {
 189     case 'T':
 190     case '-':
 191     case ':':
 192       iso8601_str[i] = '_';
 193       break;
 194     }
 195   }
 196   // "YYYY_MM_DD_HH_MM_SS"
 197 }
 198 
 199 static void date_time(char* buffer, size_t buffer_len) {
 200   assert(buffer != NULL, "invariant");
 201   assert(buffer_len >= iso8601_len, "buffer too small");
 202   os::iso8601_time(buffer, buffer_len);
 203   assert(strlen(buffer) >= iso8601_len + 1, "invariant");
 204   // "YYYY-MM-DDTHH:MM:SS"
 205   buffer[iso8601_len] = '\0';
 206   iso8601_to_date_time(buffer);
 207 }
 208 
 209 static int64_t file_size(fio_fd fd) {
 210   assert(fd != invalid_fd, "invariant");
 211   const int64_t current_offset = os::current_file_offset(fd);
 212   const int64_t size = os::lseek(fd, 0, SEEK_END);
 213   os::seek_to_file_offset(fd, current_offset);
 214   return size;
 215 }
 216 
 217 class RepositoryIterator : public StackObj {
 218  private:
 219   GrowableArray<const char*>* _file_names;
 220   int _path_buffer_file_name_offset;
 221   mutable int _iterator;
 222   const char* fully_qualified(const char* file_name) const;
 223   const char* filter(const char* file_name) const;
 224  public:
 225   RepositoryIterator(const char* repository_path);
 226   ~RepositoryIterator();
 227   bool has_next() const;
 228   const char* next() const;
 229 };
 230 
 231 // append the file_name at the _path_buffer_file_name_offset position
 232 const char* RepositoryIterator::fully_qualified(const char* file_name) const {
 233   assert(NULL != file_name, "invariant");
 234   assert(!is_path_empty(), "invariant");
 235   assert(_path_buffer_file_name_offset != 0, "invariant");
 236 
 237   const int result = jio_snprintf(_path_buffer + _path_buffer_file_name_offset,
 238                                   sizeof(_path_buffer) - _path_buffer_file_name_offset,
 239                                   "%s",
 240                                   file_name);
 241   return result != -1 ? _path_buffer : NULL;
 242 }
 243 
 244 // caller responsible for deallocation
 245 const char* RepositoryIterator::filter(const char* file_name) const {
 246   if (file_name == NULL) {
 247     return NULL;
 248   }
 249   const size_t len = strlen(file_name);
 250   if ((len < chunk_file_extension_length) ||
 251       (strncmp(&file_name[len - chunk_file_extension_length],
 252                chunk_file_jfr_ext,
 253                chunk_file_extension_length) != 0)) {
 254     // not a .jfr file
 255     return NULL;
 256   }
 257   const char* fqn = fully_qualified(file_name);
 258   if (fqn == NULL) {
 259     return NULL;
 260   }
 261   const fio_fd fd = open_exclusivly(fqn);
 262   if (invalid_fd == fd) {
 263     return NULL;
 264   }
 265   const int64_t size = file_size(fd);
 266   os::close(fd);
 267   if (size <= chunk_file_header_size) {
 268     return NULL;
 269   }
 270   char* const file_name_copy = (char*)os::malloc(len + 1, mtTracing);
 271   if (file_name_copy == NULL) {
 272     log_error(jfr, system)("Unable to malloc memory during jfr emergency dump");
 273     return NULL;
 274   }
 275   strncpy(file_name_copy, file_name, len + 1);
 276   return file_name_copy;
 277 }
 278 
 279 RepositoryIterator::RepositoryIterator(const char* repository_path) :
 280   _file_names(NULL),
 281   _path_buffer_file_name_offset(0),
 282   _iterator(0) {
 283     DIR* dirp = os::opendir(repository_path);
 284     if (dirp == NULL) {
 285       log_error(jfr, system)("Unable to open repository %s", repository_path);
 286       return;
 287     }
 288     // store repository path in the path buffer and save that position
 289     _path_buffer_file_name_offset = jio_snprintf(_path_buffer,
 290                                                  sizeof(_path_buffer),
 291                                                  "%s%s",
 292                                                  repository_path,
 293                                                  os::file_separator());
 294     if (_path_buffer_file_name_offset == -1) {
 295       return;
 296     }
 297     _file_names = new (ResourceObj::C_HEAP, mtTracing) GrowableArray<const char*>(10, true, mtTracing);
 298     if (_file_names == NULL) {
 299       log_error(jfr, system)("Unable to malloc memory during jfr emergency dump");
 300       return;
 301     }
 302     // iterate files in the repository and append filtered file names to the files array
 303     struct dirent* dentry;
 304     while ((dentry = os::readdir(dirp)) != NULL) {
 305       const char* file_name = filter(dentry->d_name);
 306       if (file_name != NULL) {
 307         _file_names->append(file_name);
 308       }
 309     }
 310     os::closedir(dirp);
 311     if (_file_names->length() > 1) {
 312       _file_names->sort(file_sort);
 313     }
 314 }
 315 
 316 RepositoryIterator::~RepositoryIterator() {
 317   if (_file_names != NULL) {
 318     for (int i = 0; i < _file_names->length(); ++i) {
 319       os::free(const_cast<char*>(_file_names->at(i)));
 320     }
 321     delete _file_names;
 322   }
 323 }
 324 
 325 bool RepositoryIterator::has_next() const {
 326   return _file_names != NULL && _iterator < _file_names->length();
 327 }
 328 
 329 const char* RepositoryIterator::next() const {
 330   return _iterator >= _file_names->length() ? NULL : fully_qualified(_file_names->at(_iterator++));
 331 }
 332 
 333 static void write_repository_files(const RepositoryIterator& iterator, char* const copy_block, size_t block_size) {
 334   assert(is_emergency_dump_file_open(), "invariant");
 335   while (iterator.has_next()) {
 336     fio_fd current_fd = invalid_fd;
 337     const char* const fqn = iterator.next();
 338     assert(fqn != NULL, "invariant");
 339     current_fd = open_exclusivly(fqn);
 340     if (current_fd != invalid_fd) {
 341       const int64_t size = file_size(current_fd);
 342       assert(size > 0, "invariant");
 343       int64_t bytes_read = 0;
 344       int64_t bytes_written = 0;
 345       while (bytes_read < size) {
 346         const ssize_t read_result = os::read_at(current_fd, copy_block, (int)block_size, bytes_read);
 347         if (-1 == read_result) {
 348           log_info(jfr)( // For user, should not be "jfr, system"
 349               "Unable to recover JFR data");
 350           break;
 351         }
 352         bytes_read += (int64_t)read_result;
 353         assert(bytes_read - bytes_written <= (int64_t)block_size, "invariant");
 354         bytes_written += (int64_t)os::write(emergency_fd, copy_block, bytes_read - bytes_written);
 355         assert(bytes_read == bytes_written, "invariant");
 356       }
 357       os::close(current_fd);
 358     }
 359   }
 360 }
 361 
 362 static void write_emergency_dump_file(const RepositoryIterator& iterator) {
 363   static const size_t block_size = 1 * M; // 1 mb
 364   char* const copy_block = (char*)os::malloc(block_size, mtTracing);
 365   if (copy_block == NULL) {
 366     log_error(jfr, system)("Unable to malloc memory during jfr emergency dump");
 367     log_error(jfr, system)("Unable to write jfr emergency dump file");
 368   }
 369   write_repository_files(iterator, copy_block, block_size);
 370   os::free(copy_block);
 371 }
 372 
 373 void JfrEmergencyDump::on_vm_error(const char* repository_path) {
 374   assert(repository_path != NULL, "invariant");
 375   if (open_emergency_dump_file()) {
 376     RepositoryIterator iterator(repository_path);
 377     write_emergency_dump_file(iterator);
 378     close_emergency_dump_file();
 379   }
 380 }
 381 
 382 static const char* create_emergency_chunk_path(const char* repository_path) {
 383   const size_t repository_path_len = strlen(repository_path);
 384   char date_time_buffer[32] = { 0 };
 385   date_time(date_time_buffer, sizeof(date_time_buffer));
 386   // append the individual substrings
 387   const int result = jio_snprintf(_path_buffer,
 388                                   _max_path_buffer_size,
 389                                   "%s%s%s%s",
 390                                   repository_path,
 391                                   os::file_separator(),
 392                                   date_time_buffer,
 393                                   chunk_file_jfr_ext);
 394   return result == -1 ? NULL : _path_buffer;
 395 }
 396 
 397 const char* JfrEmergencyDump::chunk_path(const char* repository_path) {
 398   if (repository_path == NULL) {
 399     if (!open_emergency_dump_file()) {
 400       return NULL;
 401     }
 402     // We can directly use the emergency dump file name as the chunk.
 403     // The chunk writer will open its own fd so we close this descriptor.
 404     close_emergency_dump_file();
 405     assert(!is_path_empty(), "invariant");
 406     return _path_buffer;
 407   }
 408   return create_emergency_chunk_path(repository_path);
 409 }
 410 
 411 /*
 412 * We are just about to exit the VM, so we will be very aggressive
 413 * at this point in order to increase overall success of dumping jfr data.
 414 *
 415 * If we end up deadlocking in the attempt of dumping out jfr data,
 416 * we rely on the WatcherThread task "is_error_reported()",
 417 * to exit the VM after a hard-coded timeout (disallow WatcherThread to emergency dump).
 418 * This "safety net" somewhat explains the aggressiveness in this attempt.
 419 *
 420 */
 421 static bool prepare_for_emergency_dump(Thread* thread) {
 422   assert(thread != NULL, "invariant");
 423 
 424   if (thread->is_Watcher_thread()) {
 425     // need WatcherThread as a safeguard against potential deadlocks
 426     return false;
 427   }
 428   if (JfrStream_lock->owned_by_self()) {
 429     // crashed during jfr rotation, disallow recursion
 430     return false;
 431   }
 432 
 433 #ifdef ASSERT
 434   Mutex* owned_lock = thread->owned_locks();
 435   while (owned_lock != NULL) {
 436     Mutex* next = owned_lock->next();
 437     owned_lock->unlock();
 438     owned_lock = next;
 439   }
 440 #endif // ASSERT
 441 
 442   if (Threads_lock->owned_by_self()) {
 443     Threads_lock->unlock();
 444   }
 445 
 446   if (Module_lock->owned_by_self()) {
 447     Module_lock->unlock();
 448   }
 449 
 450   if (ClassLoaderDataGraph_lock->owned_by_self()) {
 451     ClassLoaderDataGraph_lock->unlock();
 452   }
 453 
 454   if (Heap_lock->owned_by_self()) {
 455     Heap_lock->unlock();
 456   }
 457 
 458   if (VMOperationQueue_lock->owned_by_self()) {
 459     VMOperationQueue_lock->unlock();
 460   }
 461 
 462   if (VMOperationRequest_lock->owned_by_self()) {
 463     VMOperationRequest_lock->unlock();
 464   }
 465 
 466   if (Service_lock->owned_by_self()) {
 467     Service_lock->unlock();
 468   }
 469 
 470   if (UseNotificationThread && Notification_lock->owned_by_self()) {
 471     Notification_lock->unlock();
 472   }
 473 
 474   if (CodeCache_lock->owned_by_self()) {
 475     CodeCache_lock->unlock();
 476   }
 477 
 478   if (PeriodicTask_lock->owned_by_self()) {
 479     PeriodicTask_lock->unlock();
 480   }
 481 
 482   if (JfrMsg_lock->owned_by_self()) {
 483     JfrMsg_lock->unlock();
 484   }
 485 
 486   if (JfrBuffer_lock->owned_by_self()) {
 487     JfrBuffer_lock->unlock();
 488   }
 489 
 490   if (JfrStacktrace_lock->owned_by_self()) {
 491     JfrStacktrace_lock->unlock();
 492   }
 493   return true;
 494 }
 495 
 496 static volatile int jfr_shutdown_lock = 0;
 497 
 498 static bool guard_reentrancy() {
 499   return Atomic::cmpxchg(&jfr_shutdown_lock, 0, 1) == 0;
 500 }
 501 
 502 class JavaThreadInVM : public StackObj {
 503  private:
 504   JavaThread* const _jt;
 505   JavaThreadState _original_state;
 506  public:
 507 
 508   JavaThreadInVM(Thread* t) : _jt(t->is_Java_thread() ? (JavaThread*)t : NULL),
 509                               _original_state(_thread_max_state) {
 510     if ((_jt != NULL) && (_jt->thread_state() != _thread_in_vm)) {
 511       _original_state = _jt->thread_state();
 512       _jt->set_thread_state(_thread_in_vm);
 513     }
 514   }
 515 
 516   ~JavaThreadInVM() {
 517     if (_original_state != _thread_max_state) {
 518       _jt->set_thread_state(_original_state);
 519     }
 520   }
 521 
 522 };
 523 
 524 static void post_events(bool exception_handler) {
 525   if (exception_handler) {
 526     EventShutdown e;
 527     e.set_reason("VM Error");
 528     e.commit();
 529   } else {
 530     // OOM
 531     LeakProfiler::emit_events(max_jlong, false);
 532   }
 533   EventDumpReason event;
 534   event.set_reason(exception_handler ? "Crash" : "Out of Memory");
 535   event.set_recordingId(-1);
 536   event.commit();
 537 }
 538 
 539 void JfrEmergencyDump::on_vm_shutdown(bool exception_handler) {
 540   if (!guard_reentrancy()) {
 541     return;
 542   }
 543   Thread* thread = Thread::current_or_null_safe();
 544   if (thread == NULL) {
 545     return;
 546   }
 547   // Ensure a JavaThread is _thread_in_vm when we make this call
 548   JavaThreadInVM jtivm(thread);
 549   if (!prepare_for_emergency_dump(thread)) {
 550     return;
 551   }
 552   post_events(exception_handler);
 553   const int messages = MSGBIT(MSG_VM_ERROR);
 554   JfrRecorderService service;
 555   service.rotate(messages);
 556 }