1 /* 2 * Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "jfr/jfrEvents.hpp" 27 #include "jfr/jni/jfrJavaSupport.hpp" 28 #include "jfr/leakprofiler/leakProfiler.hpp" 29 #include "jfr/recorder/repository/jfrEmergencyDump.hpp" 30 #include "jfr/recorder/service/jfrPostBox.hpp" 31 #include "jfr/recorder/service/jfrRecorderService.hpp" 32 #include "jfr/utilities/jfrTypes.hpp" 33 #include "logging/log.hpp" 34 #include "memory/resourceArea.hpp" 35 #include "runtime/atomic.hpp" 36 #include "runtime/handles.inline.hpp" 37 #include "runtime/globals.hpp" 38 #include "runtime/mutexLocker.hpp" 39 #include "runtime/os.hpp" 40 #include "runtime/thread.inline.hpp" 41 #include "utilities/growableArray.hpp" 42 43 static const char vm_error_filename_fmt[] = "hs_err_pid%p.jfr"; 44 static const char vm_oom_filename_fmt[] = "hs_oom_pid%p.jfr"; 45 static const char vm_soe_filename_fmt[] = "hs_soe_pid%p.jfr"; 46 static const char chunk_file_jfr_ext[] = ".jfr"; 47 static const size_t iso8601_len = 19; // "YYYY-MM-DDTHH:MM:SS" 48 49 char JfrEmergencyDump::_dump_path[JVM_MAXPATHLEN] = {0}; 50 51 static fio_fd open_exclusivly(const char* path) { 52 return os::open(path, O_CREAT | O_RDWR, S_IREAD | S_IWRITE); 53 } 54 55 static int file_sort(const char** const file1, const char** file2) { 56 assert(NULL != *file1 && NULL != *file2, "invariant"); 57 int cmp = strncmp(*file1, *file2, iso8601_len); 58 if (0 == cmp) { 59 const char* const dot1 = strchr(*file1, '.'); 60 assert(NULL != dot1, "invariant"); 61 const char* const dot2 = strchr(*file2, '.'); 62 assert(NULL != dot2, "invariant"); 63 ptrdiff_t file1_len = dot1 - *file1; 64 ptrdiff_t file2_len = dot2 - *file2; 65 if (file1_len < file2_len) { 66 return -1; 67 } 68 if (file1_len > file2_len) { 69 return 1; 70 } 71 assert(file1_len == file2_len, "invariant"); 72 cmp = strncmp(*file1, *file2, file1_len); 73 } 74 assert(cmp != 0, "invariant"); 75 return cmp; 76 } 77 78 static void iso8601_to_date_time(char* iso8601_str) { 79 assert(iso8601_str != NULL, "invariant"); 80 assert(strlen(iso8601_str) == iso8601_len, "invariant"); 81 // "YYYY-MM-DDTHH:MM:SS" 82 for (size_t i = 0; i < iso8601_len; ++i) { 83 switch (iso8601_str[i]) { 84 case 'T': 85 case '-': 86 case ':': 87 iso8601_str[i] = '_'; 88 break; 89 } 90 } 91 // "YYYY_MM_DD_HH_MM_SS" 92 } 93 94 static void date_time(char* buffer, size_t buffer_len) { 95 assert(buffer != NULL, "invariant"); 96 assert(buffer_len >= iso8601_len, "buffer too small"); 97 os::iso8601_time(buffer, buffer_len); 98 assert(strlen(buffer) >= iso8601_len + 1, "invariant"); 99 // "YYYY-MM-DDTHH:MM:SS" 100 buffer[iso8601_len] = '\0'; 101 iso8601_to_date_time(buffer); 102 } 103 104 static int64_t file_size(fio_fd fd) { 105 assert(fd != invalid_fd, "invariant"); 106 const int64_t current_offset = os::current_file_offset(fd); 107 const int64_t size = os::lseek(fd, 0, SEEK_END); 108 os::seek_to_file_offset(fd, current_offset); 109 return size; 110 } 111 112 class RepositoryIterator : public StackObj { 113 private: 114 const char* const _repo; 115 const size_t _repository_len; 116 GrowableArray<const char*>* _files; 117 const char* const fully_qualified(const char* entry) const; 118 mutable int _iterator; 119 120 public: 121 RepositoryIterator(const char* repository, size_t repository_len); 122 ~RepositoryIterator() {} 123 const char* const filter(const char* entry) const; 124 bool has_next() const; 125 const char* const next() const; 126 }; 127 128 const char* const RepositoryIterator::fully_qualified(const char* entry) const { 129 assert(NULL != entry, "invariant"); 130 char* file_path_entry = NULL; 131 // only use files that have content, not placeholders 132 const char* const file_separator = os::file_separator(); 133 if (NULL != file_separator) { 134 const size_t entry_len = strlen(entry); 135 const size_t file_separator_length = strlen(file_separator); 136 const size_t file_path_entry_length = _repository_len + file_separator_length + entry_len; 137 file_path_entry = NEW_RESOURCE_ARRAY_RETURN_NULL(char, file_path_entry_length + 1); 138 if (NULL == file_path_entry) { 139 return NULL; 140 } 141 int position = 0; 142 position += jio_snprintf(&file_path_entry[position], _repository_len + 1, "%s", _repo); 143 position += jio_snprintf(&file_path_entry[position], file_separator_length + 1, "%s", os::file_separator()); 144 position += jio_snprintf(&file_path_entry[position], entry_len + 1, "%s", entry); 145 file_path_entry[position] = '\0'; 146 assert((size_t)position == file_path_entry_length, "invariant"); 147 assert(strlen(file_path_entry) == (size_t)position, "invariant"); 148 } 149 return file_path_entry; 150 } 151 152 const char* const RepositoryIterator::filter(const char* entry) const { 153 if (entry == NULL) { 154 return NULL; 155 } 156 const size_t entry_len = strlen(entry); 157 if (entry_len <= 2) { 158 // for "." and ".." 159 return NULL; 160 } 161 char* entry_name = NEW_RESOURCE_ARRAY_RETURN_NULL(char, entry_len + 1); 162 if (entry_name == NULL) { 163 return NULL; 164 } 165 strncpy(entry_name, entry, entry_len + 1); 166 const char* const fully_qualified_path_entry = fully_qualified(entry_name); 167 if (NULL == fully_qualified_path_entry) { 168 return NULL; 169 } 170 const fio_fd entry_fd = open_exclusivly(fully_qualified_path_entry); 171 if (invalid_fd == entry_fd) { 172 return NULL; 173 } 174 const int64_t entry_size = file_size(entry_fd); 175 os::close(entry_fd); 176 if (0 == entry_size) { 177 return NULL; 178 } 179 return entry_name; 180 } 181 182 RepositoryIterator::RepositoryIterator(const char* repository, size_t repository_len) : 183 _repo(repository), 184 _repository_len(repository_len), 185 _files(NULL), 186 _iterator(0) { 187 if (NULL != _repo) { 188 assert(strlen(_repo) == _repository_len, "invariant"); 189 _files = new GrowableArray<const char*>(10); 190 DIR* dirp = os::opendir(_repo); 191 if (dirp == NULL) { 192 log_error(jfr, system)("Unable to open repository %s", _repo); 193 return; 194 } 195 struct dirent* dentry; 196 while ((dentry = os::readdir(dirp)) != NULL) { 197 const char* const entry_path = filter(dentry->d_name); 198 if (NULL != entry_path) { 199 _files->append(entry_path); 200 } 201 } 202 os::closedir(dirp); 203 if (_files->length() > 1) { 204 _files->sort(file_sort); 205 } 206 } 207 } 208 209 bool RepositoryIterator::has_next() const { 210 return (_files != NULL && _iterator < _files->length()); 211 } 212 213 const char* const RepositoryIterator::next() const { 214 return _iterator >= _files->length() ? NULL : fully_qualified(_files->at(_iterator++)); 215 } 216 217 static void write_emergency_file(fio_fd emergency_fd, const RepositoryIterator& iterator) { 218 assert(emergency_fd != invalid_fd, "invariant"); 219 const size_t size_of_file_copy_block = 1 * M; // 1 mb 220 jbyte* const file_copy_block = NEW_RESOURCE_ARRAY_RETURN_NULL(jbyte, size_of_file_copy_block); 221 if (file_copy_block == NULL) { 222 return; 223 } 224 while (iterator.has_next()) { 225 fio_fd current_fd = invalid_fd; 226 const char* const fqn = iterator.next(); 227 if (fqn != NULL) { 228 current_fd = open_exclusivly(fqn); 229 if (current_fd != invalid_fd) { 230 const int64_t current_filesize = file_size(current_fd); 231 assert(current_filesize > 0, "invariant"); 232 int64_t bytes_read = 0; 233 int64_t bytes_written = 0; 234 while (bytes_read < current_filesize) { 235 const ssize_t read_result = os::read_at(current_fd, file_copy_block, size_of_file_copy_block, bytes_read); 236 if (-1 == read_result) { 237 log_info(jfr)( // For user, should not be "jfr, system" 238 "Unable to recover JFR data"); 239 break; 240 } 241 bytes_read += (int64_t)read_result; 242 assert(bytes_read - bytes_written <= (int64_t)size_of_file_copy_block, "invariant"); 243 bytes_written += (int64_t)os::write(emergency_fd, file_copy_block, bytes_read - bytes_written); 244 assert(bytes_read == bytes_written, "invariant"); 245 } 246 os::close(current_fd); 247 } 248 } 249 } 250 } 251 252 const char* JfrEmergencyDump::create_emergency_dump_path() { 253 if (*_dump_path != '\0') { 254 return _dump_path; 255 } 256 257 const char* const cwd = os::get_current_directory(_dump_path, JVM_MAXPATHLEN); 258 if (NULL == cwd) { 259 return NULL; 260 } 261 size_t pos = strlen(cwd); 262 const int fsep_len = jio_snprintf(&_dump_path[pos], JVM_MAXPATHLEN - pos, "%s", os::file_separator()); 263 const char* filename_fmt = NULL; 264 // fetch specific error cause 265 switch (JfrJavaSupport::cause()) { 266 case JfrJavaSupport::OUT_OF_MEMORY: 267 filename_fmt = vm_oom_filename_fmt; 268 break; 269 case JfrJavaSupport::STACK_OVERFLOW: 270 filename_fmt = vm_soe_filename_fmt; 271 break; 272 default: 273 filename_fmt = vm_error_filename_fmt; 274 } 275 pos += fsep_len; 276 Arguments::copy_expand_pid(filename_fmt, strlen(filename_fmt), &_dump_path[pos], JVM_MAXPATHLEN - pos); 277 if (*_dump_path != '\0') { 278 log_info(jfr)( // For user, should not be "jfr, system" 279 "Attempting to recover JFR data, emergency jfr file: %s", _dump_path); 280 } 281 return _dump_path; 282 } 283 284 // Caller needs ResourceMark 285 const char* JfrEmergencyDump::create_emergency_chunk_path(const char* repository_path) { 286 assert(repository_path != NULL, "invariant"); 287 288 if (*_dump_path != '\0') { 289 return _dump_path; 290 } 291 292 const size_t repository_path_len = strlen(repository_path); 293 // date time 294 char date_time_buffer[32] = { 0 }; 295 date_time(date_time_buffer, sizeof(date_time_buffer)); 296 size_t date_time_len = strlen(date_time_buffer); 297 size_t chunkname_max_len = repository_path_len // repository_base_path 298 + 1 // "/" 299 + date_time_len // date_time 300 + strlen(chunk_file_jfr_ext) // .jfr 301 + 1; 302 // append the individual substrings 303 jio_snprintf(_dump_path, chunkname_max_len, "%s%s%s%s", repository_path, os::file_separator(), date_time_buffer, chunk_file_jfr_ext); 304 return _dump_path; 305 } 306 307 fio_fd JfrEmergencyDump::emergency_dump_file_descriptor() { 308 ResourceMark rm; 309 const char* const emergency_dump_path = create_emergency_dump_path(); 310 return emergency_dump_path != NULL ? open_exclusivly(emergency_dump_path) : invalid_fd; 311 } 312 313 const char* JfrEmergencyDump::build_dump_path(const char* repository_path) { 314 return repository_path == NULL ? create_emergency_dump_path() : create_emergency_chunk_path(repository_path); 315 } 316 317 void JfrEmergencyDump::on_vm_error(const char* repository_path) { 318 assert(repository_path != NULL, "invariant"); 319 ResourceMark rm; 320 const fio_fd emergency_fd = emergency_dump_file_descriptor(); 321 if (emergency_fd != invalid_fd) { 322 RepositoryIterator iterator(repository_path, strlen(repository_path)); 323 write_emergency_file(emergency_fd, iterator); 324 os::close(emergency_fd); 325 } 326 } 327 328 /* 329 * We are just about to exit the VM, so we will be very aggressive 330 * at this point in order to increase overall success of dumping jfr data: 331 * 332 * 1. if the thread state is not "_thread_in_vm", we will quick transition 333 * it to "_thread_in_vm". 334 * 2. if the thread is the owner of some critical lock(s), unlock them. 335 * 336 * If we end up deadlocking in the attempt of dumping out jfr data, 337 * we rely on the WatcherThread task "is_error_reported()", 338 * to exit the VM after a hard-coded timeout (disallow WatcherThread to emergency dump). 339 * This "safety net" somewhat explains the aggressiveness in this attempt. 340 * 341 */ 342 static bool prepare_for_emergency_dump() { 343 if (JfrStream_lock->owned_by_self()) { 344 // crashed during jfr rotation, disallow recursion 345 return false; 346 } 347 Thread* const thread = Thread::current(); 348 if (thread->is_Watcher_thread()) { 349 // need WatcherThread as a safeguard against potential deadlocks 350 return false; 351 } 352 353 if (thread->is_Java_thread()) { 354 ((JavaThread*)thread)->set_thread_state(_thread_in_vm); 355 } 356 357 #ifdef ASSERT 358 Mutex* owned_lock = thread->owned_locks(); 359 while (owned_lock != NULL) { 360 Mutex* next = owned_lock->next(); 361 owned_lock->unlock(); 362 owned_lock = next; 363 } 364 #endif // ASSERT 365 366 if (Threads_lock->owned_by_self()) { 367 Threads_lock->unlock(); 368 } 369 370 if (Module_lock->owned_by_self()) { 371 Module_lock->unlock(); 372 } 373 374 if (ClassLoaderDataGraph_lock->owned_by_self()) { 375 ClassLoaderDataGraph_lock->unlock(); 376 } 377 378 if (Heap_lock->owned_by_self()) { 379 Heap_lock->unlock(); 380 } 381 382 if (VMOperationQueue_lock->owned_by_self()) { 383 VMOperationQueue_lock->unlock(); 384 } 385 386 if (VMOperationRequest_lock->owned_by_self()) { 387 VMOperationRequest_lock->unlock(); 388 } 389 390 if (Service_lock->owned_by_self()) { 391 Service_lock->unlock(); 392 } 393 394 if (UseNotificationThread && Notification_lock->owned_by_self()) { 395 Notification_lock->unlock(); 396 } 397 398 if (CodeCache_lock->owned_by_self()) { 399 CodeCache_lock->unlock(); 400 } 401 402 if (PeriodicTask_lock->owned_by_self()) { 403 PeriodicTask_lock->unlock(); 404 } 405 406 if (JfrMsg_lock->owned_by_self()) { 407 JfrMsg_lock->unlock(); 408 } 409 410 if (JfrBuffer_lock->owned_by_self()) { 411 JfrBuffer_lock->unlock(); 412 } 413 414 if (JfrStacktrace_lock->owned_by_self()) { 415 JfrStacktrace_lock->unlock(); 416 } 417 return true; 418 } 419 420 static volatile int jfr_shutdown_lock = 0; 421 422 static bool guard_reentrancy() { 423 return Atomic::cmpxchg(1, &jfr_shutdown_lock, 0) == 0; 424 } 425 426 void JfrEmergencyDump::on_vm_shutdown(bool exception_handler) { 427 if (!(guard_reentrancy() && prepare_for_emergency_dump())) { 428 return; 429 } 430 EventDumpReason event; 431 if (event.should_commit()) { 432 event.set_reason(exception_handler ? "Crash" : "Out of Memory"); 433 event.set_recordingId(-1); 434 event.commit(); 435 } 436 if (!exception_handler) { 437 // OOM 438 LeakProfiler::emit_events(max_jlong, false); 439 } 440 const int messages = MSGBIT(MSG_VM_ERROR); 441 JfrRecorderService service; 442 service.rotate(messages); 443 }