1 /* 2 * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "classfile/vmSymbols.hpp" 27 #include "logging/log.hpp" 28 #include "memory/allocation.inline.hpp" 29 #include "memory/resourceArea.hpp" 30 #include "oops/oop.inline.hpp" 31 #include "os_linux.inline.hpp" 32 #include "runtime/handles.inline.hpp" 33 #include "runtime/os.hpp" 34 #include "runtime/perfMemory.hpp" 35 #include "services/memTracker.hpp" 36 #include "utilities/exceptions.hpp" 37 38 // put OS-includes here 39 # include <sys/types.h> 40 # include <sys/mman.h> 41 # include <errno.h> 42 # include <stdio.h> 43 # include <unistd.h> 44 # include <sys/stat.h> 45 # include <signal.h> 46 # include <pwd.h> 47 48 static char* backing_store_file_name = NULL; // name of the backing store 49 // file, if successfully created. 50 51 // Standard Memory Implementation Details 52 53 // create the PerfData memory region in standard memory. 54 // 55 static char* create_standard_memory(size_t size) { 56 57 // allocate an aligned chuck of memory 58 char* mapAddress = os::reserve_memory(size); 59 60 if (mapAddress == NULL) { 61 return NULL; 62 } 63 64 // commit memory 65 if (!os::commit_memory(mapAddress, size, !ExecMem)) { 66 if (PrintMiscellaneous && Verbose) { 67 warning("Could not commit PerfData memory\n"); 68 } 69 os::release_memory(mapAddress, size); 70 return NULL; 71 } 72 73 return mapAddress; 74 } 75 76 // delete the PerfData memory region 77 // 78 static void delete_standard_memory(char* addr, size_t size) { 79 80 // there are no persistent external resources to cleanup for standard 81 // memory. since DestroyJavaVM does not support unloading of the JVM, 82 // cleanup of the memory resource is not performed. The memory will be 83 // reclaimed by the OS upon termination of the process. 84 // 85 return; 86 } 87 88 // save the specified memory region to the given file 89 // 90 // Note: this function might be called from signal handler (by os::abort()), 91 // don't allocate heap memory. 92 // 93 static void save_memory_to_file(char* addr, size_t size) { 94 95 const char* destfile = PerfMemory::get_perfdata_file_path(); 96 assert(destfile[0] != '\0', "invalid PerfData file path"); 97 98 int result; 99 100 RESTARTABLE(::open(destfile, O_CREAT|O_WRONLY|O_TRUNC, S_IREAD|S_IWRITE), 101 result);; 102 if (result == OS_ERR) { 103 if (PrintMiscellaneous && Verbose) { 104 warning("Could not create Perfdata save file: %s: %s\n", 105 destfile, os::strerror(errno)); 106 } 107 } else { 108 int fd = result; 109 110 for (size_t remaining = size; remaining > 0;) { 111 112 RESTARTABLE(::write(fd, addr, remaining), result); 113 if (result == OS_ERR) { 114 if (PrintMiscellaneous && Verbose) { 115 warning("Could not write Perfdata save file: %s: %s\n", 116 destfile, os::strerror(errno)); 117 } 118 break; 119 } 120 121 remaining -= (size_t)result; 122 addr += result; 123 } 124 125 result = ::close(fd); 126 if (PrintMiscellaneous && Verbose) { 127 if (result == OS_ERR) { 128 warning("Could not close %s: %s\n", destfile, os::strerror(errno)); 129 } 130 } 131 } 132 FREE_C_HEAP_ARRAY(char, destfile); 133 } 134 135 136 // Shared Memory Implementation Details 137 138 // Note: the solaris and linux shared memory implementation uses the mmap 139 // interface with a backing store file to implement named shared memory. 140 // Using the file system as the name space for shared memory allows a 141 // common name space to be supported across a variety of platforms. It 142 // also provides a name space that Java applications can deal with through 143 // simple file apis. 144 // 145 // The solaris and linux implementations store the backing store file in 146 // a user specific temporary directory located in the /tmp file system, 147 // which is always a local file system and is sometimes a RAM based file 148 // system. 149 150 151 // return the user specific temporary directory name. 152 // 153 // If containerized process, get dirname of 154 // /proc/{vmid}/root/tmp/{PERFDATA_NAME_user} 155 // otherwise /tmp/{PERFDATA_NAME_user} 156 // 157 // the caller is expected to free the allocated memory. 158 // 159 #define TMP_BUFFER_LEN (4+22) 160 static char* get_user_tmp_dir(const char* user, int vmid, int nspid) { 161 char buffer[TMP_BUFFER_LEN]; 162 char* tmpdir = (char *)os::get_temp_directory(); 163 assert(strlen(tmpdir) == 4, "No longer using /tmp - update buffer size"); 164 165 if (nspid != -1) { 166 jio_snprintf(buffer, TMP_BUFFER_LEN, "/proc/%d/root%s", vmid, tmpdir); 167 tmpdir = buffer; 168 } 169 170 const char* perfdir = PERFDATA_NAME; 171 size_t nbytes = strlen(tmpdir) + strlen(perfdir) + strlen(user) + 3; 172 char* dirname = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal); 173 174 // construct the path name to user specific tmp directory 175 snprintf(dirname, nbytes, "%s/%s_%s", tmpdir, perfdir, user); 176 177 return dirname; 178 } 179 180 // convert the given file name into a process id. if the file 181 // does not meet the file naming constraints, return 0. 182 // 183 static pid_t filename_to_pid(const char* filename) { 184 185 // a filename that doesn't begin with a digit is not a 186 // candidate for conversion. 187 // 188 if (!isdigit(*filename)) { 189 return 0; 190 } 191 192 // check if file name can be converted to an integer without 193 // any leftover characters. 194 // 195 char* remainder = NULL; 196 errno = 0; 197 pid_t pid = (pid_t)strtol(filename, &remainder, 10); 198 199 if (errno != 0) { 200 return 0; 201 } 202 203 // check for left over characters. If any, then the filename is 204 // not a candidate for conversion. 205 // 206 if (remainder != NULL && *remainder != '\0') { 207 return 0; 208 } 209 210 // successful conversion, return the pid 211 return pid; 212 } 213 214 215 // Check if the given statbuf is considered a secure directory for 216 // the backing store files. Returns true if the directory is considered 217 // a secure location. Returns false if the statbuf is a symbolic link or 218 // if an error occurred. 219 // 220 static bool is_statbuf_secure(struct stat *statp) { 221 if (S_ISLNK(statp->st_mode) || !S_ISDIR(statp->st_mode)) { 222 // The path represents a link or some non-directory file type, 223 // which is not what we expected. Declare it insecure. 224 // 225 return false; 226 } 227 // We have an existing directory, check if the permissions are safe. 228 // 229 if ((statp->st_mode & (S_IWGRP|S_IWOTH)) != 0) { 230 // The directory is open for writing and could be subjected 231 // to a symlink or a hard link attack. Declare it insecure. 232 // 233 return false; 234 } 235 // If user is not root then see if the uid of the directory matches the effective uid of the process. 236 uid_t euid = geteuid(); 237 if ((euid != 0) && (statp->st_uid != euid)) { 238 // The directory was not created by this user, declare it insecure. 239 // 240 return false; 241 } 242 return true; 243 } 244 245 246 // Check if the given path is considered a secure directory for 247 // the backing store files. Returns true if the directory exists 248 // and is considered a secure location. Returns false if the path 249 // is a symbolic link or if an error occurred. 250 // 251 static bool is_directory_secure(const char* path) { 252 struct stat statbuf; 253 int result = 0; 254 255 RESTARTABLE(::lstat(path, &statbuf), result); 256 if (result == OS_ERR) { 257 return false; 258 } 259 260 // The path exists, see if it is secure. 261 return is_statbuf_secure(&statbuf); 262 } 263 264 265 // Check if the given directory file descriptor is considered a secure 266 // directory for the backing store files. Returns true if the directory 267 // exists and is considered a secure location. Returns false if the path 268 // is a symbolic link or if an error occurred. 269 // 270 static bool is_dirfd_secure(int dir_fd) { 271 struct stat statbuf; 272 int result = 0; 273 274 RESTARTABLE(::fstat(dir_fd, &statbuf), result); 275 if (result == OS_ERR) { 276 return false; 277 } 278 279 // The path exists, now check its mode. 280 return is_statbuf_secure(&statbuf); 281 } 282 283 284 // Check to make sure fd1 and fd2 are referencing the same file system object. 285 // 286 static bool is_same_fsobject(int fd1, int fd2) { 287 struct stat statbuf1; 288 struct stat statbuf2; 289 int result = 0; 290 291 RESTARTABLE(::fstat(fd1, &statbuf1), result); 292 if (result == OS_ERR) { 293 return false; 294 } 295 RESTARTABLE(::fstat(fd2, &statbuf2), result); 296 if (result == OS_ERR) { 297 return false; 298 } 299 300 if ((statbuf1.st_ino == statbuf2.st_ino) && 301 (statbuf1.st_dev == statbuf2.st_dev)) { 302 return true; 303 } else { 304 return false; 305 } 306 } 307 308 309 // Open the directory of the given path and validate it. 310 // Return a DIR * of the open directory. 311 // 312 static DIR *open_directory_secure(const char* dirname) { 313 // Open the directory using open() so that it can be verified 314 // to be secure by calling is_dirfd_secure(), opendir() and then check 315 // to see if they are the same file system object. This method does not 316 // introduce a window of opportunity for the directory to be attacked that 317 // calling opendir() and is_directory_secure() does. 318 int result; 319 DIR *dirp = NULL; 320 RESTARTABLE(::open(dirname, O_RDONLY|O_NOFOLLOW), result); 321 if (result == OS_ERR) { 322 if (PrintMiscellaneous && Verbose) { 323 if (errno == ELOOP) { 324 warning("directory %s is a symlink and is not secure\n", dirname); 325 } else { 326 warning("could not open directory %s: %s\n", dirname, os::strerror(errno)); 327 } 328 } 329 return dirp; 330 } 331 int fd = result; 332 333 // Determine if the open directory is secure. 334 if (!is_dirfd_secure(fd)) { 335 // The directory is not a secure directory. 336 os::close(fd); 337 return dirp; 338 } 339 340 // Open the directory. 341 dirp = ::opendir(dirname); 342 if (dirp == NULL) { 343 // The directory doesn't exist, close fd and return. 344 os::close(fd); 345 return dirp; 346 } 347 348 // Check to make sure fd and dirp are referencing the same file system object. 349 if (!is_same_fsobject(fd, dirfd(dirp))) { 350 // The directory is not secure. 351 os::close(fd); 352 os::closedir(dirp); 353 dirp = NULL; 354 return dirp; 355 } 356 357 // Close initial open now that we know directory is secure 358 os::close(fd); 359 360 return dirp; 361 } 362 363 // NOTE: The code below uses fchdir(), open() and unlink() because 364 // fdopendir(), openat() and unlinkat() are not supported on all 365 // versions. Once the support for fdopendir(), openat() and unlinkat() 366 // is available on all supported versions the code can be changed 367 // to use these functions. 368 369 // Open the directory of the given path, validate it and set the 370 // current working directory to it. 371 // Return a DIR * of the open directory and the saved cwd fd. 372 // 373 static DIR *open_directory_secure_cwd(const char* dirname, int *saved_cwd_fd) { 374 375 // Open the directory. 376 DIR* dirp = open_directory_secure(dirname); 377 if (dirp == NULL) { 378 // Directory doesn't exist or is insecure, so there is nothing to cleanup. 379 return dirp; 380 } 381 int fd = dirfd(dirp); 382 383 // Open a fd to the cwd and save it off. 384 int result; 385 RESTARTABLE(::open(".", O_RDONLY), result); 386 if (result == OS_ERR) { 387 *saved_cwd_fd = -1; 388 } else { 389 *saved_cwd_fd = result; 390 } 391 392 // Set the current directory to dirname by using the fd of the directory and 393 // handle errors, otherwise shared memory files will be created in cwd. 394 result = fchdir(fd); 395 if (result == OS_ERR) { 396 if (PrintMiscellaneous && Verbose) { 397 warning("could not change to directory %s", dirname); 398 } 399 if (*saved_cwd_fd != -1) { 400 ::close(*saved_cwd_fd); 401 *saved_cwd_fd = -1; 402 } 403 // Close the directory. 404 os::closedir(dirp); 405 return NULL; 406 } else { 407 return dirp; 408 } 409 } 410 411 // Close the directory and restore the current working directory. 412 // 413 static void close_directory_secure_cwd(DIR* dirp, int saved_cwd_fd) { 414 415 int result; 416 // If we have a saved cwd change back to it and close the fd. 417 if (saved_cwd_fd != -1) { 418 result = fchdir(saved_cwd_fd); 419 ::close(saved_cwd_fd); 420 } 421 422 // Close the directory. 423 os::closedir(dirp); 424 } 425 426 // Check if the given file descriptor is considered a secure. 427 // 428 static bool is_file_secure(int fd, const char *filename) { 429 430 int result; 431 struct stat statbuf; 432 433 // Determine if the file is secure. 434 RESTARTABLE(::fstat(fd, &statbuf), result); 435 if (result == OS_ERR) { 436 if (PrintMiscellaneous && Verbose) { 437 warning("fstat failed on %s: %s\n", filename, os::strerror(errno)); 438 } 439 return false; 440 } 441 if (statbuf.st_nlink > 1) { 442 // A file with multiple links is not expected. 443 if (PrintMiscellaneous && Verbose) { 444 warning("file %s has multiple links\n", filename); 445 } 446 return false; 447 } 448 return true; 449 } 450 451 452 // return the user name for the given user id 453 // 454 // the caller is expected to free the allocated memory. 455 // 456 static char* get_user_name(uid_t uid) { 457 458 struct passwd pwent; 459 460 // determine the max pwbuf size from sysconf, and hardcode 461 // a default if this not available through sysconf. 462 // 463 long bufsize = sysconf(_SC_GETPW_R_SIZE_MAX); 464 if (bufsize == -1) 465 bufsize = 1024; 466 467 char* pwbuf = NEW_C_HEAP_ARRAY(char, bufsize, mtInternal); 468 469 // POSIX interface to getpwuid_r is used on LINUX 470 struct passwd* p; 471 int result = getpwuid_r(uid, &pwent, pwbuf, (size_t)bufsize, &p); 472 473 if (result != 0 || p == NULL || p->pw_name == NULL || *(p->pw_name) == '\0') { 474 if (PrintMiscellaneous && Verbose) { 475 if (result != 0) { 476 warning("Could not retrieve passwd entry: %s\n", 477 os::strerror(result)); 478 } 479 else if (p == NULL) { 480 // this check is added to protect against an observed problem 481 // with getpwuid_r() on RedHat 9 where getpwuid_r returns 0, 482 // indicating success, but has p == NULL. This was observed when 483 // inserting a file descriptor exhaustion fault prior to the call 484 // getpwuid_r() call. In this case, error is set to the appropriate 485 // error condition, but this is undocumented behavior. This check 486 // is safe under any condition, but the use of errno in the output 487 // message may result in an erroneous message. 488 // Bug Id 89052 was opened with RedHat. 489 // 490 warning("Could not retrieve passwd entry: %s\n", 491 os::strerror(errno)); 492 } 493 else { 494 warning("Could not determine user name: %s\n", 495 p->pw_name == NULL ? "pw_name = NULL" : 496 "pw_name zero length"); 497 } 498 } 499 FREE_C_HEAP_ARRAY(char, pwbuf); 500 return NULL; 501 } 502 503 char* user_name = NEW_C_HEAP_ARRAY(char, strlen(p->pw_name) + 1, mtInternal); 504 strcpy(user_name, p->pw_name); 505 506 FREE_C_HEAP_ARRAY(char, pwbuf); 507 return user_name; 508 } 509 510 // return the name of the user that owns the process identified by vmid. 511 // 512 // This method uses a slow directory search algorithm to find the backing 513 // store file for the specified vmid and returns the user name, as determined 514 // by the user name suffix of the hsperfdata_<username> directory name. 515 // 516 // the caller is expected to free the allocated memory. 517 // 518 // If nspid != -1, look in /proc/{vmid}/root/tmp for directories 519 // containing nspid, otherwise just look for vmid in /tmp 520 // 521 static char* get_user_name_slow(int vmid, int nspid, TRAPS) { 522 523 // short circuit the directory search if the process doesn't even exist. 524 if (kill(vmid, 0) == OS_ERR) { 525 if (errno == ESRCH) { 526 THROW_MSG_0(vmSymbols::java_lang_IllegalArgumentException(), 527 "Process not found"); 528 } 529 else /* EPERM */ { 530 THROW_MSG_0(vmSymbols::java_io_IOException(), os::strerror(errno)); 531 } 532 } 533 534 // directory search 535 char* oldest_user = NULL; 536 time_t oldest_ctime = 0; 537 char buffer[TMP_BUFFER_LEN]; 538 int searchpid; 539 char* tmpdirname = (char *)os::get_temp_directory(); 540 assert(strlen(tmpdirname) == 4, "No longer using /tmp - update buffer size"); 541 542 if (nspid == -1) { 543 searchpid = vmid; 544 } 545 else { 546 jio_snprintf(buffer, MAXPATHLEN, "/proc/%d/root%s", vmid, tmpdirname); 547 tmpdirname = buffer; 548 searchpid = nspid; 549 } 550 551 // open the temp directory 552 DIR* tmpdirp = os::opendir(tmpdirname); 553 554 if (tmpdirp == NULL) { 555 // Cannot open the directory to get the user name, return. 556 return NULL; 557 } 558 559 // for each entry in the directory that matches the pattern hsperfdata_*, 560 // open the directory and check if the file for the given vmid or nspid exists. 561 // The file with the expected name and the latest creation date is used 562 // to determine the user name for the process id. 563 // 564 struct dirent* dentry; 565 errno = 0; 566 while ((dentry = os::readdir(tmpdirp, NULL)) != NULL) { 567 568 // check if the directory entry is a hsperfdata file 569 if (strncmp(dentry->d_name, PERFDATA_NAME, strlen(PERFDATA_NAME)) != 0) { 570 continue; 571 } 572 573 char* usrdir_name = NEW_C_HEAP_ARRAY(char, 574 strlen(tmpdirname) + strlen(dentry->d_name) + 2, mtInternal); 575 strcpy(usrdir_name, tmpdirname); 576 strcat(usrdir_name, "/"); 577 strcat(usrdir_name, dentry->d_name); 578 579 // open the user directory 580 DIR* subdirp = open_directory_secure(usrdir_name); 581 582 if (subdirp == NULL) { 583 FREE_C_HEAP_ARRAY(char, usrdir_name); 584 continue; 585 } 586 587 // Since we don't create the backing store files in directories 588 // pointed to by symbolic links, we also don't follow them when 589 // looking for the files. We check for a symbolic link after the 590 // call to opendir in order to eliminate a small window where the 591 // symlink can be exploited. 592 // 593 if (!is_directory_secure(usrdir_name)) { 594 FREE_C_HEAP_ARRAY(char, usrdir_name); 595 os::closedir(subdirp); 596 continue; 597 } 598 599 struct dirent* udentry; 600 errno = 0; 601 while ((udentry = os::readdir(subdirp, NULL)) != NULL) { 602 603 if (filename_to_pid(udentry->d_name) == searchpid) { 604 struct stat statbuf; 605 int result; 606 607 char* filename = NEW_C_HEAP_ARRAY(char, 608 strlen(usrdir_name) + strlen(udentry->d_name) + 2, mtInternal); 609 610 strcpy(filename, usrdir_name); 611 strcat(filename, "/"); 612 strcat(filename, udentry->d_name); 613 614 // don't follow symbolic links for the file 615 RESTARTABLE(::lstat(filename, &statbuf), result); 616 if (result == OS_ERR) { 617 FREE_C_HEAP_ARRAY(char, filename); 618 continue; 619 } 620 621 // skip over files that are not regular files. 622 if (!S_ISREG(statbuf.st_mode)) { 623 FREE_C_HEAP_ARRAY(char, filename); 624 continue; 625 } 626 627 // compare and save filename with latest creation time 628 if (statbuf.st_size > 0 && statbuf.st_ctime > oldest_ctime) { 629 630 if (statbuf.st_ctime > oldest_ctime) { 631 char* user = strchr(dentry->d_name, '_') + 1; 632 633 if (oldest_user != NULL) FREE_C_HEAP_ARRAY(char, oldest_user); 634 oldest_user = NEW_C_HEAP_ARRAY(char, strlen(user)+1, mtInternal); 635 636 strcpy(oldest_user, user); 637 oldest_ctime = statbuf.st_ctime; 638 } 639 } 640 641 FREE_C_HEAP_ARRAY(char, filename); 642 } 643 } 644 os::closedir(subdirp); 645 FREE_C_HEAP_ARRAY(char, usrdir_name); 646 } 647 os::closedir(tmpdirp); 648 649 return(oldest_user); 650 } 651 652 // Determine if the vmid is the parent pid 653 // for a child in a PID namespace. 654 // return the namespace pid if so, otherwise -1 655 static int get_namespace_pid(int vmid) { 656 char fname[24]; 657 int retpid = -1; 658 659 snprintf(fname, sizeof(fname), "/proc/%d/status", vmid); 660 FILE *fp = fopen(fname, "r"); 661 662 if (fp) { 663 int pid, nspid; 664 int ret; 665 while (!feof(fp)) { 666 ret = fscanf(fp, "NSpid: %d %d", &pid, &nspid); 667 if (ret == 1) { 668 break; 669 } 670 if (ret == 2) { 671 retpid = nspid; 672 break; 673 } 674 for (;;) { 675 int ch = fgetc(fp); 676 if (ch == EOF || ch == (int)'\n') break; 677 } 678 } 679 fclose(fp); 680 } 681 return retpid; 682 } 683 684 // return the name of the user that owns the JVM indicated by the given vmid. 685 // 686 static char* get_user_name(int vmid, int *nspid, TRAPS) { 687 char *result = get_user_name_slow(vmid, *nspid, THREAD); 688 689 // If we are examining a container process without PID namespaces enabled 690 // we need to use /proc/{pid}/root/tmp to find hsperfdata files. 691 if (result == NULL) { 692 result = get_user_name_slow(vmid, vmid, THREAD); 693 // Enable nspid logic going forward 694 if (result != NULL) *nspid = vmid; 695 } 696 return result; 697 } 698 699 // return the file name of the backing store file for the named 700 // shared memory region for the given user name and vmid. 701 // 702 // the caller is expected to free the allocated memory. 703 // 704 static char* get_sharedmem_filename(const char* dirname, int vmid, int nspid) { 705 706 int pid = (nspid == -1) ? vmid : nspid; 707 708 // add 2 for the file separator and a null terminator. 709 size_t nbytes = strlen(dirname) + UINT_CHARS + 2; 710 711 char* name = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal); 712 snprintf(name, nbytes, "%s/%d", dirname, pid); 713 714 return name; 715 } 716 717 718 // remove file 719 // 720 // this method removes the file specified by the given path 721 // 722 static void remove_file(const char* path) { 723 724 int result; 725 726 // if the file is a directory, the following unlink will fail. since 727 // we don't expect to find directories in the user temp directory, we 728 // won't try to handle this situation. even if accidentially or 729 // maliciously planted, the directory's presence won't hurt anything. 730 // 731 RESTARTABLE(::unlink(path), result); 732 if (PrintMiscellaneous && Verbose && result == OS_ERR) { 733 if (errno != ENOENT) { 734 warning("Could not unlink shared memory backing" 735 " store file %s : %s\n", path, os::strerror(errno)); 736 } 737 } 738 } 739 740 741 // cleanup stale shared memory resources 742 // 743 // This method attempts to remove all stale shared memory files in 744 // the named user temporary directory. It scans the named directory 745 // for files matching the pattern ^$[0-9]*$. For each file found, the 746 // process id is extracted from the file name and a test is run to 747 // determine if the process is alive. If the process is not alive, 748 // any stale file resources are removed. 749 // 750 static void cleanup_sharedmem_resources(const char* dirname) { 751 752 int saved_cwd_fd; 753 // open the directory 754 DIR* dirp = open_directory_secure_cwd(dirname, &saved_cwd_fd); 755 if (dirp == NULL) { 756 // directory doesn't exist or is insecure, so there is nothing to cleanup 757 return; 758 } 759 760 // for each entry in the directory that matches the expected file 761 // name pattern, determine if the file resources are stale and if 762 // so, remove the file resources. Note, instrumented HotSpot processes 763 // for this user may start and/or terminate during this search and 764 // remove or create new files in this directory. The behavior of this 765 // loop under these conditions is dependent upon the implementation of 766 // opendir/readdir. 767 // 768 struct dirent* entry; 769 770 errno = 0; 771 while ((entry = os::readdir(dirp, NULL)) != NULL) { 772 773 pid_t pid = filename_to_pid(entry->d_name); 774 775 if (pid == 0) { 776 777 if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) { 778 // attempt to remove all unexpected files, except "." and ".." 779 unlink(entry->d_name); 780 } 781 782 errno = 0; 783 continue; 784 } 785 786 // we now have a file name that converts to a valid integer 787 // that could represent a process id . if this process id 788 // matches the current process id or the process is not running, 789 // then remove the stale file resources. 790 // 791 // process liveness is detected by sending signal number 0 to 792 // the process id (see kill(2)). if kill determines that the 793 // process does not exist, then the file resources are removed. 794 // if kill determines that that we don't have permission to 795 // signal the process, then the file resources are assumed to 796 // be stale and are removed because the resources for such a 797 // process should be in a different user specific directory. 798 // 799 if ((pid == os::current_process_id()) || 800 (kill(pid, 0) == OS_ERR && (errno == ESRCH || errno == EPERM))) { 801 unlink(entry->d_name); 802 } 803 errno = 0; 804 } 805 806 // close the directory and reset the current working directory 807 close_directory_secure_cwd(dirp, saved_cwd_fd); 808 } 809 810 // make the user specific temporary directory. Returns true if 811 // the directory exists and is secure upon return. Returns false 812 // if the directory exists but is either a symlink, is otherwise 813 // insecure, or if an error occurred. 814 // 815 static bool make_user_tmp_dir(const char* dirname) { 816 817 // create the directory with 0755 permissions. note that the directory 818 // will be owned by euid::egid, which may not be the same as uid::gid. 819 // 820 if (mkdir(dirname, S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) == OS_ERR) { 821 if (errno == EEXIST) { 822 // The directory already exists and was probably created by another 823 // JVM instance. However, this could also be the result of a 824 // deliberate symlink. Verify that the existing directory is safe. 825 // 826 if (!is_directory_secure(dirname)) { 827 // directory is not secure 828 if (PrintMiscellaneous && Verbose) { 829 warning("%s directory is insecure\n", dirname); 830 } 831 return false; 832 } 833 } 834 else { 835 // we encountered some other failure while attempting 836 // to create the directory 837 // 838 if (PrintMiscellaneous && Verbose) { 839 warning("could not create directory %s: %s\n", 840 dirname, os::strerror(errno)); 841 } 842 return false; 843 } 844 } 845 return true; 846 } 847 848 // create the shared memory file resources 849 // 850 // This method creates the shared memory file with the given size 851 // This method also creates the user specific temporary directory, if 852 // it does not yet exist. 853 // 854 static int create_sharedmem_resources(const char* dirname, const char* filename, size_t size) { 855 856 // make the user temporary directory 857 if (!make_user_tmp_dir(dirname)) { 858 // could not make/find the directory or the found directory 859 // was not secure 860 return -1; 861 } 862 863 int saved_cwd_fd; 864 // open the directory and set the current working directory to it 865 DIR* dirp = open_directory_secure_cwd(dirname, &saved_cwd_fd); 866 if (dirp == NULL) { 867 // Directory doesn't exist or is insecure, so cannot create shared 868 // memory file. 869 return -1; 870 } 871 872 // Open the filename in the current directory. 873 // Cannot use O_TRUNC here; truncation of an existing file has to happen 874 // after the is_file_secure() check below. 875 int result; 876 RESTARTABLE(::open(filename, O_RDWR|O_CREAT|O_NOFOLLOW, S_IREAD|S_IWRITE), result); 877 if (result == OS_ERR) { 878 if (PrintMiscellaneous && Verbose) { 879 if (errno == ELOOP) { 880 warning("file %s is a symlink and is not secure\n", filename); 881 } else { 882 warning("could not create file %s: %s\n", filename, os::strerror(errno)); 883 } 884 } 885 // close the directory and reset the current working directory 886 close_directory_secure_cwd(dirp, saved_cwd_fd); 887 888 return -1; 889 } 890 // close the directory and reset the current working directory 891 close_directory_secure_cwd(dirp, saved_cwd_fd); 892 893 // save the file descriptor 894 int fd = result; 895 896 // check to see if the file is secure 897 if (!is_file_secure(fd, filename)) { 898 ::close(fd); 899 return -1; 900 } 901 902 // truncate the file to get rid of any existing data 903 RESTARTABLE(::ftruncate(fd, (off_t)0), result); 904 if (result == OS_ERR) { 905 if (PrintMiscellaneous && Verbose) { 906 warning("could not truncate shared memory file: %s\n", os::strerror(errno)); 907 } 908 ::close(fd); 909 return -1; 910 } 911 // set the file size 912 RESTARTABLE(::ftruncate(fd, (off_t)size), result); 913 if (result == OS_ERR) { 914 if (PrintMiscellaneous && Verbose) { 915 warning("could not set shared memory file size: %s\n", os::strerror(errno)); 916 } 917 ::close(fd); 918 return -1; 919 } 920 921 // Verify that we have enough disk space for this file. 922 // We'll get random SIGBUS crashes on memory accesses if 923 // we don't. 924 925 for (size_t seekpos = 0; seekpos < size; seekpos += os::vm_page_size()) { 926 int zero_int = 0; 927 result = (int)os::seek_to_file_offset(fd, (jlong)(seekpos)); 928 if (result == -1 ) break; 929 RESTARTABLE(::write(fd, &zero_int, 1), result); 930 if (result != 1) { 931 if (errno == ENOSPC) { 932 warning("Insufficient space for shared memory file:\n %s\nTry using the -Djava.io.tmpdir= option to select an alternate temp location.\n", filename); 933 } 934 break; 935 } 936 } 937 938 if (result != -1) { 939 return fd; 940 } else { 941 ::close(fd); 942 return -1; 943 } 944 } 945 946 // open the shared memory file for the given user and vmid. returns 947 // the file descriptor for the open file or -1 if the file could not 948 // be opened. 949 // 950 static int open_sharedmem_file(const char* filename, int oflags, TRAPS) { 951 952 // open the file 953 int result; 954 RESTARTABLE(::open(filename, oflags), result); 955 if (result == OS_ERR) { 956 if (errno == ENOENT) { 957 THROW_MSG_(vmSymbols::java_lang_IllegalArgumentException(), 958 "Process not found", OS_ERR); 959 } 960 else if (errno == EACCES) { 961 THROW_MSG_(vmSymbols::java_lang_IllegalArgumentException(), 962 "Permission denied", OS_ERR); 963 } 964 else { 965 THROW_MSG_(vmSymbols::java_io_IOException(), 966 os::strerror(errno), OS_ERR); 967 } 968 } 969 int fd = result; 970 971 // check to see if the file is secure 972 if (!is_file_secure(fd, filename)) { 973 ::close(fd); 974 return -1; 975 } 976 977 return fd; 978 } 979 980 // create a named shared memory region. returns the address of the 981 // memory region on success or NULL on failure. A return value of 982 // NULL will ultimately disable the shared memory feature. 983 // 984 // On Linux, the name space for shared memory objects 985 // is the file system name space. 986 // 987 // A monitoring application attaching to a JVM does not need to know 988 // the file system name of the shared memory object. However, it may 989 // be convenient for applications to discover the existence of newly 990 // created and terminating JVMs by watching the file system name space 991 // for files being created or removed. 992 // 993 static char* mmap_create_shared(size_t size) { 994 995 int result; 996 int fd; 997 char* mapAddress; 998 999 int vmid = os::current_process_id(); 1000 1001 char* user_name = get_user_name(geteuid()); 1002 1003 if (user_name == NULL) 1004 return NULL; 1005 1006 char* dirname = get_user_tmp_dir(user_name, vmid, -1); 1007 char* filename = get_sharedmem_filename(dirname, vmid, -1); 1008 1009 // get the short filename 1010 char* short_filename = strrchr(filename, '/'); 1011 if (short_filename == NULL) { 1012 short_filename = filename; 1013 } else { 1014 short_filename++; 1015 } 1016 1017 // cleanup any stale shared memory files 1018 cleanup_sharedmem_resources(dirname); 1019 1020 assert(((size > 0) && (size % os::vm_page_size() == 0)), 1021 "unexpected PerfMemory region size"); 1022 1023 fd = create_sharedmem_resources(dirname, short_filename, size); 1024 1025 FREE_C_HEAP_ARRAY(char, user_name); 1026 FREE_C_HEAP_ARRAY(char, dirname); 1027 1028 if (fd == -1) { 1029 FREE_C_HEAP_ARRAY(char, filename); 1030 return NULL; 1031 } 1032 1033 mapAddress = (char*)::mmap((char*)0, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 1034 1035 result = ::close(fd); 1036 assert(result != OS_ERR, "could not close file"); 1037 1038 if (mapAddress == MAP_FAILED) { 1039 if (PrintMiscellaneous && Verbose) { 1040 warning("mmap failed - %s\n", os::strerror(errno)); 1041 } 1042 remove_file(filename); 1043 FREE_C_HEAP_ARRAY(char, filename); 1044 return NULL; 1045 } 1046 1047 // save the file name for use in delete_shared_memory() 1048 backing_store_file_name = filename; 1049 1050 // clear the shared memory region 1051 (void)::memset((void*) mapAddress, 0, size); 1052 1053 // it does not go through os api, the operation has to record from here 1054 MemTracker::record_virtual_memory_reserve_and_commit((address)mapAddress, size, CURRENT_PC, mtInternal); 1055 1056 return mapAddress; 1057 } 1058 1059 // release a named shared memory region 1060 // 1061 static void unmap_shared(char* addr, size_t bytes) { 1062 os::release_memory(addr, bytes); 1063 } 1064 1065 // create the PerfData memory region in shared memory. 1066 // 1067 static char* create_shared_memory(size_t size) { 1068 1069 // create the shared memory region. 1070 return mmap_create_shared(size); 1071 } 1072 1073 // delete the shared PerfData memory region 1074 // 1075 static void delete_shared_memory(char* addr, size_t size) { 1076 1077 // cleanup the persistent shared memory resources. since DestroyJavaVM does 1078 // not support unloading of the JVM, unmapping of the memory resource is 1079 // not performed. The memory will be reclaimed by the OS upon termination of 1080 // the process. The backing store file is deleted from the file system. 1081 1082 assert(!PerfDisableSharedMem, "shouldn't be here"); 1083 1084 if (backing_store_file_name != NULL) { 1085 remove_file(backing_store_file_name); 1086 // Don't.. Free heap memory could deadlock os::abort() if it is called 1087 // from signal handler. OS will reclaim the heap memory. 1088 // FREE_C_HEAP_ARRAY(char, backing_store_file_name); 1089 backing_store_file_name = NULL; 1090 } 1091 } 1092 1093 // return the size of the file for the given file descriptor 1094 // or 0 if it is not a valid size for a shared memory file 1095 // 1096 static size_t sharedmem_filesize(int fd, TRAPS) { 1097 1098 struct stat statbuf; 1099 int result; 1100 1101 RESTARTABLE(::fstat(fd, &statbuf), result); 1102 if (result == OS_ERR) { 1103 if (PrintMiscellaneous && Verbose) { 1104 warning("fstat failed: %s\n", os::strerror(errno)); 1105 } 1106 THROW_MSG_0(vmSymbols::java_io_IOException(), 1107 "Could not determine PerfMemory size"); 1108 } 1109 1110 if ((statbuf.st_size == 0) || 1111 ((size_t)statbuf.st_size % os::vm_page_size() != 0)) { 1112 THROW_MSG_0(vmSymbols::java_lang_Exception(), 1113 "Invalid PerfMemory size"); 1114 } 1115 1116 return (size_t)statbuf.st_size; 1117 } 1118 1119 // attach to a named shared memory region. 1120 // 1121 static void mmap_attach_shared(const char* user, int vmid, PerfMemory::PerfMemoryMode mode, char** addr, size_t* sizep, TRAPS) { 1122 1123 char* mapAddress; 1124 int result; 1125 int fd; 1126 size_t size = 0; 1127 const char* luser = NULL; 1128 1129 int mmap_prot; 1130 int file_flags; 1131 1132 ResourceMark rm; 1133 1134 // map the high level access mode to the appropriate permission 1135 // constructs for the file and the shared memory mapping. 1136 if (mode == PerfMemory::PERF_MODE_RO) { 1137 mmap_prot = PROT_READ; 1138 file_flags = O_RDONLY | O_NOFOLLOW; 1139 } 1140 else if (mode == PerfMemory::PERF_MODE_RW) { 1141 #ifdef LATER 1142 mmap_prot = PROT_READ | PROT_WRITE; 1143 file_flags = O_RDWR | O_NOFOLLOW; 1144 #else 1145 THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(), 1146 "Unsupported access mode"); 1147 #endif 1148 } 1149 else { 1150 THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(), 1151 "Illegal access mode"); 1152 } 1153 1154 // determine if vmid is for a containerized process 1155 int nspid = get_namespace_pid(vmid); 1156 1157 if (user == NULL || strlen(user) == 0) { 1158 luser = get_user_name(vmid, &nspid, CHECK); 1159 } 1160 else { 1161 luser = user; 1162 } 1163 1164 if (luser == NULL) { 1165 THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(), 1166 "Could not map vmid to user Name"); 1167 } 1168 1169 char* dirname = get_user_tmp_dir(luser, vmid, nspid); 1170 1171 // since we don't follow symbolic links when creating the backing 1172 // store file, we don't follow them when attaching either. 1173 // 1174 if (!is_directory_secure(dirname)) { 1175 FREE_C_HEAP_ARRAY(char, dirname); 1176 if (luser != user) { 1177 FREE_C_HEAP_ARRAY(char, luser); 1178 } 1179 THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(), 1180 "Process not found"); 1181 } 1182 1183 char* filename = get_sharedmem_filename(dirname, vmid, nspid); 1184 1185 // copy heap memory to resource memory. the open_sharedmem_file 1186 // method below need to use the filename, but could throw an 1187 // exception. using a resource array prevents the leak that 1188 // would otherwise occur. 1189 char* rfilename = NEW_RESOURCE_ARRAY(char, strlen(filename) + 1); 1190 strcpy(rfilename, filename); 1191 1192 // free the c heap resources that are no longer needed 1193 if (luser != user) FREE_C_HEAP_ARRAY(char, luser); 1194 FREE_C_HEAP_ARRAY(char, dirname); 1195 FREE_C_HEAP_ARRAY(char, filename); 1196 1197 // open the shared memory file for the give vmid 1198 fd = open_sharedmem_file(rfilename, file_flags, THREAD); 1199 1200 if (fd == OS_ERR) { 1201 return; 1202 } 1203 1204 if (HAS_PENDING_EXCEPTION) { 1205 ::close(fd); 1206 return; 1207 } 1208 1209 if (*sizep == 0) { 1210 size = sharedmem_filesize(fd, CHECK); 1211 } else { 1212 size = *sizep; 1213 } 1214 1215 assert(size > 0, "unexpected size <= 0"); 1216 1217 mapAddress = (char*)::mmap((char*)0, size, mmap_prot, MAP_SHARED, fd, 0); 1218 1219 result = ::close(fd); 1220 assert(result != OS_ERR, "could not close file"); 1221 1222 if (mapAddress == MAP_FAILED) { 1223 if (PrintMiscellaneous && Verbose) { 1224 warning("mmap failed: %s\n", os::strerror(errno)); 1225 } 1226 THROW_MSG(vmSymbols::java_lang_OutOfMemoryError(), 1227 "Could not map PerfMemory"); 1228 } 1229 1230 // it does not go through os api, the operation has to record from here 1231 MemTracker::record_virtual_memory_reserve_and_commit((address)mapAddress, size, CURRENT_PC, mtInternal); 1232 1233 *addr = mapAddress; 1234 *sizep = size; 1235 1236 log_debug(perf, memops)("mapped " SIZE_FORMAT " bytes for vmid %d at " 1237 INTPTR_FORMAT "\n", size, vmid, p2i((void*)mapAddress)); 1238 } 1239 1240 // create the PerfData memory region 1241 // 1242 // This method creates the memory region used to store performance 1243 // data for the JVM. The memory may be created in standard or 1244 // shared memory. 1245 // 1246 void PerfMemory::create_memory_region(size_t size) { 1247 1248 if (PerfDisableSharedMem) { 1249 // do not share the memory for the performance data. 1250 _start = create_standard_memory(size); 1251 } 1252 else { 1253 _start = create_shared_memory(size); 1254 if (_start == NULL) { 1255 1256 // creation of the shared memory region failed, attempt 1257 // to create a contiguous, non-shared memory region instead. 1258 // 1259 if (PrintMiscellaneous && Verbose) { 1260 warning("Reverting to non-shared PerfMemory region.\n"); 1261 } 1262 PerfDisableSharedMem = true; 1263 _start = create_standard_memory(size); 1264 } 1265 } 1266 1267 if (_start != NULL) _capacity = size; 1268 1269 } 1270 1271 // delete the PerfData memory region 1272 // 1273 // This method deletes the memory region used to store performance 1274 // data for the JVM. The memory region indicated by the <address, size> 1275 // tuple will be inaccessible after a call to this method. 1276 // 1277 void PerfMemory::delete_memory_region() { 1278 1279 assert((start() != NULL && capacity() > 0), "verify proper state"); 1280 1281 // If user specifies PerfDataSaveFile, it will save the performance data 1282 // to the specified file name no matter whether PerfDataSaveToFile is specified 1283 // or not. In other word, -XX:PerfDataSaveFile=.. overrides flag 1284 // -XX:+PerfDataSaveToFile. 1285 if (PerfDataSaveToFile || PerfDataSaveFile != NULL) { 1286 save_memory_to_file(start(), capacity()); 1287 } 1288 1289 if (PerfDisableSharedMem) { 1290 delete_standard_memory(start(), capacity()); 1291 } 1292 else { 1293 delete_shared_memory(start(), capacity()); 1294 } 1295 } 1296 1297 // attach to the PerfData memory region for another JVM 1298 // 1299 // This method returns an <address, size> tuple that points to 1300 // a memory buffer that is kept reasonably synchronized with 1301 // the PerfData memory region for the indicated JVM. This 1302 // buffer may be kept in synchronization via shared memory 1303 // or some other mechanism that keeps the buffer updated. 1304 // 1305 // If the JVM chooses not to support the attachability feature, 1306 // this method should throw an UnsupportedOperation exception. 1307 // 1308 // This implementation utilizes named shared memory to map 1309 // the indicated process's PerfData memory region into this JVMs 1310 // address space. 1311 // 1312 void PerfMemory::attach(const char* user, int vmid, PerfMemoryMode mode, char** addrp, size_t* sizep, TRAPS) { 1313 1314 if (vmid == 0 || vmid == os::current_process_id()) { 1315 *addrp = start(); 1316 *sizep = capacity(); 1317 return; 1318 } 1319 1320 mmap_attach_shared(user, vmid, mode, addrp, sizep, CHECK); 1321 } 1322 1323 // detach from the PerfData memory region of another JVM 1324 // 1325 // This method detaches the PerfData memory region of another 1326 // JVM, specified as an <address, size> tuple of a buffer 1327 // in this process's address space. This method may perform 1328 // arbitrary actions to accomplish the detachment. The memory 1329 // region specified by <address, size> will be inaccessible after 1330 // a call to this method. 1331 // 1332 // If the JVM chooses not to support the attachability feature, 1333 // this method should throw an UnsupportedOperation exception. 1334 // 1335 // This implementation utilizes named shared memory to detach 1336 // the indicated process's PerfData memory region from this 1337 // process's address space. 1338 // 1339 void PerfMemory::detach(char* addr, size_t bytes, TRAPS) { 1340 1341 assert(addr != 0, "address sanity check"); 1342 assert(bytes > 0, "capacity sanity check"); 1343 1344 if (PerfMemory::contains(addr) || PerfMemory::contains(addr + bytes - 1)) { 1345 // prevent accidental detachment of this process's PerfMemory region 1346 return; 1347 } 1348 1349 unmap_shared(addr, bytes); 1350 }