1 /*
   2  * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/vmSymbols.hpp"
  27 #include "logging/log.hpp"
  28 #include "memory/allocation.inline.hpp"
  29 #include "memory/resourceArea.hpp"
  30 #include "oops/oop.inline.hpp"
  31 #include "os_linux.inline.hpp"
  32 #include "runtime/handles.inline.hpp"
  33 #include "runtime/os.hpp"
  34 #include "runtime/perfMemory.hpp"
  35 #include "services/memTracker.hpp"
  36 #include "utilities/exceptions.hpp"
  37 
  38 // put OS-includes here
  39 # include <sys/types.h>
  40 # include <sys/mman.h>
  41 # include <errno.h>
  42 # include <stdio.h>
  43 # include <unistd.h>
  44 # include <sys/stat.h>
  45 # include <signal.h>
  46 # include <pwd.h>
  47 
  48 static char* backing_store_file_name = NULL;  // name of the backing store
  49                                               // file, if successfully created.
  50 
  51 // Standard Memory Implementation Details
  52 
  53 // create the PerfData memory region in standard memory.
  54 //
  55 static char* create_standard_memory(size_t size) {
  56 
  57   // allocate an aligned chuck of memory
  58   char* mapAddress = os::reserve_memory(size);
  59 
  60   if (mapAddress == NULL) {
  61     return NULL;
  62   }
  63 
  64   // commit memory
  65   if (!os::commit_memory(mapAddress, size, !ExecMem)) {
  66     if (PrintMiscellaneous && Verbose) {
  67       warning("Could not commit PerfData memory\n");
  68     }
  69     os::release_memory(mapAddress, size);
  70     return NULL;
  71   }
  72 
  73   return mapAddress;
  74 }
  75 
  76 // delete the PerfData memory region
  77 //
  78 static void delete_standard_memory(char* addr, size_t size) {
  79 
  80   // there are no persistent external resources to cleanup for standard
  81   // memory. since DestroyJavaVM does not support unloading of the JVM,
  82   // cleanup of the memory resource is not performed. The memory will be
  83   // reclaimed by the OS upon termination of the process.
  84   //
  85   return;
  86 }
  87 
  88 // save the specified memory region to the given file
  89 //
  90 // Note: this function might be called from signal handler (by os::abort()),
  91 // don't allocate heap memory.
  92 //
  93 static void save_memory_to_file(char* addr, size_t size) {
  94 
  95  const char* destfile = PerfMemory::get_perfdata_file_path();
  96  assert(destfile[0] != '\0', "invalid PerfData file path");
  97 
  98   int result;
  99 
 100   RESTARTABLE(::open(destfile, O_CREAT|O_WRONLY|O_TRUNC, S_IREAD|S_IWRITE),
 101               result);;
 102   if (result == OS_ERR) {
 103     if (PrintMiscellaneous && Verbose) {
 104       warning("Could not create Perfdata save file: %s: %s\n",
 105               destfile, os::strerror(errno));
 106     }
 107   } else {
 108     int fd = result;
 109 
 110     for (size_t remaining = size; remaining > 0;) {
 111 
 112       RESTARTABLE(::write(fd, addr, remaining), result);
 113       if (result == OS_ERR) {
 114         if (PrintMiscellaneous && Verbose) {
 115           warning("Could not write Perfdata save file: %s: %s\n",
 116                   destfile, os::strerror(errno));
 117         }
 118         break;
 119       }
 120 
 121       remaining -= (size_t)result;
 122       addr += result;
 123     }
 124 
 125     result = ::close(fd);
 126     if (PrintMiscellaneous && Verbose) {
 127       if (result == OS_ERR) {
 128         warning("Could not close %s: %s\n", destfile, os::strerror(errno));
 129       }
 130     }
 131   }
 132   FREE_C_HEAP_ARRAY(char, destfile);
 133 }
 134 
 135 
 136 // Shared Memory Implementation Details
 137 
 138 // Note: the solaris and linux shared memory implementation uses the mmap
 139 // interface with a backing store file to implement named shared memory.
 140 // Using the file system as the name space for shared memory allows a
 141 // common name space to be supported across a variety of platforms. It
 142 // also provides a name space that Java applications can deal with through
 143 // simple file apis.
 144 //
 145 // The solaris and linux implementations store the backing store file in
 146 // a user specific temporary directory located in the /tmp file system,
 147 // which is always a local file system and is sometimes a RAM based file
 148 // system.
 149 
 150 
 151 // return the user specific temporary directory name.
 152 //
 153 // If containerized process, get dirname of
 154 // /proc/{vmid}/root/tmp/{PERFDATA_NAME_user}
 155 // otherwise /tmp/{PERFDATA_NAME_user}
 156 //
 157 // the caller is expected to free the allocated memory.
 158 //
 159 #define TMP_BUFFER_LEN (4+22)
 160 static char* get_user_tmp_dir(const char* user, int vmid, int nspid) {
 161   char buffer[TMP_BUFFER_LEN];
 162   char* tmpdir = (char *)os::get_temp_directory();
 163   assert(strlen(tmpdir) == 4, "No longer using /tmp - update buffer size");
 164 
 165   if (nspid != -1) {
 166     jio_snprintf(buffer, TMP_BUFFER_LEN, "/proc/%d/root%s", vmid, tmpdir);
 167     tmpdir = buffer;
 168   }
 169 
 170   const char* perfdir = PERFDATA_NAME;
 171   size_t nbytes = strlen(tmpdir) + strlen(perfdir) + strlen(user) + 3;
 172   char* dirname = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal);
 173 
 174   // construct the path name to user specific tmp directory
 175   snprintf(dirname, nbytes, "%s/%s_%s", tmpdir, perfdir, user);
 176 
 177   return dirname;
 178 }
 179 
 180 // convert the given file name into a process id. if the file
 181 // does not meet the file naming constraints, return 0.
 182 //
 183 static pid_t filename_to_pid(const char* filename) {
 184 
 185   // a filename that doesn't begin with a digit is not a
 186   // candidate for conversion.
 187   //
 188   if (!isdigit(*filename)) {
 189     return 0;
 190   }
 191 
 192   // check if file name can be converted to an integer without
 193   // any leftover characters.
 194   //
 195   char* remainder = NULL;
 196   errno = 0;
 197   pid_t pid = (pid_t)strtol(filename, &remainder, 10);
 198 
 199   if (errno != 0) {
 200     return 0;
 201   }
 202 
 203   // check for left over characters. If any, then the filename is
 204   // not a candidate for conversion.
 205   //
 206   if (remainder != NULL && *remainder != '\0') {
 207     return 0;
 208   }
 209 
 210   // successful conversion, return the pid
 211   return pid;
 212 }
 213 
 214 
 215 // Check if the given statbuf is considered a secure directory for
 216 // the backing store files. Returns true if the directory is considered
 217 // a secure location. Returns false if the statbuf is a symbolic link or
 218 // if an error occurred.
 219 //
 220 static bool is_statbuf_secure(struct stat *statp) {
 221   if (S_ISLNK(statp->st_mode) || !S_ISDIR(statp->st_mode)) {
 222     // The path represents a link or some non-directory file type,
 223     // which is not what we expected. Declare it insecure.
 224     //
 225     return false;
 226   }
 227   // We have an existing directory, check if the permissions are safe.
 228   //
 229   if ((statp->st_mode & (S_IWGRP|S_IWOTH)) != 0) {
 230     // The directory is open for writing and could be subjected
 231     // to a symlink or a hard link attack. Declare it insecure.
 232     //
 233     return false;
 234   }
 235   // If user is not root then see if the uid of the directory matches the effective uid of the process.
 236   uid_t euid = geteuid();
 237   if ((euid != 0) && (statp->st_uid != euid)) {
 238     // The directory was not created by this user, declare it insecure.
 239     //
 240     return false;
 241   }
 242   return true;
 243 }
 244 
 245 
 246 // Check if the given path is considered a secure directory for
 247 // the backing store files. Returns true if the directory exists
 248 // and is considered a secure location. Returns false if the path
 249 // is a symbolic link or if an error occurred.
 250 //
 251 static bool is_directory_secure(const char* path) {
 252   struct stat statbuf;
 253   int result = 0;
 254 
 255   RESTARTABLE(::lstat(path, &statbuf), result);
 256   if (result == OS_ERR) {
 257     return false;
 258   }
 259 
 260   // The path exists, see if it is secure.
 261   return is_statbuf_secure(&statbuf);
 262 }
 263 
 264 
 265 // Check if the given directory file descriptor is considered a secure
 266 // directory for the backing store files. Returns true if the directory
 267 // exists and is considered a secure location. Returns false if the path
 268 // is a symbolic link or if an error occurred.
 269 //
 270 static bool is_dirfd_secure(int dir_fd) {
 271   struct stat statbuf;
 272   int result = 0;
 273 
 274   RESTARTABLE(::fstat(dir_fd, &statbuf), result);
 275   if (result == OS_ERR) {
 276     return false;
 277   }
 278 
 279   // The path exists, now check its mode.
 280   return is_statbuf_secure(&statbuf);
 281 }
 282 
 283 
 284 // Check to make sure fd1 and fd2 are referencing the same file system object.
 285 //
 286 static bool is_same_fsobject(int fd1, int fd2) {
 287   struct stat statbuf1;
 288   struct stat statbuf2;
 289   int result = 0;
 290 
 291   RESTARTABLE(::fstat(fd1, &statbuf1), result);
 292   if (result == OS_ERR) {
 293     return false;
 294   }
 295   RESTARTABLE(::fstat(fd2, &statbuf2), result);
 296   if (result == OS_ERR) {
 297     return false;
 298   }
 299 
 300   if ((statbuf1.st_ino == statbuf2.st_ino) &&
 301       (statbuf1.st_dev == statbuf2.st_dev)) {
 302     return true;
 303   } else {
 304     return false;
 305   }
 306 }
 307 
 308 
 309 // Open the directory of the given path and validate it.
 310 // Return a DIR * of the open directory.
 311 //
 312 static DIR *open_directory_secure(const char* dirname) {
 313   // Open the directory using open() so that it can be verified
 314   // to be secure by calling is_dirfd_secure(), opendir() and then check
 315   // to see if they are the same file system object.  This method does not
 316   // introduce a window of opportunity for the directory to be attacked that
 317   // calling opendir() and is_directory_secure() does.
 318   int result;
 319   DIR *dirp = NULL;
 320   RESTARTABLE(::open(dirname, O_RDONLY|O_NOFOLLOW), result);
 321   if (result == OS_ERR) {
 322     if (PrintMiscellaneous && Verbose) {
 323       if (errno == ELOOP) {
 324         warning("directory %s is a symlink and is not secure\n", dirname);
 325       } else {
 326         warning("could not open directory %s: %s\n", dirname, os::strerror(errno));
 327       }
 328     }
 329     return dirp;
 330   }
 331   int fd = result;
 332 
 333   // Determine if the open directory is secure.
 334   if (!is_dirfd_secure(fd)) {
 335     // The directory is not a secure directory.
 336     os::close(fd);
 337     return dirp;
 338   }
 339 
 340   // Open the directory.
 341   dirp = ::opendir(dirname);
 342   if (dirp == NULL) {
 343     // The directory doesn't exist, close fd and return.
 344     os::close(fd);
 345     return dirp;
 346   }
 347 
 348   // Check to make sure fd and dirp are referencing the same file system object.
 349   if (!is_same_fsobject(fd, dirfd(dirp))) {
 350     // The directory is not secure.
 351     os::close(fd);
 352     os::closedir(dirp);
 353     dirp = NULL;
 354     return dirp;
 355   }
 356 
 357   // Close initial open now that we know directory is secure
 358   os::close(fd);
 359 
 360   return dirp;
 361 }
 362 
 363 // NOTE: The code below uses fchdir(), open() and unlink() because
 364 // fdopendir(), openat() and unlinkat() are not supported on all
 365 // versions.  Once the support for fdopendir(), openat() and unlinkat()
 366 // is available on all supported versions the code can be changed
 367 // to use these functions.
 368 
 369 // Open the directory of the given path, validate it and set the
 370 // current working directory to it.
 371 // Return a DIR * of the open directory and the saved cwd fd.
 372 //
 373 static DIR *open_directory_secure_cwd(const char* dirname, int *saved_cwd_fd) {
 374 
 375   // Open the directory.
 376   DIR* dirp = open_directory_secure(dirname);
 377   if (dirp == NULL) {
 378     // Directory doesn't exist or is insecure, so there is nothing to cleanup.
 379     return dirp;
 380   }
 381   int fd = dirfd(dirp);
 382 
 383   // Open a fd to the cwd and save it off.
 384   int result;
 385   RESTARTABLE(::open(".", O_RDONLY), result);
 386   if (result == OS_ERR) {
 387     *saved_cwd_fd = -1;
 388   } else {
 389     *saved_cwd_fd = result;
 390   }
 391 
 392   // Set the current directory to dirname by using the fd of the directory and
 393   // handle errors, otherwise shared memory files will be created in cwd.
 394   result = fchdir(fd);
 395   if (result == OS_ERR) {
 396     if (PrintMiscellaneous && Verbose) {
 397       warning("could not change to directory %s", dirname);
 398     }
 399     if (*saved_cwd_fd != -1) {
 400       ::close(*saved_cwd_fd);
 401       *saved_cwd_fd = -1;
 402     }
 403     // Close the directory.
 404     os::closedir(dirp);
 405     return NULL;
 406   } else {
 407     return dirp;
 408   }
 409 }
 410 
 411 // Close the directory and restore the current working directory.
 412 //
 413 static void close_directory_secure_cwd(DIR* dirp, int saved_cwd_fd) {
 414 
 415   int result;
 416   // If we have a saved cwd change back to it and close the fd.
 417   if (saved_cwd_fd != -1) {
 418     result = fchdir(saved_cwd_fd);
 419     ::close(saved_cwd_fd);
 420   }
 421 
 422   // Close the directory.
 423   os::closedir(dirp);
 424 }
 425 
 426 // Check if the given file descriptor is considered a secure.
 427 //
 428 static bool is_file_secure(int fd, const char *filename) {
 429 
 430   int result;
 431   struct stat statbuf;
 432 
 433   // Determine if the file is secure.
 434   RESTARTABLE(::fstat(fd, &statbuf), result);
 435   if (result == OS_ERR) {
 436     if (PrintMiscellaneous && Verbose) {
 437       warning("fstat failed on %s: %s\n", filename, os::strerror(errno));
 438     }
 439     return false;
 440   }
 441   if (statbuf.st_nlink > 1) {
 442     // A file with multiple links is not expected.
 443     if (PrintMiscellaneous && Verbose) {
 444       warning("file %s has multiple links\n", filename);
 445     }
 446     return false;
 447   }
 448   return true;
 449 }
 450 
 451 
 452 // return the user name for the given user id
 453 //
 454 // the caller is expected to free the allocated memory.
 455 //
 456 static char* get_user_name(uid_t uid) {
 457 
 458   struct passwd pwent;
 459 
 460   // determine the max pwbuf size from sysconf, and hardcode
 461   // a default if this not available through sysconf.
 462   //
 463   long bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
 464   if (bufsize == -1)
 465     bufsize = 1024;
 466 
 467   char* pwbuf = NEW_C_HEAP_ARRAY(char, bufsize, mtInternal);
 468 
 469   // POSIX interface to getpwuid_r is used on LINUX
 470   struct passwd* p;
 471   int result = getpwuid_r(uid, &pwent, pwbuf, (size_t)bufsize, &p);
 472 
 473   if (result != 0 || p == NULL || p->pw_name == NULL || *(p->pw_name) == '\0') {
 474     if (PrintMiscellaneous && Verbose) {
 475       if (result != 0) {
 476         warning("Could not retrieve passwd entry: %s\n",
 477                 os::strerror(result));
 478       }
 479       else if (p == NULL) {
 480         // this check is added to protect against an observed problem
 481         // with getpwuid_r() on RedHat 9 where getpwuid_r returns 0,
 482         // indicating success, but has p == NULL. This was observed when
 483         // inserting a file descriptor exhaustion fault prior to the call
 484         // getpwuid_r() call. In this case, error is set to the appropriate
 485         // error condition, but this is undocumented behavior. This check
 486         // is safe under any condition, but the use of errno in the output
 487         // message may result in an erroneous message.
 488         // Bug Id 89052 was opened with RedHat.
 489         //
 490         warning("Could not retrieve passwd entry: %s\n",
 491                 os::strerror(errno));
 492       }
 493       else {
 494         warning("Could not determine user name: %s\n",
 495                 p->pw_name == NULL ? "pw_name = NULL" :
 496                                      "pw_name zero length");
 497       }
 498     }
 499     FREE_C_HEAP_ARRAY(char, pwbuf);
 500     return NULL;
 501   }
 502 
 503   char* user_name = NEW_C_HEAP_ARRAY(char, strlen(p->pw_name) + 1, mtInternal);
 504   strcpy(user_name, p->pw_name);
 505 
 506   FREE_C_HEAP_ARRAY(char, pwbuf);
 507   return user_name;
 508 }
 509 
 510 // return the name of the user that owns the process identified by vmid.
 511 //
 512 // This method uses a slow directory search algorithm to find the backing
 513 // store file for the specified vmid and returns the user name, as determined
 514 // by the user name suffix of the hsperfdata_<username> directory name.
 515 //
 516 // the caller is expected to free the allocated memory.
 517 //
 518 // If nspid != -1, look in /proc/{vmid}/root/tmp for directories
 519 // containing nspid, otherwise just look for vmid in /tmp
 520 //
 521 static char* get_user_name_slow(int vmid, int nspid, TRAPS) {
 522 
 523   // short circuit the directory search if the process doesn't even exist.
 524   if (kill(vmid, 0) == OS_ERR) {
 525     if (errno == ESRCH) {
 526       THROW_MSG_0(vmSymbols::java_lang_IllegalArgumentException(),
 527                   "Process not found");
 528     }
 529     else /* EPERM */ {
 530       THROW_MSG_0(vmSymbols::java_io_IOException(), os::strerror(errno));
 531     }
 532   }
 533 
 534   // directory search
 535   char* oldest_user = NULL;
 536   time_t oldest_ctime = 0;
 537   char buffer[TMP_BUFFER_LEN];
 538   int searchpid;
 539   char* tmpdirname = (char *)os::get_temp_directory();
 540   assert(strlen(tmpdirname) == 4, "No longer using /tmp - update buffer size");
 541 
 542   if (nspid == -1) {
 543     searchpid = vmid;
 544   }
 545   else {
 546     jio_snprintf(buffer, MAXPATHLEN, "/proc/%d/root%s", vmid, tmpdirname);
 547     tmpdirname = buffer;
 548     searchpid = nspid;
 549   }
 550 
 551   // open the temp directory
 552   DIR* tmpdirp = os::opendir(tmpdirname);
 553 
 554   if (tmpdirp == NULL) {
 555     // Cannot open the directory to get the user name, return.
 556     return NULL;
 557   }
 558 
 559   // for each entry in the directory that matches the pattern hsperfdata_*,
 560   // open the directory and check if the file for the given vmid or nspid exists.
 561   // The file with the expected name and the latest creation date is used
 562   // to determine the user name for the process id.
 563   //
 564   struct dirent* dentry;
 565   errno = 0;
 566   while ((dentry = os::readdir(tmpdirp, NULL)) != NULL) {
 567 
 568     // check if the directory entry is a hsperfdata file
 569     if (strncmp(dentry->d_name, PERFDATA_NAME, strlen(PERFDATA_NAME)) != 0) {
 570       continue;
 571     }
 572 
 573     char* usrdir_name = NEW_C_HEAP_ARRAY(char,
 574                      strlen(tmpdirname) + strlen(dentry->d_name) + 2, mtInternal);
 575     strcpy(usrdir_name, tmpdirname);
 576     strcat(usrdir_name, "/");
 577     strcat(usrdir_name, dentry->d_name);
 578 
 579     // open the user directory
 580     DIR* subdirp = open_directory_secure(usrdir_name);
 581 
 582     if (subdirp == NULL) {
 583       FREE_C_HEAP_ARRAY(char, usrdir_name);
 584       continue;
 585     }
 586 
 587     // Since we don't create the backing store files in directories
 588     // pointed to by symbolic links, we also don't follow them when
 589     // looking for the files. We check for a symbolic link after the
 590     // call to opendir in order to eliminate a small window where the
 591     // symlink can be exploited.
 592     //
 593     if (!is_directory_secure(usrdir_name)) {
 594       FREE_C_HEAP_ARRAY(char, usrdir_name);
 595       os::closedir(subdirp);
 596       continue;
 597     }
 598 
 599     struct dirent* udentry;
 600     errno = 0;
 601     while ((udentry = os::readdir(subdirp, NULL)) != NULL) {
 602 
 603       if (filename_to_pid(udentry->d_name) == searchpid) {
 604         struct stat statbuf;
 605         int result;
 606 
 607         char* filename = NEW_C_HEAP_ARRAY(char,
 608                    strlen(usrdir_name) + strlen(udentry->d_name) + 2, mtInternal);
 609 
 610         strcpy(filename, usrdir_name);
 611         strcat(filename, "/");
 612         strcat(filename, udentry->d_name);
 613 
 614         // don't follow symbolic links for the file
 615         RESTARTABLE(::lstat(filename, &statbuf), result);
 616         if (result == OS_ERR) {
 617            FREE_C_HEAP_ARRAY(char, filename);
 618            continue;
 619         }
 620 
 621         // skip over files that are not regular files.
 622         if (!S_ISREG(statbuf.st_mode)) {
 623           FREE_C_HEAP_ARRAY(char, filename);
 624           continue;
 625         }
 626 
 627         // compare and save filename with latest creation time
 628         if (statbuf.st_size > 0 && statbuf.st_ctime > oldest_ctime) {
 629 
 630           if (statbuf.st_ctime > oldest_ctime) {
 631             char* user = strchr(dentry->d_name, '_') + 1;
 632 
 633             if (oldest_user != NULL) FREE_C_HEAP_ARRAY(char, oldest_user);
 634             oldest_user = NEW_C_HEAP_ARRAY(char, strlen(user)+1, mtInternal);
 635 
 636             strcpy(oldest_user, user);
 637             oldest_ctime = statbuf.st_ctime;
 638           }
 639         }
 640 
 641         FREE_C_HEAP_ARRAY(char, filename);
 642       }
 643     }
 644     os::closedir(subdirp);
 645     FREE_C_HEAP_ARRAY(char, usrdir_name);
 646   }
 647   os::closedir(tmpdirp);
 648 
 649   return(oldest_user);
 650 }
 651 
 652 // Determine if the vmid is the parent pid
 653 // for a child in a PID namespace.
 654 // return the namespace pid if so, otherwise -1
 655 static int get_namespace_pid(int vmid) {
 656   char fname[24];
 657   int retpid = -1;
 658 
 659   snprintf(fname, sizeof(fname), "/proc/%d/status", vmid);
 660   FILE *fp = fopen(fname, "r");
 661 
 662   if (fp) {
 663     int pid, nspid;
 664     int ret;
 665     while (!feof(fp)) {
 666       ret = fscanf(fp, "NSpid: %d %d", &pid, &nspid);
 667       if (ret == 1) {
 668         break;
 669       }
 670       if (ret == 2) {
 671         retpid = nspid;
 672         break;
 673       }
 674       for (;;) {
 675         int ch = fgetc(fp);
 676         if (ch == EOF || ch == (int)'\n') break;
 677       }
 678     }
 679     fclose(fp);
 680   }
 681   return retpid;
 682 }
 683 
 684 // return the name of the user that owns the JVM indicated by the given vmid.
 685 //
 686 static char* get_user_name(int vmid, int *nspid, TRAPS) {
 687   char *result = get_user_name_slow(vmid, *nspid, THREAD);
 688 
 689   // If we are examining a container process without PID namespaces enabled
 690   // we need to use /proc/{pid}/root/tmp to find hsperfdata files.
 691   if (result == NULL) {
 692     result = get_user_name_slow(vmid, vmid, THREAD);
 693     // Enable nspid logic going forward
 694     if (result != NULL) *nspid = vmid;
 695   }
 696   return result;
 697 }
 698 
 699 // return the file name of the backing store file for the named
 700 // shared memory region for the given user name and vmid.
 701 //
 702 // the caller is expected to free the allocated memory.
 703 //
 704 static char* get_sharedmem_filename(const char* dirname, int vmid, int nspid) {
 705 
 706   int pid = (nspid == -1) ? vmid : nspid;
 707 
 708   // add 2 for the file separator and a null terminator.
 709   size_t nbytes = strlen(dirname) + UINT_CHARS + 2;
 710 
 711   char* name = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal);
 712   snprintf(name, nbytes, "%s/%d", dirname, pid);
 713 
 714   return name;
 715 }
 716 
 717 
 718 // remove file
 719 //
 720 // this method removes the file specified by the given path
 721 //
 722 static void remove_file(const char* path) {
 723 
 724   int result;
 725 
 726   // if the file is a directory, the following unlink will fail. since
 727   // we don't expect to find directories in the user temp directory, we
 728   // won't try to handle this situation. even if accidentially or
 729   // maliciously planted, the directory's presence won't hurt anything.
 730   //
 731   RESTARTABLE(::unlink(path), result);
 732   if (PrintMiscellaneous && Verbose && result == OS_ERR) {
 733     if (errno != ENOENT) {
 734       warning("Could not unlink shared memory backing"
 735               " store file %s : %s\n", path, os::strerror(errno));
 736     }
 737   }
 738 }
 739 
 740 
 741 // cleanup stale shared memory resources
 742 //
 743 // This method attempts to remove all stale shared memory files in
 744 // the named user temporary directory. It scans the named directory
 745 // for files matching the pattern ^$[0-9]*$. For each file found, the
 746 // process id is extracted from the file name and a test is run to
 747 // determine if the process is alive. If the process is not alive,
 748 // any stale file resources are removed.
 749 //
 750 static void cleanup_sharedmem_resources(const char* dirname) {
 751 
 752   int saved_cwd_fd;
 753   // open the directory
 754   DIR* dirp = open_directory_secure_cwd(dirname, &saved_cwd_fd);
 755   if (dirp == NULL) {
 756     // directory doesn't exist or is insecure, so there is nothing to cleanup
 757     return;
 758   }
 759 
 760   // for each entry in the directory that matches the expected file
 761   // name pattern, determine if the file resources are stale and if
 762   // so, remove the file resources. Note, instrumented HotSpot processes
 763   // for this user may start and/or terminate during this search and
 764   // remove or create new files in this directory. The behavior of this
 765   // loop under these conditions is dependent upon the implementation of
 766   // opendir/readdir.
 767   //
 768   struct dirent* entry;
 769 
 770   errno = 0;
 771   while ((entry = os::readdir(dirp, NULL)) != NULL) {
 772 
 773     pid_t pid = filename_to_pid(entry->d_name);
 774 
 775     if (pid == 0) {
 776 
 777       if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
 778         // attempt to remove all unexpected files, except "." and ".."
 779         unlink(entry->d_name);
 780       }
 781 
 782       errno = 0;
 783       continue;
 784     }
 785 
 786     // we now have a file name that converts to a valid integer
 787     // that could represent a process id . if this process id
 788     // matches the current process id or the process is not running,
 789     // then remove the stale file resources.
 790     //
 791     // process liveness is detected by sending signal number 0 to
 792     // the process id (see kill(2)). if kill determines that the
 793     // process does not exist, then the file resources are removed.
 794     // if kill determines that that we don't have permission to
 795     // signal the process, then the file resources are assumed to
 796     // be stale and are removed because the resources for such a
 797     // process should be in a different user specific directory.
 798     //
 799     if ((pid == os::current_process_id()) ||
 800         (kill(pid, 0) == OS_ERR && (errno == ESRCH || errno == EPERM))) {
 801         unlink(entry->d_name);
 802     }
 803     errno = 0;
 804   }
 805 
 806   // close the directory and reset the current working directory
 807   close_directory_secure_cwd(dirp, saved_cwd_fd);
 808 }
 809 
 810 // make the user specific temporary directory. Returns true if
 811 // the directory exists and is secure upon return. Returns false
 812 // if the directory exists but is either a symlink, is otherwise
 813 // insecure, or if an error occurred.
 814 //
 815 static bool make_user_tmp_dir(const char* dirname) {
 816 
 817   // create the directory with 0755 permissions. note that the directory
 818   // will be owned by euid::egid, which may not be the same as uid::gid.
 819   //
 820   if (mkdir(dirname, S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) == OS_ERR) {
 821     if (errno == EEXIST) {
 822       // The directory already exists and was probably created by another
 823       // JVM instance. However, this could also be the result of a
 824       // deliberate symlink. Verify that the existing directory is safe.
 825       //
 826       if (!is_directory_secure(dirname)) {
 827         // directory is not secure
 828         if (PrintMiscellaneous && Verbose) {
 829           warning("%s directory is insecure\n", dirname);
 830         }
 831         return false;
 832       }
 833     }
 834     else {
 835       // we encountered some other failure while attempting
 836       // to create the directory
 837       //
 838       if (PrintMiscellaneous && Verbose) {
 839         warning("could not create directory %s: %s\n",
 840                 dirname, os::strerror(errno));
 841       }
 842       return false;
 843     }
 844   }
 845   return true;
 846 }
 847 
 848 // create the shared memory file resources
 849 //
 850 // This method creates the shared memory file with the given size
 851 // This method also creates the user specific temporary directory, if
 852 // it does not yet exist.
 853 //
 854 static int create_sharedmem_resources(const char* dirname, const char* filename, size_t size) {
 855 
 856   // make the user temporary directory
 857   if (!make_user_tmp_dir(dirname)) {
 858     // could not make/find the directory or the found directory
 859     // was not secure
 860     return -1;
 861   }
 862 
 863   int saved_cwd_fd;
 864   // open the directory and set the current working directory to it
 865   DIR* dirp = open_directory_secure_cwd(dirname, &saved_cwd_fd);
 866   if (dirp == NULL) {
 867     // Directory doesn't exist or is insecure, so cannot create shared
 868     // memory file.
 869     return -1;
 870   }
 871 
 872   // Open the filename in the current directory.
 873   // Cannot use O_TRUNC here; truncation of an existing file has to happen
 874   // after the is_file_secure() check below.
 875   int result;
 876   RESTARTABLE(::open(filename, O_RDWR|O_CREAT|O_NOFOLLOW, S_IREAD|S_IWRITE), result);
 877   if (result == OS_ERR) {
 878     if (PrintMiscellaneous && Verbose) {
 879       if (errno == ELOOP) {
 880         warning("file %s is a symlink and is not secure\n", filename);
 881       } else {
 882         warning("could not create file %s: %s\n", filename, os::strerror(errno));
 883       }
 884     }
 885     // close the directory and reset the current working directory
 886     close_directory_secure_cwd(dirp, saved_cwd_fd);
 887 
 888     return -1;
 889   }
 890   // close the directory and reset the current working directory
 891   close_directory_secure_cwd(dirp, saved_cwd_fd);
 892 
 893   // save the file descriptor
 894   int fd = result;
 895 
 896   // check to see if the file is secure
 897   if (!is_file_secure(fd, filename)) {
 898     ::close(fd);
 899     return -1;
 900   }
 901 
 902   // truncate the file to get rid of any existing data
 903   RESTARTABLE(::ftruncate(fd, (off_t)0), result);
 904   if (result == OS_ERR) {
 905     if (PrintMiscellaneous && Verbose) {
 906       warning("could not truncate shared memory file: %s\n", os::strerror(errno));
 907     }
 908     ::close(fd);
 909     return -1;
 910   }
 911   // set the file size
 912   RESTARTABLE(::ftruncate(fd, (off_t)size), result);
 913   if (result == OS_ERR) {
 914     if (PrintMiscellaneous && Verbose) {
 915       warning("could not set shared memory file size: %s\n", os::strerror(errno));
 916     }
 917     ::close(fd);
 918     return -1;
 919   }
 920 
 921   // Verify that we have enough disk space for this file.
 922   // We'll get random SIGBUS crashes on memory accesses if
 923   // we don't.
 924 
 925   for (size_t seekpos = 0; seekpos < size; seekpos += os::vm_page_size()) {
 926     int zero_int = 0;
 927     result = (int)os::seek_to_file_offset(fd, (jlong)(seekpos));
 928     if (result == -1 ) break;
 929     RESTARTABLE(::write(fd, &zero_int, 1), result);
 930     if (result != 1) {
 931       if (errno == ENOSPC) {
 932         warning("Insufficient space for shared memory file:\n   %s\nTry using the -Djava.io.tmpdir= option to select an alternate temp location.\n", filename);
 933       }
 934       break;
 935     }
 936   }
 937 
 938   if (result != -1) {
 939     return fd;
 940   } else {
 941     ::close(fd);
 942     return -1;
 943   }
 944 }
 945 
 946 // open the shared memory file for the given user and vmid. returns
 947 // the file descriptor for the open file or -1 if the file could not
 948 // be opened.
 949 //
 950 static int open_sharedmem_file(const char* filename, int oflags, TRAPS) {
 951 
 952   // open the file
 953   int result;
 954   RESTARTABLE(::open(filename, oflags), result);
 955   if (result == OS_ERR) {
 956     if (errno == ENOENT) {
 957       THROW_MSG_(vmSymbols::java_lang_IllegalArgumentException(),
 958                  "Process not found", OS_ERR);
 959     }
 960     else if (errno == EACCES) {
 961       THROW_MSG_(vmSymbols::java_lang_IllegalArgumentException(),
 962                  "Permission denied", OS_ERR);
 963     }
 964     else {
 965       THROW_MSG_(vmSymbols::java_io_IOException(),
 966                  os::strerror(errno), OS_ERR);
 967     }
 968   }
 969   int fd = result;
 970 
 971   // check to see if the file is secure
 972   if (!is_file_secure(fd, filename)) {
 973     ::close(fd);
 974     return -1;
 975   }
 976 
 977   return fd;
 978 }
 979 
 980 // create a named shared memory region. returns the address of the
 981 // memory region on success or NULL on failure. A return value of
 982 // NULL will ultimately disable the shared memory feature.
 983 //
 984 // On Linux, the name space for shared memory objects
 985 // is the file system name space.
 986 //
 987 // A monitoring application attaching to a JVM does not need to know
 988 // the file system name of the shared memory object. However, it may
 989 // be convenient for applications to discover the existence of newly
 990 // created and terminating JVMs by watching the file system name space
 991 // for files being created or removed.
 992 //
 993 static char* mmap_create_shared(size_t size) {
 994 
 995   int result;
 996   int fd;
 997   char* mapAddress;
 998 
 999   int vmid = os::current_process_id();
1000 
1001   char* user_name = get_user_name(geteuid());
1002 
1003   if (user_name == NULL)
1004     return NULL;
1005 
1006   char* dirname = get_user_tmp_dir(user_name, vmid, -1);
1007   char* filename = get_sharedmem_filename(dirname, vmid, -1);
1008 
1009   // get the short filename
1010   char* short_filename = strrchr(filename, '/');
1011   if (short_filename == NULL) {
1012     short_filename = filename;
1013   } else {
1014     short_filename++;
1015   }
1016 
1017   // cleanup any stale shared memory files
1018   cleanup_sharedmem_resources(dirname);
1019 
1020   assert(((size > 0) && (size % os::vm_page_size() == 0)),
1021          "unexpected PerfMemory region size");
1022 
1023   fd = create_sharedmem_resources(dirname, short_filename, size);
1024 
1025   FREE_C_HEAP_ARRAY(char, user_name);
1026   FREE_C_HEAP_ARRAY(char, dirname);
1027 
1028   if (fd == -1) {
1029     FREE_C_HEAP_ARRAY(char, filename);
1030     return NULL;
1031   }
1032 
1033   mapAddress = (char*)::mmap((char*)0, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1034 
1035   result = ::close(fd);
1036   assert(result != OS_ERR, "could not close file");
1037 
1038   if (mapAddress == MAP_FAILED) {
1039     if (PrintMiscellaneous && Verbose) {
1040       warning("mmap failed -  %s\n", os::strerror(errno));
1041     }
1042     remove_file(filename);
1043     FREE_C_HEAP_ARRAY(char, filename);
1044     return NULL;
1045   }
1046 
1047   // save the file name for use in delete_shared_memory()
1048   backing_store_file_name = filename;
1049 
1050   // clear the shared memory region
1051   (void)::memset((void*) mapAddress, 0, size);
1052 
1053   // it does not go through os api, the operation has to record from here
1054   MemTracker::record_virtual_memory_reserve_and_commit((address)mapAddress, size, CURRENT_PC, mtInternal);
1055 
1056   return mapAddress;
1057 }
1058 
1059 // release a named shared memory region
1060 //
1061 static void unmap_shared(char* addr, size_t bytes) {
1062   os::release_memory(addr, bytes);
1063 }
1064 
1065 // create the PerfData memory region in shared memory.
1066 //
1067 static char* create_shared_memory(size_t size) {
1068 
1069   // create the shared memory region.
1070   return mmap_create_shared(size);
1071 }
1072 
1073 // delete the shared PerfData memory region
1074 //
1075 static void delete_shared_memory(char* addr, size_t size) {
1076 
1077   // cleanup the persistent shared memory resources. since DestroyJavaVM does
1078   // not support unloading of the JVM, unmapping of the memory resource is
1079   // not performed. The memory will be reclaimed by the OS upon termination of
1080   // the process. The backing store file is deleted from the file system.
1081 
1082   assert(!PerfDisableSharedMem, "shouldn't be here");
1083 
1084   if (backing_store_file_name != NULL) {
1085     remove_file(backing_store_file_name);
1086     // Don't.. Free heap memory could deadlock os::abort() if it is called
1087     // from signal handler. OS will reclaim the heap memory.
1088     // FREE_C_HEAP_ARRAY(char, backing_store_file_name);
1089     backing_store_file_name = NULL;
1090   }
1091 }
1092 
1093 // return the size of the file for the given file descriptor
1094 // or 0 if it is not a valid size for a shared memory file
1095 //
1096 static size_t sharedmem_filesize(int fd, TRAPS) {
1097 
1098   struct stat statbuf;
1099   int result;
1100 
1101   RESTARTABLE(::fstat(fd, &statbuf), result);
1102   if (result == OS_ERR) {
1103     if (PrintMiscellaneous && Verbose) {
1104       warning("fstat failed: %s\n", os::strerror(errno));
1105     }
1106     THROW_MSG_0(vmSymbols::java_io_IOException(),
1107                 "Could not determine PerfMemory size");
1108   }
1109 
1110   if ((statbuf.st_size == 0) ||
1111      ((size_t)statbuf.st_size % os::vm_page_size() != 0)) {
1112     THROW_MSG_0(vmSymbols::java_lang_Exception(),
1113                 "Invalid PerfMemory size");
1114   }
1115 
1116   return (size_t)statbuf.st_size;
1117 }
1118 
1119 // attach to a named shared memory region.
1120 //
1121 static void mmap_attach_shared(const char* user, int vmid, PerfMemory::PerfMemoryMode mode, char** addr, size_t* sizep, TRAPS) {
1122 
1123   char* mapAddress;
1124   int result;
1125   int fd;
1126   size_t size = 0;
1127   const char* luser = NULL;
1128 
1129   int mmap_prot;
1130   int file_flags;
1131 
1132   ResourceMark rm;
1133 
1134   // map the high level access mode to the appropriate permission
1135   // constructs for the file and the shared memory mapping.
1136   if (mode == PerfMemory::PERF_MODE_RO) {
1137     mmap_prot = PROT_READ;
1138     file_flags = O_RDONLY | O_NOFOLLOW;
1139   }
1140   else if (mode == PerfMemory::PERF_MODE_RW) {
1141 #ifdef LATER
1142     mmap_prot = PROT_READ | PROT_WRITE;
1143     file_flags = O_RDWR | O_NOFOLLOW;
1144 #else
1145     THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
1146               "Unsupported access mode");
1147 #endif
1148   }
1149   else {
1150     THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
1151               "Illegal access mode");
1152   }
1153 
1154   // determine if vmid is for a containerized process
1155   int nspid = get_namespace_pid(vmid);
1156 
1157   if (user == NULL || strlen(user) == 0) {
1158     luser = get_user_name(vmid, &nspid, CHECK);
1159   }
1160   else {
1161     luser = user;
1162   }
1163 
1164   if (luser == NULL) {
1165     THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
1166               "Could not map vmid to user Name");
1167   }
1168 
1169   char* dirname = get_user_tmp_dir(luser, vmid, nspid);
1170 
1171   // since we don't follow symbolic links when creating the backing
1172   // store file, we don't follow them when attaching either.
1173   //
1174   if (!is_directory_secure(dirname)) {
1175     FREE_C_HEAP_ARRAY(char, dirname);
1176     if (luser != user) {
1177       FREE_C_HEAP_ARRAY(char, luser);
1178     }
1179     THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
1180               "Process not found");
1181   }
1182 
1183   char* filename = get_sharedmem_filename(dirname, vmid, nspid);
1184 
1185   // copy heap memory to resource memory. the open_sharedmem_file
1186   // method below need to use the filename, but could throw an
1187   // exception. using a resource array prevents the leak that
1188   // would otherwise occur.
1189   char* rfilename = NEW_RESOURCE_ARRAY(char, strlen(filename) + 1);
1190   strcpy(rfilename, filename);
1191 
1192   // free the c heap resources that are no longer needed
1193   if (luser != user) FREE_C_HEAP_ARRAY(char, luser);
1194   FREE_C_HEAP_ARRAY(char, dirname);
1195   FREE_C_HEAP_ARRAY(char, filename);
1196 
1197   // open the shared memory file for the give vmid
1198   fd = open_sharedmem_file(rfilename, file_flags, THREAD);
1199 
1200   if (fd == OS_ERR) {
1201     return;
1202   }
1203 
1204   if (HAS_PENDING_EXCEPTION) {
1205     ::close(fd);
1206     return;
1207   }
1208 
1209   if (*sizep == 0) {
1210     size = sharedmem_filesize(fd, CHECK);
1211   } else {
1212     size = *sizep;
1213   }
1214 
1215   assert(size > 0, "unexpected size <= 0");
1216 
1217   mapAddress = (char*)::mmap((char*)0, size, mmap_prot, MAP_SHARED, fd, 0);
1218 
1219   result = ::close(fd);
1220   assert(result != OS_ERR, "could not close file");
1221 
1222   if (mapAddress == MAP_FAILED) {
1223     if (PrintMiscellaneous && Verbose) {
1224       warning("mmap failed: %s\n", os::strerror(errno));
1225     }
1226     THROW_MSG(vmSymbols::java_lang_OutOfMemoryError(),
1227               "Could not map PerfMemory");
1228   }
1229 
1230   // it does not go through os api, the operation has to record from here
1231   MemTracker::record_virtual_memory_reserve_and_commit((address)mapAddress, size, CURRENT_PC, mtInternal);
1232 
1233   *addr = mapAddress;
1234   *sizep = size;
1235 
1236   log_debug(perf, memops)("mapped " SIZE_FORMAT " bytes for vmid %d at "
1237                           INTPTR_FORMAT "\n", size, vmid, p2i((void*)mapAddress));
1238 }
1239 
1240 // create the PerfData memory region
1241 //
1242 // This method creates the memory region used to store performance
1243 // data for the JVM. The memory may be created in standard or
1244 // shared memory.
1245 //
1246 void PerfMemory::create_memory_region(size_t size) {
1247 
1248   if (PerfDisableSharedMem) {
1249     // do not share the memory for the performance data.
1250     _start = create_standard_memory(size);
1251   }
1252   else {
1253     _start = create_shared_memory(size);
1254     if (_start == NULL) {
1255 
1256       // creation of the shared memory region failed, attempt
1257       // to create a contiguous, non-shared memory region instead.
1258       //
1259       if (PrintMiscellaneous && Verbose) {
1260         warning("Reverting to non-shared PerfMemory region.\n");
1261       }
1262       PerfDisableSharedMem = true;
1263       _start = create_standard_memory(size);
1264     }
1265   }
1266 
1267   if (_start != NULL) _capacity = size;
1268 
1269 }
1270 
1271 // delete the PerfData memory region
1272 //
1273 // This method deletes the memory region used to store performance
1274 // data for the JVM. The memory region indicated by the <address, size>
1275 // tuple will be inaccessible after a call to this method.
1276 //
1277 void PerfMemory::delete_memory_region() {
1278 
1279   assert((start() != NULL && capacity() > 0), "verify proper state");
1280 
1281   // If user specifies PerfDataSaveFile, it will save the performance data
1282   // to the specified file name no matter whether PerfDataSaveToFile is specified
1283   // or not. In other word, -XX:PerfDataSaveFile=.. overrides flag
1284   // -XX:+PerfDataSaveToFile.
1285   if (PerfDataSaveToFile || PerfDataSaveFile != NULL) {
1286     save_memory_to_file(start(), capacity());
1287   }
1288 
1289   if (PerfDisableSharedMem) {
1290     delete_standard_memory(start(), capacity());
1291   }
1292   else {
1293     delete_shared_memory(start(), capacity());
1294   }
1295 }
1296 
1297 // attach to the PerfData memory region for another JVM
1298 //
1299 // This method returns an <address, size> tuple that points to
1300 // a memory buffer that is kept reasonably synchronized with
1301 // the PerfData memory region for the indicated JVM. This
1302 // buffer may be kept in synchronization via shared memory
1303 // or some other mechanism that keeps the buffer updated.
1304 //
1305 // If the JVM chooses not to support the attachability feature,
1306 // this method should throw an UnsupportedOperation exception.
1307 //
1308 // This implementation utilizes named shared memory to map
1309 // the indicated process's PerfData memory region into this JVMs
1310 // address space.
1311 //
1312 void PerfMemory::attach(const char* user, int vmid, PerfMemoryMode mode, char** addrp, size_t* sizep, TRAPS) {
1313 
1314   if (vmid == 0 || vmid == os::current_process_id()) {
1315      *addrp = start();
1316      *sizep = capacity();
1317      return;
1318   }
1319 
1320   mmap_attach_shared(user, vmid, mode, addrp, sizep, CHECK);
1321 }
1322 
1323 // detach from the PerfData memory region of another JVM
1324 //
1325 // This method detaches the PerfData memory region of another
1326 // JVM, specified as an <address, size> tuple of a buffer
1327 // in this process's address space. This method may perform
1328 // arbitrary actions to accomplish the detachment. The memory
1329 // region specified by <address, size> will be inaccessible after
1330 // a call to this method.
1331 //
1332 // If the JVM chooses not to support the attachability feature,
1333 // this method should throw an UnsupportedOperation exception.
1334 //
1335 // This implementation utilizes named shared memory to detach
1336 // the indicated process's PerfData memory region from this
1337 // process's address space.
1338 //
1339 void PerfMemory::detach(char* addr, size_t bytes, TRAPS) {
1340 
1341   assert(addr != 0, "address sanity check");
1342   assert(bytes > 0, "capacity sanity check");
1343 
1344   if (PerfMemory::contains(addr) || PerfMemory::contains(addr + bytes - 1)) {
1345     // prevent accidental detachment of this process's PerfMemory region
1346     return;
1347   }
1348 
1349   unmap_shared(addr, bytes);
1350 }