src/os/aix/vm/perfMemory_aix.cpp

Print this page
rev 7076 : 8069590: AIX port of "8050807: Better performing performance data handling"
Reviewed-by: simonis, goetz
Contributed-by: matthias.baesken@sap.com, martin.doerr@sap.com

@@ -29,10 +29,11 @@
 #include "memory/resourceArea.hpp"
 #include "oops/oop.inline.hpp"
 #include "os_aix.inline.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/perfMemory.hpp"
+#include "services/memTracker.hpp"
 #include "utilities/exceptions.hpp"
 
 // put OS-includes here
 # include <sys/types.h>
 # include <sys/mman.h>

@@ -194,57 +195,320 @@
 
   // successful conversion, return the pid
   return pid;
 }
 
+// Check if the given statbuf is considered a secure directory for
+// the backing store files. Returns true if the directory is considered
+// a secure location. Returns false if the statbuf is a symbolic link or
+// if an error occurred.
+static bool is_statbuf_secure(struct stat *statp) {
+  if (S_ISLNK(statp->st_mode) || !S_ISDIR(statp->st_mode)) {
+    // The path represents a link or some non-directory file type,
+    // which is not what we expected. Declare it insecure.
+    //
+    return false;
+  }
+  // We have an existing directory, check if the permissions are safe.
+  if ((statp->st_mode & (S_IWGRP|S_IWOTH)) != 0) {
+    // The directory is open for writing and could be subjected
+    // to a symlink or a hard link attack. Declare it insecure.
+    return false;
+  }
+  // See if the uid of the directory matches the effective uid of the process.
+  //
+  if (statp->st_uid != geteuid()) {
+    // The directory was not created by this user, declare it insecure.
+    return false;
+  }
+  return true;
+}
+
 
-// check if the given path is considered a secure directory for
+// Check if the given path is considered a secure directory for
 // the backing store files. Returns true if the directory exists
 // and is considered a secure location. Returns false if the path
 // is a symbolic link or if an error occurred.
-//
 static bool is_directory_secure(const char* path) {
   struct stat statbuf;
   int result = 0;
 
   RESTARTABLE(::lstat(path, &statbuf), result);
   if (result == OS_ERR) {
     return false;
   }
 
-  // the path exists, now check it's mode
-  if (S_ISLNK(statbuf.st_mode) || !S_ISDIR(statbuf.st_mode)) {
-    // the path represents a link or some non-directory file type,
-    // which is not what we expected. declare it insecure.
-    //
+  // The path exists, see if it is secure.
+  return is_statbuf_secure(&statbuf);
+}
+
+// (Taken over from Solaris to support the O_NOFOLLOW case on AIX.)
+// Check if the given directory file descriptor is considered a secure
+// directory for the backing store files. Returns true if the directory
+// exists and is considered a secure location. Returns false if the path
+// is a symbolic link or if an error occurred.
+static bool is_dirfd_secure(int dir_fd) {
+  struct stat statbuf;
+  int result = 0;
+
+  RESTARTABLE(::fstat(dir_fd, &statbuf), result);
+  if (result == OS_ERR) {
     return false;
   }
-  else {
-    // we have an existing directory, check if the permissions are safe.
-    //
-    if ((statbuf.st_mode & (S_IWGRP|S_IWOTH)) != 0) {
-      // the directory is open for writing and could be subjected
-      // to a symlnk attack. declare it insecure.
-      //
+
+  // The path exists, now check its mode.
+  return is_statbuf_secure(&statbuf);
+}
+
+
+// Check to make sure fd1 and fd2 are referencing the same file system object.
+static bool is_same_fsobject(int fd1, int fd2) {
+  struct stat statbuf1;
+  struct stat statbuf2;
+  int result = 0;
+
+  RESTARTABLE(::fstat(fd1, &statbuf1), result);
+  if (result == OS_ERR) {
       return false;
     }
+  RESTARTABLE(::fstat(fd2, &statbuf2), result);
+  if (result == OS_ERR) {
+    return false;
   }
+
+  if ((statbuf1.st_ino == statbuf2.st_ino) &&
+      (statbuf1.st_dev == statbuf2.st_dev)) {
   return true;
+  } else {
+    return false;
+  }
 }
 
+// Helper functions for open without O_NOFOLLOW which is not present on AIX 5.3/6.1.
+// We use the jdk6 implementation here.
+#ifndef O_NOFOLLOW
+// The O_NOFOLLOW oflag doesn't exist before solaris 5.10, this is to simulate that behaviour
+// was done in jdk 5/6 hotspot by Oracle this way
+static int open_o_nofollow_impl(const char* path, int oflag, mode_t mode, bool use_mode) {
+  struct stat orig_st;
+  struct stat new_st;
+  bool create;
+  int error;
+  int fd;
+
+  create = false;
+
+  if (lstat(path, &orig_st) != 0) {
+    if (errno == ENOENT && (oflag & O_CREAT) != 0) {
+      // File doesn't exist, but_we want to create it, add O_EXCL flag
+      // to make sure no-one creates it (or a symlink) before us
+      // This works as we expect with symlinks, from posix man page:
+      // 'If O_EXCL  and  O_CREAT  are set, and path names a symbolic
+      // link, open() shall fail and set errno to [EEXIST]'.
+      oflag |= O_EXCL;
+      create = true;
+    } else {
+      // File doesn't exist, and we are not creating it.
+      return OS_ERR;
+    }
+  } else {
+    // Lstat success, check if existing file is a link.
+    if ((orig_st.st_mode & S_IFMT) == S_IFLNK)  {
+      // File is a symlink.
+      errno = ELOOP;
+      return OS_ERR;
+    }
+  }
+
+  if (use_mode == true) {
+    fd = open(path, oflag, mode);
+  } else {
+    fd = open(path, oflag);
+  }
 
-// return the user name for the given user id
+  if (fd == OS_ERR) {
+    return fd;
+  }
+
+  // Can't do inode checks on before/after if we created the file.
+  if (create == false) {
+    if (fstat(fd, &new_st) != 0) {
+      // Keep errno from fstat, in case close also fails.
+      error = errno;
+      ::close(fd);
+      errno = error;
+      return OS_ERR;
+    }
+
+    if (orig_st.st_dev != new_st.st_dev || orig_st.st_ino != new_st.st_ino) {
+      // File was tampered with during race window.
+      ::close(fd);
+      errno = EEXIST;
+      if (PrintMiscellaneous && Verbose) {
+        warning("possible file tampering attempt detected when opening %s", path);
+      }
+      return OS_ERR;
+    }
+  }
+
+  return fd;
+}
+
+static int open_o_nofollow(const char* path, int oflag, mode_t mode) {
+  return open_o_nofollow_impl(path, oflag, mode, true);
+}
+
+static int open_o_nofollow(const char* path, int oflag) {
+  return open_o_nofollow_impl(path, oflag, 0, false);
+}
+#endif
+
+// Open the directory of the given path and validate it.
+// Return a DIR * of the open directory.
+static DIR *open_directory_secure(const char* dirname) {
+  // Open the directory using open() so that it can be verified
+  // to be secure by calling is_dirfd_secure(), opendir() and then check
+  // to see if they are the same file system object.  This method does not
+  // introduce a window of opportunity for the directory to be attacked that
+  // calling opendir() and is_directory_secure() does.
+  int result;
+  DIR *dirp = NULL;
+
+  // No O_NOFOLLOW defined at buildtime, and it is not documented for open;
+  // so provide a workaround in this case.
+#ifdef O_NOFOLLOW
+  RESTARTABLE(::open(dirname, O_RDONLY|O_NOFOLLOW), result);
+#else
+  // workaround (jdk6 coding)
+  RESTARTABLE(::open_o_nofollow(dirname, O_RDONLY), result);
+#endif
+
+  if (result == OS_ERR) {
+    // Directory doesn't exist or is a symlink, so there is nothing to cleanup.
+    if (PrintMiscellaneous && Verbose) {
+      if (errno == ELOOP) {
+        warning("directory %s is a symlink and is not secure\n", dirname);
+      } else {
+        warning("could not open directory %s: %s\n", dirname, strerror(errno));
+      }
+    }
+    return dirp;
+  }
+  int fd = result;
+
+  // Determine if the open directory is secure.
+  if (!is_dirfd_secure(fd)) {
+    // The directory is not a secure directory.
+    os::close(fd);
+    return dirp;
+  }
+
+  // Open the directory.
+  dirp = ::opendir(dirname);
+  if (dirp == NULL) {
+    // The directory doesn't exist, close fd and return.
+    os::close(fd);
+    return dirp;
+  }
+
+  // Check to make sure fd and dirp are referencing the same file system object.
+  if (!is_same_fsobject(fd, dirp->dd_fd)) {
+    // The directory is not secure.
+    os::close(fd);
+    os::closedir(dirp);
+    dirp = NULL;
+    return dirp;
+  }
+
+  // Close initial open now that we know directory is secure
+  os::close(fd);
+
+  return dirp;
+}
+
+// NOTE: The code below uses fchdir(), open() and unlink() because
+// fdopendir(), openat() and unlinkat() are not supported on all
+// versions.  Once the support for fdopendir(), openat() and unlinkat()
+// is available on all supported versions the code can be changed
+// to use these functions.
+
+// Open the directory of the given path, validate it and set the
+// current working directory to it.
+// Return a DIR * of the open directory and the saved cwd fd.
 //
-// the caller is expected to free the allocated memory.
+static DIR *open_directory_secure_cwd(const char* dirname, int *saved_cwd_fd) {
+
+  // Open the directory.
+  DIR* dirp = open_directory_secure(dirname);
+  if (dirp == NULL) {
+    // Directory doesn't exist or is insecure, so there is nothing to cleanup.
+    return dirp;
+  }
+  int fd = dirp->dd_fd;
+
+  // Open a fd to the cwd and save it off.
+  int result;
+  RESTARTABLE(::open(".", O_RDONLY), result);
+  if (result == OS_ERR) {
+    *saved_cwd_fd = -1;
+  } else {
+    *saved_cwd_fd = result;
+  }
+
+  // Set the current directory to dirname by using the fd of the directory.
+  result = fchdir(fd);
+
+  return dirp;
+}
+
+// Close the directory and restore the current working directory.
+static void close_directory_secure_cwd(DIR* dirp, int saved_cwd_fd) {
+
+  int result;
+  // If we have a saved cwd change back to it and close the fd.
+  if (saved_cwd_fd != -1) {
+    result = fchdir(saved_cwd_fd);
+    ::close(saved_cwd_fd);
+  }
+
+  // Close the directory.
+  os::closedir(dirp);
+}
+
+// Check if the given file descriptor is considered a secure.
+static bool is_file_secure(int fd, const char *filename) {
+
+  int result;
+  struct stat statbuf;
+
+  // Determine if the file is secure.
+  RESTARTABLE(::fstat(fd, &statbuf), result);
+  if (result == OS_ERR) {
+    if (PrintMiscellaneous && Verbose) {
+      warning("fstat failed on %s: %s\n", filename, strerror(errno));
+    }
+    return false;
+  }
+  if (statbuf.st_nlink > 1) {
+    // A file with multiple links is not expected.
+    if (PrintMiscellaneous && Verbose) {
+      warning("file %s has multiple links\n", filename);
+    }
+    return false;
+  }
+  return true;
+}
+
+// Return the user name for the given user id.
 //
+// The caller is expected to free the allocated memory.
 static char* get_user_name(uid_t uid) {
 
   struct passwd pwent;
 
-  // determine the max pwbuf size from sysconf, and hardcode
+  // Determine the max pwbuf size from sysconf, and hardcode
   // a default if this not available through sysconf.
-  //
   long bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
   if (bufsize == -1)
     bufsize = 1024;
 
   char* pwbuf = NEW_C_HEAP_ARRAY(char, bufsize, mtInternal);

@@ -342,11 +606,12 @@
                               strlen(tmpdirname) + strlen(dentry->d_name) + 2, mtInternal);
     strcpy(usrdir_name, tmpdirname);
     strcat(usrdir_name, "/");
     strcat(usrdir_name, dentry->d_name);
 
-    DIR* subdirp = os::opendir(usrdir_name);
+    // Open the user directory.
+    DIR* subdirp = open_directory_secure(usrdir_name);
 
     if (subdirp == NULL) {
       FREE_C_HEAP_ARRAY(char, usrdir_name, mtInternal);
       continue;
     }

@@ -462,124 +727,96 @@
               " store file %s : %s\n", path, strerror(errno));
     }
   }
 }
 
-
-// remove file
-//
-// this method removes the file with the given file name in the
-// named directory.
-//
-static void remove_file(const char* dirname, const char* filename) {
-
-  size_t nbytes = strlen(dirname) + strlen(filename) + 2;
-  char* path = NEW_C_HEAP_ARRAY(char, nbytes, mtInternal);
-
-  strcpy(path, dirname);
-  strcat(path, "/");
-  strcat(path, filename);
-
-  remove_file(path);
-
-  FREE_C_HEAP_ARRAY(char, path, mtInternal);
-}
-
-
-// cleanup stale shared memory resources
+// Cleanup stale shared memory resources
 //
 // This method attempts to remove all stale shared memory files in
 // the named user temporary directory. It scans the named directory
 // for files matching the pattern ^$[0-9]*$. For each file found, the
 // process id is extracted from the file name and a test is run to
 // determine if the process is alive. If the process is not alive,
 // any stale file resources are removed.
-//
 static void cleanup_sharedmem_resources(const char* dirname) {
 
-  // open the user temp directory
-  DIR* dirp = os::opendir(dirname);
-
+  int saved_cwd_fd;
+  // Open the directory.
+  DIR* dirp = open_directory_secure_cwd(dirname, &saved_cwd_fd);
   if (dirp == NULL) {
-    // directory doesn't exist, so there is nothing to cleanup
-    return;
-  }
-
-  if (!is_directory_secure(dirname)) {
-    // the directory is not a secure directory
+     // Directory doesn't exist or is insecure, so there is nothing to cleanup.
     return;
   }
 
-  // for each entry in the directory that matches the expected file
+  // For each entry in the directory that matches the expected file
   // name pattern, determine if the file resources are stale and if
   // so, remove the file resources. Note, instrumented HotSpot processes
   // for this user may start and/or terminate during this search and
   // remove or create new files in this directory. The behavior of this
   // loop under these conditions is dependent upon the implementation of
   // opendir/readdir.
-  //
   struct dirent* entry;
   char* dbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(dirname), mtInternal);
+
   errno = 0;
   while ((entry = os::readdir(dirp, (struct dirent *)dbuf)) != NULL) {
 
     pid_t pid = filename_to_pid(entry->d_name);
 
     if (pid == 0) {
 
       if (strcmp(entry->d_name, ".") != 0 && strcmp(entry->d_name, "..") != 0) {
 
-        // attempt to remove all unexpected files, except "." and ".."
-        remove_file(dirname, entry->d_name);
+        // Attempt to remove all unexpected files, except "." and "..".
+        unlink(entry->d_name);
       }
 
       errno = 0;
       continue;
     }
 
-    // we now have a file name that converts to a valid integer
+    // We now have a file name that converts to a valid integer
     // that could represent a process id . if this process id
     // matches the current process id or the process is not running,
     // then remove the stale file resources.
     //
-    // process liveness is detected by sending signal number 0 to
+    // Process liveness is detected by sending signal number 0 to
     // the process id (see kill(2)). if kill determines that the
     // process does not exist, then the file resources are removed.
     // if kill determines that that we don't have permission to
     // signal the process, then the file resources are assumed to
     // be stale and are removed because the resources for such a
     // process should be in a different user specific directory.
-    //
     if ((pid == os::current_process_id()) ||
         (kill(pid, 0) == OS_ERR && (errno == ESRCH || errno == EPERM))) {
 
-        remove_file(dirname, entry->d_name);
+        unlink(entry->d_name);
     }
     errno = 0;
   }
-  os::closedir(dirp);
+
+  // Close the directory and reset the current working directory.
+  close_directory_secure_cwd(dirp, saved_cwd_fd);
+
   FREE_C_HEAP_ARRAY(char, dbuf, mtInternal);
 }
 
-// make the user specific temporary directory. Returns true if
+// Make the user specific temporary directory. Returns true if
 // the directory exists and is secure upon return. Returns false
 // if the directory exists but is either a symlink, is otherwise
 // insecure, or if an error occurred.
-//
 static bool make_user_tmp_dir(const char* dirname) {
 
-  // create the directory with 0755 permissions. note that the directory
+  // Create the directory with 0755 permissions. note that the directory
   // will be owned by euid::egid, which may not be the same as uid::gid.
-  //
   if (mkdir(dirname, S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) == OS_ERR) {
     if (errno == EEXIST) {
       // The directory already exists and was probably created by another
       // JVM instance. However, this could also be the result of a
       // deliberate symlink. Verify that the existing directory is safe.
-      //
       if (!is_directory_secure(dirname)) {
-        // directory is not secure
+        // Directory is not secure.
         if (PrintMiscellaneous && Verbose) {
           warning("%s directory is insecure\n", dirname);
         }
         return false;
       }

@@ -611,23 +848,67 @@
     // could not make/find the directory or the found directory
     // was not secure
     return -1;
   }
 
+  int saved_cwd_fd;
+  // Open the directory and set the current working directory to it.
+  DIR* dirp = open_directory_secure_cwd(dirname, &saved_cwd_fd);
+  if (dirp == NULL) {
+    // Directory doesn't exist or is insecure, so cannot create shared
+    // memory file.
+    return -1;
+  }
+
+  // Open the filename in the current directory.
+  // Cannot use O_TRUNC here; truncation of an existing file has to happen
+  // after the is_file_secure() check below.
   int result;
 
-  RESTARTABLE(::open(filename, O_RDWR|O_CREAT|O_TRUNC, S_IREAD|S_IWRITE), result);
+  // No O_NOFOLLOW defined at buildtime, and it is not documented for open;
+  // so provide a workaround in this case.
+#ifdef O_NOFOLLOW
+  RESTARTABLE(::open(filename, O_RDWR|O_CREAT|O_NOFOLLOW, S_IREAD|S_IWRITE), result);
+#else
+  // workaround function (jdk6 code)
+  RESTARTABLE(::open_o_nofollow(filename, O_RDWR|O_CREAT, S_IREAD|S_IWRITE), result);
+#endif
+
   if (result == OS_ERR) {
     if (PrintMiscellaneous && Verbose) {
+      if (errno == ELOOP) {
+        warning("file %s is a symlink and is not secure\n", filename);
+      } else {
       warning("could not create file %s: %s\n", filename, strerror(errno));
     }
+    }
+    // Close the directory and reset the current working directory.
+    close_directory_secure_cwd(dirp, saved_cwd_fd);
+
     return -1;
   }
+  // Close the directory and reset the current working directory.
+  close_directory_secure_cwd(dirp, saved_cwd_fd);
 
   // save the file descriptor
   int fd = result;
 
+  // Check to see if the file is secure.
+  if (!is_file_secure(fd, filename)) {
+    ::close(fd);
+    return -1;
+  }
+
+  // Truncate the file to get rid of any existing data.
+  RESTARTABLE(::ftruncate(fd, (off_t)0), result);
+  if (result == OS_ERR) {
+    if (PrintMiscellaneous && Verbose) {
+      warning("could not truncate shared memory file: %s\n", strerror(errno));
+    }
+    ::close(fd);
+    return -1;
+  }
   // set the file size
   RESTARTABLE(::ftruncate(fd, (off_t)size), result);
   if (result == OS_ERR) {
     if (PrintMiscellaneous && Verbose) {
       warning("could not set shared memory file size: %s\n", strerror(errno));

@@ -645,11 +926,18 @@
 //
 static int open_sharedmem_file(const char* filename, int oflags, TRAPS) {
 
   // open the file
   int result;
+  // No O_NOFOLLOW defined at buildtime, and it is not documented for open;
+  // so provide a workaround in this case
+#ifdef O_NOFOLLOW
   RESTARTABLE(::open(filename, oflags), result);
+#else
+  RESTARTABLE(::open_o_nofollow(filename, oflags), result);
+#endif
+
   if (result == OS_ERR) {
     if (errno == ENOENT) {
       THROW_MSG_0(vmSymbols::java_lang_IllegalArgumentException(),
                   "Process not found");
     }

@@ -659,12 +947,19 @@
     }
     else {
       THROW_MSG_0(vmSymbols::java_io_IOException(), strerror(errno));
     }
   }
+  int fd = result;
 
-  return result;
+  // Check to see if the file is secure.
+  if (!is_file_secure(fd, filename)) {
+    ::close(fd);
+    return -1;
+  }
+
+  return fd;
 }
 
 // create a named shared memory region. returns the address of the
 // memory region on success or NULL on failure. A return value of
 // NULL will ultimately disable the shared memory feature.

@@ -692,17 +987,25 @@
     return NULL;
 
   char* dirname = get_user_tmp_dir(user_name);
   char* filename = get_sharedmem_filename(dirname, vmid);
 
+  // Get the short filename.
+  char* short_filename = strrchr(filename, '/');
+  if (short_filename == NULL) {
+    short_filename = filename;
+  } else {
+    short_filename++;
+  }
+
   // cleanup any stale shared memory files
   cleanup_sharedmem_resources(dirname);
 
   assert(((size > 0) && (size % os::vm_page_size() == 0)),
          "unexpected PerfMemory region size");
 
-  fd = create_sharedmem_resources(dirname, filename, size);
+  fd = create_sharedmem_resources(dirname, short_filename, size);
 
   FREE_C_HEAP_ARRAY(char, user_name, mtInternal);
   FREE_C_HEAP_ARRAY(char, dirname, mtInternal);
 
   if (fd == -1) {

@@ -730,10 +1033,13 @@
   backing_store_file_name = filename;
 
   // clear the shared memory region
   (void)::memset((void*) mapAddress, 0, size);
 
+  // It does not go through os api, the operation has to record from here.
+  MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC, mtInternal);
+
   return mapAddress;
 }
 
 // release a named shared memory region
 //

@@ -804,11 +1110,11 @@
 static void mmap_attach_shared(const char* user, int vmid, PerfMemory::PerfMemoryMode mode, char** addr, size_t* sizep, TRAPS) {
 
   char* mapAddress;
   int result;
   int fd;
-  size_t size;
+  size_t size = 0;
   const char* luser = NULL;
 
   int mmap_prot;
   int file_flags;
 

@@ -816,16 +1122,22 @@
 
   // map the high level access mode to the appropriate permission
   // constructs for the file and the shared memory mapping.
   if (mode == PerfMemory::PERF_MODE_RO) {
     mmap_prot = PROT_READ;
+
+  // No O_NOFOLLOW defined at buildtime, and it is not documented for open.
+#ifdef O_NOFOLLOW
+    file_flags = O_RDONLY | O_NOFOLLOW;
+#else
     file_flags = O_RDONLY;
+#endif
   }
   else if (mode == PerfMemory::PERF_MODE_RW) {
 #ifdef LATER
     mmap_prot = PROT_READ | PROT_WRITE;
-    file_flags = O_RDWR;
+    file_flags = O_RDWR | O_NOFOLLOW;
 #else
     THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
               "Unsupported access mode");
 #endif
   }

@@ -851,10 +1163,13 @@
   // since we don't follow symbolic links when creating the backing
   // store file, we don't follow them when attaching either.
   //
   if (!is_directory_secure(dirname)) {
     FREE_C_HEAP_ARRAY(char, dirname, mtInternal);
+    if (luser != user) {
+      FREE_C_HEAP_ARRAY(char, luser, mtInternal);
+    }
     THROW_MSG(vmSymbols::java_lang_IllegalArgumentException(),
               "Process not found");
   }
 
   char* filename = get_sharedmem_filename(dirname, vmid);

@@ -895,10 +1210,13 @@
     }
     THROW_MSG(vmSymbols::java_lang_OutOfMemoryError(),
               "Could not map PerfMemory");
   }
 
+  // It does not go through os api, the operation has to record from here.
+  MemTracker::record_virtual_memory_reserve((address)mapAddress, size, CURRENT_PC, mtInternal);
+
   *addr = mapAddress;
   *sizep = size;
 
   if (PerfTraceMemOps) {
     tty->print("mapped " SIZE_FORMAT " bytes for vmid %d at "