1 /*
   2  * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 #include "precompiled.hpp"
  25 #include "gc/z/zArray.inline.hpp"
  26 #include "gc/z/zBackingFile_linux_x86.hpp"
  27 #include "gc/z/zBackingPath_linux_x86.hpp"
  28 #include "gc/z/zErrno.hpp"
  29 #include "gc/z/zFlags.hpp"
  30 #include "gc/z/zLargePages.inline.hpp"
  31 #include "logging/log.hpp"
  32 #include "runtime/init.hpp"
  33 #include "runtime/os.hpp"
  34 #include "utilities/align.hpp"
  35 #include "utilities/debug.hpp"
  36 
  37 #include <fcntl.h>
  38 #include <sys/mman.h>
  39 #include <sys/stat.h>
  40 #include <sys/statfs.h>
  41 #include <sys/types.h>
  42 #include <unistd.h>
  43 
  44 // Filesystem names
  45 #define ZFILESYSTEM_TMPFS                "tmpfs"
  46 #define ZFILESYSTEM_HUGETLBFS            "hugetlbfs"
  47 
  48 // Sysfs file for transparent huge page on tmpfs
  49 #define ZFILENAME_SHMEM_ENABLED          "/sys/kernel/mm/transparent_hugepage/shmem_enabled"
  50 
  51 // Default mount points
  52 #define ZMOUNTPOINT_TMPFS                "/dev/shm"
  53 #define ZMOUNTPOINT_HUGETLBFS            "/hugepages"
  54 
  55 // Java heap filename
  56 #define ZFILENAME_HEAP                   "java_heap"
  57 
  58 // Support for building on older Linux systems
  59 #ifndef __NR_memfd_create
  60 #define __NR_memfd_create                319
  61 #endif
  62 #ifndef MFD_CLOEXEC
  63 #define MFD_CLOEXEC                      0x0001U
  64 #endif
  65 #ifndef MFD_HUGETLB
  66 #define MFD_HUGETLB                      0x0004U
  67 #endif
  68 #ifndef O_CLOEXEC
  69 #define O_CLOEXEC                        02000000
  70 #endif
  71 #ifndef O_TMPFILE
  72 #define O_TMPFILE                        (020000000 | O_DIRECTORY)
  73 #endif
  74 
  75 // Filesystem types, see statfs(2)
  76 #ifndef TMPFS_MAGIC
  77 #define TMPFS_MAGIC                      0x01021994
  78 #endif
  79 #ifndef HUGETLBFS_MAGIC
  80 #define HUGETLBFS_MAGIC                  0x958458f6
  81 #endif
  82 
  83 static int z_memfd_create(const char *name, unsigned int flags) {
  84   return syscall(__NR_memfd_create, name, flags);
  85 }
  86 
  87 ZBackingFile::ZBackingFile() :
  88     _fd(-1),
  89     _filesystem(0),
  90     _initialized(false) {
  91 
  92   // Create backing file
  93   _fd = create_fd(ZFILENAME_HEAP);
  94   if (_fd == -1) {
  95     return;
  96   }
  97 
  98   // Get filesystem type
  99   struct statfs statfs_buf;
 100   if (fstatfs(_fd, &statfs_buf) == -1) {
 101     ZErrno err;
 102     log_error(gc, init)("Failed to determine filesystem type for backing file (%s)", err.to_string());
 103     return;
 104   }
 105   _filesystem = statfs_buf.f_type;
 106 
 107   // Make sure we're on a supported filesystem
 108   if (!is_tmpfs() && !is_hugetlbfs()) {
 109     log_error(gc, init)("Backing file must be located on a %s or a %s filesystem", ZFILESYSTEM_TMPFS, ZFILESYSTEM_HUGETLBFS);
 110     return;
 111   }
 112 
 113   // Make sure the filesystem type matches requested large page type
 114   if (ZLargePages::is_transparent() && !is_tmpfs()) {
 115     log_error(gc, init)("-XX:+UseTransparentHugePages can only be enable when using a %s filesystem", ZFILESYSTEM_TMPFS);
 116     return;
 117   }
 118 
 119   if (ZLargePages::is_transparent() && !tmpfs_supports_transparent_huge_pages()) {
 120     log_error(gc, init)("-XX:+UseTransparentHugePages on a %s filesystem not supported by kernel", ZFILESYSTEM_TMPFS);
 121     return;
 122   }
 123 
 124   if (ZLargePages::is_explicit() && !is_hugetlbfs()) {
 125     log_error(gc, init)("-XX:+UseLargePages (without -XX:+UseTransparentHugePages) can only be enabled when using a %s filesystem", ZFILESYSTEM_HUGETLBFS);
 126     return;
 127   }
 128 
 129   if (!ZLargePages::is_explicit() && is_hugetlbfs()) {
 130     log_error(gc, init)("-XX:+UseLargePages must be enabled when using a %s filesystem", ZFILESYSTEM_HUGETLBFS);
 131     return;
 132   }
 133 
 134   // Successfully initialized
 135   _initialized = true;
 136 }
 137 
 138 int ZBackingFile::create_mem_fd(const char* name) const {
 139   // Create file name
 140   char filename[PATH_MAX];
 141   snprintf(filename, sizeof(filename), "%s%s", name, ZLargePages::is_explicit() ? ".hugetlb" : "");
 142 
 143   // Create file
 144   const int extra_flags = ZLargePages::is_explicit() ? MFD_HUGETLB : 0;
 145   const int fd = z_memfd_create(filename, MFD_CLOEXEC | extra_flags);
 146   if (fd == -1) {
 147     ZErrno err;
 148     log_debug(gc, init)("Failed to create memfd file (%s)",
 149                         ((UseLargePages && err == EINVAL) ? "Hugepages not supported" : err.to_string()));
 150     return -1;
 151   }
 152 
 153   log_debug(gc, init)("Heap backed by file /memfd:%s", filename);
 154 
 155   return fd;
 156 }
 157 
 158 int ZBackingFile::create_file_fd(const char* name) const {
 159   const char* const filesystem = ZLargePages::is_explicit() ? ZFILESYSTEM_HUGETLBFS : ZFILESYSTEM_TMPFS;
 160   const char* const mountpoint = ZLargePages::is_explicit() ? ZMOUNTPOINT_HUGETLBFS : ZMOUNTPOINT_TMPFS;
 161 
 162   // Find mountpoint
 163   ZBackingPath path(filesystem, mountpoint);
 164   if (path.get() == NULL) {
 165     log_error(gc, init)("Use -XX:ZPath to specify the path to a %s filesystem", filesystem);
 166     return -1;
 167   }
 168 
 169   // Try to create an anonymous file using the O_TMPFILE flag. Note that this
 170   // flag requires kernel >= 3.11. If this fails we fall back to open/unlink.
 171   const int fd_anon = open(path.get(), O_TMPFILE|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR);
 172   if (fd_anon == -1) {
 173     ZErrno err;
 174     log_debug(gc, init)("Failed to create anonymouns file in %s (%s)", path.get(),
 175                         (err == EINVAL ? "Not supported" : err.to_string()));
 176   } else {
 177     // Get inode number for anonymous file
 178     struct stat stat_buf;
 179     if (fstat(fd_anon, &stat_buf) == -1) {
 180       ZErrno err;
 181       log_error(gc, init)("Failed to determine inode number for anonymous file (%s)", err.to_string());
 182       return -1;
 183     }
 184 
 185     log_debug(gc, init)("Heap backed by file %s/#" UINT64_FORMAT, path.get(), (uint64_t)stat_buf.st_ino);
 186 
 187     return fd_anon;
 188   }
 189 
 190   log_debug(gc, init)("Falling back to open/unlink");
 191 
 192   // Create file name
 193   char filename[PATH_MAX];
 194   snprintf(filename, sizeof(filename), "%s/%s.%d", path.get(), name, os::current_process_id());
 195 
 196   // Create file
 197   const int fd = open(filename, O_CREAT|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR);
 198   if (fd == -1) {
 199     ZErrno err;
 200     log_error(gc, init)("Failed to create file %s (%s)", filename, err.to_string());
 201     return -1;
 202   }
 203 
 204   // Unlink file
 205   if (unlink(filename) == -1) {
 206     ZErrno err;
 207     log_error(gc, init)("Failed to unlink file %s (%s)", filename, err.to_string());
 208     return -1;
 209   }
 210 
 211   log_debug(gc, init)("Heap backed by file %s", filename);
 212 
 213   return fd;
 214 }
 215 
 216 int ZBackingFile::create_fd(const char* name) const {
 217   if (ZPath == NULL) {
 218     // If the path is not explicitly specified, then we first try to create a memfd file
 219     // instead of looking for a tmpfd/hugetlbfs mount point. Note that memfd_create() might
 220     // not be supported at all (requires kernel >= 3.17), or it might not support large
 221     // pages (requires kernel >= 4.14). If memfd_create() fails, then we try to create a
 222     // file on an accessible tmpfs or hugetlbfs mount point.
 223     const int fd = create_mem_fd(name);
 224     if (fd != -1) {
 225       return fd;
 226     }
 227 
 228     log_debug(gc, init)("Falling back to searching for an accessible moint point");
 229   }
 230 
 231   return create_file_fd(name);
 232 }
 233 
 234 bool ZBackingFile::is_initialized() const {
 235   return _initialized;
 236 }
 237 
 238 int ZBackingFile::fd() const {
 239   return _fd;
 240 }
 241 
 242 bool ZBackingFile::is_tmpfs() const {
 243   return _filesystem == TMPFS_MAGIC;
 244 }
 245 
 246 bool ZBackingFile::is_hugetlbfs() const {
 247   return _filesystem == HUGETLBFS_MAGIC;
 248 }
 249 
 250 bool ZBackingFile::tmpfs_supports_transparent_huge_pages() const {
 251   // If the shmem_enabled file exists and is readable then we
 252   // know the kernel supports transparent huge pages for tmpfs.
 253   return access(ZFILENAME_SHMEM_ENABLED, R_OK) == 0;
 254 }
 255 
 256 bool ZBackingFile::try_split_and_expand_tmpfs(size_t offset, size_t length, size_t alignment) const {
 257   // Try first smaller part.
 258   const size_t offset0 = offset;
 259   const size_t length0 = align_up(length / 2, alignment);
 260   if (!try_expand_tmpfs(offset0, length0, alignment)) {
 261     return false;
 262   }
 263 
 264   // Try second smaller part.
 265   const size_t offset1 = offset0 + length0;
 266   const size_t length1 = length - length0;
 267   if (!try_expand_tmpfs(offset1, length1, alignment)) {
 268     return false;
 269   }
 270 
 271   return true;
 272 }
 273 
 274 bool ZBackingFile::try_expand_tmpfs(size_t offset, size_t length, size_t alignment) const {
 275   assert(length > 0, "Invalid length");
 276   assert(is_aligned(length, alignment), "Invalid length");
 277 
 278   ZErrno err = posix_fallocate(_fd, offset, length);
 279 
 280   if (err == EINTR && length > alignment) {
 281     // Calling posix_fallocate() with a large length can take a long
 282     // time to complete. When running profilers, such as VTune, this
 283     // syscall will be constantly interrupted by signals. Expanding
 284     // the file in smaller steps avoids this problem.
 285     return try_split_and_expand_tmpfs(offset, length, alignment);
 286   }
 287 
 288   if (err) {
 289     log_error(gc)("Failed to allocate backing file (%s)", err.to_string());
 290     return false;
 291   }
 292 
 293   return true;
 294 }
 295 
 296 bool ZBackingFile::expand_tmpfs(size_t offset, size_t length) const {
 297   assert(is_tmpfs(), "Wrong filesystem");
 298   return try_expand_tmpfs(offset, length, os::vm_page_size());
 299 }
 300 
 301 bool ZBackingFile::expand_hugetlbfs(size_t offset, size_t length) const {
 302   assert(is_hugetlbfs(), "Wrong filesystem");
 303 
 304   // Prior to kernel 4.3, hugetlbfs did not support posix_fallocate().
 305   // Instead of posix_fallocate() we can use a well-known workaround,
 306   // which involves truncating the file to requested size and then try
 307   // to map it to verify that there are enough huge pages available to
 308   // back it.
 309   while (ftruncate(_fd, offset + length) == -1) {
 310     ZErrno err;
 311     if (err != EINTR) {
 312       log_error(gc)("Failed to truncate backing file (%s)", err.to_string());
 313       return false;
 314     }
 315   }
 316 
 317   // If we fail mapping during initialization, i.e. when we are pre-mapping
 318   // the heap, then we wait and retry a few times before giving up. Otherwise
 319   // there is a risk that running JVMs back-to-back will fail, since there
 320   // is a delay between process termination and the huge pages owned by that
 321   // process being returned to the huge page pool and made available for new
 322   // allocations.
 323   void* addr = MAP_FAILED;
 324   const int max_attempts = 3;
 325   for (int attempt = 1; attempt <= max_attempts; attempt++) {
 326     addr = mmap(0, length, PROT_READ|PROT_WRITE, MAP_SHARED, _fd, offset);
 327     if (addr != MAP_FAILED || is_init_completed()) {
 328       // Mapping was successful or initialization phase has completed
 329       break;
 330     }
 331 
 332     ZErrno err;
 333     log_debug(gc)("Failed to map backing file (%s), attempt %d of %d",
 334                   err.to_string(), attempt, max_attempts);
 335 
 336     // Wait and retry in one second, in the hope that
 337     // huge pages will be available by then.
 338     sleep(1);
 339   }
 340 
 341   if (addr == MAP_FAILED) {
 342     // Not enough huge pages left
 343     ZErrno err;
 344     log_error(gc)("Failed to map backing file (%s)", err.to_string());
 345     return false;
 346   }
 347 
 348   // Successful mapping, unmap again. From now on the pages we mapped
 349   // will be reserved for this file.
 350   if (munmap(addr, length) == -1) {
 351     ZErrno err;
 352     log_error(gc)("Failed to unmap backing file (%s)", err.to_string());
 353     return false;
 354   }
 355 
 356   return true;
 357 }
 358 
 359 bool ZBackingFile::expand(size_t offset, size_t length) const {
 360   return is_hugetlbfs() ? expand_hugetlbfs(offset, length) : expand_tmpfs(offset, length);
 361 }