1 /*
   2  * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 #include "precompiled.hpp"
  25 #include "gc/z/zArray.inline.hpp"
  26 #include "gc/z/zBackingFile_linux_x86.hpp"
  27 #include "gc/z/zBackingPath_linux_x86.hpp"
  28 #include "gc/z/zErrno.hpp"
  29 #include "gc/z/zLargePages.inline.hpp"
  30 #include "logging/log.hpp"
  31 #include "runtime/os.hpp"
  32 #include "utilities/align.hpp"
  33 #include "utilities/debug.hpp"
  34 
  35 #include <fcntl.h>
  36 #include <sys/mman.h>
  37 #include <sys/stat.h>
  38 #include <sys/statfs.h>
  39 #include <sys/types.h>
  40 #include <unistd.h>
  41 
  42 // Filesystem names
  43 #define ZFILESYSTEM_TMPFS                "tmpfs"
  44 #define ZFILESYSTEM_HUGETLBFS            "hugetlbfs"
  45 
  46 // Sysfs file for transparent huge page on tmpfs
  47 #define ZFILENAME_SHMEM_ENABLED          "/sys/kernel/mm/transparent_hugepage/shmem_enabled"
  48 
  49 // Java heap filename
  50 #define ZFILENAME_HEAP                   "java_heap"
  51 
  52 // Support for building on older Linux systems
  53 #ifndef __NR_memfd_create
  54 #define __NR_memfd_create                319
  55 #endif
  56 #ifndef MFD_CLOEXEC
  57 #define MFD_CLOEXEC                      0x0001U
  58 #endif
  59 #ifndef MFD_HUGETLB
  60 #define MFD_HUGETLB                      0x0004U
  61 #endif
  62 #ifndef O_CLOEXEC
  63 #define O_CLOEXEC                        02000000
  64 #endif
  65 #ifndef O_TMPFILE
  66 #define O_TMPFILE                        (020000000 | O_DIRECTORY)
  67 #endif
  68 
  69 // Filesystem types, see statfs(2)
  70 #ifndef TMPFS_MAGIC
  71 #define TMPFS_MAGIC                      0x01021994
  72 #endif
  73 #ifndef HUGETLBFS_MAGIC
  74 #define HUGETLBFS_MAGIC                  0x958458f6
  75 #endif
  76 
  77 // Preferred tmpfs mount points, ordered by priority
  78 static const char* z_preferred_tmpfs_mountpoints[] = {
  79   "/dev/shm",
  80   "/run/shm",
  81   NULL
  82 };
  83 
  84 // Preferred hugetlbfs mount points, ordered by priority
  85 static const char* z_preferred_hugetlbfs_mountpoints[] = {
  86   "/dev/hugepages",
  87   "/hugepages",
  88   NULL
  89 };
  90 
  91 static int z_memfd_create(const char *name, unsigned int flags) {
  92   return syscall(__NR_memfd_create, name, flags);
  93 }
  94 
  95 bool ZBackingFile::_hugetlbfs_mmap_retry = true;
  96 
  97 ZBackingFile::ZBackingFile() :
  98     _fd(-1),
  99     _filesystem(0),
 100     _available(0),
 101     _initialized(false) {
 102 
 103   // Create backing file
 104   _fd = create_fd(ZFILENAME_HEAP);
 105   if (_fd == -1) {
 106     return;
 107   }
 108 
 109   // Get filesystem statistics
 110   struct statfs statfs_buf;
 111   if (fstatfs(_fd, &statfs_buf) == -1) {
 112     ZErrno err;
 113     log_error(gc, init)("Failed to determine filesystem type for backing file (%s)",
 114                         err.to_string());
 115     return;
 116   }
 117 
 118   _filesystem = statfs_buf.f_type;
 119   _available = statfs_buf.f_bavail * statfs_buf.f_bsize;
 120 
 121   // Make sure we're on a supported filesystem
 122   if (!is_tmpfs() && !is_hugetlbfs()) {
 123     log_error(gc, init)("Backing file must be located on a %s or a %s filesystem",
 124                         ZFILESYSTEM_TMPFS, ZFILESYSTEM_HUGETLBFS);
 125     return;
 126   }
 127 
 128   // Make sure the filesystem type matches requested large page type
 129   if (ZLargePages::is_transparent() && !is_tmpfs()) {
 130     log_error(gc, init)("-XX:+UseTransparentHugePages can only be enable when using a %s filesystem",
 131                         ZFILESYSTEM_TMPFS);
 132     return;
 133   }
 134 
 135   if (ZLargePages::is_transparent() && !tmpfs_supports_transparent_huge_pages()) {
 136     log_error(gc, init)("-XX:+UseTransparentHugePages on a %s filesystem not supported by kernel",
 137                         ZFILESYSTEM_TMPFS);
 138     return;
 139   }
 140 
 141   if (ZLargePages::is_explicit() && !is_hugetlbfs()) {
 142     log_error(gc, init)("-XX:+UseLargePages (without -XX:+UseTransparentHugePages) can only be enabled when using a %s filesystem",
 143                         ZFILESYSTEM_HUGETLBFS);
 144     return;
 145   }
 146 
 147   if (!ZLargePages::is_explicit() && is_hugetlbfs()) {
 148     log_error(gc, init)("-XX:+UseLargePages must be enabled when using a %s filesystem",
 149                         ZFILESYSTEM_HUGETLBFS);
 150     return;
 151   }
 152 
 153   // Successfully initialized
 154   _initialized = true;
 155 }
 156 
 157 int ZBackingFile::create_mem_fd(const char* name) const {
 158   // Create file name
 159   char filename[PATH_MAX];
 160   snprintf(filename, sizeof(filename), "%s%s", name, ZLargePages::is_explicit() ? ".hugetlb" : "");
 161 
 162   // Create file
 163   const int extra_flags = ZLargePages::is_explicit() ? MFD_HUGETLB : 0;
 164   const int fd = z_memfd_create(filename, MFD_CLOEXEC | extra_flags);
 165   if (fd == -1) {
 166     ZErrno err;
 167     log_debug(gc, init)("Failed to create memfd file (%s)",
 168                         ((UseLargePages && err == EINVAL) ? "Hugepages not supported" : err.to_string()));
 169     return -1;
 170   }
 171 
 172   log_info(gc, init)("Heap backed by file: /memfd:%s", filename);
 173 
 174   return fd;
 175 }
 176 
 177 int ZBackingFile::create_file_fd(const char* name) const {
 178   const char* const filesystem = ZLargePages::is_explicit()
 179                                  ? ZFILESYSTEM_HUGETLBFS
 180                                  : ZFILESYSTEM_TMPFS;
 181   const char** const preferred_mountpoints = ZLargePages::is_explicit()
 182                                              ? z_preferred_hugetlbfs_mountpoints
 183                                              : z_preferred_tmpfs_mountpoints;
 184 
 185   // Find mountpoint
 186   ZBackingPath path(filesystem, preferred_mountpoints);
 187   if (path.get() == NULL) {
 188     log_error(gc, init)("Use -XX:ZPath to specify the path to a %s filesystem", filesystem);
 189     return -1;
 190   }
 191 
 192   // Try to create an anonymous file using the O_TMPFILE flag. Note that this
 193   // flag requires kernel >= 3.11. If this fails we fall back to open/unlink.
 194   const int fd_anon = os::open(path.get(), O_TMPFILE|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR);
 195   if (fd_anon == -1) {
 196     ZErrno err;
 197     log_debug(gc, init)("Failed to create anonymous file in %s (%s)", path.get(),
 198                         (err == EINVAL ? "Not supported" : err.to_string()));
 199   } else {
 200     // Get inode number for anonymous file
 201     struct stat stat_buf;
 202     if (fstat(fd_anon, &stat_buf) == -1) {
 203       ZErrno err;
 204       log_error(gc, init)("Failed to determine inode number for anonymous file (%s)", err.to_string());
 205       return -1;
 206     }
 207 
 208     log_info(gc, init)("Heap backed by file: %s/#" UINT64_FORMAT, path.get(), (uint64_t)stat_buf.st_ino);
 209 
 210     return fd_anon;
 211   }
 212 
 213   log_debug(gc, init)("Falling back to open/unlink");
 214 
 215   // Create file name
 216   char filename[PATH_MAX];
 217   snprintf(filename, sizeof(filename), "%s/%s.%d", path.get(), name, os::current_process_id());
 218 
 219   // Create file
 220   const int fd = os::open(filename, O_CREAT|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR);
 221   if (fd == -1) {
 222     ZErrno err;
 223     log_error(gc, init)("Failed to create file %s (%s)", filename, err.to_string());
 224     return -1;
 225   }
 226 
 227   // Unlink file
 228   if (unlink(filename) == -1) {
 229     ZErrno err;
 230     log_error(gc, init)("Failed to unlink file %s (%s)", filename, err.to_string());
 231     return -1;
 232   }
 233 
 234   log_info(gc, init)("Heap backed by file: %s", filename);
 235 
 236   return fd;
 237 }
 238 
 239 int ZBackingFile::create_fd(const char* name) const {
 240   if (ZPath == NULL) {
 241     // If the path is not explicitly specified, then we first try to create a memfd file
 242     // instead of looking for a tmpfd/hugetlbfs mount point. Note that memfd_create() might
 243     // not be supported at all (requires kernel >= 3.17), or it might not support large
 244     // pages (requires kernel >= 4.14). If memfd_create() fails, then we try to create a
 245     // file on an accessible tmpfs or hugetlbfs mount point.
 246     const int fd = create_mem_fd(name);
 247     if (fd != -1) {
 248       return fd;
 249     }
 250 
 251     log_debug(gc, init)("Falling back to searching for an accessible mount point");
 252   }
 253 
 254   return create_file_fd(name);
 255 }
 256 
 257 bool ZBackingFile::is_initialized() const {
 258   return _initialized;
 259 }
 260 
 261 int ZBackingFile::fd() const {
 262   return _fd;
 263 }
 264 
 265 size_t ZBackingFile::available() const {
 266   return _available;
 267 }
 268 
 269 bool ZBackingFile::is_tmpfs() const {
 270   return _filesystem == TMPFS_MAGIC;
 271 }
 272 
 273 bool ZBackingFile::is_hugetlbfs() const {
 274   return _filesystem == HUGETLBFS_MAGIC;
 275 }
 276 
 277 bool ZBackingFile::tmpfs_supports_transparent_huge_pages() const {
 278   // If the shmem_enabled file exists and is readable then we
 279   // know the kernel supports transparent huge pages for tmpfs.
 280   return access(ZFILENAME_SHMEM_ENABLED, R_OK) == 0;
 281 }
 282 
 283 bool ZBackingFile::try_split_and_expand_tmpfs(size_t offset, size_t length, size_t alignment) const {
 284   // Try first smaller part.
 285   const size_t offset0 = offset;
 286   const size_t length0 = align_up(length / 2, alignment);
 287   if (!try_expand_tmpfs(offset0, length0, alignment)) {
 288     return false;
 289   }
 290 
 291   // Try second smaller part.
 292   const size_t offset1 = offset0 + length0;
 293   const size_t length1 = length - length0;
 294   if (!try_expand_tmpfs(offset1, length1, alignment)) {
 295     return false;
 296   }
 297 
 298   return true;
 299 }
 300 
 301 bool ZBackingFile::try_expand_tmpfs(size_t offset, size_t length, size_t alignment) const {
 302   assert(length > 0, "Invalid length");
 303   assert(is_aligned(length, alignment), "Invalid length");
 304 
 305   ZErrno err = posix_fallocate(_fd, offset, length);
 306 
 307   if (err == EINTR && length > alignment) {
 308     // Calling posix_fallocate() with a large length can take a long
 309     // time to complete. When running profilers, such as VTune, this
 310     // syscall will be constantly interrupted by signals. Expanding
 311     // the file in smaller steps avoids this problem.
 312     return try_split_and_expand_tmpfs(offset, length, alignment);
 313   }
 314 
 315   if (err) {
 316     log_error(gc)("Failed to allocate backing file (%s)", err.to_string());
 317     return false;
 318   }
 319 
 320   return true;
 321 }
 322 
 323 bool ZBackingFile::try_expand_tmpfs(size_t offset, size_t length) const {
 324   assert(is_tmpfs(), "Wrong filesystem");
 325   return try_expand_tmpfs(offset, length, os::vm_page_size());
 326 }
 327 
 328 bool ZBackingFile::try_expand_hugetlbfs(size_t offset, size_t length) const {
 329   assert(is_hugetlbfs(), "Wrong filesystem");
 330 
 331   // Prior to kernel 4.3, hugetlbfs did not support posix_fallocate().
 332   // Instead of posix_fallocate() we can use a well-known workaround,
 333   // which involves truncating the file to requested size and then try
 334   // to map it to verify that there are enough huge pages available to
 335   // back it.
 336   while (ftruncate(_fd, offset + length) == -1) {
 337     ZErrno err;
 338     if (err != EINTR) {
 339       log_error(gc)("Failed to truncate backing file (%s)", err.to_string());
 340       return false;
 341     }
 342   }
 343 
 344   // If we fail mapping during initialization, i.e. when we are pre-mapping
 345   // the heap, then we wait and retry a few times before giving up. Otherwise
 346   // there is a risk that running JVMs back-to-back will fail, since there
 347   // is a delay between process termination and the huge pages owned by that
 348   // process being returned to the huge page pool and made available for new
 349   // allocations.
 350   void* addr = MAP_FAILED;
 351   const int max_attempts = 5;
 352   for (int attempt = 1; attempt <= max_attempts; attempt++) {
 353     addr = mmap(0, length, PROT_READ|PROT_WRITE, MAP_SHARED, _fd, offset);
 354     if (addr != MAP_FAILED || !_hugetlbfs_mmap_retry) {
 355       // Mapping was successful or mmap retry is disabled
 356       break;
 357     }
 358 
 359     ZErrno err;
 360     log_debug(gc)("Failed to map backing file (%s), attempt %d of %d",
 361                   err.to_string(), attempt, max_attempts);
 362 
 363     // Wait and retry in one second, in the hope that
 364     // huge pages will be available by then.
 365     sleep(1);
 366   }
 367 
 368   // Disable mmap retry from now on
 369   if (_hugetlbfs_mmap_retry) {
 370     _hugetlbfs_mmap_retry = false;
 371   }
 372 
 373   if (addr == MAP_FAILED) {
 374     // Not enough huge pages left
 375     ZErrno err;
 376     log_error(gc)("Failed to map backing file (%s)", err.to_string());
 377     return false;
 378   }
 379 
 380   // Successful mapping, unmap again. From now on the pages we mapped
 381   // will be reserved for this file.
 382   if (munmap(addr, length) == -1) {
 383     ZErrno err;
 384     log_error(gc)("Failed to unmap backing file (%s)", err.to_string());
 385     return false;
 386   }
 387 
 388   return true;
 389 }
 390 
 391 bool ZBackingFile::try_expand_tmpfs_or_hugetlbfs(size_t offset, size_t length, size_t alignment) const {
 392   assert(is_aligned(offset, alignment), "Invalid offset");
 393   assert(is_aligned(length, alignment), "Invalid length");
 394 
 395   log_debug(gc)("Expanding heap from " SIZE_FORMAT "M to " SIZE_FORMAT "M", offset / M, (offset + length) / M);
 396 
 397   return is_hugetlbfs() ? try_expand_hugetlbfs(offset, length) : try_expand_tmpfs(offset, length);
 398 }
 399 
 400 size_t ZBackingFile::try_expand(size_t offset, size_t length, size_t alignment) const {
 401   size_t start = offset;
 402   size_t end = offset + length;
 403 
 404   // Try to expand
 405   if (try_expand_tmpfs_or_hugetlbfs(start, length, alignment)) {
 406     // Success
 407     return end;
 408   }
 409 
 410   // Failed, try to expand as much as possible
 411   for (;;) {
 412     length = align_down((end - start) / 2, alignment);
 413     if (length < alignment) {
 414       // Done, don't expand more
 415       return start;
 416     }
 417 
 418     if (try_expand_tmpfs_or_hugetlbfs(start, length, alignment)) {
 419       // Success, try expand more
 420       start += length;
 421     } else {
 422       // Failed, try expand less
 423       end -= length;
 424     }
 425   }
 426 }