1 /* 2 * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 #include "precompiled.hpp" 25 #include "gc/z/zArray.inline.hpp" 26 #include "gc/z/zBackingFile_linux_x86.hpp" 27 #include "gc/z/zBackingPath_linux_x86.hpp" 28 #include "gc/z/zErrno.hpp" 29 #include "gc/z/zFlags.hpp" 30 #include "gc/z/zLargePages.inline.hpp" 31 #include "logging/log.hpp" 32 #include "runtime/init.hpp" 33 #include "runtime/os.hpp" 34 #include "utilities/align.hpp" 35 #include "utilities/debug.hpp" 36 37 #include <fcntl.h> 38 #include <sys/mman.h> 39 #include <sys/stat.h> 40 #include <sys/statfs.h> 41 #include <sys/types.h> 42 #include <unistd.h> 43 44 // Filesystem names 45 #define ZFILESYSTEM_TMPFS "tmpfs" 46 #define ZFILESYSTEM_HUGETLBFS "hugetlbfs" 47 48 // Sysfs file for transparent huge page on tmpfs 49 #define ZFILENAME_SHMEM_ENABLED "/sys/kernel/mm/transparent_hugepage/shmem_enabled" 50 51 // Default mount points 52 #define ZMOUNTPOINT_TMPFS "/dev/shm" 53 #define ZMOUNTPOINT_HUGETLBFS "/hugepages" 54 55 // Java heap filename 56 #define ZFILENAME_HEAP "java_heap" 57 58 // Support for building on older Linux systems 59 #ifndef __NR_memfd_create 60 #define __NR_memfd_create 319 61 #endif 62 #ifndef MFD_CLOEXEC 63 #define MFD_CLOEXEC 0x0001U 64 #endif 65 #ifndef MFD_HUGETLB 66 #define MFD_HUGETLB 0x0004U 67 #endif 68 #ifndef O_CLOEXEC 69 #define O_CLOEXEC 02000000 70 #endif 71 #ifndef O_TMPFILE 72 #define O_TMPFILE (020000000 | O_DIRECTORY) 73 #endif 74 75 // Filesystem types, see statfs(2) 76 #ifndef TMPFS_MAGIC 77 #define TMPFS_MAGIC 0x01021994 78 #endif 79 #ifndef HUGETLBFS_MAGIC 80 #define HUGETLBFS_MAGIC 0x958458f6 81 #endif 82 83 static int z_memfd_create(const char *name, unsigned int flags) { 84 return syscall(__NR_memfd_create, name, flags); 85 } 86 87 ZBackingFile::ZBackingFile() : 88 _fd(-1), 89 _filesystem(0), 90 _initialized(false) { 91 92 // Create backing file 93 _fd = create_fd(ZFILENAME_HEAP); 94 if (_fd == -1) { 95 return; 96 } 97 98 // Get filesystem type 99 struct statfs statfs_buf; 100 if (fstatfs(_fd, &statfs_buf) == -1) { 101 ZErrno err; 102 log_error(gc, init)("Failed to determine filesystem type for backing file (%s)", err.to_string()); 103 return; 104 } 105 _filesystem = statfs_buf.f_type; 106 107 // Make sure we're on a supported filesystem 108 if (!is_tmpfs() && !is_hugetlbfs()) { 109 log_error(gc, init)("Backing file must be located on a %s or a %s filesystem", ZFILESYSTEM_TMPFS, ZFILESYSTEM_HUGETLBFS); 110 return; 111 } 112 113 // Make sure the filesystem type matches requested large page type 114 if (ZLargePages::is_transparent() && !is_tmpfs()) { 115 log_error(gc, init)("-XX:+UseTransparentHugePages can only be enable when using a %s filesystem", ZFILESYSTEM_TMPFS); 116 return; 117 } 118 119 if (ZLargePages::is_transparent() && !tmpfs_supports_transparent_huge_pages()) { 120 log_error(gc, init)("-XX:+UseTransparentHugePages on a %s filesystem not supported by kernel", ZFILESYSTEM_TMPFS); 121 return; 122 } 123 124 if (ZLargePages::is_explicit() && !is_hugetlbfs()) { 125 log_error(gc, init)("-XX:+UseLargePages (without -XX:+UseTransparentHugePages) can only be enabled when using a %s filesystem", ZFILESYSTEM_HUGETLBFS); 126 return; 127 } 128 129 if (!ZLargePages::is_explicit() && is_hugetlbfs()) { 130 log_error(gc, init)("-XX:+UseLargePages must be enabled when using a %s filesystem", ZFILESYSTEM_HUGETLBFS); 131 return; 132 } 133 134 // Successfully initialized 135 _initialized = true; 136 } 137 138 int ZBackingFile::create_mem_fd(const char* name) const { 139 // Create file name 140 char filename[PATH_MAX]; 141 snprintf(filename, sizeof(filename), "%s%s", name, ZLargePages::is_explicit() ? ".hugetlb" : ""); 142 143 // Create file 144 const int extra_flags = ZLargePages::is_explicit() ? MFD_HUGETLB : 0; 145 const int fd = z_memfd_create(filename, MFD_CLOEXEC | extra_flags); 146 if (fd == -1) { 147 ZErrno err; 148 log_debug(gc, init)("Failed to create memfd file (%s)", 149 ((UseLargePages && err == EINVAL) ? "Hugepages not supported" : err.to_string())); 150 return -1; 151 } 152 153 log_debug(gc, init)("Heap backed by file /memfd:%s", filename); 154 155 return fd; 156 } 157 158 int ZBackingFile::create_file_fd(const char* name) const { 159 const char* const filesystem = ZLargePages::is_explicit() ? ZFILESYSTEM_HUGETLBFS : ZFILESYSTEM_TMPFS; 160 const char* const mountpoint = ZLargePages::is_explicit() ? ZMOUNTPOINT_HUGETLBFS : ZMOUNTPOINT_TMPFS; 161 162 // Find mountpoint 163 ZBackingPath path(filesystem, mountpoint); 164 if (path.get() == NULL) { 165 log_error(gc, init)("Use -XX:ZPath to specify the path to a %s filesystem", filesystem); 166 return -1; 167 } 168 169 // Try to create an anonymous file using the O_TMPFILE flag. Note that this 170 // flag requires kernel >= 3.11. If this fails we fall back to open/unlink. 171 const int fd_anon = open(path.get(), O_TMPFILE|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR); 172 if (fd_anon == -1) { 173 ZErrno err; 174 log_debug(gc, init)("Failed to create anonymouns file in %s (%s)", path.get(), 175 (err == EINVAL ? "Not supported" : err.to_string())); 176 } else { 177 // Get inode number for anonymous file 178 struct stat stat_buf; 179 if (fstat(fd_anon, &stat_buf) == -1) { 180 ZErrno err; 181 log_error(gc, init)("Failed to determine inode number for anonymous file (%s)", err.to_string()); 182 return -1; 183 } 184 185 log_debug(gc, init)("Heap backed by file %s/#" UINT64_FORMAT, path.get(), (uint64_t)stat_buf.st_ino); 186 187 return fd_anon; 188 } 189 190 log_debug(gc, init)("Falling back to open/unlink"); 191 192 // Create file name 193 char filename[PATH_MAX]; 194 snprintf(filename, sizeof(filename), "%s/%s.%d", path.get(), name, os::current_process_id()); 195 196 // Create file 197 const int fd = open(filename, O_CREAT|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR); 198 if (fd == -1) { 199 ZErrno err; 200 log_error(gc, init)("Failed to create file %s (%s)", filename, err.to_string()); 201 return -1; 202 } 203 204 // Unlink file 205 if (unlink(filename) == -1) { 206 ZErrno err; 207 log_error(gc, init)("Failed to unlink file %s (%s)", filename, err.to_string()); 208 return -1; 209 } 210 211 log_debug(gc, init)("Heap backed by file %s", filename); 212 213 return fd; 214 } 215 216 int ZBackingFile::create_fd(const char* name) const { 217 if (ZPath == NULL) { 218 // If the path is not explicitly specified, then we first try to create a memfd file 219 // instead of looking for a tmpfd/hugetlbfs mount point. Note that memfd_create() might 220 // not be supported at all (requires kernel >= 3.17), or it might not support large 221 // pages (requires kernel >= 4.14). If memfd_create() fails, then we try to create a 222 // file on an accessible tmpfs or hugetlbfs mount point. 223 const int fd = create_mem_fd(name); 224 if (fd != -1) { 225 return fd; 226 } 227 228 log_debug(gc, init)("Falling back to searching for an accessible moint point"); 229 } 230 231 return create_file_fd(name); 232 } 233 234 bool ZBackingFile::is_initialized() const { 235 return _initialized; 236 } 237 238 int ZBackingFile::fd() const { 239 return _fd; 240 } 241 242 bool ZBackingFile::is_tmpfs() const { 243 return _filesystem == TMPFS_MAGIC; 244 } 245 246 bool ZBackingFile::is_hugetlbfs() const { 247 return _filesystem == HUGETLBFS_MAGIC; 248 } 249 250 bool ZBackingFile::tmpfs_supports_transparent_huge_pages() const { 251 // If the shmem_enabled file exists and is readable then we 252 // know the kernel supports transparent huge pages for tmpfs. 253 return access(ZFILENAME_SHMEM_ENABLED, R_OK) == 0; 254 } 255 256 bool ZBackingFile::try_split_and_expand_tmpfs(size_t offset, size_t length, size_t alignment) const { 257 // Try first smaller part. 258 const size_t offset0 = offset; 259 const size_t length0 = align_up(length / 2, alignment); 260 if (!try_expand_tmpfs(offset0, length0, alignment)) { 261 return false; 262 } 263 264 // Try second smaller part. 265 const size_t offset1 = offset0 + length0; 266 const size_t length1 = length - length0; 267 if (!try_expand_tmpfs(offset1, length1, alignment)) { 268 return false; 269 } 270 271 return true; 272 } 273 274 bool ZBackingFile::try_expand_tmpfs(size_t offset, size_t length, size_t alignment) const { 275 assert(length > 0, "Invalid length"); 276 assert(is_aligned(length, alignment), "Invalid length"); 277 278 ZErrno err = posix_fallocate(_fd, offset, length); 279 280 if (err == EINTR && length > alignment) { 281 // Calling posix_fallocate() with a large length can take a long 282 // time to complete. When running profilers, such as VTune, this 283 // syscall will be constantly interrupted by signals. Expanding 284 // the file in smaller steps avoids this problem. 285 return try_split_and_expand_tmpfs(offset, length, alignment); 286 } 287 288 if (err) { 289 log_error(gc)("Failed to allocate backing file (%s)", err.to_string()); 290 return false; 291 } 292 293 return true; 294 } 295 296 bool ZBackingFile::expand_tmpfs(size_t offset, size_t length) const { 297 assert(is_tmpfs(), "Wrong filesystem"); 298 return try_expand_tmpfs(offset, length, os::vm_page_size()); 299 } 300 301 bool ZBackingFile::expand_hugetlbfs(size_t offset, size_t length) const { 302 assert(is_hugetlbfs(), "Wrong filesystem"); 303 304 // Prior to kernel 4.3, hugetlbfs did not support posix_fallocate(). 305 // Instead of posix_fallocate() we can use a well-known workaround, 306 // which involves truncating the file to requested size and then try 307 // to map it to verify that there are enough huge pages available to 308 // back it. 309 while (ftruncate(_fd, offset + length) == -1) { 310 ZErrno err; 311 if (err != EINTR) { 312 log_error(gc)("Failed to truncate backing file (%s)", err.to_string()); 313 return false; 314 } 315 } 316 317 // If we fail mapping during initialization, i.e. when we are pre-mapping 318 // the heap, then we wait and retry a few times before giving up. Otherwise 319 // there is a risk that running JVMs back-to-back will fail, since there 320 // is a delay between process termination and the huge pages owned by that 321 // process being returned to the huge page pool and made available for new 322 // allocations. 323 void* addr = MAP_FAILED; 324 const int max_attempts = 3; 325 for (int attempt = 1; attempt <= max_attempts; attempt++) { 326 addr = mmap(0, length, PROT_READ|PROT_WRITE, MAP_SHARED, _fd, offset); 327 if (addr != MAP_FAILED || is_init_completed()) { 328 // Mapping was successful or initialization phase has completed 329 break; 330 } 331 332 ZErrno err; 333 log_debug(gc)("Failed to map backing file (%s), attempt %d of %d", 334 err.to_string(), attempt, max_attempts); 335 336 // Wait and retry in one second, in the hope that 337 // huge pages will be available by then. 338 sleep(1); 339 } 340 341 if (addr == MAP_FAILED) { 342 // Not enough huge pages left 343 ZErrno err; 344 log_error(gc)("Failed to map backing file (%s)", err.to_string()); 345 return false; 346 } 347 348 // Successful mapping, unmap again. From now on the pages we mapped 349 // will be reserved for this file. 350 if (munmap(addr, length) == -1) { 351 ZErrno err; 352 log_error(gc)("Failed to unmap backing file (%s)", err.to_string()); 353 return false; 354 } 355 356 return true; 357 } 358 359 bool ZBackingFile::expand(size_t offset, size_t length) const { 360 return is_hugetlbfs() ? expand_hugetlbfs(offset, length) : expand_tmpfs(offset, length); 361 }