1 /* 2 * Copyright (c) 2015, 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 #include "precompiled.hpp" 25 #include "gc/z/zArray.inline.hpp" 26 #include "gc/z/zBackingFile_linux_x86.hpp" 27 #include "gc/z/zBackingPath_linux_x86.hpp" 28 #include "gc/z/zErrno.hpp" 29 #include "gc/z/zLargePages.inline.hpp" 30 #include "logging/log.hpp" 31 #include "runtime/os.hpp" 32 #include "utilities/align.hpp" 33 #include "utilities/debug.hpp" 34 35 #include <fcntl.h> 36 #include <sys/mman.h> 37 #include <sys/stat.h> 38 #include <sys/statfs.h> 39 #include <sys/types.h> 40 #include <unistd.h> 41 42 // Filesystem names 43 #define ZFILESYSTEM_TMPFS "tmpfs" 44 #define ZFILESYSTEM_HUGETLBFS "hugetlbfs" 45 46 // Sysfs file for transparent huge page on tmpfs 47 #define ZFILENAME_SHMEM_ENABLED "/sys/kernel/mm/transparent_hugepage/shmem_enabled" 48 49 // Java heap filename 50 #define ZFILENAME_HEAP "java_heap" 51 52 // Support for building on older Linux systems 53 #ifndef __NR_memfd_create 54 #define __NR_memfd_create 319 55 #endif 56 #ifndef MFD_CLOEXEC 57 #define MFD_CLOEXEC 0x0001U 58 #endif 59 #ifndef MFD_HUGETLB 60 #define MFD_HUGETLB 0x0004U 61 #endif 62 #ifndef O_CLOEXEC 63 #define O_CLOEXEC 02000000 64 #endif 65 #ifndef O_TMPFILE 66 #define O_TMPFILE (020000000 | O_DIRECTORY) 67 #endif 68 69 // Filesystem types, see statfs(2) 70 #ifndef TMPFS_MAGIC 71 #define TMPFS_MAGIC 0x01021994 72 #endif 73 #ifndef HUGETLBFS_MAGIC 74 #define HUGETLBFS_MAGIC 0x958458f6 75 #endif 76 77 // Preferred tmpfs mount points, ordered by priority 78 static const char* z_preferred_tmpfs_mountpoints[] = { 79 "/dev/shm", 80 "/run/shm", 81 NULL 82 }; 83 84 // Preferred hugetlbfs mount points, ordered by priority 85 static const char* z_preferred_hugetlbfs_mountpoints[] = { 86 "/dev/hugepages", 87 "/hugepages", 88 NULL 89 }; 90 91 static int z_memfd_create(const char *name, unsigned int flags) { 92 return syscall(__NR_memfd_create, name, flags); 93 } 94 95 bool ZBackingFile::_hugetlbfs_mmap_retry = true; 96 97 ZBackingFile::ZBackingFile() : 98 _fd(-1), 99 _filesystem(0), 100 _available(0), 101 _initialized(false) { 102 103 // Create backing file 104 _fd = create_fd(ZFILENAME_HEAP); 105 if (_fd == -1) { 106 return; 107 } 108 109 // Get filesystem statistics 110 struct statfs statfs_buf; 111 if (fstatfs(_fd, &statfs_buf) == -1) { 112 ZErrno err; 113 log_error(gc, init)("Failed to determine filesystem type for backing file (%s)", 114 err.to_string()); 115 return; 116 } 117 118 _filesystem = statfs_buf.f_type; 119 _available = statfs_buf.f_bavail * statfs_buf.f_bsize; 120 121 // Make sure we're on a supported filesystem 122 if (!is_tmpfs() && !is_hugetlbfs()) { 123 log_error(gc, init)("Backing file must be located on a %s or a %s filesystem", 124 ZFILESYSTEM_TMPFS, ZFILESYSTEM_HUGETLBFS); 125 return; 126 } 127 128 // Make sure the filesystem type matches requested large page type 129 if (ZLargePages::is_transparent() && !is_tmpfs()) { 130 log_error(gc, init)("-XX:+UseTransparentHugePages can only be enable when using a %s filesystem", 131 ZFILESYSTEM_TMPFS); 132 return; 133 } 134 135 if (ZLargePages::is_transparent() && !tmpfs_supports_transparent_huge_pages()) { 136 log_error(gc, init)("-XX:+UseTransparentHugePages on a %s filesystem not supported by kernel", 137 ZFILESYSTEM_TMPFS); 138 return; 139 } 140 141 if (ZLargePages::is_explicit() && !is_hugetlbfs()) { 142 log_error(gc, init)("-XX:+UseLargePages (without -XX:+UseTransparentHugePages) can only be enabled when using a %s filesystem", 143 ZFILESYSTEM_HUGETLBFS); 144 return; 145 } 146 147 if (!ZLargePages::is_explicit() && is_hugetlbfs()) { 148 log_error(gc, init)("-XX:+UseLargePages must be enabled when using a %s filesystem", 149 ZFILESYSTEM_HUGETLBFS); 150 return; 151 } 152 153 // Successfully initialized 154 _initialized = true; 155 } 156 157 int ZBackingFile::create_mem_fd(const char* name) const { 158 // Create file name 159 char filename[PATH_MAX]; 160 snprintf(filename, sizeof(filename), "%s%s", name, ZLargePages::is_explicit() ? ".hugetlb" : ""); 161 162 // Create file 163 const int extra_flags = ZLargePages::is_explicit() ? MFD_HUGETLB : 0; 164 const int fd = z_memfd_create(filename, MFD_CLOEXEC | extra_flags); 165 if (fd == -1) { 166 ZErrno err; 167 log_debug(gc, init)("Failed to create memfd file (%s)", 168 ((UseLargePages && err == EINVAL) ? "Hugepages not supported" : err.to_string())); 169 return -1; 170 } 171 172 log_info(gc, init)("Heap backed by file: /memfd:%s", filename); 173 174 return fd; 175 } 176 177 int ZBackingFile::create_file_fd(const char* name) const { 178 const char* const filesystem = ZLargePages::is_explicit() 179 ? ZFILESYSTEM_HUGETLBFS 180 : ZFILESYSTEM_TMPFS; 181 const char** const preferred_mountpoints = ZLargePages::is_explicit() 182 ? z_preferred_hugetlbfs_mountpoints 183 : z_preferred_tmpfs_mountpoints; 184 185 // Find mountpoint 186 ZBackingPath path(filesystem, preferred_mountpoints); 187 if (path.get() == NULL) { 188 log_error(gc, init)("Use -XX:ZPath to specify the path to a %s filesystem", filesystem); 189 return -1; 190 } 191 192 // Try to create an anonymous file using the O_TMPFILE flag. Note that this 193 // flag requires kernel >= 3.11. If this fails we fall back to open/unlink. 194 const int fd_anon = os::open(path.get(), O_TMPFILE|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR); 195 if (fd_anon == -1) { 196 ZErrno err; 197 log_debug(gc, init)("Failed to create anonymous file in %s (%s)", path.get(), 198 (err == EINVAL ? "Not supported" : err.to_string())); 199 } else { 200 // Get inode number for anonymous file 201 struct stat stat_buf; 202 if (fstat(fd_anon, &stat_buf) == -1) { 203 ZErrno err; 204 log_error(gc, init)("Failed to determine inode number for anonymous file (%s)", err.to_string()); 205 return -1; 206 } 207 208 log_info(gc, init)("Heap backed by file: %s/#" UINT64_FORMAT, path.get(), (uint64_t)stat_buf.st_ino); 209 210 return fd_anon; 211 } 212 213 log_debug(gc, init)("Falling back to open/unlink"); 214 215 // Create file name 216 char filename[PATH_MAX]; 217 snprintf(filename, sizeof(filename), "%s/%s.%d", path.get(), name, os::current_process_id()); 218 219 // Create file 220 const int fd = os::open(filename, O_CREAT|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR); 221 if (fd == -1) { 222 ZErrno err; 223 log_error(gc, init)("Failed to create file %s (%s)", filename, err.to_string()); 224 return -1; 225 } 226 227 // Unlink file 228 if (unlink(filename) == -1) { 229 ZErrno err; 230 log_error(gc, init)("Failed to unlink file %s (%s)", filename, err.to_string()); 231 return -1; 232 } 233 234 log_info(gc, init)("Heap backed by file: %s", filename); 235 236 return fd; 237 } 238 239 int ZBackingFile::create_fd(const char* name) const { 240 if (ZPath == NULL) { 241 // If the path is not explicitly specified, then we first try to create a memfd file 242 // instead of looking for a tmpfd/hugetlbfs mount point. Note that memfd_create() might 243 // not be supported at all (requires kernel >= 3.17), or it might not support large 244 // pages (requires kernel >= 4.14). If memfd_create() fails, then we try to create a 245 // file on an accessible tmpfs or hugetlbfs mount point. 246 const int fd = create_mem_fd(name); 247 if (fd != -1) { 248 return fd; 249 } 250 251 log_debug(gc, init)("Falling back to searching for an accessible mount point"); 252 } 253 254 return create_file_fd(name); 255 } 256 257 bool ZBackingFile::is_initialized() const { 258 return _initialized; 259 } 260 261 int ZBackingFile::fd() const { 262 return _fd; 263 } 264 265 size_t ZBackingFile::available() const { 266 return _available; 267 } 268 269 bool ZBackingFile::is_tmpfs() const { 270 return _filesystem == TMPFS_MAGIC; 271 } 272 273 bool ZBackingFile::is_hugetlbfs() const { 274 return _filesystem == HUGETLBFS_MAGIC; 275 } 276 277 bool ZBackingFile::tmpfs_supports_transparent_huge_pages() const { 278 // If the shmem_enabled file exists and is readable then we 279 // know the kernel supports transparent huge pages for tmpfs. 280 return access(ZFILENAME_SHMEM_ENABLED, R_OK) == 0; 281 } 282 283 bool ZBackingFile::try_split_and_expand_tmpfs(size_t offset, size_t length, size_t alignment) const { 284 // Try first smaller part. 285 const size_t offset0 = offset; 286 const size_t length0 = align_up(length / 2, alignment); 287 if (!try_expand_tmpfs(offset0, length0, alignment)) { 288 return false; 289 } 290 291 // Try second smaller part. 292 const size_t offset1 = offset0 + length0; 293 const size_t length1 = length - length0; 294 if (!try_expand_tmpfs(offset1, length1, alignment)) { 295 return false; 296 } 297 298 return true; 299 } 300 301 bool ZBackingFile::try_expand_tmpfs(size_t offset, size_t length, size_t alignment) const { 302 assert(length > 0, "Invalid length"); 303 assert(is_aligned(length, alignment), "Invalid length"); 304 305 ZErrno err = posix_fallocate(_fd, offset, length); 306 307 if (err == EINTR && length > alignment) { 308 // Calling posix_fallocate() with a large length can take a long 309 // time to complete. When running profilers, such as VTune, this 310 // syscall will be constantly interrupted by signals. Expanding 311 // the file in smaller steps avoids this problem. 312 return try_split_and_expand_tmpfs(offset, length, alignment); 313 } 314 315 if (err) { 316 log_error(gc)("Failed to allocate backing file (%s)", err.to_string()); 317 return false; 318 } 319 320 return true; 321 } 322 323 bool ZBackingFile::try_expand_tmpfs(size_t offset, size_t length) const { 324 assert(is_tmpfs(), "Wrong filesystem"); 325 return try_expand_tmpfs(offset, length, os::vm_page_size()); 326 } 327 328 bool ZBackingFile::try_expand_hugetlbfs(size_t offset, size_t length) const { 329 assert(is_hugetlbfs(), "Wrong filesystem"); 330 331 // Prior to kernel 4.3, hugetlbfs did not support posix_fallocate(). 332 // Instead of posix_fallocate() we can use a well-known workaround, 333 // which involves truncating the file to requested size and then try 334 // to map it to verify that there are enough huge pages available to 335 // back it. 336 while (ftruncate(_fd, offset + length) == -1) { 337 ZErrno err; 338 if (err != EINTR) { 339 log_error(gc)("Failed to truncate backing file (%s)", err.to_string()); 340 return false; 341 } 342 } 343 344 // If we fail mapping during initialization, i.e. when we are pre-mapping 345 // the heap, then we wait and retry a few times before giving up. Otherwise 346 // there is a risk that running JVMs back-to-back will fail, since there 347 // is a delay between process termination and the huge pages owned by that 348 // process being returned to the huge page pool and made available for new 349 // allocations. 350 void* addr = MAP_FAILED; 351 const int max_attempts = 5; 352 for (int attempt = 1; attempt <= max_attempts; attempt++) { 353 addr = mmap(0, length, PROT_READ|PROT_WRITE, MAP_SHARED, _fd, offset); 354 if (addr != MAP_FAILED || !_hugetlbfs_mmap_retry) { 355 // Mapping was successful or mmap retry is disabled 356 break; 357 } 358 359 ZErrno err; 360 log_debug(gc)("Failed to map backing file (%s), attempt %d of %d", 361 err.to_string(), attempt, max_attempts); 362 363 // Wait and retry in one second, in the hope that 364 // huge pages will be available by then. 365 sleep(1); 366 } 367 368 // Disable mmap retry from now on 369 if (_hugetlbfs_mmap_retry) { 370 _hugetlbfs_mmap_retry = false; 371 } 372 373 if (addr == MAP_FAILED) { 374 // Not enough huge pages left 375 ZErrno err; 376 log_error(gc)("Failed to map backing file (%s)", err.to_string()); 377 return false; 378 } 379 380 // Successful mapping, unmap again. From now on the pages we mapped 381 // will be reserved for this file. 382 if (munmap(addr, length) == -1) { 383 ZErrno err; 384 log_error(gc)("Failed to unmap backing file (%s)", err.to_string()); 385 return false; 386 } 387 388 return true; 389 } 390 391 bool ZBackingFile::try_expand_tmpfs_or_hugetlbfs(size_t offset, size_t length, size_t alignment) const { 392 assert(is_aligned(offset, alignment), "Invalid offset"); 393 assert(is_aligned(length, alignment), "Invalid length"); 394 395 log_debug(gc)("Expanding heap from " SIZE_FORMAT "M to " SIZE_FORMAT "M", offset / M, (offset + length) / M); 396 397 return is_hugetlbfs() ? try_expand_hugetlbfs(offset, length) : try_expand_tmpfs(offset, length); 398 } 399 400 size_t ZBackingFile::try_expand(size_t offset, size_t length, size_t alignment) const { 401 size_t start = offset; 402 size_t end = offset + length; 403 404 // Try to expand 405 if (try_expand_tmpfs_or_hugetlbfs(start, length, alignment)) { 406 // Success 407 return end; 408 } 409 410 // Failed, try to expand as much as possible 411 for (;;) { 412 length = align_down((end - start) / 2, alignment); 413 if (length < alignment) { 414 // Done, don't expand more 415 return start; 416 } 417 418 if (try_expand_tmpfs_or_hugetlbfs(start, length, alignment)) { 419 // Success, try expand more 420 start += length; 421 } else { 422 // Failed, try expand less 423 end -= length; 424 } 425 } 426 }