1 /* 2 * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 #include "precompiled.hpp" 25 #include "gc/z/zArray.inline.hpp" 26 #include "gc/z/zErrno.hpp" 27 #include "gc/z/zGlobals.hpp" 28 #include "gc/z/zLargePages.inline.hpp" 29 #include "gc/z/zMountPoint_linux.hpp" 30 #include "gc/z/zNUMA.inline.hpp" 31 #include "gc/z/zPhysicalMemoryBacking_linux.hpp" 32 #include "gc/z/zSyscall_linux.hpp" 33 #include "logging/log.hpp" 34 #include "runtime/init.hpp" 35 #include "runtime/os.hpp" 36 #include "runtime/stubRoutines.hpp" 37 #include "utilities/align.hpp" 38 #include "utilities/debug.hpp" 39 #include "utilities/growableArray.hpp" 40 41 #include <fcntl.h> 42 #include <stdio.h> 43 #include <sys/mman.h> 44 #include <sys/stat.h> 45 #include <sys/statfs.h> 46 #include <sys/types.h> 47 #include <unistd.h> 48 49 // 50 // Support for building on older Linux systems 51 // 52 53 // memfd_create(2) flags 54 #ifndef MFD_CLOEXEC 55 #define MFD_CLOEXEC 0x0001U 56 #endif 57 #ifndef MFD_HUGETLB 58 #define MFD_HUGETLB 0x0004U 59 #endif 60 61 // open(2) flags 62 #ifndef O_CLOEXEC 63 #define O_CLOEXEC 02000000 64 #endif 65 #ifndef O_TMPFILE 66 #define O_TMPFILE (020000000 | O_DIRECTORY) 67 #endif 68 69 // fallocate(2) flags 70 #ifndef FALLOC_FL_KEEP_SIZE 71 #define FALLOC_FL_KEEP_SIZE 0x01 72 #endif 73 #ifndef FALLOC_FL_PUNCH_HOLE 74 #define FALLOC_FL_PUNCH_HOLE 0x02 75 #endif 76 77 // Filesystem types, see statfs(2) 78 #ifndef TMPFS_MAGIC 79 #define TMPFS_MAGIC 0x01021994 80 #endif 81 #ifndef HUGETLBFS_MAGIC 82 #define HUGETLBFS_MAGIC 0x958458f6 83 #endif 84 85 // Filesystem names 86 #define ZFILESYSTEM_TMPFS "tmpfs" 87 #define ZFILESYSTEM_HUGETLBFS "hugetlbfs" 88 89 // Proc file entry for max map mount 90 #define ZFILENAME_PROC_MAX_MAP_COUNT "/proc/sys/vm/max_map_count" 91 92 // Sysfs file for transparent huge page on tmpfs 93 #define ZFILENAME_SHMEM_ENABLED "/sys/kernel/mm/transparent_hugepage/shmem_enabled" 94 95 // Java heap filename 96 #define ZFILENAME_HEAP "java_heap" 97 98 // Preferred tmpfs mount points, ordered by priority 99 static const char* z_preferred_tmpfs_mountpoints[] = { 100 "/dev/shm", 101 "/run/shm", 102 NULL 103 }; 104 105 // Preferred hugetlbfs mount points, ordered by priority 106 static const char* z_preferred_hugetlbfs_mountpoints[] = { 107 "/dev/hugepages", 108 "/hugepages", 109 NULL 110 }; 111 112 static int z_fallocate_hugetlbfs_attempts = 3; 113 static bool z_fallocate_supported = true; 114 115 ZPhysicalMemoryBacking::ZPhysicalMemoryBacking() : 116 _fd(-1), 117 _size(0), 118 _filesystem(0), 119 _block_size(0), 120 _available(0), 121 _initialized(false) { 122 123 // Create backing file 124 _fd = create_fd(ZFILENAME_HEAP); 125 if (_fd == -1) { 126 return; 127 } 128 129 // Get filesystem statistics 130 struct statfs buf; 131 if (fstatfs(_fd, &buf) == -1) { 132 ZErrno err; 133 log_error(gc)("Failed to determine filesystem type for backing file (%s)", err.to_string()); 134 return; 135 } 136 137 _filesystem = buf.f_type; 138 _block_size = buf.f_bsize; 139 _available = buf.f_bavail * _block_size; 140 141 log_info(gc, init)("Heap Backing Filesystem: %s (0x" UINT64_FORMAT_X ")", 142 is_tmpfs() ? ZFILESYSTEM_TMPFS : is_hugetlbfs() ? ZFILESYSTEM_HUGETLBFS : "other", _filesystem); 143 144 // Make sure the filesystem type matches requested large page type 145 if (ZLargePages::is_transparent() && !is_tmpfs()) { 146 log_error(gc)("-XX:+UseTransparentHugePages can only be enabled when using a %s filesystem", 147 ZFILESYSTEM_TMPFS); 148 return; 149 } 150 151 if (ZLargePages::is_transparent() && !tmpfs_supports_transparent_huge_pages()) { 152 log_error(gc)("-XX:+UseTransparentHugePages on a %s filesystem not supported by kernel", 153 ZFILESYSTEM_TMPFS); 154 return; 155 } 156 157 if (ZLargePages::is_explicit() && !is_hugetlbfs()) { 158 log_error(gc)("-XX:+UseLargePages (without -XX:+UseTransparentHugePages) can only be enabled " 159 "when using a %s filesystem", ZFILESYSTEM_HUGETLBFS); 160 return; 161 } 162 163 if (!ZLargePages::is_explicit() && is_hugetlbfs()) { 164 log_error(gc)("-XX:+UseLargePages must be enabled when using a %s filesystem", 165 ZFILESYSTEM_HUGETLBFS); 166 return; 167 } 168 169 if (ZLargePages::is_explicit() && os::large_page_size() != ZGranuleSize) { 170 log_error(gc)("Incompatible large page size configured " SIZE_FORMAT " (expected " SIZE_FORMAT ")", 171 os::large_page_size(), ZGranuleSize); 172 return; 173 } 174 175 // Make sure the filesystem block size is compatible 176 if (ZGranuleSize % _block_size != 0) { 177 log_error(gc)("Filesystem backing the heap has incompatible block size (" SIZE_FORMAT ")", 178 _block_size); 179 return; 180 } 181 182 if (is_hugetlbfs() && _block_size != ZGranuleSize) { 183 log_error(gc)("%s filesystem has unexpected block size " SIZE_FORMAT " (expected " SIZE_FORMAT ")", 184 ZFILESYSTEM_HUGETLBFS, _block_size, ZGranuleSize); 185 return; 186 } 187 188 // Successfully initialized 189 _initialized = true; 190 } 191 192 int ZPhysicalMemoryBacking::create_mem_fd(const char* name) const { 193 // Create file name 194 char filename[PATH_MAX]; 195 snprintf(filename, sizeof(filename), "%s%s", name, ZLargePages::is_explicit() ? ".hugetlb" : ""); 196 197 // Create file 198 const int extra_flags = ZLargePages::is_explicit() ? MFD_HUGETLB : 0; 199 const int fd = ZSyscall::memfd_create(filename, MFD_CLOEXEC | extra_flags); 200 if (fd == -1) { 201 ZErrno err; 202 log_debug(gc, init)("Failed to create memfd file (%s)", 203 ((ZLargePages::is_explicit() && err == EINVAL) ? "Hugepages not supported" : err.to_string())); 204 return -1; 205 } 206 207 log_info(gc, init)("Heap Backing File: /memfd:%s", filename); 208 209 return fd; 210 } 211 212 int ZPhysicalMemoryBacking::create_file_fd(const char* name) const { 213 const char* const filesystem = ZLargePages::is_explicit() 214 ? ZFILESYSTEM_HUGETLBFS 215 : ZFILESYSTEM_TMPFS; 216 const char** const preferred_mountpoints = ZLargePages::is_explicit() 217 ? z_preferred_hugetlbfs_mountpoints 218 : z_preferred_tmpfs_mountpoints; 219 220 // Find mountpoint 221 ZMountPoint mountpoint(filesystem, preferred_mountpoints); 222 if (mountpoint.get() == NULL) { 223 log_error(gc)("Use -XX:AllocateHeapAt to specify the path to a %s filesystem", filesystem); 224 return -1; 225 } 226 227 // Try to create an anonymous file using the O_TMPFILE flag. Note that this 228 // flag requires kernel >= 3.11. If this fails we fall back to open/unlink. 229 const int fd_anon = os::open(mountpoint.get(), O_TMPFILE|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR); 230 if (fd_anon == -1) { 231 ZErrno err; 232 log_debug(gc, init)("Failed to create anonymous file in %s (%s)", mountpoint.get(), 233 (err == EINVAL ? "Not supported" : err.to_string())); 234 } else { 235 // Get inode number for anonymous file 236 struct stat stat_buf; 237 if (fstat(fd_anon, &stat_buf) == -1) { 238 ZErrno err; 239 log_error(gc)("Failed to determine inode number for anonymous file (%s)", err.to_string()); 240 return -1; 241 } 242 243 log_info(gc, init)("Heap Backing File: %s/#" UINT64_FORMAT, mountpoint.get(), (uint64_t)stat_buf.st_ino); 244 245 return fd_anon; 246 } 247 248 log_debug(gc, init)("Falling back to open/unlink"); 249 250 // Create file name 251 char filename[PATH_MAX]; 252 snprintf(filename, sizeof(filename), "%s/%s.%d", mountpoint.get(), name, os::current_process_id()); 253 254 // Create file 255 const int fd = os::open(filename, O_CREAT|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR); 256 if (fd == -1) { 257 ZErrno err; 258 log_error(gc)("Failed to create file %s (%s)", filename, err.to_string()); 259 return -1; 260 } 261 262 // Unlink file 263 if (unlink(filename) == -1) { 264 ZErrno err; 265 log_error(gc)("Failed to unlink file %s (%s)", filename, err.to_string()); 266 return -1; 267 } 268 269 log_info(gc, init)("Heap Backing File: %s", filename); 270 271 return fd; 272 } 273 274 int ZPhysicalMemoryBacking::create_fd(const char* name) const { 275 if (AllocateHeapAt == NULL) { 276 // If the path is not explicitly specified, then we first try to create a memfd file 277 // instead of looking for a tmpfd/hugetlbfs mount point. Note that memfd_create() might 278 // not be supported at all (requires kernel >= 3.17), or it might not support large 279 // pages (requires kernel >= 4.14). If memfd_create() fails, then we try to create a 280 // file on an accessible tmpfs or hugetlbfs mount point. 281 const int fd = create_mem_fd(name); 282 if (fd != -1) { 283 return fd; 284 } 285 286 log_debug(gc, init)("Falling back to searching for an accessible mount point"); 287 } 288 289 return create_file_fd(name); 290 } 291 292 bool ZPhysicalMemoryBacking::is_initialized() const { 293 return _initialized; 294 } 295 296 void ZPhysicalMemoryBacking::warn_available_space(size_t max) const { 297 // Note that the available space on a tmpfs or a hugetlbfs filesystem 298 // will be zero if no size limit was specified when it was mounted. 299 if (_available == 0) { 300 // No size limit set, skip check 301 log_info(gc, init)("Available space on backing filesystem: N/A"); 302 return; 303 } 304 305 log_info(gc, init)("Available space on backing filesystem: " SIZE_FORMAT "M", _available / M); 306 307 // Warn if the filesystem doesn't currently have enough space available to hold 308 // the max heap size. The max heap size will be capped if we later hit this limit 309 // when trying to expand the heap. 310 if (_available < max) { 311 log_warning(gc)("***** WARNING! INCORRECT SYSTEM CONFIGURATION DETECTED! *****"); 312 log_warning(gc)("Not enough space available on the backing filesystem to hold the current max Java heap"); 313 log_warning(gc)("size (" SIZE_FORMAT "M). Please adjust the size of the backing filesystem accordingly " 314 "(available", max / M); 315 log_warning(gc)("space is currently " SIZE_FORMAT "M). Continuing execution with the current filesystem " 316 "size could", _available / M); 317 log_warning(gc)("lead to a premature OutOfMemoryError being thrown, due to failure to map memory."); 318 } 319 } 320 321 void ZPhysicalMemoryBacking::warn_max_map_count(size_t max) const { 322 const char* const filename = ZFILENAME_PROC_MAX_MAP_COUNT; 323 FILE* const file = fopen(filename, "r"); 324 if (file == NULL) { 325 // Failed to open file, skip check 326 log_debug(gc, init)("Failed to open %s", filename); 327 return; 328 } 329 330 size_t actual_max_map_count = 0; 331 const int result = fscanf(file, SIZE_FORMAT, &actual_max_map_count); 332 fclose(file); 333 if (result != 1) { 334 // Failed to read file, skip check 335 log_debug(gc, init)("Failed to read %s", filename); 336 return; 337 } 338 339 // The required max map count is impossible to calculate exactly since subsystems 340 // other than ZGC are also creating memory mappings, and we have no control over that. 341 // However, ZGC tends to create the most mappings and dominate the total count. 342 // In the worst cases, ZGC will map each granule three times, i.e. once per heap view. 343 // We speculate that we need another 20% to allow for non-ZGC subsystems to map memory. 344 const size_t required_max_map_count = (max / ZGranuleSize) * 3 * 1.2; 345 if (actual_max_map_count < required_max_map_count) { 346 log_warning(gc)("***** WARNING! INCORRECT SYSTEM CONFIGURATION DETECTED! *****"); 347 log_warning(gc)("The system limit on number of memory mappings per process might be too low for the given"); 348 log_warning(gc)("max Java heap size (" SIZE_FORMAT "M). Please adjust %s to allow for at", 349 max / M, filename); 350 log_warning(gc)("least " SIZE_FORMAT " mappings (current limit is " SIZE_FORMAT "). Continuing execution " 351 "with the current", required_max_map_count, actual_max_map_count); 352 log_warning(gc)("limit could lead to a fatal error, due to failure to map memory."); 353 } 354 } 355 356 void ZPhysicalMemoryBacking::warn_commit_limits(size_t max) const { 357 // Warn if available space is too low 358 warn_available_space(max); 359 360 // Warn if max map count is too low 361 warn_max_map_count(max); 362 } 363 364 size_t ZPhysicalMemoryBacking::size() const { 365 return _size; 366 } 367 368 bool ZPhysicalMemoryBacking::is_tmpfs() const { 369 return _filesystem == TMPFS_MAGIC; 370 } 371 372 bool ZPhysicalMemoryBacking::is_hugetlbfs() const { 373 return _filesystem == HUGETLBFS_MAGIC; 374 } 375 376 bool ZPhysicalMemoryBacking::tmpfs_supports_transparent_huge_pages() const { 377 // If the shmem_enabled file exists and is readable then we 378 // know the kernel supports transparent huge pages for tmpfs. 379 return access(ZFILENAME_SHMEM_ENABLED, R_OK) == 0; 380 } 381 382 ZErrno ZPhysicalMemoryBacking::fallocate_compat_ftruncate(size_t size) const { 383 while (ftruncate(_fd, size) == -1) { 384 if (errno != EINTR) { 385 // Failed 386 return errno; 387 } 388 } 389 390 // Success 391 return 0; 392 } 393 394 ZErrno ZPhysicalMemoryBacking::fallocate_compat_mmap_hugetlbfs(size_t offset, size_t length, bool touch) const { 395 // On hugetlbfs, mapping a file segment will fail immediately, without 396 // the need to touch the mapped pages first, if there aren't enough huge 397 // pages available to back the mapping. 398 void* const addr = mmap(0, length, PROT_READ|PROT_WRITE, MAP_SHARED, _fd, offset); 399 if (addr == MAP_FAILED) { 400 // Failed 401 return errno; 402 } 403 404 // Once mapped, the huge pages are only reserved. We need to touch them 405 // to associate them with the file segment. Note that we can not punch 406 // hole in file segments which only have reserved pages. 407 if (touch) { 408 char* const start = (char*)addr; 409 char* const end = start + length; 410 os::pretouch_memory(start, end, _block_size); 411 } 412 413 // Unmap again. From now on, the huge pages that were mapped are allocated 414 // to this file. There's no risk of getting a SIGBUS when mapping and 415 // touching these pages again. 416 if (munmap(addr, length) == -1) { 417 // Failed 418 return errno; 419 } 420 421 // Success 422 return 0; 423 } 424 425 static bool safe_touch_mapping(void* addr, size_t length, size_t page_size) { 426 char* const start = (char*)addr; 427 char* const end = start + length; 428 429 // Touching a mapping that can't be backed by memory will generate a 430 // SIGBUS. By using SafeFetch32 any SIGBUS will be safely caught and 431 // handled. On tmpfs, doing a fetch (rather than a store) is enough 432 // to cause backing pages to be allocated (there's no zero-page to 433 // worry about). 434 for (char *p = start; p < end; p += page_size) { 435 if (SafeFetch32((int*)p, -1) == -1) { 436 // Failed 437 return false; 438 } 439 } 440 441 // Success 442 return true; 443 } 444 445 ZErrno ZPhysicalMemoryBacking::fallocate_compat_mmap_tmpfs(size_t offset, size_t length) const { 446 // On tmpfs, we need to touch the mapped pages to figure out 447 // if there are enough pages available to back the mapping. 448 void* const addr = mmap(0, length, PROT_READ|PROT_WRITE, MAP_SHARED, _fd, offset); 449 if (addr == MAP_FAILED) { 450 // Failed 451 return errno; 452 } 453 454 // Advise mapping to use transparent huge pages 455 os::realign_memory((char*)addr, length, os::large_page_size()); 456 457 // Touch the mapping (safely) to make sure it's backed by memory 458 const bool backed = safe_touch_mapping(addr, length, _block_size); 459 460 // Unmap again. If successfully touched, the backing memory will 461 // be allocated to this file. There's no risk of getting a SIGBUS 462 // when mapping and touching these pages again. 463 if (munmap(addr, length) == -1) { 464 // Failed 465 return errno; 466 } 467 468 // Success 469 return backed ? 0 : ENOMEM; 470 } 471 472 ZErrno ZPhysicalMemoryBacking::fallocate_compat_pwrite(size_t offset, size_t length) const { 473 uint8_t data = 0; 474 475 // Allocate backing memory by writing to each block 476 for (size_t pos = offset; pos < offset + length; pos += _block_size) { 477 if (pwrite(_fd, &data, sizeof(data), pos) == -1) { 478 // Failed 479 return errno; 480 } 481 } 482 483 // Success 484 return 0; 485 } 486 487 ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole_compat(size_t offset, size_t length) { 488 // fallocate(2) is only supported by tmpfs since Linux 3.5, and by hugetlbfs 489 // since Linux 4.3. When fallocate(2) is not supported we emulate it using 490 // mmap/munmap (for hugetlbfs and tmpfs with transparent huge pages) or pwrite 491 // (for tmpfs without transparent huge pages and other filesystem types). 492 493 const size_t end = offset + length; 494 if (end > _size) { 495 // Increase file size 496 const ZErrno err = fallocate_compat_ftruncate(end); 497 if (err) { 498 // Failed 499 return err; 500 } 501 } 502 503 // Allocate backing memory 504 const ZErrno err = ZLargePages::is_explicit() 505 ? fallocate_compat_mmap_hugetlbfs(offset, length, false /* touch */) 506 : (ZLargePages::is_transparent() 507 ? fallocate_compat_mmap_tmpfs(offset, length) 508 : fallocate_compat_pwrite(offset, length)); 509 510 if (err) { 511 if (end > _size) { 512 // Restore file size 513 fallocate_compat_ftruncate(_size); 514 } 515 516 // Failed 517 return err; 518 } 519 520 if (end > _size) { 521 // Record new file size 522 _size = end; 523 } 524 525 // Success 526 return 0; 527 } 528 529 ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole_syscall(size_t offset, size_t length) { 530 const int mode = 0; // Allocate 531 const int res = ZSyscall::fallocate(_fd, mode, offset, length); 532 if (res == -1) { 533 // Failed 534 return errno; 535 } 536 537 const size_t end = offset + length; 538 if (end > _size) { 539 // Record new file size 540 _size = end; 541 } 542 543 // Success 544 return 0; 545 } 546 547 ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole(size_t offset, size_t length) { 548 // Using compat mode is more efficient when allocating space on hugetlbfs. 549 // Note that allocating huge pages this way will only reserve them, and not 550 // associate them with segments of the file. We must guarantee that we at 551 // some point touch these segments, otherwise we can not punch hole in them. 552 // Also note that we need to use compat mode when using transparent huge pages, 553 // since we need to use madvise(2) on the mapping before the page is allocated. 554 if (z_fallocate_supported && !ZLargePages::is_enabled()) { 555 const ZErrno err = fallocate_fill_hole_syscall(offset, length); 556 if (!err) { 557 // Success 558 return 0; 559 } 560 561 if (err != ENOSYS && err != EOPNOTSUPP) { 562 // Failed 563 return err; 564 } 565 566 // Not supported 567 log_debug(gc)("Falling back to fallocate() compatibility mode"); 568 z_fallocate_supported = false; 569 } 570 571 return fallocate_fill_hole_compat(offset, length); 572 } 573 574 ZErrno ZPhysicalMemoryBacking::fallocate_punch_hole(size_t offset, size_t length) { 575 if (ZLargePages::is_explicit()) { 576 // We can only punch hole in pages that have been touched. Non-touched 577 // pages are only reserved, and not associated with any specific file 578 // segment. We don't know which pages have been previously touched, so 579 // we always touch them here to guarantee that we can punch hole. 580 const ZErrno err = fallocate_compat_mmap_hugetlbfs(offset, length, true /* touch */); 581 if (err) { 582 // Failed 583 return err; 584 } 585 } 586 587 const int mode = FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE; 588 if (ZSyscall::fallocate(_fd, mode, offset, length) == -1) { 589 // Failed 590 return errno; 591 } 592 593 // Success 594 return 0; 595 } 596 597 ZErrno ZPhysicalMemoryBacking::split_and_fallocate(bool punch_hole, size_t offset, size_t length) { 598 // Try first half 599 const size_t offset0 = offset; 600 const size_t length0 = align_up(length / 2, _block_size); 601 const ZErrno err0 = fallocate(punch_hole, offset0, length0); 602 if (err0) { 603 return err0; 604 } 605 606 // Try second half 607 const size_t offset1 = offset0 + length0; 608 const size_t length1 = length - length0; 609 const ZErrno err1 = fallocate(punch_hole, offset1, length1); 610 if (err1) { 611 return err1; 612 } 613 614 // Success 615 return 0; 616 } 617 618 ZErrno ZPhysicalMemoryBacking::fallocate(bool punch_hole, size_t offset, size_t length) { 619 assert(is_aligned(offset, _block_size), "Invalid offset"); 620 assert(is_aligned(length, _block_size), "Invalid length"); 621 622 const ZErrno err = punch_hole ? fallocate_punch_hole(offset, length) : fallocate_fill_hole(offset, length); 623 if (err == EINTR && length > _block_size) { 624 // Calling fallocate(2) with a large length can take a long time to 625 // complete. When running profilers, such as VTune, this syscall will 626 // be constantly interrupted by signals. Expanding the file in smaller 627 // steps avoids this problem. 628 return split_and_fallocate(punch_hole, offset, length); 629 } 630 631 return err; 632 } 633 634 bool ZPhysicalMemoryBacking::commit_inner(size_t offset, size_t length) { 635 log_trace(gc, heap)("Committing memory: " SIZE_FORMAT "M-" SIZE_FORMAT "M (" SIZE_FORMAT "M)", 636 offset / M, (offset + length) / M, length / M); 637 638 retry: 639 const ZErrno err = fallocate(false /* punch_hole */, offset, length); 640 if (err) { 641 if (err == ENOSPC && !is_init_completed() && ZLargePages::is_explicit() && z_fallocate_hugetlbfs_attempts-- > 0) { 642 // If we fail to allocate during initialization, due to lack of space on 643 // the hugetlbfs filesystem, then we wait and retry a few times before 644 // giving up. Otherwise there is a risk that running JVMs back-to-back 645 // will fail, since there is a delay between process termination and the 646 // huge pages owned by that process being returned to the huge page pool 647 // and made available for new allocations. 648 log_debug(gc, init)("Failed to commit memory (%s), retrying", err.to_string()); 649 650 // Wait and retry in one second, in the hope that huge pages will be 651 // available by then. 652 sleep(1); 653 goto retry; 654 } 655 656 // Failed 657 log_error(gc)("Failed to commit memory (%s)", err.to_string()); 658 return false; 659 } 660 661 // Success 662 return true; 663 } 664 665 static int offset_to_node(size_t offset) { 666 const GrowableArray<int>* mapping = os::Linux::numa_nindex_to_node(); 667 const size_t nindex = (offset >> ZGranuleSizeShift) % mapping->length(); 668 return mapping->at((int)nindex); 669 } 670 671 size_t ZPhysicalMemoryBacking::commit_numa_interleaved(size_t offset, size_t length) { 672 size_t committed = 0; 673 674 // Commit one granule at a time, so that each granule 675 // can be allocated from a different preferred node. 676 while (committed < length) { 677 const size_t granule_offset = offset + committed; 678 679 // Setup NUMA policy to allocate memory from a preferred node 680 os::Linux::numa_set_preferred(offset_to_node(granule_offset)); 681 682 if (!commit_inner(granule_offset, ZGranuleSize)) { 683 // Failed 684 break; 685 } 686 687 committed += ZGranuleSize; 688 } 689 690 // Restore NUMA policy 691 os::Linux::numa_set_preferred(-1); 692 693 return committed; 694 } 695 696 size_t ZPhysicalMemoryBacking::commit_default(size_t offset, size_t length) { 697 // Try to commit the whole region 698 if (commit_inner(offset, length)) { 699 // Success 700 return length; 701 } 702 703 // Failed, try to commit as much as possible 704 size_t start = offset; 705 size_t end = offset + length; 706 707 for (;;) { 708 length = align_down((end - start) / 2, ZGranuleSize); 709 if (length < ZGranuleSize) { 710 // Done, don't commit more 711 return start - offset; 712 } 713 714 if (commit_inner(start, length)) { 715 // Success, try commit more 716 start += length; 717 } else { 718 // Failed, try commit less 719 end -= length; 720 } 721 } 722 } 723 724 size_t ZPhysicalMemoryBacking::commit(size_t offset, size_t length) { 725 if (ZNUMA::is_enabled() && !ZLargePages::is_explicit()) { 726 // To get granule-level NUMA interleaving when using non-large pages, 727 // we must explicitly interleave the memory at commit/fallocate time. 728 return commit_numa_interleaved(offset, length); 729 } 730 731 return commit_default(offset, length); 732 } 733 734 size_t ZPhysicalMemoryBacking::uncommit(size_t offset, size_t length) { 735 log_trace(gc, heap)("Uncommitting memory: " SIZE_FORMAT "M-" SIZE_FORMAT "M (" SIZE_FORMAT "M)", 736 offset / M, (offset + length) / M, length / M); 737 738 const ZErrno err = fallocate(true /* punch_hole */, offset, length); 739 if (err) { 740 log_error(gc)("Failed to uncommit memory (%s)", err.to_string()); 741 return 0; 742 } 743 744 return length; 745 } 746 747 void ZPhysicalMemoryBacking::map(uintptr_t addr, size_t size, uintptr_t offset) const { 748 const void* const res = mmap((void*)addr, size, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED, _fd, offset); 749 if (res == MAP_FAILED) { 750 ZErrno err; 751 fatal("Failed to map memory (%s)", err.to_string()); 752 } 753 } 754 755 void ZPhysicalMemoryBacking::unmap(uintptr_t addr, size_t size) const { 756 // Note that we must keep the address space reservation intact and just detach 757 // the backing memory. For this reason we map a new anonymous, non-accessible 758 // and non-reserved page over the mapping instead of actually unmapping. 759 const void* const res = mmap((void*)addr, size, PROT_NONE, MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0); 760 if (res == MAP_FAILED) { 761 ZErrno err; 762 fatal("Failed to map memory (%s)", err.to_string()); 763 } 764 }