New src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking

   1 /*
   2  * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 #include "precompiled.hpp"
  25 #include "gc/z/zArray.inline.hpp"
  26 #include "gc/z/zErrno.hpp"
  27 #include "gc/z/zGlobals.hpp"
  28 #include "gc/z/zLargePages.inline.hpp"
  29 #include "gc/z/zMountPoint_linux.hpp"
  30 #include "gc/z/zNUMA.inline.hpp"
  31 #include "gc/z/zPhysicalMemoryBacking_linux.hpp"
  32 #include "gc/z/zSyscall_linux.hpp"
  33 #include "logging/log.hpp"
  34 #include "runtime/init.hpp"
  35 #include "runtime/os.hpp"
  36 #include "utilities/align.hpp"
  37 #include "utilities/debug.hpp"
  38 #include "utilities/growableArray.hpp"
  39 
  40 #include <fcntl.h>
  41 #include <stdio.h>
  42 #include <sys/mman.h>
  43 #include <sys/stat.h>
  44 #include <sys/statfs.h>
  45 #include <sys/types.h>
  46 #include <unistd.h>
  47 
  48 //
  49 // Support for building on older Linux systems
  50 //
  51 
  52 // memfd_create(2) flags
  53 #ifndef MFD_CLOEXEC
  54 #define MFD_CLOEXEC                      0x0001U
  55 #endif
  56 #ifndef MFD_HUGETLB
  57 #define MFD_HUGETLB                      0x0004U
  58 #endif
  59 
  60 // open(2) flags
  61 #ifndef O_CLOEXEC
  62 #define O_CLOEXEC                        02000000
  63 #endif
  64 #ifndef O_TMPFILE
  65 #define O_TMPFILE                        (020000000 | O_DIRECTORY)
  66 #endif
  67 
  68 // fallocate(2) flags
  69 #ifndef FALLOC_FL_KEEP_SIZE
  70 #define FALLOC_FL_KEEP_SIZE              0x01
  71 #endif
  72 #ifndef FALLOC_FL_PUNCH_HOLE
  73 #define FALLOC_FL_PUNCH_HOLE             0x02
  74 #endif
  75 
  76 // Filesystem types, see statfs(2)
  77 #ifndef TMPFS_MAGIC
  78 #define TMPFS_MAGIC                      0x01021994
  79 #endif
  80 #ifndef HUGETLBFS_MAGIC
  81 #define HUGETLBFS_MAGIC                  0x958458f6
  82 #endif
  83 
  84 // Filesystem names
  85 #define ZFILESYSTEM_TMPFS                "tmpfs"
  86 #define ZFILESYSTEM_HUGETLBFS            "hugetlbfs"
  87 
  88 // Proc file entry for max map mount
  89 #define ZFILENAME_PROC_MAX_MAP_COUNT     "/proc/sys/vm/max_map_count"
  90 
  91 // Sysfs file for transparent huge page on tmpfs
  92 #define ZFILENAME_SHMEM_ENABLED          "/sys/kernel/mm/transparent_hugepage/shmem_enabled"
  93 
  94 // Java heap filename
  95 #define ZFILENAME_HEAP                   "java_heap"
  96 
  97 // Preferred tmpfs mount points, ordered by priority
  98 static const char* z_preferred_tmpfs_mountpoints[] = {
  99   "/dev/shm",
 100   "/run/shm",
 101   NULL
 102 };
 103 
 104 // Preferred hugetlbfs mount points, ordered by priority
 105 static const char* z_preferred_hugetlbfs_mountpoints[] = {
 106   "/dev/hugepages",
 107   "/hugepages",
 108   NULL
 109 };
 110 
 111 static int z_fallocate_hugetlbfs_attempts = 3;
 112 static bool z_fallocate_supported = true;
 113 
 114 ZPhysicalMemoryBacking::ZPhysicalMemoryBacking() :
 115     _fd(-1),
 116     _size(0),
 117     _filesystem(0),
 118     _block_size(0),
 119     _available(0),
 120     _initialized(false) {
 121 
 122   // Create backing file
 123   _fd = create_fd(ZFILENAME_HEAP);
 124   if (_fd == -1) {
 125     return;
 126   }
 127 
 128   // Get filesystem statistics
 129   struct statfs buf;
 130   if (fstatfs(_fd, &buf) == -1) {
 131     ZErrno err;
 132     log_error(gc)("Failed to determine filesystem type for backing file (%s)", err.to_string());
 133     return;
 134   }
 135 
 136   _filesystem = buf.f_type;
 137   _block_size = buf.f_bsize;
 138   _available = buf.f_bavail * _block_size;
 139 
 140   // Make sure we're on a supported filesystem
 141   if (!ZAllowHeapOnFileSystem && !is_tmpfs() && !is_hugetlbfs()) {
 142     log_error(gc)("Backing file must be located on a %s or a %s filesystem",
 143                   ZFILESYSTEM_TMPFS, ZFILESYSTEM_HUGETLBFS);
 144     return;
 145   }
 146 
 147   // Make sure the filesystem type matches requested large page type
 148   if (!ZAllowHeapOnFileSystem && ZLargePages::is_transparent() && !is_tmpfs()) {
 149     log_error(gc)("-XX:+UseTransparentHugePages can only be enable when using a %s filesystem",
 150                   ZFILESYSTEM_TMPFS);
 151     return;
 152   }
 153 
 154   if (ZLargePages::is_transparent() && !tmpfs_supports_transparent_huge_pages()) {
 155     log_error(gc)("-XX:+UseTransparentHugePages on a %s filesystem not supported by kernel",
 156                   ZFILESYSTEM_TMPFS);
 157     return;
 158   }
 159 
 160   if (ZLargePages::is_explicit() && !is_hugetlbfs()) {
 161     log_error(gc)("-XX:+UseLargePages (without -XX:+UseTransparentHugePages) can only be enabled "
 162                   "when using a %s filesystem", ZFILESYSTEM_HUGETLBFS);
 163     return;
 164   }
 165 
 166   if (!ZLargePages::is_explicit() && is_hugetlbfs()) {
 167     log_error(gc)("-XX:+UseLargePages must be enabled when using a %s filesystem",
 168                   ZFILESYSTEM_HUGETLBFS);
 169     return;
 170   }
 171 
 172   const size_t expected_block_size = is_tmpfs() ? os::vm_page_size() : os::large_page_size();
 173   if (!ZAllowHeapOnFileSystem && (expected_block_size != _block_size)) {
 174     log_error(gc)("%s filesystem has unexpected block size " SIZE_FORMAT " (expected " SIZE_FORMAT ")",
 175                   is_tmpfs() ? ZFILESYSTEM_TMPFS : ZFILESYSTEM_HUGETLBFS, _block_size, expected_block_size);
 176     return;
 177   }
 178 
 179   // Successfully initialized
 180   _initialized = true;
 181 }
 182 
 183 int ZPhysicalMemoryBacking::create_mem_fd(const char* name) const {
 184   // Create file name
 185   char filename[PATH_MAX];
 186   snprintf(filename, sizeof(filename), "%s%s", name, ZLargePages::is_explicit() ? ".hugetlb" : "");
 187 
 188   // Create file
 189   const int extra_flags = ZLargePages::is_explicit() ? MFD_HUGETLB : 0;
 190   const int fd = ZSyscall::memfd_create(filename, MFD_CLOEXEC | extra_flags);
 191   if (fd == -1) {
 192     ZErrno err;
 193     log_debug(gc, init)("Failed to create memfd file (%s)",
 194                         ((ZLargePages::is_explicit() && err == EINVAL) ? "Hugepages not supported" : err.to_string()));
 195     return -1;
 196   }
 197 
 198   log_info(gc, init)("Heap backed by file: /memfd:%s", filename);
 199 
 200   return fd;
 201 }
 202 
 203 int ZPhysicalMemoryBacking::create_file_fd(const char* name) const {
 204   if (ZAllowHeapOnFileSystem && (AllocateHeapAt == NULL)) {
 205     log_error(gc)("-XX:AllocateHeapAt is needed when ZAllowHeapOnFileSystem is specified");
 206     return -1;
 207   }
 208 
 209   const char* const filesystem = ZLargePages::is_explicit()
 210                                  ? ZFILESYSTEM_HUGETLBFS
 211                                  : ZFILESYSTEM_TMPFS;
 212   const char** const preferred_mountpoints = ZLargePages::is_explicit()
 213                                              ? z_preferred_hugetlbfs_mountpoints
 214                                              : z_preferred_tmpfs_mountpoints;
 215 
 216   // Find mountpoint
 217   ZMountPoint mountpoint(filesystem, preferred_mountpoints);
 218   if (mountpoint.get() == NULL) {
 219     log_error(gc)("Use -XX:AllocateHeapAt to specify the path to a %s filesystem", filesystem);
 220     return -1;
 221   }
 222 
 223   // Try to create an anonymous file using the O_TMPFILE flag. Note that this
 224   // flag requires kernel >= 3.11. If this fails we fall back to open/unlink.
 225   const int fd_anon = os::open(mountpoint.get(), O_TMPFILE|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR);
 226   if (fd_anon == -1) {
 227     ZErrno err;
 228     log_debug(gc, init)("Failed to create anonymous file in %s (%s)", mountpoint.get(),
 229                         (err == EINVAL ? "Not supported" : err.to_string()));
 230   } else {
 231     // Get inode number for anonymous file
 232     struct stat stat_buf;
 233     if (fstat(fd_anon, &stat_buf) == -1) {
 234       ZErrno err;
 235       log_error(gc)("Failed to determine inode number for anonymous file (%s)", err.to_string());
 236       return -1;
 237     }
 238 
 239     log_info(gc, init)("Heap backed by file: %s/#" UINT64_FORMAT, mountpoint.get(), (uint64_t)stat_buf.st_ino);
 240 
 241     return fd_anon;
 242   }
 243 
 244   log_debug(gc, init)("Falling back to open/unlink");
 245 
 246   // Create file name
 247   char filename[PATH_MAX];
 248   snprintf(filename, sizeof(filename), "%s/%s.%d", mountpoint.get(), name, os::current_process_id());
 249 
 250   // Create file
 251   const int fd = os::open(filename, O_CREAT|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR);
 252   if (fd == -1) {
 253     ZErrno err;
 254     log_error(gc)("Failed to create file %s (%s)", filename, err.to_string());
 255     return -1;
 256   }
 257 
 258   // Unlink file
 259   if (unlink(filename) == -1) {
 260     ZErrno err;
 261     log_error(gc)("Failed to unlink file %s (%s)", filename, err.to_string());
 262     return -1;
 263   }
 264 
 265   log_info(gc, init)("Heap backed by file: %s", filename);
 266 
 267   return fd;
 268 }
 269 
 270 int ZPhysicalMemoryBacking::create_fd(const char* name) const {
 271   if (AllocateHeapAt == NULL) {
 272     // If the path is not explicitly specified, then we first try to create a memfd file
 273     // instead of looking for a tmpfd/hugetlbfs mount point. Note that memfd_create() might
 274     // not be supported at all (requires kernel >= 3.17), or it might not support large
 275     // pages (requires kernel >= 4.14). If memfd_create() fails, then we try to create a
 276     // file on an accessible tmpfs or hugetlbfs mount point.
 277     const int fd = create_mem_fd(name);
 278     if (fd != -1) {
 279       return fd;
 280     }
 281 
 282     log_debug(gc, init)("Falling back to searching for an accessible mount point");
 283   }
 284 
 285   return create_file_fd(name);
 286 }
 287 
 288 bool ZPhysicalMemoryBacking::is_initialized() const {
 289   return _initialized;
 290 }
 291 
 292 void ZPhysicalMemoryBacking::warn_available_space(size_t max) const {
 293   // Note that the available space on a tmpfs or a hugetlbfs filesystem
 294   // will be zero if no size limit was specified when it was mounted.
 295   if (_available == 0) {
 296     // No size limit set, skip check
 297     log_info(gc, init)("Available space on backing filesystem: N/A");
 298     return;
 299   }
 300 
 301   log_info(gc, init)("Available space on backing filesystem: " SIZE_FORMAT "M", _available / M);
 302 
 303   // Warn if the filesystem doesn't currently have enough space available to hold
 304   // the max heap size. The max heap size will be capped if we later hit this limit
 305   // when trying to expand the heap.
 306   if (_available < max) {
 307     log_warning(gc)("***** WARNING! INCORRECT SYSTEM CONFIGURATION DETECTED! *****");
 308     log_warning(gc)("Not enough space available on the backing filesystem to hold the current max Java heap");
 309     log_warning(gc)("size (" SIZE_FORMAT "M). Please adjust the size of the backing filesystem accordingly "
 310                     "(available", max / M);
 311     log_warning(gc)("space is currently " SIZE_FORMAT "M). Continuing execution with the current filesystem "
 312                     "size could", _available / M);
 313     log_warning(gc)("lead to a premature OutOfMemoryError being thrown, due to failure to map memory.");
 314   }
 315 }
 316 
 317 void ZPhysicalMemoryBacking::warn_max_map_count(size_t max) const {
 318   const char* const filename = ZFILENAME_PROC_MAX_MAP_COUNT;
 319   FILE* const file = fopen(filename, "r");
 320   if (file == NULL) {
 321     // Failed to open file, skip check
 322     log_debug(gc, init)("Failed to open %s", filename);
 323     return;
 324   }
 325 
 326   size_t actual_max_map_count = 0;
 327   const int result = fscanf(file, SIZE_FORMAT, &actual_max_map_count);
 328   fclose(file);
 329   if (result != 1) {
 330     // Failed to read file, skip check
 331     log_debug(gc, init)("Failed to read %s", filename);
 332     return;
 333   }
 334 
 335   // The required max map count is impossible to calculate exactly since subsystems
 336   // other than ZGC are also creating memory mappings, and we have no control over that.
 337   // However, ZGC tends to create the most mappings and dominate the total count.
 338   // In the worst cases, ZGC will map each granule three times, i.e. once per heap view.
 339   // We speculate that we need another 20% to allow for non-ZGC subsystems to map memory.
 340   const size_t required_max_map_count = (max / ZGranuleSize) * 3 * 1.2;
 341   if (actual_max_map_count < required_max_map_count) {
 342     log_warning(gc)("***** WARNING! INCORRECT SYSTEM CONFIGURATION DETECTED! *****");
 343     log_warning(gc)("The system limit on number of memory mappings per process might be too low for the given");
 344     log_warning(gc)("max Java heap size (" SIZE_FORMAT "M). Please adjust %s to allow for at",
 345                     max / M, filename);
 346     log_warning(gc)("least " SIZE_FORMAT " mappings (current limit is " SIZE_FORMAT "). Continuing execution "
 347                     "with the current", required_max_map_count, actual_max_map_count);
 348     log_warning(gc)("limit could lead to a fatal error, due to failure to map memory.");
 349   }
 350 }
 351 
 352 void ZPhysicalMemoryBacking::warn_commit_limits(size_t max) const {
 353   // Warn if available space is too low
 354   warn_available_space(max);
 355 
 356   // Warn if max map count is too low
 357   warn_max_map_count(max);
 358 }
 359 
 360 size_t ZPhysicalMemoryBacking::size() const {
 361   return _size;
 362 }
 363 
 364 bool ZPhysicalMemoryBacking::is_tmpfs() const {
 365   return _filesystem == TMPFS_MAGIC;
 366 }
 367 
 368 bool ZPhysicalMemoryBacking::is_hugetlbfs() const {
 369   return _filesystem == HUGETLBFS_MAGIC;
 370 }
 371 
 372 bool ZPhysicalMemoryBacking::tmpfs_supports_transparent_huge_pages() const {
 373   // If the shmem_enabled file exists and is readable then we
 374   // know the kernel supports transparent huge pages for tmpfs.
 375   return access(ZFILENAME_SHMEM_ENABLED, R_OK) == 0;
 376 }
 377 
 378 ZErrno ZPhysicalMemoryBacking::fallocate_compat_ftruncate(size_t size) const {
 379   while (ftruncate(_fd, size) == -1) {
 380     if (errno != EINTR) {
 381       // Failed
 382       return errno;
 383     }
 384   }
 385 
 386   // Success
 387   return 0;
 388 }
 389 
 390 ZErrno ZPhysicalMemoryBacking::fallocate_compat_mmap(size_t offset, size_t length, bool touch) const {
 391   // On hugetlbfs, mapping a file segment will fail immediately, without
 392   // the need to touch the mapped pages first, if there aren't enough huge
 393   // pages available to back the mapping.
 394   void* const addr = mmap(0, length, PROT_READ|PROT_WRITE, MAP_SHARED, _fd, offset);
 395   if (addr == MAP_FAILED) {
 396     // Failed
 397     return errno;
 398   }
 399 
 400   // Once mapped, the huge pages are only reserved. We need to touch them
 401   // to associate them with the file segment. Note that we can not punch
 402   // hole in file segments which only have reserved pages.
 403   if (touch) {
 404     char* const start = (char*)addr;
 405     char* const end = start + length;
 406     os::pretouch_memory(start, end, _block_size);
 407   }
 408 
 409   // Unmap again. From now on, the huge pages that were mapped are allocated
 410   // to this file. There's no risk in getting SIGBUS when touching them.
 411   if (munmap(addr, length) == -1) {
 412     // Failed
 413     return errno;
 414   }
 415 
 416   // Success
 417   return 0;
 418 }
 419 
 420 ZErrno ZPhysicalMemoryBacking::fallocate_compat_pwrite(size_t offset, size_t length) const {
 421   uint8_t data = 0;
 422 
 423   // Allocate backing memory by writing to each block
 424   for (size_t pos = offset; pos < offset + length; pos += _block_size) {
 425     if (pwrite(_fd, &data, sizeof(data), pos) == -1) {
 426       // Failed
 427       return errno;
 428     }
 429   }
 430 
 431   // Success
 432   return 0;
 433 }
 434 
 435 ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole_compat(size_t offset, size_t length) {
 436   // fallocate(2) is only supported by tmpfs since Linux 3.5, and by hugetlbfs
 437   // since Linux 4.3. When fallocate(2) is not supported we emulate it using
 438   // ftruncate/pwrite (for tmpfs) or ftruncate/mmap/munmap (for hugetlbfs).
 439 
 440   const size_t end = offset + length;
 441   if (end > _size) {
 442     // Increase file size
 443     const ZErrno err = fallocate_compat_ftruncate(end);
 444     if (err) {
 445       // Failed
 446       return err;
 447     }
 448   }
 449 
 450   // Allocate backing memory
 451   const ZErrno err = is_hugetlbfs() ? fallocate_compat_mmap(offset, length, false /* touch */)
 452                                     : fallocate_compat_pwrite(offset, length);
 453   if (err) {
 454     if (end > _size) {
 455       // Restore file size
 456       fallocate_compat_ftruncate(_size);
 457     }
 458 
 459     // Failed
 460     return err;
 461   }
 462 
 463   if (end > _size) {
 464     // Record new file size
 465     _size = end;
 466   }
 467 
 468   // Success
 469   return 0;
 470 }
 471 
 472 ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole_syscall(size_t offset, size_t length) {
 473   const int mode = 0; // Allocate
 474   const int res = ZSyscall::fallocate(_fd, mode, offset, length);
 475   if (res == -1) {
 476     // Failed
 477     return errno;
 478   }
 479 
 480   const size_t end = offset + length;
 481   if (end > _size) {
 482     // Record new file size
 483     _size = end;
 484   }
 485 
 486   // Success
 487   return 0;
 488 }
 489 
 490 ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole(size_t offset, size_t length) {
 491   // Using compat mode is more efficient when allocating space on hugetlbfs.
 492   // Note that allocating huge pages this way will only reserve them, and not
 493   // associate them with segments of the file. We must guarantee that we at
 494   // some point touch these segments, otherwise we can not punch hole in them.
 495   if (z_fallocate_supported && !is_hugetlbfs()) {
 496      const ZErrno err = fallocate_fill_hole_syscall(offset, length);
 497      if (!err) {
 498        // Success
 499        return 0;
 500      }
 501 
 502      if (err != ENOSYS && err != EOPNOTSUPP) {
 503        // Failed
 504        return err;
 505      }
 506 
 507      // Not supported
 508      log_debug(gc)("Falling back to fallocate() compatibility mode");
 509      z_fallocate_supported = false;
 510   }
 511 
 512   return fallocate_fill_hole_compat(offset, length);
 513 }
 514 
 515 ZErrno ZPhysicalMemoryBacking::fallocate_punch_hole(size_t offset, size_t length) {
 516   if (is_hugetlbfs()) {
 517     // We can only punch hole in pages that have been touched. Non-touched
 518     // pages are only reserved, and not associated with any specific file
 519     // segment. We don't know which pages have been previously touched, so
 520     // we always touch them here to guarantee that we can punch hole.
 521     const ZErrno err = fallocate_compat_mmap(offset, length, true /* touch */);
 522     if (err) {
 523       // Failed
 524       return err;
 525     }
 526   }
 527 
 528   const int mode = FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE;
 529   if (ZSyscall::fallocate(_fd, mode, offset, length) == -1) {
 530     // Failed
 531     return errno;
 532   }
 533 
 534   // Success
 535   return 0;
 536 }
 537 
 538 ZErrno ZPhysicalMemoryBacking::split_and_fallocate(bool punch_hole, size_t offset, size_t length) {
 539   // Try first half
 540   const size_t offset0 = offset;
 541   const size_t length0 = align_up(length / 2, _block_size);
 542   const ZErrno err0 = fallocate(punch_hole, offset0, length0);
 543   if (err0) {
 544     return err0;
 545   }
 546 
 547   // Try second half
 548   const size_t offset1 = offset0 + length0;
 549   const size_t length1 = length - length0;
 550   const ZErrno err1 = fallocate(punch_hole, offset1, length1);
 551   if (err1) {
 552     return err1;
 553   }
 554 
 555   // Success
 556   return 0;
 557 }
 558 
 559 ZErrno ZPhysicalMemoryBacking::fallocate(bool punch_hole, size_t offset, size_t length) {
 560   assert(is_aligned(offset, _block_size), "Invalid offset");
 561   assert(is_aligned(length, _block_size), "Invalid length");
 562 
 563   const ZErrno err = punch_hole ? fallocate_punch_hole(offset, length) : fallocate_fill_hole(offset, length);
 564   if (err == EINTR && length > _block_size) {
 565     // Calling fallocate(2) with a large length can take a long time to
 566     // complete. When running profilers, such as VTune, this syscall will
 567     // be constantly interrupted by signals. Expanding the file in smaller
 568     // steps avoids this problem.
 569     return split_and_fallocate(punch_hole, offset, length);
 570   }
 571 
 572   return err;
 573 }
 574 
 575 bool ZPhysicalMemoryBacking::commit_inner(size_t offset, size_t length) {
 576   log_trace(gc, heap)("Committing memory: " SIZE_FORMAT "M-" SIZE_FORMAT "M (" SIZE_FORMAT "M)",
 577                       offset / M, (offset + length) / M, length / M);
 578 
 579 retry:
 580   const ZErrno err = fallocate(false /* punch_hole */, offset, length);
 581   if (err) {
 582     if (err == ENOSPC && !is_init_completed() && is_hugetlbfs() && z_fallocate_hugetlbfs_attempts-- > 0) {
 583       // If we fail to allocate during initialization, due to lack of space on
 584       // the hugetlbfs filesystem, then we wait and retry a few times before
 585       // giving up. Otherwise there is a risk that running JVMs back-to-back
 586       // will fail, since there is a delay between process termination and the
 587       // huge pages owned by that process being returned to the huge page pool
 588       // and made available for new allocations.
 589       log_debug(gc, init)("Failed to commit memory (%s), retrying", err.to_string());
 590 
 591       // Wait and retry in one second, in the hope that huge pages will be
 592       // available by then.
 593       sleep(1);
 594       goto retry;
 595     }
 596 
 597     // Failed
 598     log_error(gc)("Failed to commit memory (%s)", err.to_string());
 599     return false;
 600   }
 601 
 602   // Success
 603   return true;
 604 }
 605 
 606 static int offset_to_node(size_t offset) {
 607   const GrowableArray<int>* mapping = os::Linux::numa_nindex_to_node();
 608   const size_t nindex = (offset >> ZGranuleSizeShift) % mapping->length();
 609   return mapping->at((int)nindex);
 610 }
 611 
 612 size_t ZPhysicalMemoryBacking::commit_numa_interleaved(size_t offset, size_t length) {
 613   size_t committed = 0;
 614 
 615   // Commit one granule at a time, so that each granule
 616   // can be allocated from a different preferred node.
 617   while (committed < length) {
 618     const size_t granule_offset = offset + committed;
 619 
 620     // Setup NUMA policy to allocate memory from a preferred node
 621     os::Linux::numa_set_preferred(offset_to_node(granule_offset));
 622 
 623     if (!commit_inner(granule_offset, ZGranuleSize)) {
 624       // Failed
 625       break;
 626     }
 627 
 628     committed += ZGranuleSize;
 629   }
 630 
 631   // Restore NUMA policy
 632   os::Linux::numa_set_preferred(-1);
 633 
 634   return committed;
 635 }
 636 
 637 size_t ZPhysicalMemoryBacking::commit_default(size_t offset, size_t length) {
 638   // Try to commit the whole region
 639   if (commit_inner(offset, length)) {
 640     // Success
 641     return length;
 642   }
 643 
 644   // Failed, try to commit as much as possible
 645   size_t start = offset;
 646   size_t end = offset + length;
 647 
 648   for (;;) {
 649     length = align_down((end - start) / 2, ZGranuleSize);
 650     if (length < ZGranuleSize) {
 651       // Done, don't commit more
 652       return start - offset;
 653     }
 654 
 655     if (commit_inner(start, length)) {
 656       // Success, try commit more
 657       start += length;
 658     } else {
 659       // Failed, try commit less
 660       end -= length;
 661     }
 662   }
 663 }
 664 
 665 size_t ZPhysicalMemoryBacking::commit(size_t offset, size_t length) {
 666   if (ZNUMA::is_enabled() && !ZLargePages::is_explicit()) {
 667     // To get granule-level NUMA interleaving when using non-large pages,
 668     // we must explicitly interleave the memory at commit/fallocate time.
 669     return commit_numa_interleaved(offset, length);
 670   }
 671 
 672   return commit_default(offset, length);
 673 }
 674 
 675 size_t ZPhysicalMemoryBacking::uncommit(size_t offset, size_t length) {
 676   log_trace(gc, heap)("Uncommitting memory: " SIZE_FORMAT "M-" SIZE_FORMAT "M (" SIZE_FORMAT "M)",
 677                       offset / M, (offset + length) / M, length / M);
 678 
 679   const ZErrno err = fallocate(true /* punch_hole */, offset, length);
 680   if (err) {
 681     log_error(gc)("Failed to uncommit memory (%s)", err.to_string());
 682     return 0;
 683   }
 684 
 685   return length;
 686 }
 687 
 688 void ZPhysicalMemoryBacking::map(uintptr_t addr, size_t size, uintptr_t offset) const {
 689   const void* const res = mmap((void*)addr, size, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED, _fd, offset);
 690   if (res == MAP_FAILED) {
 691     ZErrno err;
 692     fatal("Failed to map memory (%s)", err.to_string());
 693   }
 694 }
 695 
 696 void ZPhysicalMemoryBacking::unmap(uintptr_t addr, size_t size) const {
 697   // Note that we must keep the address space reservation intact and just detach
 698   // the backing memory. For this reason we map a new anonymous, non-accessible
 699   // and non-reserved page over the mapping instead of actually unmapping.
 700   const void* const res = mmap((void*)addr, size, PROT_NONE, MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
 701   if (res == MAP_FAILED) {
 702     ZErrno err;
 703     fatal("Failed to map memory (%s)", err.to_string());
 704   }
 705 }