< prev index next >

src/hotspot/os/linux/gc/z/zPhysicalMemoryBacking_linux.cpp

Print this page




  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 #include "precompiled.hpp"
  25 #include "gc/z/zArray.inline.hpp"
  26 #include "gc/z/zErrno.hpp"
  27 #include "gc/z/zGlobals.hpp"
  28 #include "gc/z/zLargePages.inline.hpp"
  29 #include "gc/z/zMountPoint_linux.hpp"
  30 #include "gc/z/zNUMA.inline.hpp"
  31 #include "gc/z/zPhysicalMemoryBacking_linux.hpp"
  32 #include "gc/z/zSyscall_linux.hpp"
  33 #include "logging/log.hpp"
  34 #include "runtime/init.hpp"
  35 #include "runtime/os.hpp"

  36 #include "utilities/align.hpp"
  37 #include "utilities/debug.hpp"
  38 #include "utilities/growableArray.hpp"
  39 
  40 #include <fcntl.h>
  41 #include <stdio.h>
  42 #include <sys/mman.h>
  43 #include <sys/stat.h>
  44 #include <sys/statfs.h>
  45 #include <sys/types.h>
  46 #include <unistd.h>
  47 
  48 //
  49 // Support for building on older Linux systems
  50 //
  51 
  52 // memfd_create(2) flags
  53 #ifndef MFD_CLOEXEC
  54 #define MFD_CLOEXEC                      0x0001U
  55 #endif


 373 }
 374 
 375 bool ZPhysicalMemoryBacking::tmpfs_supports_transparent_huge_pages() const {
 376   // If the shmem_enabled file exists and is readable then we
 377   // know the kernel supports transparent huge pages for tmpfs.
 378   return access(ZFILENAME_SHMEM_ENABLED, R_OK) == 0;
 379 }
 380 
 381 ZErrno ZPhysicalMemoryBacking::fallocate_compat_ftruncate(size_t size) const {
 382   while (ftruncate(_fd, size) == -1) {
 383     if (errno != EINTR) {
 384       // Failed
 385       return errno;
 386     }
 387   }
 388 
 389   // Success
 390   return 0;
 391 }
 392 
 393 ZErrno ZPhysicalMemoryBacking::fallocate_compat_mmap(size_t offset, size_t length, bool touch) const {
 394   // On hugetlbfs, mapping a file segment will fail immediately, without
 395   // the need to touch the mapped pages first, if there aren't enough huge
 396   // pages available to back the mapping.
 397   void* const addr = mmap(0, length, PROT_READ|PROT_WRITE, MAP_SHARED, _fd, offset);
 398   if (addr == MAP_FAILED) {
 399     // Failed
 400     return errno;
 401   }
 402 
 403   // Once mapped, the huge pages are only reserved. We need to touch them
 404   // to associate them with the file segment. Note that we can not punch
 405   // hole in file segments which only have reserved pages.
 406   if (touch) {
 407     char* const start = (char*)addr;
 408     char* const end = start + length;
 409     os::pretouch_memory(start, end, _block_size);
 410   }
 411 
 412   // Unmap again. From now on, the huge pages that were mapped are allocated
 413   // to this file. There's no risk in getting SIGBUS when touching them.
 414   if (munmap(addr, length) == -1) {
 415     // Failed
 416     return errno;
 417   }
 418 
 419   // Success
 420   return 0;
 421 }
 422 












































 423 ZErrno ZPhysicalMemoryBacking::fallocate_compat_pwrite(size_t offset, size_t length) const {
 424   uint8_t data = 0;
 425 
 426   // Allocate backing memory by writing to each block
 427   for (size_t pos = offset; pos < offset + length; pos += _block_size) {
 428     if (pwrite(_fd, &data, sizeof(data), pos) == -1) {
 429       // Failed
 430       return errno;
 431     }
 432   }
 433 
 434   // Success
 435   return 0;
 436 }
 437 
 438 ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole_compat(size_t offset, size_t length) {
 439   // fallocate(2) is only supported by tmpfs since Linux 3.5, and by hugetlbfs
 440   // since Linux 4.3. When fallocate(2) is not supported we emulate it using
 441   // ftruncate/pwrite (for tmpfs) or ftruncate/mmap/munmap (for hugetlbfs).

 442 
 443   const size_t end = offset + length;
 444   if (end > _size) {
 445     // Increase file size
 446     const ZErrno err = fallocate_compat_ftruncate(end);
 447     if (err) {
 448       // Failed
 449       return err;
 450     }
 451   }
 452 
 453   // Allocate backing memory
 454   const ZErrno err = is_hugetlbfs() ? fallocate_compat_mmap(offset, length, false /* touch */)
 455                                     : fallocate_compat_pwrite(offset, length);




 456   if (err) {
 457     if (end > _size) {
 458       // Restore file size
 459       fallocate_compat_ftruncate(_size);
 460     }
 461 
 462     // Failed
 463     return err;
 464   }
 465 
 466   if (end > _size) {
 467     // Record new file size
 468     _size = end;
 469   }
 470 
 471   // Success
 472   return 0;
 473 }
 474 
 475 ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole_syscall(size_t offset, size_t length) {


 478   if (res == -1) {
 479     // Failed
 480     return errno;
 481   }
 482 
 483   const size_t end = offset + length;
 484   if (end > _size) {
 485     // Record new file size
 486     _size = end;
 487   }
 488 
 489   // Success
 490   return 0;
 491 }
 492 
 493 ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole(size_t offset, size_t length) {
 494   // Using compat mode is more efficient when allocating space on hugetlbfs.
 495   // Note that allocating huge pages this way will only reserve them, and not
 496   // associate them with segments of the file. We must guarantee that we at
 497   // some point touch these segments, otherwise we can not punch hole in them.
 498   if (z_fallocate_supported && !is_hugetlbfs()) {
 499      const ZErrno err = fallocate_fill_hole_syscall(offset, length);
 500      if (!err) {
 501        // Success
 502        return 0;
 503      }
 504 
 505      if (err != ENOSYS && err != EOPNOTSUPP) {
 506        // Failed
 507        return err;
 508      }
 509 
 510      // Not supported
 511      log_debug(gc)("Falling back to fallocate() compatibility mode");
 512      z_fallocate_supported = false;
 513   }
 514 
 515   return fallocate_fill_hole_compat(offset, length);
 516 }
 517 
 518 ZErrno ZPhysicalMemoryBacking::fallocate_punch_hole(size_t offset, size_t length) {
 519   if (is_hugetlbfs()) {
 520     // We can only punch hole in pages that have been touched. Non-touched
 521     // pages are only reserved, and not associated with any specific file
 522     // segment. We don't know which pages have been previously touched, so
 523     // we always touch them here to guarantee that we can punch hole.
 524     const ZErrno err = fallocate_compat_mmap(offset, length, true /* touch */);
 525     if (err) {
 526       // Failed
 527       return err;
 528     }
 529   }
 530 
 531   const int mode = FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE;
 532   if (ZSyscall::fallocate(_fd, mode, offset, length) == -1) {
 533     // Failed
 534     return errno;
 535   }
 536 
 537   // Success
 538   return 0;
 539 }
 540 
 541 ZErrno ZPhysicalMemoryBacking::split_and_fallocate(bool punch_hole, size_t offset, size_t length) {
 542   // Try first half
 543   const size_t offset0 = offset;
 544   const size_t length0 = align_up(length / 2, _block_size);


 565 
 566   const ZErrno err = punch_hole ? fallocate_punch_hole(offset, length) : fallocate_fill_hole(offset, length);
 567   if (err == EINTR && length > _block_size) {
 568     // Calling fallocate(2) with a large length can take a long time to
 569     // complete. When running profilers, such as VTune, this syscall will
 570     // be constantly interrupted by signals. Expanding the file in smaller
 571     // steps avoids this problem.
 572     return split_and_fallocate(punch_hole, offset, length);
 573   }
 574 
 575   return err;
 576 }
 577 
 578 bool ZPhysicalMemoryBacking::commit_inner(size_t offset, size_t length) {
 579   log_trace(gc, heap)("Committing memory: " SIZE_FORMAT "M-" SIZE_FORMAT "M (" SIZE_FORMAT "M)",
 580                       offset / M, (offset + length) / M, length / M);
 581 
 582 retry:
 583   const ZErrno err = fallocate(false /* punch_hole */, offset, length);
 584   if (err) {
 585     if (err == ENOSPC && !is_init_completed() && is_hugetlbfs() && z_fallocate_hugetlbfs_attempts-- > 0) {
 586       // If we fail to allocate during initialization, due to lack of space on
 587       // the hugetlbfs filesystem, then we wait and retry a few times before
 588       // giving up. Otherwise there is a risk that running JVMs back-to-back
 589       // will fail, since there is a delay between process termination and the
 590       // huge pages owned by that process being returned to the huge page pool
 591       // and made available for new allocations.
 592       log_debug(gc, init)("Failed to commit memory (%s), retrying", err.to_string());
 593 
 594       // Wait and retry in one second, in the hope that huge pages will be
 595       // available by then.
 596       sleep(1);
 597       goto retry;
 598     }
 599 
 600     // Failed
 601     log_error(gc)("Failed to commit memory (%s)", err.to_string());
 602     return false;
 603   }
 604 
 605   // Success




  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  */
  23 
  24 #include "precompiled.hpp"
  25 #include "gc/z/zArray.inline.hpp"
  26 #include "gc/z/zErrno.hpp"
  27 #include "gc/z/zGlobals.hpp"
  28 #include "gc/z/zLargePages.inline.hpp"
  29 #include "gc/z/zMountPoint_linux.hpp"
  30 #include "gc/z/zNUMA.inline.hpp"
  31 #include "gc/z/zPhysicalMemoryBacking_linux.hpp"
  32 #include "gc/z/zSyscall_linux.hpp"
  33 #include "logging/log.hpp"
  34 #include "runtime/init.hpp"
  35 #include "runtime/os.hpp"
  36 #include "runtime/stubRoutines.hpp"
  37 #include "utilities/align.hpp"
  38 #include "utilities/debug.hpp"
  39 #include "utilities/growableArray.hpp"
  40 
  41 #include <fcntl.h>
  42 #include <stdio.h>
  43 #include <sys/mman.h>
  44 #include <sys/stat.h>
  45 #include <sys/statfs.h>
  46 #include <sys/types.h>
  47 #include <unistd.h>
  48 
  49 //
  50 // Support for building on older Linux systems
  51 //
  52 
  53 // memfd_create(2) flags
  54 #ifndef MFD_CLOEXEC
  55 #define MFD_CLOEXEC                      0x0001U
  56 #endif


 374 }
 375 
 376 bool ZPhysicalMemoryBacking::tmpfs_supports_transparent_huge_pages() const {
 377   // If the shmem_enabled file exists and is readable then we
 378   // know the kernel supports transparent huge pages for tmpfs.
 379   return access(ZFILENAME_SHMEM_ENABLED, R_OK) == 0;
 380 }
 381 
 382 ZErrno ZPhysicalMemoryBacking::fallocate_compat_ftruncate(size_t size) const {
 383   while (ftruncate(_fd, size) == -1) {
 384     if (errno != EINTR) {
 385       // Failed
 386       return errno;
 387     }
 388   }
 389 
 390   // Success
 391   return 0;
 392 }
 393 
 394 ZErrno ZPhysicalMemoryBacking::fallocate_compat_mmap_hugetlbfs(size_t offset, size_t length, bool touch) const {
 395   // On hugetlbfs, mapping a file segment will fail immediately, without
 396   // the need to touch the mapped pages first, if there aren't enough huge
 397   // pages available to back the mapping.
 398   void* const addr = mmap(0, length, PROT_READ|PROT_WRITE, MAP_SHARED, _fd, offset);
 399   if (addr == MAP_FAILED) {
 400     // Failed
 401     return errno;
 402   }
 403 
 404   // Once mapped, the huge pages are only reserved. We need to touch them
 405   // to associate them with the file segment. Note that we can not punch
 406   // hole in file segments which only have reserved pages.
 407   if (touch) {
 408     char* const start = (char*)addr;
 409     char* const end = start + length;
 410     os::pretouch_memory(start, end, _block_size);
 411   }
 412 
 413   // Unmap again. From now on, the huge pages that were mapped are allocated
 414   // to this file. There's no risk in getting SIGBUS when touching them.
 415   if (munmap(addr, length) == -1) {
 416     // Failed
 417     return errno;
 418   }
 419 
 420   // Success
 421   return 0;
 422 }
 423 
 424 static bool is_mapping_backed_by_memory(void* addr, size_t length, size_t page_size) {
 425   char* const start = (char*)addr;
 426   char* const end = start + length;
 427 
 428   // Touch pages to make sure the mapping is backed. If the mapping can't
 429   // be backed we'll get a SIGBUS, which is why we're using SafeFetch32.
 430   // On tmpfs, doing a fetch (as opposed to a store) is enough to cause
 431   // the backing pages to be allocated.
 432   for (char *p = start; p < end; p += page_size) {
 433     if (SafeFetch32((int*)p, -1) == -1) {
 434       return false;
 435     }
 436   }
 437 
 438   return true;
 439 }
 440 
 441 ZErrno ZPhysicalMemoryBacking::fallocate_compat_mmap_tmpfs(size_t offset, size_t length) const {
 442   // On tmpfs, we need to touch the mapped pages to figure out
 443   // if there are enough pages available to back the mapping.
 444   void* const addr = mmap(0, length, PROT_READ|PROT_WRITE, MAP_SHARED, _fd, offset);
 445   if (addr == MAP_FAILED) {
 446     // Failed
 447     return errno;
 448   }
 449 
 450   // Back the mapping with transparent huge pages
 451   os::realign_memory((char*)addr, length, os::large_page_size());
 452 
 453   // Check if the mapping can be backed by memory
 454   const bool backed = is_mapping_backed_by_memory(addr, length, _block_size);
 455 
 456   // Unmap again. From now on, if the mapping was backed, the pages that
 457   // were mapped are allocated to this file. There's no risk in getting
 458   // SIGBUS when touching them.
 459   if (munmap(addr, length) == -1) {
 460     // Failed
 461     return errno;
 462   }
 463 
 464   // Success
 465   return backed ? 0 : ENOMEM;
 466 }
 467 
 468 ZErrno ZPhysicalMemoryBacking::fallocate_compat_pwrite(size_t offset, size_t length) const {
 469   uint8_t data = 0;
 470 
 471   // Allocate backing memory by writing to each block
 472   for (size_t pos = offset; pos < offset + length; pos += _block_size) {
 473     if (pwrite(_fd, &data, sizeof(data), pos) == -1) {
 474       // Failed
 475       return errno;
 476     }
 477   }
 478 
 479   // Success
 480   return 0;
 481 }
 482 
 483 ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole_compat(size_t offset, size_t length) {
 484   // fallocate(2) is only supported by tmpfs since Linux 3.5, and by hugetlbfs
 485   // since Linux 4.3. When fallocate(2) is not supported we emulate it using
 486   // mmap/munmap (for hugetlbfs and tmpfs with transparent huge pages) or pwrite
 487   // (for tmpfs without transparent huge pages and other filesystem types).
 488 
 489   const size_t end = offset + length;
 490   if (end > _size) {
 491     // Increase file size
 492     const ZErrno err = fallocate_compat_ftruncate(end);
 493     if (err) {
 494       // Failed
 495       return err;
 496     }
 497   }
 498 
 499   // Allocate backing memory
 500   const ZErrno err = ZLargePages::is_explicit() ?
 501                      fallocate_compat_mmap_hugetlbfs(offset, length, false /* touch */) :
 502                      (ZLargePages::is_transparent() ?
 503                      fallocate_compat_mmap_tmpfs(offset, length) :
 504                      fallocate_compat_pwrite(offset, length));
 505 
 506   if (err) {
 507     if (end > _size) {
 508       // Restore file size
 509       fallocate_compat_ftruncate(_size);
 510     }
 511 
 512     // Failed
 513     return err;
 514   }
 515 
 516   if (end > _size) {
 517     // Record new file size
 518     _size = end;
 519   }
 520 
 521   // Success
 522   return 0;
 523 }
 524 
 525 ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole_syscall(size_t offset, size_t length) {


 528   if (res == -1) {
 529     // Failed
 530     return errno;
 531   }
 532 
 533   const size_t end = offset + length;
 534   if (end > _size) {
 535     // Record new file size
 536     _size = end;
 537   }
 538 
 539   // Success
 540   return 0;
 541 }
 542 
 543 ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole(size_t offset, size_t length) {
 544   // Using compat mode is more efficient when allocating space on hugetlbfs.
 545   // Note that allocating huge pages this way will only reserve them, and not
 546   // associate them with segments of the file. We must guarantee that we at
 547   // some point touch these segments, otherwise we can not punch hole in them.
 548   if (z_fallocate_supported && !ZLargePages::is_enabled()) {
 549      const ZErrno err = fallocate_fill_hole_syscall(offset, length);
 550      if (!err) {
 551        // Success
 552        return 0;
 553      }
 554 
 555      if (err != ENOSYS && err != EOPNOTSUPP) {
 556        // Failed
 557        return err;
 558      }
 559 
 560      // Not supported
 561      log_debug(gc)("Falling back to fallocate() compatibility mode");
 562      z_fallocate_supported = false;
 563   }
 564 
 565   return fallocate_fill_hole_compat(offset, length);
 566 }
 567 
 568 ZErrno ZPhysicalMemoryBacking::fallocate_punch_hole(size_t offset, size_t length) {
 569   if (ZLargePages::is_explicit()) {
 570     // We can only punch hole in pages that have been touched. Non-touched
 571     // pages are only reserved, and not associated with any specific file
 572     // segment. We don't know which pages have been previously touched, so
 573     // we always touch them here to guarantee that we can punch hole.
 574     const ZErrno err = fallocate_compat_mmap_hugetlbfs(offset, length, true /* touch */);
 575     if (err) {
 576       // Failed
 577       return err;
 578     }
 579   }
 580 
 581   const int mode = FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE;
 582   if (ZSyscall::fallocate(_fd, mode, offset, length) == -1) {
 583     // Failed
 584     return errno;
 585   }
 586 
 587   // Success
 588   return 0;
 589 }
 590 
 591 ZErrno ZPhysicalMemoryBacking::split_and_fallocate(bool punch_hole, size_t offset, size_t length) {
 592   // Try first half
 593   const size_t offset0 = offset;
 594   const size_t length0 = align_up(length / 2, _block_size);


 615 
 616   const ZErrno err = punch_hole ? fallocate_punch_hole(offset, length) : fallocate_fill_hole(offset, length);
 617   if (err == EINTR && length > _block_size) {
 618     // Calling fallocate(2) with a large length can take a long time to
 619     // complete. When running profilers, such as VTune, this syscall will
 620     // be constantly interrupted by signals. Expanding the file in smaller
 621     // steps avoids this problem.
 622     return split_and_fallocate(punch_hole, offset, length);
 623   }
 624 
 625   return err;
 626 }
 627 
 628 bool ZPhysicalMemoryBacking::commit_inner(size_t offset, size_t length) {
 629   log_trace(gc, heap)("Committing memory: " SIZE_FORMAT "M-" SIZE_FORMAT "M (" SIZE_FORMAT "M)",
 630                       offset / M, (offset + length) / M, length / M);
 631 
 632 retry:
 633   const ZErrno err = fallocate(false /* punch_hole */, offset, length);
 634   if (err) {
 635     if (err == ENOSPC && !is_init_completed() && ZLargePages::is_explicit() && z_fallocate_hugetlbfs_attempts-- > 0) {
 636       // If we fail to allocate during initialization, due to lack of space on
 637       // the hugetlbfs filesystem, then we wait and retry a few times before
 638       // giving up. Otherwise there is a risk that running JVMs back-to-back
 639       // will fail, since there is a delay between process termination and the
 640       // huge pages owned by that process being returned to the huge page pool
 641       // and made available for new allocations.
 642       log_debug(gc, init)("Failed to commit memory (%s), retrying", err.to_string());
 643 
 644       // Wait and retry in one second, in the hope that huge pages will be
 645       // available by then.
 646       sleep(1);
 647       goto retry;
 648     }
 649 
 650     // Failed
 651     log_error(gc)("Failed to commit memory (%s)", err.to_string());
 652     return false;
 653   }
 654 
 655   // Success


< prev index next >