16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 #include "precompiled.hpp"
25 #include "gc/z/zArray.inline.hpp"
26 #include "gc/z/zErrno.hpp"
27 #include "gc/z/zGlobals.hpp"
28 #include "gc/z/zLargePages.inline.hpp"
29 #include "gc/z/zMountPoint_linux.hpp"
30 #include "gc/z/zNUMA.inline.hpp"
31 #include "gc/z/zPhysicalMemoryBacking_linux.hpp"
32 #include "gc/z/zSyscall_linux.hpp"
33 #include "logging/log.hpp"
34 #include "runtime/init.hpp"
35 #include "runtime/os.hpp"
36 #include "utilities/align.hpp"
37 #include "utilities/debug.hpp"
38 #include "utilities/growableArray.hpp"
39
40 #include <fcntl.h>
41 #include <stdio.h>
42 #include <sys/mman.h>
43 #include <sys/stat.h>
44 #include <sys/statfs.h>
45 #include <sys/types.h>
46 #include <unistd.h>
47
48 //
49 // Support for building on older Linux systems
50 //
51
52 // memfd_create(2) flags
53 #ifndef MFD_CLOEXEC
54 #define MFD_CLOEXEC 0x0001U
55 #endif
373 }
374
375 bool ZPhysicalMemoryBacking::tmpfs_supports_transparent_huge_pages() const {
376 // If the shmem_enabled file exists and is readable then we
377 // know the kernel supports transparent huge pages for tmpfs.
378 return access(ZFILENAME_SHMEM_ENABLED, R_OK) == 0;
379 }
380
381 ZErrno ZPhysicalMemoryBacking::fallocate_compat_ftruncate(size_t size) const {
382 while (ftruncate(_fd, size) == -1) {
383 if (errno != EINTR) {
384 // Failed
385 return errno;
386 }
387 }
388
389 // Success
390 return 0;
391 }
392
393 ZErrno ZPhysicalMemoryBacking::fallocate_compat_mmap(size_t offset, size_t length, bool touch) const {
394 // On hugetlbfs, mapping a file segment will fail immediately, without
395 // the need to touch the mapped pages first, if there aren't enough huge
396 // pages available to back the mapping.
397 void* const addr = mmap(0, length, PROT_READ|PROT_WRITE, MAP_SHARED, _fd, offset);
398 if (addr == MAP_FAILED) {
399 // Failed
400 return errno;
401 }
402
403 // Once mapped, the huge pages are only reserved. We need to touch them
404 // to associate them with the file segment. Note that we can not punch
405 // hole in file segments which only have reserved pages.
406 if (touch) {
407 char* const start = (char*)addr;
408 char* const end = start + length;
409 os::pretouch_memory(start, end, _block_size);
410 }
411
412 // Unmap again. From now on, the huge pages that were mapped are allocated
413 // to this file. There's no risk in getting SIGBUS when touching them.
414 if (munmap(addr, length) == -1) {
415 // Failed
416 return errno;
417 }
418
419 // Success
420 return 0;
421 }
422
423 ZErrno ZPhysicalMemoryBacking::fallocate_compat_pwrite(size_t offset, size_t length) const {
424 uint8_t data = 0;
425
426 // Allocate backing memory by writing to each block
427 for (size_t pos = offset; pos < offset + length; pos += _block_size) {
428 if (pwrite(_fd, &data, sizeof(data), pos) == -1) {
429 // Failed
430 return errno;
431 }
432 }
433
434 // Success
435 return 0;
436 }
437
438 ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole_compat(size_t offset, size_t length) {
439 // fallocate(2) is only supported by tmpfs since Linux 3.5, and by hugetlbfs
440 // since Linux 4.3. When fallocate(2) is not supported we emulate it using
441 // ftruncate/pwrite (for tmpfs) or ftruncate/mmap/munmap (for hugetlbfs).
442
443 const size_t end = offset + length;
444 if (end > _size) {
445 // Increase file size
446 const ZErrno err = fallocate_compat_ftruncate(end);
447 if (err) {
448 // Failed
449 return err;
450 }
451 }
452
453 // Allocate backing memory
454 const ZErrno err = is_hugetlbfs() ? fallocate_compat_mmap(offset, length, false /* touch */)
455 : fallocate_compat_pwrite(offset, length);
456 if (err) {
457 if (end > _size) {
458 // Restore file size
459 fallocate_compat_ftruncate(_size);
460 }
461
462 // Failed
463 return err;
464 }
465
466 if (end > _size) {
467 // Record new file size
468 _size = end;
469 }
470
471 // Success
472 return 0;
473 }
474
475 ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole_syscall(size_t offset, size_t length) {
478 if (res == -1) {
479 // Failed
480 return errno;
481 }
482
483 const size_t end = offset + length;
484 if (end > _size) {
485 // Record new file size
486 _size = end;
487 }
488
489 // Success
490 return 0;
491 }
492
493 ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole(size_t offset, size_t length) {
494 // Using compat mode is more efficient when allocating space on hugetlbfs.
495 // Note that allocating huge pages this way will only reserve them, and not
496 // associate them with segments of the file. We must guarantee that we at
497 // some point touch these segments, otherwise we can not punch hole in them.
498 if (z_fallocate_supported && !is_hugetlbfs()) {
499 const ZErrno err = fallocate_fill_hole_syscall(offset, length);
500 if (!err) {
501 // Success
502 return 0;
503 }
504
505 if (err != ENOSYS && err != EOPNOTSUPP) {
506 // Failed
507 return err;
508 }
509
510 // Not supported
511 log_debug(gc)("Falling back to fallocate() compatibility mode");
512 z_fallocate_supported = false;
513 }
514
515 return fallocate_fill_hole_compat(offset, length);
516 }
517
518 ZErrno ZPhysicalMemoryBacking::fallocate_punch_hole(size_t offset, size_t length) {
519 if (is_hugetlbfs()) {
520 // We can only punch hole in pages that have been touched. Non-touched
521 // pages are only reserved, and not associated with any specific file
522 // segment. We don't know which pages have been previously touched, so
523 // we always touch them here to guarantee that we can punch hole.
524 const ZErrno err = fallocate_compat_mmap(offset, length, true /* touch */);
525 if (err) {
526 // Failed
527 return err;
528 }
529 }
530
531 const int mode = FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE;
532 if (ZSyscall::fallocate(_fd, mode, offset, length) == -1) {
533 // Failed
534 return errno;
535 }
536
537 // Success
538 return 0;
539 }
540
541 ZErrno ZPhysicalMemoryBacking::split_and_fallocate(bool punch_hole, size_t offset, size_t length) {
542 // Try first half
543 const size_t offset0 = offset;
544 const size_t length0 = align_up(length / 2, _block_size);
565
566 const ZErrno err = punch_hole ? fallocate_punch_hole(offset, length) : fallocate_fill_hole(offset, length);
567 if (err == EINTR && length > _block_size) {
568 // Calling fallocate(2) with a large length can take a long time to
569 // complete. When running profilers, such as VTune, this syscall will
570 // be constantly interrupted by signals. Expanding the file in smaller
571 // steps avoids this problem.
572 return split_and_fallocate(punch_hole, offset, length);
573 }
574
575 return err;
576 }
577
578 bool ZPhysicalMemoryBacking::commit_inner(size_t offset, size_t length) {
579 log_trace(gc, heap)("Committing memory: " SIZE_FORMAT "M-" SIZE_FORMAT "M (" SIZE_FORMAT "M)",
580 offset / M, (offset + length) / M, length / M);
581
582 retry:
583 const ZErrno err = fallocate(false /* punch_hole */, offset, length);
584 if (err) {
585 if (err == ENOSPC && !is_init_completed() && is_hugetlbfs() && z_fallocate_hugetlbfs_attempts-- > 0) {
586 // If we fail to allocate during initialization, due to lack of space on
587 // the hugetlbfs filesystem, then we wait and retry a few times before
588 // giving up. Otherwise there is a risk that running JVMs back-to-back
589 // will fail, since there is a delay between process termination and the
590 // huge pages owned by that process being returned to the huge page pool
591 // and made available for new allocations.
592 log_debug(gc, init)("Failed to commit memory (%s), retrying", err.to_string());
593
594 // Wait and retry in one second, in the hope that huge pages will be
595 // available by then.
596 sleep(1);
597 goto retry;
598 }
599
600 // Failed
601 log_error(gc)("Failed to commit memory (%s)", err.to_string());
602 return false;
603 }
604
605 // Success
|
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 #include "precompiled.hpp"
25 #include "gc/z/zArray.inline.hpp"
26 #include "gc/z/zErrno.hpp"
27 #include "gc/z/zGlobals.hpp"
28 #include "gc/z/zLargePages.inline.hpp"
29 #include "gc/z/zMountPoint_linux.hpp"
30 #include "gc/z/zNUMA.inline.hpp"
31 #include "gc/z/zPhysicalMemoryBacking_linux.hpp"
32 #include "gc/z/zSyscall_linux.hpp"
33 #include "logging/log.hpp"
34 #include "runtime/init.hpp"
35 #include "runtime/os.hpp"
36 #include "runtime/stubRoutines.hpp"
37 #include "utilities/align.hpp"
38 #include "utilities/debug.hpp"
39 #include "utilities/growableArray.hpp"
40
41 #include <fcntl.h>
42 #include <stdio.h>
43 #include <sys/mman.h>
44 #include <sys/stat.h>
45 #include <sys/statfs.h>
46 #include <sys/types.h>
47 #include <unistd.h>
48
49 //
50 // Support for building on older Linux systems
51 //
52
53 // memfd_create(2) flags
54 #ifndef MFD_CLOEXEC
55 #define MFD_CLOEXEC 0x0001U
56 #endif
374 }
375
376 bool ZPhysicalMemoryBacking::tmpfs_supports_transparent_huge_pages() const {
377 // If the shmem_enabled file exists and is readable then we
378 // know the kernel supports transparent huge pages for tmpfs.
379 return access(ZFILENAME_SHMEM_ENABLED, R_OK) == 0;
380 }
381
382 ZErrno ZPhysicalMemoryBacking::fallocate_compat_ftruncate(size_t size) const {
383 while (ftruncate(_fd, size) == -1) {
384 if (errno != EINTR) {
385 // Failed
386 return errno;
387 }
388 }
389
390 // Success
391 return 0;
392 }
393
394 ZErrno ZPhysicalMemoryBacking::fallocate_compat_mmap_hugetlbfs(size_t offset, size_t length, bool touch) const {
395 // On hugetlbfs, mapping a file segment will fail immediately, without
396 // the need to touch the mapped pages first, if there aren't enough huge
397 // pages available to back the mapping.
398 void* const addr = mmap(0, length, PROT_READ|PROT_WRITE, MAP_SHARED, _fd, offset);
399 if (addr == MAP_FAILED) {
400 // Failed
401 return errno;
402 }
403
404 // Once mapped, the huge pages are only reserved. We need to touch them
405 // to associate them with the file segment. Note that we can not punch
406 // hole in file segments which only have reserved pages.
407 if (touch) {
408 char* const start = (char*)addr;
409 char* const end = start + length;
410 os::pretouch_memory(start, end, _block_size);
411 }
412
413 // Unmap again. From now on, the huge pages that were mapped are allocated
414 // to this file. There's no risk in getting SIGBUS when touching them.
415 if (munmap(addr, length) == -1) {
416 // Failed
417 return errno;
418 }
419
420 // Success
421 return 0;
422 }
423
424 static bool is_mapping_backed_by_memory(void* addr, size_t length, size_t page_size) {
425 char* const start = (char*)addr;
426 char* const end = start + length;
427
428 // Touch pages to make sure the mapping is backed. If the mapping can't
429 // be backed we'll get a SIGBUS, which is why we're using SafeFetch32.
430 // On tmpfs, doing a fetch (as opposed to a store) is enough to cause
431 // the backing pages to be allocated.
432 for (char *p = start; p < end; p += page_size) {
433 if (SafeFetch32((int*)p, -1) == -1) {
434 return false;
435 }
436 }
437
438 return true;
439 }
440
441 ZErrno ZPhysicalMemoryBacking::fallocate_compat_mmap_tmpfs(size_t offset, size_t length) const {
442 // On tmpfs, we need to touch the mapped pages to figure out
443 // if there are enough pages available to back the mapping.
444 void* const addr = mmap(0, length, PROT_READ|PROT_WRITE, MAP_SHARED, _fd, offset);
445 if (addr == MAP_FAILED) {
446 // Failed
447 return errno;
448 }
449
450 // Back the mapping with transparent huge pages
451 os::realign_memory((char*)addr, length, os::large_page_size());
452
453 // Check if the mapping can be backed by memory
454 const bool backed = is_mapping_backed_by_memory(addr, length, _block_size);
455
456 // Unmap again. From now on, if the mapping was backed, the pages that
457 // were mapped are allocated to this file. There's no risk in getting
458 // SIGBUS when touching them.
459 if (munmap(addr, length) == -1) {
460 // Failed
461 return errno;
462 }
463
464 // Success
465 return backed ? 0 : ENOMEM;
466 }
467
468 ZErrno ZPhysicalMemoryBacking::fallocate_compat_pwrite(size_t offset, size_t length) const {
469 uint8_t data = 0;
470
471 // Allocate backing memory by writing to each block
472 for (size_t pos = offset; pos < offset + length; pos += _block_size) {
473 if (pwrite(_fd, &data, sizeof(data), pos) == -1) {
474 // Failed
475 return errno;
476 }
477 }
478
479 // Success
480 return 0;
481 }
482
483 ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole_compat(size_t offset, size_t length) {
484 // fallocate(2) is only supported by tmpfs since Linux 3.5, and by hugetlbfs
485 // since Linux 4.3. When fallocate(2) is not supported we emulate it using
486 // mmap/munmap (for hugetlbfs and tmpfs with transparent huge pages) or pwrite
487 // (for tmpfs without transparent huge pages and other filesystem types).
488
489 const size_t end = offset + length;
490 if (end > _size) {
491 // Increase file size
492 const ZErrno err = fallocate_compat_ftruncate(end);
493 if (err) {
494 // Failed
495 return err;
496 }
497 }
498
499 // Allocate backing memory
500 const ZErrno err = ZLargePages::is_explicit() ?
501 fallocate_compat_mmap_hugetlbfs(offset, length, false /* touch */) :
502 (ZLargePages::is_transparent() ?
503 fallocate_compat_mmap_tmpfs(offset, length) :
504 fallocate_compat_pwrite(offset, length));
505
506 if (err) {
507 if (end > _size) {
508 // Restore file size
509 fallocate_compat_ftruncate(_size);
510 }
511
512 // Failed
513 return err;
514 }
515
516 if (end > _size) {
517 // Record new file size
518 _size = end;
519 }
520
521 // Success
522 return 0;
523 }
524
525 ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole_syscall(size_t offset, size_t length) {
528 if (res == -1) {
529 // Failed
530 return errno;
531 }
532
533 const size_t end = offset + length;
534 if (end > _size) {
535 // Record new file size
536 _size = end;
537 }
538
539 // Success
540 return 0;
541 }
542
543 ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole(size_t offset, size_t length) {
544 // Using compat mode is more efficient when allocating space on hugetlbfs.
545 // Note that allocating huge pages this way will only reserve them, and not
546 // associate them with segments of the file. We must guarantee that we at
547 // some point touch these segments, otherwise we can not punch hole in them.
548 if (z_fallocate_supported && !ZLargePages::is_enabled()) {
549 const ZErrno err = fallocate_fill_hole_syscall(offset, length);
550 if (!err) {
551 // Success
552 return 0;
553 }
554
555 if (err != ENOSYS && err != EOPNOTSUPP) {
556 // Failed
557 return err;
558 }
559
560 // Not supported
561 log_debug(gc)("Falling back to fallocate() compatibility mode");
562 z_fallocate_supported = false;
563 }
564
565 return fallocate_fill_hole_compat(offset, length);
566 }
567
568 ZErrno ZPhysicalMemoryBacking::fallocate_punch_hole(size_t offset, size_t length) {
569 if (ZLargePages::is_explicit()) {
570 // We can only punch hole in pages that have been touched. Non-touched
571 // pages are only reserved, and not associated with any specific file
572 // segment. We don't know which pages have been previously touched, so
573 // we always touch them here to guarantee that we can punch hole.
574 const ZErrno err = fallocate_compat_mmap_hugetlbfs(offset, length, true /* touch */);
575 if (err) {
576 // Failed
577 return err;
578 }
579 }
580
581 const int mode = FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE;
582 if (ZSyscall::fallocate(_fd, mode, offset, length) == -1) {
583 // Failed
584 return errno;
585 }
586
587 // Success
588 return 0;
589 }
590
591 ZErrno ZPhysicalMemoryBacking::split_and_fallocate(bool punch_hole, size_t offset, size_t length) {
592 // Try first half
593 const size_t offset0 = offset;
594 const size_t length0 = align_up(length / 2, _block_size);
615
616 const ZErrno err = punch_hole ? fallocate_punch_hole(offset, length) : fallocate_fill_hole(offset, length);
617 if (err == EINTR && length > _block_size) {
618 // Calling fallocate(2) with a large length can take a long time to
619 // complete. When running profilers, such as VTune, this syscall will
620 // be constantly interrupted by signals. Expanding the file in smaller
621 // steps avoids this problem.
622 return split_and_fallocate(punch_hole, offset, length);
623 }
624
625 return err;
626 }
627
628 bool ZPhysicalMemoryBacking::commit_inner(size_t offset, size_t length) {
629 log_trace(gc, heap)("Committing memory: " SIZE_FORMAT "M-" SIZE_FORMAT "M (" SIZE_FORMAT "M)",
630 offset / M, (offset + length) / M, length / M);
631
632 retry:
633 const ZErrno err = fallocate(false /* punch_hole */, offset, length);
634 if (err) {
635 if (err == ENOSPC && !is_init_completed() && ZLargePages::is_explicit() && z_fallocate_hugetlbfs_attempts-- > 0) {
636 // If we fail to allocate during initialization, due to lack of space on
637 // the hugetlbfs filesystem, then we wait and retry a few times before
638 // giving up. Otherwise there is a risk that running JVMs back-to-back
639 // will fail, since there is a delay between process termination and the
640 // huge pages owned by that process being returned to the huge page pool
641 // and made available for new allocations.
642 log_debug(gc, init)("Failed to commit memory (%s), retrying", err.to_string());
643
644 // Wait and retry in one second, in the hope that huge pages will be
645 // available by then.
646 sleep(1);
647 goto retry;
648 }
649
650 // Failed
651 log_error(gc)("Failed to commit memory (%s)", err.to_string());
652 return false;
653 }
654
655 // Success
|