< prev index next >

src/os/posix/vm/os_posix.cpp

Print this page

        

@@ -36,10 +36,11 @@
 #include <sys/resource.h>
 #include <sys/utsname.h>
 #include <pthread.h>
 #include <semaphore.h>
 #include <signal.h>
+#include <sys/mman.h>
 
 // Todo: provide a os::get_max_process_id() or similar. Number of processes
 // may have been configured, can be read more accurately from proc fs etc.
 #ifndef MAX_PID
 #define MAX_PID INT_MAX

@@ -137,22 +138,190 @@
 void os::wait_for_keypress_at_exit(void) {
   // don't do anything on posix platforms
   return;
 }
 
+// Helper function to create a temp file in the given directory.
+int os::create_file_for_heap(const char* dir, size_t size) {
+
+  const char name_template[] = "/jvmheap.XXXXXX";
+
+  char *fullname = (char*)::malloc(strlen(dir) + sizeof(name_template));
+  if (fullname == NULL) {
+    vm_exit_during_initialization(err_msg("malloc failed"));
+    return -1;
+  }
+  (void)strcpy(fullname, dir);
+  (void)strcat(fullname, name_template);
+
+  sigset_t set, oldset;
+  int ret = sigfillset(&set);
+  assert(ret == 0, "sigfillset error");
+
+  // block all signals while we do the file operation.
+  ret = pthread_sigmask(SIG_BLOCK, &set, &oldset);
+  assert(ret == 0, "pthread_sigmask error");
+
+  // set the file creation mask.
+  mode_t file_mode = S_IRUSR | S_IWUSR;
+
+  // create a new file.
+  int fd = mkstemp(fullname);
+
+  if (fd < 0) {
+    // reset the signal mask.
+    ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+    assert(ret == 0, "pthread_sigmask error");
+    ::free(fullname);
+    return -1;
+  }
+
+  // change file permissions; mkstemp creates file with permissions 0600 (glibc versions after 2.06) or 0666 (2.06 and earlier versions)
+  ret = fchmod(fd, file_mode);
+  assert(ret == 0, "fchmod error");
+
+  // delete the name from the filesystem. When 'fd' is closed, the file (and space) will be deleted.
+  ret = unlink(fullname);
+  assert(ret == 0, "unlink error");
+
+  // reset the signal mask.
+  ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+  assert(ret == 0, "pthread_sigmask error");
+
+  ::free(fullname);
+  return fd;
+}
+
+static char* reserve_mmaped_memory(size_t bytes, char* requested_addr) {
+  char * addr;
+  int flags;
+
+  flags = MAP_PRIVATE | MAP_NORESERVE | MAP_ANONYMOUS;
+  if (requested_addr != NULL) {
+    assert((uintptr_t)requested_addr % os::Linux::page_size() == 0, "unaligned address");
+    flags |= MAP_FIXED;
+  }
+
+  // Map reserved/uncommitted pages PROT_NONE so we fail early if we
+  // touch an uncommitted page. Otherwise, the read/write might
+  // succeed if we have enough swap space to back the physical page.
+  addr = (char*)::mmap(requested_addr, bytes, PROT_NONE,
+    flags, -1, 0);
+
+  if (addr != MAP_FAILED) {
+    MemTracker::record_virtual_memory_reserve((address)addr, bytes, CALLER_PC);
+    return addr;
+  }
+  return NULL;
+}
+
+static int util_posix_fallocate(int fd, off_t offset, off_t len) {
+#ifdef __APPLE__
+  fstore_t store = { F_ALLOCATECONTIG, F_PEOFPOSMODE, 0, len };
+  // First we try to get a continous chunk of disk space
+  int ret = fcntl(fd, F_PREALLOCATE, &store);
+  if (ret == -1) {
+    // Maybe we are too fragmented, try to allocate non-continuous range
+    store.fst_flags = F_ALLOCATEALL;
+    ret = fcntl(fd, F_PREALLOCATE, &store);
+    if (ret == -1)
+      return -1;
+  }
+  return ftruncate(fd, len);
+#else
+  return posix_fallocate(fd, offset, len);
+#endif
+}
+
+// Map the given address range to the provided file descriptor.
+char* os::map_memory_to_dax_file(char* base, size_t size, int fd) {
+  assert(fd != -1, "File descriptor is not valid");
+
+  // allocate space for the file
+  if (util_posix_fallocate(fd, 0, (off_t)size) != 0) {
+    vm_exit_during_initialization(err_msg("Error in mapping Java heap at the given filesystem directory (%s)", os::strerror(errno)));
+    return NULL;
+  }
+
+  int prot = PROT_READ | PROT_WRITE;
+  int flags = MAP_SHARED;
+  if (base != NULL) {
+    flags |= MAP_FIXED;
+  }
+  char* addr = (char*)mmap(base, size, prot, flags, fd, 0);
+
+  if (addr == MAP_FAILED || (base != NULL && addr != base)) {
+    if (addr != MAP_FAILED) {
+      if (!os::release_memory(addr, size)) {
+        warning("Could not release memory on unsuccessful file mapping");
+      }
+    }
+    return NULL;
+  }
+
+  return addr;
+}
+
+char* os::replace_existing_mapping_with_dax_file_mapping(char* base, size_t size, int fd) {
+  assert(fd != -1, "File descriptor is not valid");
+  assert(base != NULL, "base cannot be NULL");
+
+  return map_memory_to_dax_file(base, size, fd);
+
+}
+
+char* os::attempt_reserve_memory_at(size_t bytes, char* addr, int file_desc) {
+
+  // We would want to use the complex logic in pd_attempt_reserve_memory_at(), especially in Linux.
+  // So we call pd_attempt_reserve_memory_at() to purely reserve mmemory
+  // and then replace the anonymous mapping with file mapping.
+  // Unfortunately for AIX, we need to pass new bool parameter to pd_attempt_reserve_memory_at()
+  // to indicate not to use SHM
+  #if defined(AIX)
+    char* result = pd_attempt_reserve_memory_at(bytes, addr, file_desc == -1 /*can use SHM*/);
+  #else
+    char* result = pd_attempt_reserve_memory_at(bytes, addr);
+  #endif
+  if (result != NULL && file_desc != -1) {
+    if (replace_existing_mapping_with_dax_file_mapping(result, bytes, file_desc) == NULL) {
+      vm_exit_during_initialization(err_msg("Error in mapping Java heap at the given filesystem directory"));
+    }
+    MemTracker::record_virtual_memory_reserve_and_commit((address)result, bytes, CALLER_PC);
+    return result;
+  }
+  if (result != NULL) {
+    MemTracker::record_virtual_memory_reserve((address)result, bytes, CALLER_PC);
+  }
+  return result;
+}
+
 // Multiple threads can race in this code, and can remap over each other with MAP_FIXED,
 // so on posix, unmap the section at the start and at the end of the chunk that we mapped
 // rather than unmapping and remapping the whole chunk to get requested alignment.
-char* os::reserve_memory_aligned(size_t size, size_t alignment) {
+char* os::reserve_memory_aligned(size_t size, size_t alignment, int file_desc) {
   assert((alignment & (os::vm_allocation_granularity() - 1)) == 0,
       "Alignment must be a multiple of allocation granularity (page size)");
   assert((size & (alignment -1)) == 0, "size must be 'alignment' aligned");
 
   size_t extra_size = size + alignment;
   assert(extra_size >= size, "overflow, size is too large to allow alignment");
 
-  char* extra_base = os::reserve_memory(extra_size, NULL, alignment);
+  char* extra_base;
+  if (file_desc != -1) {
+    // For file mapping, we do not call os:reserve_memory(extra_size, NULL, alignment, file_desc) because
+    // we need to deal with shrinking of the file space later when we release extra memory after alignment.
+    // We also cannot called os:reserve_memory() with file_desc set to -1 because on aix we might get SHM memory.
+    // So here to call a helper function while reserve memory for us. After we have a aligned base,
+    // we will replace anonymous mapping with file mapping.
+    extra_base = reserve_mmaped_memory(extra_size, NULL);
+    if (extra_base != NULL) {
+      MemTracker::record_virtual_memory_reserve((address)extra_base, extra_size, CALLER_PC);
+    }
+  }
+  else {
+    extra_base = os::reserve_memory(extra_size, NULL, alignment);
+  }
 
   if (extra_base == NULL) {
     return NULL;
   }
 

@@ -175,10 +344,17 @@
 
   if (end_offset > 0) {
       os::release_memory(extra_base + begin_offset + size, end_offset);
   }
 
+  if (file_desc != -1) {
+    // After we have an aligned address, we can replace anonymopus mapping with file mapping
+    if (replace_existing_mapping_with_dax_file_mapping(aligned_base, size, file_desc) == NULL) {
+      vm_exit_during_initialization(err_msg("Error in mapping Java heap at the given filesystem directory"));
+    }
+    MemTracker::record_virtual_memory_commit((address)aligned_base, size, CALLER_PC);
+  }
   return aligned_base;
 }
 
 int os::log_vsnprintf(char* buf, size_t len, const char* fmt, va_list args) {
     return vsnprintf(buf, len, fmt, args);
< prev index next >