/* * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. * */ #include #include #include "utilities/globalDefinitions.hpp" #include "memory/allocation.hpp" #include "runtime/os.hpp" #include "logging/log.hpp" #include "osContainer_linux.hpp" /* * Warning: Some linux distros use 0x7FFFFFFFFFFFF000 * and others use 0x7FFFFFFFFFFFFFFF for unlimited. */ #define UNLIMITED_MEM CONST64(0x7FFFFFFFFFFFF000) #define PER_CPU_SHARES 1024 bool OSContainer::_is_initialized = false; bool OSContainer::_is_containerized = false; class CgroupSubsystem: CHeapObj { friend class OSContainer; private: /* mountinfo contents */ char *_root; char *_mount_point; /* Constructed subsystem directory */ char *_path; public: CgroupSubsystem(char *root, char *mountpoint) { _root = os::strdup(root); _mount_point = os::strdup(mountpoint); _path = NULL; } /* * Set directory to subsystem specific files based * on the contents of the mountinfo and cgroup files. */ void set_subsystem_path(char *cgroup_path) { char buf[MAXPATHLEN+1]; if (_root != NULL && cgroup_path != NULL) { if (strcmp(_root, "/") == 0) { strncpy(buf, _mount_point, MAXPATHLEN); buf[MAXPATHLEN-1] = '\0'; strncat(buf, cgroup_path, MAXPATHLEN-strlen(buf)); buf[MAXPATHLEN-1] = '\0'; _path = os::strdup(buf); } else { if (strcmp(_root, cgroup_path) == 0) { strncpy(buf, _mount_point, MAXPATHLEN); buf[MAXPATHLEN-1] = '\0'; _path = os::strdup(buf); } else { char *p = strstr(_root, cgroup_path); if (p != NULL && p == _root) { if (strlen(cgroup_path) > strlen(_root)) { strncpy(buf, _mount_point, MAXPATHLEN); buf[MAXPATHLEN-1] = '\0'; strncat(buf, cgroup_path + strlen(_root), MAXPATHLEN-strlen(buf)); buf[MAXPATHLEN-1] = '\0'; _path = os::strdup(buf); } } } } } } char *subsystem_path() { return _path; } }; // CgroupSubsystem *cgroupv2; CgroupSubsystem* memory = NULL; CgroupSubsystem* cpuset = NULL; CgroupSubsystem* cpu = NULL; CgroupSubsystem* cpuacct = NULL; typedef char * cptr; #define GEN_CONTAINER_GET_INFO(return_type, scan_fmt, isstr) \ int subsystem_file_contents_##return_type(CgroupSubsystem* c, \ char *filename, \ return_type *returnval) { \ FILE *fp = NULL; \ char *p; \ char buf[MAXPATHLEN+1]; \ \ if (c != NULL && c->subsystem_path() != NULL) { \ strncpy(buf, c->subsystem_path(), MAXPATHLEN); \ buf[MAXPATHLEN-1] = '\0'; \ strncat(buf, filename, MAXPATHLEN-strlen(buf)); \ log_trace(os, container)("Path to %s is %s\n", filename, buf); \ fp = fopen(buf, "r"); \ if (fp != NULL) { \ p = fgets(buf, MAXPATHLEN, fp); \ if (p != NULL) { \ if (isstr) { \ *(char **)returnval = os::strdup(p); \ fclose(fp); \ return 0; \ } else { \ return_type value; \ int matched = sscanf(p, scan_fmt, &value); \ if (matched == 1) { \ *returnval = value; \ fclose(fp); \ return 0; \ } else { \ log_debug(os, container)("Type %s not found in file %s\n", \ scan_fmt , buf); \ } \ } \ } else { \ log_debug(os, container)("Empty file %s\n", buf); \ } \ } else { \ log_debug(os, container)("file not found %s\n", buf); \ } \ } \ if (fp != NULL) \ fclose(fp); \ return OSCONTAINER_ERROR; \ } GEN_CONTAINER_GET_INFO(int, "%d", false) GEN_CONTAINER_GET_INFO(jlong, JLONG_FORMAT, false) GEN_CONTAINER_GET_INFO(cptr, "%p", true) #define GET_CONTAINER_INFO(return_type, isstring, subsystem, \ filename, logstring, variable) \ return_type variable; \ { \ int err; \ err = subsystem_file_contents_##return_type(subsystem, \ filename, \ &variable); \ if (err != 0) { \ log_debug(os, container)("Error reading %s", filename); \ return isstring ? (return_type) NULL : \ (return_type) OSCONTAINER_ERROR; \ } \ log_trace(os, container)(logstring, variable); \ } /* init * * Initialize the container support and determine if * we are running under cgroup control. */ void OSContainer::init() { int mountid; int parentid; int major; int minor; FILE *mntinfo = NULL; FILE *cgroup = NULL; char buf[MAXPATHLEN+1]; char tmproot[MAXPATHLEN+1]; char tmpmount[MAXPATHLEN+1]; char tmpbase[MAXPATHLEN+1]; char *p; jlong mem_limit; assert(!_is_initialized, "Initializing OSContainer more than once"); _is_initialized = true; _is_containerized = false; log_trace(os, container)("OSContainer::init: Initializing Container Support"); if (!UseContainerSupport) { log_trace(os, container)("Container Support not enabled"); return; } /* * Find the cgroup mount point for memory and cpuset * by reading /proc/self/mountinfo * * Example for docker: * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory * * Example for host: * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory */ mntinfo = fopen("/proc/self/mountinfo", "r"); if (mntinfo == NULL) { log_debug(os, container)("Can't locate /proc/self/mountinfo\n"); return; } while ( (p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) { // Look for the filesystem type and see if it's cgroup char fstype[MAXPATHLEN+1]; fstype[0] = '\0'; char *s = strstr(p, " - "); if (s != NULL && sscanf(s, " - %s", fstype) == 1 && strcmp(fstype, "cgroup") == 0) { if (strstr(p, "memory") != NULL) { int matched = sscanf(p, "%d %d %d:%d %s %s", &mountid, &parentid, &major, &minor, tmproot, tmpmount); if (matched == 6) { memory = new CgroupSubsystem(tmproot, tmpmount); } else log_debug(os, container)("Incompatible str containing cgroup and memory: %s\n", p); } else if (strstr(p, "cpuset") != NULL) { int matched = sscanf(p, "%d %d %d:%d %s %s", &mountid, &parentid, &major, &minor, tmproot, tmpmount); if (matched == 6) { cpuset = new CgroupSubsystem(tmproot, tmpmount); } else { log_debug(os, container)("Incompatible str containing cgroup and cpuset: %s\n", p); } } else if (strstr(p, "cpu,cpuacct") != NULL) { int matched = sscanf(p, "%d %d %d:%d %s %s", &mountid, &parentid, &major, &minor, tmproot, tmpmount); if (matched == 6) { cpu = new CgroupSubsystem(tmproot, tmpmount); cpuacct = new CgroupSubsystem(tmproot, tmpmount); } else { log_debug(os, container)("Incompatible str containing cgroup and cpu,cpuacct: %s\n", p); } } else if (strstr(p, "cpuacct") != NULL) { int matched = sscanf(p, "%d %d %d:%d %s %s", &mountid, &parentid, &major, &minor, tmproot, tmpmount); if (matched == 6) { cpuacct = new CgroupSubsystem(tmproot, tmpmount); } else { log_debug(os, container)("Incompatible str containing cgroup and cpuacct: %s\n", p); } } else if (strstr(p, "cpu") != NULL) { int matched = sscanf(p, "%d %d %d:%d %s %s", &mountid, &parentid, &major, &minor, tmproot, tmpmount); if (matched == 6) { cpu = new CgroupSubsystem(tmproot, tmpmount); } else { log_debug(os, container)("Incompatible str containing cgroup and cpu: %s\n", p); } } } } if (mntinfo != NULL) fclose(mntinfo); /* * Read /proc/self/cgroup and map host mount point to * local one via /proc/self/mountinfo content above * * Docker example: * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044 * * Host example: * 5:memory:/user.slice * * Construct a path to the process specific memory and cpuset * cgroup directory. * * For a container running under Docker from memory example above * the paths would be: * * /sys/fs/cgroup/memory * * For a Host from memory example above the path would be: * * /sys/fs/cgroup/memory/user.slice * */ cgroup = fopen("/proc/self/cgroup", "r"); if (cgroup == NULL) { log_debug(os, container)("Can't locate /proc/self/cgroup\n"); return; } while ( (p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) { int cgno; int matched; char *controller; char *base; /* Skip cgroup number */ strsep(&p, ":"); /* Get controller and base */ controller = strsep(&p, ":"); base = strsep(&p, "\n"); if (controller != NULL) { if (strstr(controller, "memory") != NULL) { memory->set_subsystem_path(base); } else if (strstr(controller, "cpuset") != NULL) { cpuset->set_subsystem_path(base); } else if (strstr(controller, "cpu,cpuacct") != NULL) { cpu->set_subsystem_path(base); cpuacct->set_subsystem_path(base); } else if (strstr(controller, "cpuacct") != NULL) { cpuacct->set_subsystem_path(base); } else if (strstr(controller, "cpu") != NULL) { cpu->set_subsystem_path(base); } } } if (cgroup != NULL) fclose(cgroup); if (memory == NULL || cpuset == NULL || cpu == NULL) { log_debug(os, container)("Required cgroup subsystems not found"); return; } // We need to update the amount of physical memory now that // command line arguments have been processed. if ((mem_limit = memory_limit_in_bytes()) > 0) { os::Linux::set_physical_memory(mem_limit); } _is_containerized = true; } char * OSContainer::container_type() { if (is_containerized()) { return (char *)"cgroupv1"; } else { return NULL; } } /* memory_limit_in_bytes * * Return the limit of available memory for this process. * * return: * memory limit in bytes or * -1 for unlimited * OSCONTAINER_ERROR for not supported */ jlong OSContainer::memory_limit_in_bytes() { GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.limit_in_bytes", "Memory Limit is: " JLONG_FORMAT "\n", memlimit); if (memlimit >= UNLIMITED_MEM) { log_trace(os, container)("Memory Limit is: Unlimited\n"); return (jlong)-1; } else { return memlimit; } } jlong OSContainer::memory_and_swap_limit_in_bytes() { GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.memsw.limit_in_bytes", "Memory and Swap Limit is: " JLONG_FORMAT "\n", memswlimit); if (memswlimit >= UNLIMITED_MEM) { log_trace(os, container)("Memory and Swap Limit is: Unlimited\n"); return (jlong)-1; } else { return memswlimit; } } jlong OSContainer::memory_soft_limit_in_bytes() { GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.soft_limit_in_bytes", "Memory Soft Limit is: " JLONG_FORMAT "\n", memsoftlimit); if (memsoftlimit >= UNLIMITED_MEM) { log_trace(os, container)("Memory Soft Limit is: Unlimited\n"); return (jlong)-1; } else { return memsoftlimit; } } /* memory_usage_in_bytes * * Return the amount of used memory for this process. * * return: * memory usage in bytes or * -1 for unlimited * OSCONTAINER_ERROR for not supported */ jlong OSContainer::memory_usage_in_bytes() { GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.usage_in_bytes", "Memory Usage is: " JLONG_FORMAT "\n", memusage); return memusage; } /* memory_max_usage_in_bytes * * Return the maximum amount of used memory for this process. * * return: * max memory usage in bytes or * OSCONTAINER_ERROR for not supported */ jlong OSContainer::memory_max_usage_in_bytes() { GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.max_usage_in_bytes", "Maximu, Memory Usage is: " JLONG_FORMAT "\n", memmaxusage); return memmaxusage; } /* active_processor_count * * Calculate an appropriate number of active processors for the * VM to use based on these three cgroup options. * * cpu affinity * cpu quota & cpu period * cpu shares * * Algorithm: * * Determine the number of available CPUs from sched_getaffinity * * If user specified a quota (quota != -1), calculate the number of * required CPUs by dividing quota by period. * * If shares are in effect (shares != -1), calculate the number * of cpus required for the shares by dividing the share value * by PER_CPU_SHARES. * * All results of division are rounded up to the next whole number. * * Return the smaller number from the three different settings. * * return: * number of cpus * OSCONTAINER_ERROR if failure occured during extract of cpuset info */ int OSContainer::active_processor_count() { int cpu_count, share_count, quota_count; int share, quota, period; int result; cpu_count = os::Linux::active_processor_count(); share = cpu_shares(); if (share > -1) { share_count = ceilf((float)share / (float)PER_CPU_SHARES); log_trace(os, container)("cpu_share count: %d", share_count); } else { share_count = cpu_count; } quota = cpu_quota(); period = cpu_period(); if (quota > -1 && period > 0) { quota_count = ceilf((float)quota / (float)period); log_trace(os, container)("quota_count: %d", quota_count); } else { quota_count = cpu_count; } result = MIN2(cpu_count, MIN2(share_count, quota_count)); log_trace(os, container)("OSContainer::active_processor_count: %d", result); return result; } char * OSContainer::cpu_cpuset_cpus() { GET_CONTAINER_INFO(cptr, true, cpuset, (char *)"/cpuset.cpus", "cpuset.cpus is: %s\n", cpus); return cpus; } char * OSContainer::cpu_cpuset_memory_nodes() { GET_CONTAINER_INFO(cptr, true, cpuset, (char *)"/cpuset.mems", "cpuset.mems is: %s\n", mems); return mems; } /* cpu_quota * * Return the number of milliseconds per period * process is guaranteed to run. * * return: * quota time in milliseconds * -1 for no quota * OSCONTAINER_ERROR for not supported */ int OSContainer::cpu_quota() { GET_CONTAINER_INFO(int, false, cpu, (char *)"/cpu.cfs_quota_us", "CPU Quota is: %d\n", quota); return quota; } int OSContainer::cpu_period() { GET_CONTAINER_INFO(int, false, cpu, (char *)"/cpu.cfs_period_us", "CPU Period is: %d\n", period); return period; } /* cpu_shares * * Return the amount of cpu shares available to the process * * return: * Share number (typically a number relative to 1024) * (2048 typically expresses 2 CPUs worth of processing) * -1 for no share setup * OSCONTAINER_ERROR for not supported */ int OSContainer::cpu_shares() { GET_CONTAINER_INFO(int, false, cpu, (char *)"/cpu.shares", "CPU Shares is: %d\n", shares); // Convert 1024 to no shares setup if (shares == 1024) return -1; return shares; }