--- /dev/null Thu Jun 21 18:15:34 2018 +++ new/src/os/linux/vm/osContainer_linux.cpp Thu Jun 21 18:15:33 2018 @@ -0,0 +1,680 @@ +/* + * Copyright (c) 2017, 2018, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include +#include +#include +#include "utilities/globalDefinitions.hpp" +#include "memory/allocation.hpp" +#include "runtime/os.hpp" +#include "osContainer_linux.hpp" + +#define PER_CPU_SHARES 1024 + +bool OSContainer::_is_initialized = false; +bool OSContainer::_is_containerized = false; +julong _unlimited_memory; + +class CgroupSubsystem: CHeapObj { + friend class OSContainer; + + private: + /* mountinfo contents */ + char *_root; + char *_mount_point; + + /* Constructed subsystem directory */ + char *_path; + + public: + CgroupSubsystem(char *root, char *mountpoint) { + _root = os::strdup(root); + _mount_point = os::strdup(mountpoint); + _path = NULL; + } + + /* + * Set directory to subsystem specific files based + * on the contents of the mountinfo and cgroup files. + */ + void set_subsystem_path(char *cgroup_path) { + char buf[MAXPATHLEN+1]; + if (_root != NULL && cgroup_path != NULL) { + if (strcmp(_root, "/") == 0) { + int buflen; + strncpy(buf, _mount_point, MAXPATHLEN); + buf[MAXPATHLEN-1] = '\0'; + if (strcmp(cgroup_path,"/") != 0) { + buflen = strlen(buf); + if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) { + return; + } + strncat(buf, cgroup_path, MAXPATHLEN-buflen); + buf[MAXPATHLEN-1] = '\0'; + } + _path = os::strdup(buf); + } else { + if (strcmp(_root, cgroup_path) == 0) { + strncpy(buf, _mount_point, MAXPATHLEN); + buf[MAXPATHLEN-1] = '\0'; + _path = os::strdup(buf); + } else { + char *p = strstr(_root, cgroup_path); + if (p != NULL && p == _root) { + if (strlen(cgroup_path) > strlen(_root)) { + int buflen; + strncpy(buf, _mount_point, MAXPATHLEN); + buf[MAXPATHLEN-1] = '\0'; + buflen = strlen(buf); + if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) { + return; + } + strncat(buf, cgroup_path + strlen(_root), MAXPATHLEN-buflen); + buf[MAXPATHLEN-1] = '\0'; + _path = os::strdup(buf); + } + } + } + } + } + } + + char *subsystem_path() { return _path; } +}; + +CgroupSubsystem* memory = NULL; +CgroupSubsystem* cpuset = NULL; +CgroupSubsystem* cpu = NULL; +CgroupSubsystem* cpuacct = NULL; + +typedef char * cptr; + +PRAGMA_DIAG_PUSH +PRAGMA_FORMAT_NONLITERAL_IGNORED +template int subsystem_file_contents(CgroupSubsystem* c, + const char *filename, + const char *scan_fmt, + T returnval) { + FILE *fp = NULL; + char *p; + char file[MAXPATHLEN+1]; + char buf[MAXPATHLEN+1]; + + if (c == NULL) { + if (PrintContainerInfo) { + tty->print_cr("subsystem_file_contents: CgroupSubsytem* is NULL"); + } + return OSCONTAINER_ERROR; + } + if (c->subsystem_path() == NULL) { + if (PrintContainerInfo) { + tty->print_cr("subsystem_file_contents: subsystem path is NULL"); + } + return OSCONTAINER_ERROR; + } + + strncpy(file, c->subsystem_path(), MAXPATHLEN); + file[MAXPATHLEN-1] = '\0'; + int filelen = strlen(file); + if ((filelen + strlen(filename)) > (MAXPATHLEN-1)) { + if (PrintContainerInfo) { + tty->print_cr("File path too long %s, %s", file, filename); + } + return OSCONTAINER_ERROR; + } + strncat(file, filename, MAXPATHLEN-filelen); + if (PrintContainerInfo) { + tty->print_cr("Path to %s is %s", filename, file); + } + fp = fopen(file, "r"); + if (fp != NULL) { + p = fgets(buf, MAXPATHLEN, fp); + if (p != NULL) { + int matched = sscanf(p, scan_fmt, returnval); + if (matched == 1) { + fclose(fp); + return 0; + } else { + if (PrintContainerInfo) { + tty->print_cr("Type %s not found in file %s", scan_fmt, file); + } + } + } else { + if (PrintContainerInfo) { + tty->print_cr("Empty file %s", file); + } + } + } else { + if (PrintContainerInfo) { + tty->print_cr("Open of file %s failed, %s", file, strerror(errno)); + } + } + if (fp != NULL) + fclose(fp); + return OSCONTAINER_ERROR; +} +PRAGMA_DIAG_POP + +#define GET_CONTAINER_INFO(return_type, subsystem, filename, \ + logstring, scan_fmt, variable) \ + return_type variable; \ +{ \ + int err; \ + err = subsystem_file_contents(subsystem, \ + filename, \ + scan_fmt, \ + &variable); \ + if (err != 0) \ + return (return_type) OSCONTAINER_ERROR; \ + \ + if (PrintContainerInfo) \ + tty->print_cr(logstring, variable); \ +} + +#define GET_CONTAINER_INFO_CPTR(return_type, subsystem, filename, \ + logstring, scan_fmt, variable, bufsize) \ + char variable[bufsize]; \ +{ \ + int err; \ + err = subsystem_file_contents(subsystem, \ + filename, \ + scan_fmt, \ + variable); \ + if (err != 0) \ + return (return_type) NULL; \ + \ + if (PrintContainerInfo) \ + tty->print_cr(logstring, variable); \ +} + +/* init + * + * Initialize the container support and determine if + * we are running under cgroup control. + */ +void OSContainer::init() { + int mountid; + int parentid; + int major; + int minor; + FILE *mntinfo = NULL; + FILE *cgroup = NULL; + char buf[MAXPATHLEN+1]; + char tmproot[MAXPATHLEN+1]; + char tmpmount[MAXPATHLEN+1]; + char tmpbase[MAXPATHLEN+1]; + char *p; + jlong mem_limit; + + assert(!_is_initialized, "Initializing OSContainer more than once"); + + _is_initialized = true; + _is_containerized = false; + + _unlimited_memory = (LONG_MAX / os::vm_page_size()) * os::vm_page_size(); + + if (PrintContainerInfo) { + tty->print_cr("OSContainer::init: Initializing Container Support"); + } + if (!UseContainerSupport) { + if (PrintContainerInfo) { + tty->print_cr("Container Support not enabled"); + } + return; + } + + /* + * Find the cgroup mount point for memory and cpuset + * by reading /proc/self/mountinfo + * + * Example for docker: + * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory + * + * Example for host: + * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory + */ + mntinfo = fopen("/proc/self/mountinfo", "r"); + if (mntinfo == NULL) { + if (PrintContainerInfo) { + tty->print_cr("Can't open /proc/self/mountinfo, %s", + strerror(errno)); + } + return; + } + + while ( (p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) { + // Look for the filesystem type and see if it's cgroup + char fstype[MAXPATHLEN+1]; + fstype[0] = '\0'; + char *s = strstr(p, " - "); + if (s != NULL && + sscanf(s, " - %s", fstype) == 1 && + strcmp(fstype, "cgroup") == 0) { + + if (strstr(p, "memory") != NULL) { + int matched = sscanf(p, "%d %d %d:%d %s %s", + &mountid, + &parentid, + &major, + &minor, + tmproot, + tmpmount); + if (matched == 6) { + memory = new CgroupSubsystem(tmproot, tmpmount); + } + else + if (PrintContainerInfo) { + tty->print_cr("Incompatible str containing cgroup and memory: %s", p); + } + } else if (strstr(p, "cpuset") != NULL) { + int matched = sscanf(p, "%d %d %d:%d %s %s", + &mountid, + &parentid, + &major, + &minor, + tmproot, + tmpmount); + if (matched == 6) { + cpuset = new CgroupSubsystem(tmproot, tmpmount); + } + else { + if (PrintContainerInfo) { + tty->print_cr("Incompatible str containing cgroup and cpuset: %s", p); + } + } + } else if (strstr(p, "cpu,cpuacct") != NULL || strstr(p, "cpuacct,cpu") != NULL) { + int matched = sscanf(p, "%d %d %d:%d %s %s", + &mountid, + &parentid, + &major, + &minor, + tmproot, + tmpmount); + if (matched == 6) { + cpu = new CgroupSubsystem(tmproot, tmpmount); + cpuacct = new CgroupSubsystem(tmproot, tmpmount); + } + else { + if (PrintContainerInfo) { + tty->print_cr("Incompatible str containing cgroup and cpu,cpuacct: %s", p); + } + } + } else if (strstr(p, "cpuacct") != NULL) { + int matched = sscanf(p, "%d %d %d:%d %s %s", + &mountid, + &parentid, + &major, + &minor, + tmproot, + tmpmount); + if (matched == 6) { + cpuacct = new CgroupSubsystem(tmproot, tmpmount); + } + else { + if (PrintContainerInfo) { + tty->print_cr("Incompatible str containing cgroup and cpuacct: %s", p); + } + } + } else if (strstr(p, "cpu") != NULL) { + int matched = sscanf(p, "%d %d %d:%d %s %s", + &mountid, + &parentid, + &major, + &minor, + tmproot, + tmpmount); + if (matched == 6) { + cpu = new CgroupSubsystem(tmproot, tmpmount); + } + else { + if (PrintContainerInfo) { + tty->print_cr("Incompatible str containing cgroup and cpu: %s", p); + } + } + } + } + } + + fclose(mntinfo); + + if (memory == NULL) { + if (PrintContainerInfo) { + tty->print_cr("Required cgroup memory subsystem not found"); + } + return; + } + if (cpuset == NULL) { + if (PrintContainerInfo) { + tty->print_cr("Required cgroup cpuset subsystem not found"); + } + return; + } + if (cpu == NULL) { + if (PrintContainerInfo) { + tty->print_cr("Required cgroup cpu subsystem not found"); + } + return; + } + if (cpuacct == NULL) { + if (PrintContainerInfo) { + tty->print_cr("Required cgroup cpuacct subsystem not found"); + } + return; + } + + /* + * Read /proc/self/cgroup and map host mount point to + * local one via /proc/self/mountinfo content above + * + * Docker example: + * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044 + * + * Host example: + * 5:memory:/user.slice + * + * Construct a path to the process specific memory and cpuset + * cgroup directory. + * + * For a container running under Docker from memory example above + * the paths would be: + * + * /sys/fs/cgroup/memory + * + * For a Host from memory example above the path would be: + * + * /sys/fs/cgroup/memory/user.slice + * + */ + cgroup = fopen("/proc/self/cgroup", "r"); + if (cgroup == NULL) { + if (PrintContainerInfo) { + tty->print_cr("Can't open /proc/self/cgroup, %s", + strerror(errno)); + } + return; + } + + while ( (p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) { + int cgno; + int matched; + char *controller; + char *base; + + /* Skip cgroup number */ + strsep(&p, ":"); + /* Get controller and base */ + controller = strsep(&p, ":"); + base = strsep(&p, "\n"); + + if (controller != NULL) { + if (strstr(controller, "memory") != NULL) { + memory->set_subsystem_path(base); + } else if (strstr(controller, "cpuset") != NULL) { + cpuset->set_subsystem_path(base); + } else if (strstr(controller, "cpu,cpuacct") != NULL || strstr(controller, "cpuacct,cpu") != NULL) { + cpu->set_subsystem_path(base); + cpuacct->set_subsystem_path(base); + } else if (strstr(controller, "cpuacct") != NULL) { + cpuacct->set_subsystem_path(base); + } else if (strstr(controller, "cpu") != NULL) { + cpu->set_subsystem_path(base); + } + } + } + + fclose(cgroup); + + // We need to update the amount of physical memory now that + // command line arguments have been processed. + if ((mem_limit = memory_limit_in_bytes()) > 0) { + os::Linux::set_physical_memory(mem_limit); + } + + _is_containerized = true; + +} + +const char * OSContainer::container_type() { + if (is_containerized()) { + return "cgroupv1"; + } else { + return NULL; + } +} + + +/* memory_limit_in_bytes + * + * Return the limit of available memory for this process. + * + * return: + * memory limit in bytes or + * -1 for unlimited + * OSCONTAINER_ERROR for not supported + */ +jlong OSContainer::memory_limit_in_bytes() { + GET_CONTAINER_INFO(julong, memory, "/memory.limit_in_bytes", + "Memory Limit is: " JULONG_FORMAT, JULONG_FORMAT, memlimit); + + if (memlimit >= _unlimited_memory) { + if (PrintContainerInfo) { + tty->print_cr("Memory Limit is: Unlimited"); + } + return (jlong)-1; + } + else { + return (jlong)memlimit; + } +} + +jlong OSContainer::memory_and_swap_limit_in_bytes() { + GET_CONTAINER_INFO(julong, memory, "/memory.memsw.limit_in_bytes", + "Memory and Swap Limit is: " JULONG_FORMAT, JULONG_FORMAT, memswlimit); + if (memswlimit >= _unlimited_memory) { + if (PrintContainerInfo) { + tty->print_cr("Memory and Swap Limit is: Unlimited"); + } + return (jlong)-1; + } else { + return (jlong)memswlimit; + } +} + +jlong OSContainer::memory_soft_limit_in_bytes() { + GET_CONTAINER_INFO(julong, memory, "/memory.soft_limit_in_bytes", + "Memory Soft Limit is: " JULONG_FORMAT, JULONG_FORMAT, memsoftlimit); + if (memsoftlimit >= _unlimited_memory) { + if (PrintContainerInfo) { + tty->print_cr("Memory Soft Limit is: Unlimited"); + } + return (jlong)-1; + } else { + return (jlong)memsoftlimit; + } +} + +/* memory_usage_in_bytes + * + * Return the amount of used memory for this process. + * + * return: + * memory usage in bytes or + * -1 for unlimited + * OSCONTAINER_ERROR for not supported + */ +jlong OSContainer::memory_usage_in_bytes() { + GET_CONTAINER_INFO(jlong, memory, "/memory.usage_in_bytes", + "Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memusage); + return memusage; +} + +/* memory_max_usage_in_bytes + * + * Return the maximum amount of used memory for this process. + * + * return: + * max memory usage in bytes or + * OSCONTAINER_ERROR for not supported + */ +jlong OSContainer::memory_max_usage_in_bytes() { + GET_CONTAINER_INFO(jlong, memory, "/memory.max_usage_in_bytes", + "Maximum Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memmaxusage); + return memmaxusage; +} + +/* active_processor_count + * + * Calculate an appropriate number of active processors for the + * VM to use based on these three inputs. + * + * cpu affinity + * cgroup cpu quota & cpu period + * cgroup cpu shares + * + * Algorithm: + * + * Determine the number of available CPUs from sched_getaffinity + * + * If user specified a quota (quota != -1), calculate the number of + * required CPUs by dividing quota by period. + * + * If shares are in effect (shares != -1), calculate the number + * of CPUs required for the shares by dividing the share value + * by PER_CPU_SHARES. + * + * All results of division are rounded up to the next whole number. + * + * If neither shares or quotas have been specified, return the + * number of active processors in the system. + * + * If both shares and quotas have been specified, the results are + * based on the flag PreferContainerQuotaForCPUCount. If true, + * return the quota value. If false return the smallest value + * between shares or quotas. + * + * If shares and/or quotas have been specified, the resulting number + * returned will never exceed the number of active processors. + * + * return: + * number of CPUs + */ +int OSContainer::active_processor_count() { + int quota_count = 0, share_count = 0; + int cpu_count, limit_count; + int result; + + cpu_count = limit_count = os::Linux::active_processor_count(); + int quota = cpu_quota(); + int period = cpu_period(); + int share = cpu_shares(); + + if (quota > -1 && period > 0) { + quota_count = ceilf((float)quota / (float)period); + if (PrintContainerInfo) { + tty->print_cr("CPU Quota count based on quota/period: %d", quota_count); + } + } + if (share > -1) { + share_count = ceilf((float)share / (float)PER_CPU_SHARES); + if (PrintContainerInfo) { + tty->print_cr("CPU Share count based on shares: %d", share_count); + } + } + + // If both shares and quotas are setup results depend + // on flag PreferContainerQuotaForCPUCount. + // If true, limit CPU count to quota + // If false, use minimum of shares and quotas + if (quota_count !=0 && share_count != 0) { + if (PreferContainerQuotaForCPUCount) { + limit_count = quota_count; + } else { + limit_count = MIN2(quota_count, share_count); + } + } else if (quota_count != 0) { + limit_count = quota_count; + } else if (share_count != 0) { + limit_count = share_count; + } + + result = MIN2(cpu_count, limit_count); + if (PrintContainerInfo) { + tty->print_cr("OSContainer::active_processor_count: %d", result); + } + return result; +} + +char * OSContainer::cpu_cpuset_cpus() { + GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.cpus", + "cpuset.cpus is: %s", "%1023s", cpus, 1024); + return os::strdup(cpus); +} + +char * OSContainer::cpu_cpuset_memory_nodes() { + GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.mems", + "cpuset.mems is: %s", "%1023s", mems, 1024); + return os::strdup(mems); +} + +/* cpu_quota + * + * Return the number of milliseconds per period + * process is guaranteed to run. + * + * return: + * quota time in milliseconds + * -1 for no quota + * OSCONTAINER_ERROR for not supported + */ +int OSContainer::cpu_quota() { + GET_CONTAINER_INFO(int, cpu, "/cpu.cfs_quota_us", + "CPU Quota is: %d", "%d", quota); + return quota; +} + +int OSContainer::cpu_period() { + GET_CONTAINER_INFO(int, cpu, "/cpu.cfs_period_us", + "CPU Period is: %d", "%d", period); + return period; +} + +/* cpu_shares + * + * Return the amount of cpu shares available to the process + * + * return: + * Share number (typically a number relative to 1024) + * (2048 typically expresses 2 CPUs worth of processing) + * -1 for no share setup + * OSCONTAINER_ERROR for not supported + */ +int OSContainer::cpu_shares() { + GET_CONTAINER_INFO(int, cpu, "/cpu.shares", + "CPU Shares is: %d", "%d", shares); + // Convert 1024 to no shares setup + if (shares == 1024) return -1; + + return shares; +} +