--- /dev/null 2016-12-30 14:52:30.584485998 -0500 +++ new/src/os/linux/vm/osContainer_linux.cpp 2017-09-21 13:56:36.141153758 -0400 @@ -0,0 +1,625 @@ +/* + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include +#include +#include "memory/allocation.hpp" +#include "runtime/osContainer.hpp" +#include "logging/log.hpp" + +#define MAXBUF 256 +#define CONTAINER_ERROR -2 + +class CgroupSubsystem: CHeapObj { + friend OSContainer; + private: + /* mountinfo contents */ + char *_root; + char *_mount_point; + + /* Constructed subsystem directory */ + char *_path; + + protected: + CgroupSubsystem(char *root, char *mountpoint) { + _root = strdup(root); + _mount_point = strdup(mountpoint); + _path = NULL; + } + + ~CgroupSubsystem() { + if (_root != NULL) free(_root); + if ( _mount_point != NULL) free(_mount_point); + if (_path != NULL) free(_path); + } + + public: + /* + * Set directory to subsystem specific files based + * on the contents of the mountinfo and cgroup files. + */ + void set_subsystem_path(char *cgroup_path) { + char buf[MAXBUF]; + if (_root != NULL && cgroup_path != NULL) { + if (strcmp(_root, "/") == 0) { + strncpy(buf, _mount_point, MAXBUF); + buf[MAXBUF-1] = '\0'; + strncat(buf, cgroup_path, MAXBUF-strlen(buf)); + buf[MAXBUF-1] = '\0'; + _path = strdup(buf); + } + else { + if (strcmp(_root, cgroup_path) == 0) { + strncpy(buf, _mount_point, MAXBUF); + buf[MAXBUF-1] = '\0'; + _path = strdup(buf); + } + else { + char *p = strstr(_root, cgroup_path); + if (p != NULL && p == _root) { + if (strlen(cgroup_path) > strlen(_root)) { + strncpy(buf, _mount_point, MAXBUF); + buf[MAXBUF-1] = '\0'; + strncat(buf, cgroup_path + strlen(_root), MAXBUF-strlen(buf)); + buf[MAXBUF-1] = '\0'; + _path = strdup(buf); + } + } + } + } + } + } + + char *get_subsystem_path() { return _path; } +}; + +// CgroupSubsystem *cgroupv2; +CgroupSubsystem* memory = NULL; +CgroupSubsystem* cpuset = NULL; +CgroupSubsystem* cpu = NULL; +CgroupSubsystem* cpuacct = NULL; + +#define GEN_CONTAINER_GET_INFO(return_name, return_type, scan_fmt) \ +int get_subsystem_file_contents_##return_name(CgroupSubsystem* c, \ + char *filename, \ + return_type *returnval) { \ + FILE *fp = NULL; \ + char *p; \ + char buf[MAXBUF]; \ + \ + if (c != NULL && c->get_subsystem_path() != NULL) { \ + strncpy(buf, c->get_subsystem_path(), MAXBUF); \ + buf[MAXBUF-1] = '\0'; \ + strncat(buf, filename, MAXBUF-strlen(buf)); \ + log_trace(os, container)("Path to %s is %s\n", filename, buf); \ + fp = fopen(buf, "r"); \ + if (fp != NULL) { \ + p = fgets(buf, MAXBUF, fp); \ + return_type value; \ + int matched; \ + if (p != NULL) { \ + matched = sscanf(p, scan_fmt, &value); \ + if (matched == 1) { \ + fclose(fp); \ + *returnval = value; \ + return 0; \ + } \ + else log_error(os, container)("Type %s not found in file %s\n", scan_fmt , buf); \ + } \ + else log_error(os, container)("Empty file %s\n", buf); \ + } \ + else log_error(os, container)("file not found %s\n", buf); \ + } \ + if (fp != NULL) \ + fclose(fp); \ + return CONTAINER_ERROR; \ +} + +#define GEN_CONTAINER_GET_INFO_STR(return_name, return_type) \ +int get_subsystem_file_contents_##return_name(CgroupSubsystem* c, \ + char *filename, \ + return_type *returnval) { \ + FILE *fp = NULL; \ + char *p; \ + char buf[MAXBUF]; \ + \ + if (c != NULL && c->get_subsystem_path() != NULL) { \ + strncpy(buf, c->get_subsystem_path(), MAXBUF); \ + buf[MAXBUF-1] = '\0'; \ + strncat(buf, filename, MAXBUF-strlen(buf)); \ + log_trace(os, container)("Path to %s is %s\n", filename, buf); \ + fp = fopen(buf, "r"); \ + if (fp != NULL) { \ + p = fgets(buf, MAXBUF, fp); \ + if (p != NULL) { \ + *returnval = strdup(p); \ + fclose(fp); \ + return 0; \ + } \ + else log_error(os, container)("Empty file %s\n", buf); \ + } \ + else log_error(os, container)("File not found %s\n", buf); \ + } \ + if (fp != NULL) \ + fclose(fp); \ + return CONTAINER_ERROR; \ +} + +GEN_CONTAINER_GET_INFO(int, int, "%d") +GEN_CONTAINER_GET_INFO(jlong, jlong, "%ld") +GEN_CONTAINER_GET_INFO_STR(cptr, char *) + +#define GET_CONTAINER_INFO(return_name, return_type, subsystem, \ + filename, logstring, variable) \ + int err; \ + return_type variable; \ + err = get_subsystem_file_contents_##return_name(subsystem, \ + filename, \ + &variable); \ + if (err != 0) { \ + log_error(os, container)("Error reading %s", filename); \ + return (return_type)CONTAINER_ERROR; \ + } \ + log_info(os, container)(logstring, variable); + +static int cpuset_cpus_to_count(char *cpus); + +/* pd_init + * + * Initialize the container support and return true if the + * container support is enabled and we are running under cgroup control. + */ +bool OSContainer::pd_init() { + int mountid; + int parentid; + int major; + int minor; + FILE *mntinfo = NULL; + FILE *cgroup = NULL; + char buf[MAXBUF]; + char tmproot[MAXBUF]; + char tmpmount[MAXBUF]; + char tmpbase[MAXBUF]; + char *p; + jlong mem_limit; + + log_trace(os, container)("OSContainer::pd_init: Initializing Container Support"); + if (!UseContainerSupport) { + log_trace(os, container)("Container Support not enabled"); + return false; + } + + /* + * Find the cgroup mount point for memory and cpuset + * by reading /proc/self/mountinfo + * + * Example for docker: + * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory + * + * Example for host: + * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory + */ + mntinfo = fopen("/proc/self/mountinfo", "r"); + if (mntinfo == NULL) { + log_error(os, container)("Can't locate /proc/self/mountinfo\n"); + return false; + } + + while ( (p = fgets(buf, MAXBUF, mntinfo)) != NULL) { + // Look for the filesystem type and see if it's cgroup + char fstype[MAXBUF]; + fstype[0] = '\0'; + char *s = strstr(p, " - "); + if (s != NULL && + sscanf(s, " - %s", fstype) == 1 && + strcmp(fstype, "cgroup") == 0) { + + if (strstr(p, "memory") != NULL) { + int matched = sscanf(p, "%d %d %d:%d %s %s", + &mountid, + &parentid, + &major, + &minor, + tmproot, + tmpmount); + if (matched == 6) { + memory = new CgroupSubsystem(tmproot, tmpmount); + } + else log_error(os, container)("Incompatible str containing cgroup and memory: %s\n", p); + } + else if (strstr(p, "cpuset") != NULL) { + int matched = sscanf(p, "%d %d %d:%d %s %s", + &mountid, + &parentid, + &major, + &minor, + tmproot, + tmpmount); + if (matched == 6) { + cpuset = new CgroupSubsystem(tmproot, tmpmount); + } + else log_error(os, container)("Incompatible str containing cgroup and cpuset: %s\n", p); + } + else if (strstr(p, "cpu,cpuacct") != NULL) { + int matched = sscanf(p, "%d %d %d:%d %s %s", + &mountid, + &parentid, + &major, + &minor, + tmproot, + tmpmount); + if (matched == 6) { + cpu = new CgroupSubsystem(tmproot, tmpmount); + cpuacct = new CgroupSubsystem(tmproot, tmpmount); + } + else log_error(os, container)("Incompatible str containing cgroup and cpu,cpuacct: %s\n", p); + } + else if (strstr(p, "cpuacct") != NULL) { + int matched = sscanf(p, "%d %d %d:%d %s %s", + &mountid, + &parentid, + &major, + &minor, + tmproot, + tmpmount); + if (matched == 6) { + cpuacct = new CgroupSubsystem(tmproot, tmpmount); + } + else log_error(os, container)("Incompatible str containing cgroup and cpuacct: %s\n", p); + } + else if (strstr(p, "cpu") != NULL) { + int matched = sscanf(p, "%d %d %d:%d %s %s", + &mountid, + &parentid, + &major, + &minor, + tmproot, + tmpmount); + if (matched == 6) { + cpu = new CgroupSubsystem(tmproot, tmpmount); + } + else log_error(os, container)("Incompatible str containing cgroup and cpu: %s\n", p); + } + } + } + + if (mntinfo != NULL) fclose(mntinfo); + + /* + * Read /proc/self/cgroup and map host mount point to + * local one via /proc/self/mountinfo content above + * + * Docker example: + * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044 + * + * Host example: + * 5:memory:/user.slice + * + * Construct a path to the process specific memory and cpuset + * cgroup directory. + * + * For a container running under Docker from memory example above + * the paths would be: + * + * /sys/fs/cgroup/memory + * + * For a Host from memory example above the path would be: + * + * /sys/fs/cgroup/memory/user.slice + * + */ + cgroup = fopen("/proc/self/cgroup", "r"); + if (cgroup == NULL) { + log_error(os, container)("Can't locate /proc/self/cgroup\n"); + return false; + } + + while ( (p = fgets(buf, MAXBUF, cgroup)) != NULL) { + int cgno; + int matched; + char *controller; + char *base; + + /* Skip cgroup number */ + strsep(&p, ":"); + /* Get controller and base */ + controller = strsep(&p, ":"); + base = strsep(&p, "\n"); + + if (controller != NULL) { + if (strstr(controller, "memory") != NULL) { + memory->set_subsystem_path(base); + } + else if (strstr(controller, "cpuset") != NULL) { + cpuset->set_subsystem_path(base); + } + else if (strstr(controller, "cpu,cpuacct") != NULL) { + cpu->set_subsystem_path(base); + cpuacct->set_subsystem_path(base); + } + else if (strstr(controller, "cpuacct") != NULL) { + cpuacct->set_subsystem_path(base); + } + else if (strstr(controller, "cpu") != NULL) { + cpu->set_subsystem_path(base); + } + } + } + + if (cgroup != NULL) fclose(cgroup); + + if (memory == NULL || cpuset == NULL || cpu == NULL) { + log_warning(os, container)("Required cgroup subsystems not found"); + return false; + } + + // We need to update the amount of physical memory now that + // command line arguments have been processed. + if ((mem_limit = OSContainer::memory_limit_in_bytes()) > 0) { + os::Linux::set_physical_memory(mem_limit); + } + +#if 0 + // Test Container functions + OSContainer::memory_usage_in_bytes(); + OSContainer::memory_and_swap_limit_in_bytes(); + OSContainer::memory_soft_limit_in_bytes(); + OSContainer::cpu_cpuset_memory_nodes(); + OSContainer::cpu_cpuset_cpus(); + OSContainer::cpu_quota(); + OSContainer::cpu_period(); + OSContainer::cpu_shares(); + OSContainer::active_processor_count(); +#endif + + return true; +} + +char * OSContainer::get_container_type() { + return (char *)0; +} + + +/* pd_memory_limit_in_bytes + * + * Return the limit of available memory for this process. + * + * return: + * memory limit in bytes or + * -1 for unlimited + * CONTAINER_ERROR for not supported + */ +jlong OSContainer::pd_memory_limit_in_bytes() { + GET_CONTAINER_INFO(jlong, jlong, memory, (char *)"/memory.limit_in_bytes", + "Memory Limit is: %ld\n", memlimit); + + if (memlimit == 9223372036854771712) { + log_info(os, container)("Memory Limit is: Unlimited\n"); + return (jlong)-1; + } + else + return memlimit; +} + +jlong OSContainer::pd_memory_and_swap_limit_in_bytes() { + GET_CONTAINER_INFO(jlong, jlong, memory, (char *)"/memory.memsw.limit_in_bytes", + "Memory and Swap Limit is: %ld\n", memswlimit); + if (memswlimit == 9223372036854771712) { + log_info(os, container)("Memory and Swap Limit is: Unlimited\n"); + return (jlong)-1; + } + else + return memswlimit; +} + +jlong OSContainer::pd_memory_soft_limit_in_bytes() { + GET_CONTAINER_INFO(jlong, jlong, memory, (char *)"/memory.soft_limit_in_bytes", + "Memory Soft Limit is: %ld\n", memsoftlimit); + if (memsoftlimit == 9223372036854771712) { + log_info(os, container)("Memory Soft Limit is: Unlimited\n"); + return (jlong)-1; + } + else + return memsoftlimit; +} + +/* pd_memory_usage_in_bytes + * + * Return the amount of used memory for this process. + * + * return: + * memory usage in bytes or + * -1 for unlimited + * CONTAINER_ERROR for not supported + */ +jlong OSContainer::pd_memory_usage_in_bytes() { + GET_CONTAINER_INFO(jlong, jlong, memory, (char *)"/memory.usage_in_bytes", + "Memory Usage is: %ld\n", memusage); + return memusage; +} + +/* pd_memory_max_usage_in_bytes + * + * Return the maximum amount of used memory for this process. + * + * return: + * max memory usage in bytes or + * CONTAINER_ERROR for not supported + */ +jlong OSContainer::pd_memory_max_usage_in_bytes() { + GET_CONTAINER_INFO(jlong, jlong, memory, (char *)"/memory.max_usage_in_bytes", + "Maximu, Memory Usage is: %ld\n", memmaxusage); + return memmaxusage; +} + +/* pd_active_processor_count + * + * Calculate an appropriate number of active processors for the + * VM to use based on these three cgroup options. + * + * cpu sets + * cpu quota & cpu period + * cpu shares + * + * Algorythm: + * + * Determine the number of available CPUs from cpu_sets + * + * If user specified a quota (quota != -1), calculate the number of + * required CPUs by dividing quota by period. + * + * If shares are in effect (shares != -1), calculate the number + * of cpus required for the shares by dividing the share value by 1024. + * + * All results of division are rounded up to the next whole number. + * + * Return the smaller number from the three different settings. + * + * return: + * number of cpus + * CONTAINER_ERROR if failure occured during extract of cpuset info + */ +int OSContainer::pd_active_processor_count() { + int cpu_count, share_count, quota_count; + int share, quota, period; + int result; + char *cpus; + + cpus = OSContainer::cpu_cpuset_cpus(); + if (cpus != (char *)CONTAINER_ERROR) { + cpu_count = cpuset_cpus_to_count(cpus); + log_info(os,container)("cpuset_cpu count is %d\n", cpu_count); + free(cpus); + } + else { + log_error(os,container)("Error getting cpuset_cpucount"); + return CONTAINER_ERROR; + } + + share = OSContainer::cpu_shares(); + if (share > -1) { + share_count = ceilf((float)share / 1024.0f); + log_trace(os, container)("cpu_share count: %d", share_count); + } + else share_count = cpu_count; + + + quota = OSContainer::cpu_quota(); + period = OSContainer::cpu_period(); + if (quota > -1 && period > 0) { + quota_count = ceilf((float)quota / (float)period); + log_trace(os, container)("quota_count: %d", quota_count); + } + else quota_count = cpu_count; + + result = MIN2(cpu_count, MIN2(share_count, quota_count)); + log_trace(os, container)("OSContainer::active_processor_count: %d", result); + return result; +} + +char * OSContainer::pd_cpu_cpuset_cpus() { + GET_CONTAINER_INFO(cptr, char *, cpuset, (char *)"/cpuset.cpus", + "cpuset.cpus is: %s\n", cpus); + return cpus; +} + +char * OSContainer::pd_cpu_cpuset_memory_nodes() { + GET_CONTAINER_INFO(cptr, char *, cpuset, (char *)"/cpuset.mems", + "cpuset.mems is: %s\n", mems); + return mems; +} + +/* pd_cpu_quota + * + * Return the number of miliseconds per period + * process is guaranteed to run. + * + * return: + * quota time in milliseconds + * -1 for no quota + * CONTAINER_ERROR for not supported + */ +int OSContainer::pd_cpu_quota() { + GET_CONTAINER_INFO(int, int, cpu, (char *)"/cpu.cfs_quota_us", + "CPU Quota is: %d\n", quota); + return quota; +} + +int OSContainer::pd_cpu_period() { + GET_CONTAINER_INFO(int, int, cpu, (char *)"/cpu.cfs_period_us", + "CPU Period is: %d\n", period); + return period; +} + +/* pd_cpu_shares + * + * Return the amount of cpu shares available to the process + * + * return: + * Share number (typically a number relative to 1024) + * (2048 typically expresses 2 CPUs worth of processing) + * -1 for no share setup + * CONTAINER_ERROR for not supported + */ +int OSContainer::pd_cpu_shares() { + GET_CONTAINER_INFO(int, int, cpu, (char *)"/cpu.shares", + "CPU Shares is: %d\n", shares); + // Convert 1024 to no shares setup + if (shares == 1024) return -1; + + return shares; +} + +/* + * Convert cpuset.cpus comma separated string to a + * count of cpus + */ +static int cpuset_cpus_to_count(char *cpus) +{ + int cpu_count = 0; + char *token, *string, *tofree; + + tofree = string = strdup(cpus); + while ((token = strsep(&string, ",")) != NULL) { + /* Range x-z format */ + if (index(token, '-') != 0) { + int low, hi; + char *rtoken; + char *range, *tofree2; + tofree2 = range = strdup(token); + rtoken = strsep(&range, "-"); + low = atoi(rtoken); + hi = atoi(range); + free(tofree2); + cpu_count += ((++hi) - (low)); + } + /* single number */ + else { + int cpu = atoi(token); + cpu_count++; + } + } + free(tofree); + return cpu_count; +}