--- /dev/null 2016-12-30 14:52:30.584485998 -0500 +++ new/src/hotspot/os/linux/osContainer_linux.cpp 2017-10-11 14:35:02.650067422 -0400 @@ -0,0 +1,585 @@ +/* + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include +#include +#include "utilities/globalDefinitions.hpp" +#include "memory/allocation.hpp" +#include "runtime/os.hpp" +#include "logging/log.hpp" +#include "osContainer_linux.hpp" + +/* + * Warning: Some linux distros use 0x7FFFFFFFFFFFF000 + * and others use 0x7FFFFFFFFFFFFFFF for unlimited. + */ +#define UNLIMITED_MEM CONST64(0x7FFFFFFFFFFFF000) + +#define PER_CPU_SHARES 1024 + +bool OSContainer::_is_initialized = false; +bool OSContainer::_is_containerized = false; + +class CgroupSubsystem: CHeapObj { + friend class OSContainer; + + private: + /* mountinfo contents */ + char *_root; + char *_mount_point; + + /* Constructed subsystem directory */ + char *_path; + + public: + CgroupSubsystem(char *root, char *mountpoint) { + _root = os::strdup(root); + _mount_point = os::strdup(mountpoint); + _path = NULL; + } + + /* + * Set directory to subsystem specific files based + * on the contents of the mountinfo and cgroup files. + */ + void set_subsystem_path(char *cgroup_path) { + char buf[MAXPATHLEN+1]; + if (_root != NULL && cgroup_path != NULL) { + if (strcmp(_root, "/") == 0) { + strncpy(buf, _mount_point, MAXPATHLEN); + buf[MAXPATHLEN-1] = '\0'; + strncat(buf, cgroup_path, MAXPATHLEN-strlen(buf)); + buf[MAXPATHLEN-1] = '\0'; + _path = os::strdup(buf); + } else { + if (strcmp(_root, cgroup_path) == 0) { + strncpy(buf, _mount_point, MAXPATHLEN); + buf[MAXPATHLEN-1] = '\0'; + _path = os::strdup(buf); + } else { + char *p = strstr(_root, cgroup_path); + if (p != NULL && p == _root) { + if (strlen(cgroup_path) > strlen(_root)) { + strncpy(buf, _mount_point, MAXPATHLEN); + buf[MAXPATHLEN-1] = '\0'; + strncat(buf, cgroup_path + strlen(_root), MAXPATHLEN-strlen(buf)); + buf[MAXPATHLEN-1] = '\0'; + _path = os::strdup(buf); + } + } + } + } + } + } + + char *subsystem_path() { return _path; } +}; + +// CgroupSubsystem *cgroupv2; +CgroupSubsystem* memory = NULL; +CgroupSubsystem* cpuset = NULL; +CgroupSubsystem* cpu = NULL; +CgroupSubsystem* cpuacct = NULL; + +typedef char * cptr; + +#define GEN_CONTAINER_GET_INFO(return_type, scan_fmt, isstr) \ +int subsystem_file_contents_##return_type(CgroupSubsystem* c, \ + char *filename, \ + return_type *returnval) { \ + FILE *fp = NULL; \ + char *p; \ + char buf[MAXPATHLEN+1]; \ + \ + if (c != NULL && c->subsystem_path() != NULL) { \ + strncpy(buf, c->subsystem_path(), MAXPATHLEN); \ + buf[MAXPATHLEN-1] = '\0'; \ + strncat(buf, filename, MAXPATHLEN-strlen(buf)); \ + log_trace(os, container)("Path to %s is %s\n", filename, buf); \ + fp = fopen(buf, "r"); \ + if (fp != NULL) { \ + p = fgets(buf, MAXPATHLEN, fp); \ + if (p != NULL) { \ + if (isstr) { \ + *(char **)returnval = os::strdup(p); \ + fclose(fp); \ + return 0; \ + } else { \ + return_type value; \ + int matched = sscanf(p, scan_fmt, &value); \ + if (matched == 1) { \ + *returnval = value; \ + fclose(fp); \ + return 0; \ + } else { \ + log_debug(os, container)("Type %s not found in file %s\n", \ + scan_fmt , buf); \ + } \ + } \ + } else { \ + log_debug(os, container)("Empty file %s\n", buf); \ + } \ + } else { \ + log_debug(os, container)("file not found %s\n", buf); \ + } \ + } \ + if (fp != NULL) \ + fclose(fp); \ + return OSCONTAINER_ERROR; \ +} + + +GEN_CONTAINER_GET_INFO(int, "%d", false) +GEN_CONTAINER_GET_INFO(jlong, JLONG_FORMAT, false) +GEN_CONTAINER_GET_INFO(cptr, "%p", true) + +#define GET_CONTAINER_INFO(return_type, isstring, subsystem, \ + filename, logstring, variable) \ + return_type variable; \ +{ \ + int err; \ + err = subsystem_file_contents_##return_type(subsystem, \ + filename, \ + &variable); \ + if (err != 0) { \ + log_debug(os, container)("Error reading %s", filename); \ + return isstring ? (return_type) NULL : \ + (return_type) OSCONTAINER_ERROR; \ + } \ + log_trace(os, container)(logstring, variable); \ +} + +/* init + * + * Initialize the container support and determine if + * we are running under cgroup control. + */ +void OSContainer::init() { + int mountid; + int parentid; + int major; + int minor; + FILE *mntinfo = NULL; + FILE *cgroup = NULL; + char buf[MAXPATHLEN+1]; + char tmproot[MAXPATHLEN+1]; + char tmpmount[MAXPATHLEN+1]; + char tmpbase[MAXPATHLEN+1]; + char *p; + jlong mem_limit; + + assert(!_is_initialized, "Initializing OSContainer more than once"); + + _is_initialized = true; + _is_containerized = false; + + log_trace(os, container)("OSContainer::init: Initializing Container Support"); + if (!UseContainerSupport) { + log_trace(os, container)("Container Support not enabled"); + return; + } + + /* + * Find the cgroup mount point for memory and cpuset + * by reading /proc/self/mountinfo + * + * Example for docker: + * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory + * + * Example for host: + * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory + */ + mntinfo = fopen("/proc/self/mountinfo", "r"); + if (mntinfo == NULL) { + log_debug(os, container)("Can't locate /proc/self/mountinfo\n"); + return; + } + + while ( (p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) { + // Look for the filesystem type and see if it's cgroup + char fstype[MAXPATHLEN+1]; + fstype[0] = '\0'; + char *s = strstr(p, " - "); + if (s != NULL && + sscanf(s, " - %s", fstype) == 1 && + strcmp(fstype, "cgroup") == 0) { + + if (strstr(p, "memory") != NULL) { + int matched = sscanf(p, "%d %d %d:%d %s %s", + &mountid, + &parentid, + &major, + &minor, + tmproot, + tmpmount); + if (matched == 6) { + memory = new CgroupSubsystem(tmproot, tmpmount); + } + else + log_debug(os, container)("Incompatible str containing cgroup and memory: %s\n", p); + } else if (strstr(p, "cpuset") != NULL) { + int matched = sscanf(p, "%d %d %d:%d %s %s", + &mountid, + &parentid, + &major, + &minor, + tmproot, + tmpmount); + if (matched == 6) { + cpuset = new CgroupSubsystem(tmproot, tmpmount); + } + else { + log_debug(os, container)("Incompatible str containing cgroup and cpuset: %s\n", p); + } + } else if (strstr(p, "cpu,cpuacct") != NULL) { + int matched = sscanf(p, "%d %d %d:%d %s %s", + &mountid, + &parentid, + &major, + &minor, + tmproot, + tmpmount); + if (matched == 6) { + cpu = new CgroupSubsystem(tmproot, tmpmount); + cpuacct = new CgroupSubsystem(tmproot, tmpmount); + } + else { + log_debug(os, container)("Incompatible str containing cgroup and cpu,cpuacct: %s\n", p); + } + } else if (strstr(p, "cpuacct") != NULL) { + int matched = sscanf(p, "%d %d %d:%d %s %s", + &mountid, + &parentid, + &major, + &minor, + tmproot, + tmpmount); + if (matched == 6) { + cpuacct = new CgroupSubsystem(tmproot, tmpmount); + } + else { + log_debug(os, container)("Incompatible str containing cgroup and cpuacct: %s\n", p); + } + } else if (strstr(p, "cpu") != NULL) { + int matched = sscanf(p, "%d %d %d:%d %s %s", + &mountid, + &parentid, + &major, + &minor, + tmproot, + tmpmount); + if (matched == 6) { + cpu = new CgroupSubsystem(tmproot, tmpmount); + } + else { + log_debug(os, container)("Incompatible str containing cgroup and cpu: %s\n", p); + } + } + } + } + + if (mntinfo != NULL) fclose(mntinfo); + + /* + * Read /proc/self/cgroup and map host mount point to + * local one via /proc/self/mountinfo content above + * + * Docker example: + * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044 + * + * Host example: + * 5:memory:/user.slice + * + * Construct a path to the process specific memory and cpuset + * cgroup directory. + * + * For a container running under Docker from memory example above + * the paths would be: + * + * /sys/fs/cgroup/memory + * + * For a Host from memory example above the path would be: + * + * /sys/fs/cgroup/memory/user.slice + * + */ + cgroup = fopen("/proc/self/cgroup", "r"); + if (cgroup == NULL) { + log_debug(os, container)("Can't locate /proc/self/cgroup\n"); + return; + } + + while ( (p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) { + int cgno; + int matched; + char *controller; + char *base; + + /* Skip cgroup number */ + strsep(&p, ":"); + /* Get controller and base */ + controller = strsep(&p, ":"); + base = strsep(&p, "\n"); + + if (controller != NULL) { + if (strstr(controller, "memory") != NULL) { + memory->set_subsystem_path(base); + } else if (strstr(controller, "cpuset") != NULL) { + cpuset->set_subsystem_path(base); + } else if (strstr(controller, "cpu,cpuacct") != NULL) { + cpu->set_subsystem_path(base); + cpuacct->set_subsystem_path(base); + } else if (strstr(controller, "cpuacct") != NULL) { + cpuacct->set_subsystem_path(base); + } else if (strstr(controller, "cpu") != NULL) { + cpu->set_subsystem_path(base); + } + } + } + + if (cgroup != NULL) fclose(cgroup); + + if (memory == NULL || cpuset == NULL || cpu == NULL) { + log_debug(os, container)("Required cgroup subsystems not found"); + return; + } + + // We need to update the amount of physical memory now that + // command line arguments have been processed. + if ((mem_limit = memory_limit_in_bytes()) > 0) { + os::Linux::set_physical_memory(mem_limit); + } + + _is_containerized = true; + +#if 0 + os::Linux::print_container_info(tty); + // Test Container functions + memory_limit_in_bytes(); + memory_usage_in_bytes(); + os::available_memory(); + memory_and_swap_limit_in_bytes(); + memory_soft_limit_in_bytes(); + cpu_cpuset_memory_nodes(); + cpu_cpuset_cpus(); + cpu_quota(); + cpu_period(); + cpu_shares(); + active_processor_count(); +#endif + +} + +char * OSContainer::container_type() { + if (is_containerized()) { + return (char *)"cgroupv1"; + } else { + return NULL; + } +} + + +/* memory_limit_in_bytes + * + * Return the limit of available memory for this process. + * + * return: + * memory limit in bytes or + * -1 for unlimited + * OSCONTAINER_ERROR for not supported + */ +jlong OSContainer::memory_limit_in_bytes() { + GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.limit_in_bytes", + "Memory Limit is: " JLONG_FORMAT "\n", memlimit); + + if (memlimit >= UNLIMITED_MEM) { + log_trace(os, container)("Memory Limit is: Unlimited\n"); + return (jlong)-1; + } + else { + return memlimit; + } +} + +jlong OSContainer::memory_and_swap_limit_in_bytes() { + GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.memsw.limit_in_bytes", + "Memory and Swap Limit is: " JLONG_FORMAT "\n", memswlimit); + if (memswlimit >= UNLIMITED_MEM) { + log_trace(os, container)("Memory and Swap Limit is: Unlimited\n"); + return (jlong)-1; + } else { + return memswlimit; + } +} + +jlong OSContainer::memory_soft_limit_in_bytes() { + GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.soft_limit_in_bytes", + "Memory Soft Limit is: " JLONG_FORMAT "\n", memsoftlimit); + if (memsoftlimit >= UNLIMITED_MEM) { + log_trace(os, container)("Memory Soft Limit is: Unlimited\n"); + return (jlong)-1; + } else { + return memsoftlimit; + } +} + +/* memory_usage_in_bytes + * + * Return the amount of used memory for this process. + * + * return: + * memory usage in bytes or + * -1 for unlimited + * OSCONTAINER_ERROR for not supported + */ +jlong OSContainer::memory_usage_in_bytes() { + GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.usage_in_bytes", + "Memory Usage is: " JLONG_FORMAT "\n", memusage); + return memusage; +} + +/* memory_max_usage_in_bytes + * + * Return the maximum amount of used memory for this process. + * + * return: + * max memory usage in bytes or + * OSCONTAINER_ERROR for not supported + */ +jlong OSContainer::memory_max_usage_in_bytes() { + GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.max_usage_in_bytes", + "Maximu, Memory Usage is: " JLONG_FORMAT "\n", memmaxusage); + return memmaxusage; +} + +/* active_processor_count + * + * Calculate an appropriate number of active processors for the + * VM to use based on these three cgroup options. + * + * cpu affinity + * cpu quota & cpu period + * cpu shares + * + * Algorithm: + * + * Determine the number of available CPUs from sched_getaffinity + * + * If user specified a quota (quota != -1), calculate the number of + * required CPUs by dividing quota by period. + * + * If shares are in effect (shares != -1), calculate the number + * of cpus required for the shares by dividing the share value + * by PER_CPU_SHARES. + * + * All results of division are rounded up to the next whole number. + * + * Return the smaller number from the three different settings. + * + * return: + * number of cpus + * OSCONTAINER_ERROR if failure occured during extract of cpuset info + */ +int OSContainer::active_processor_count() { + int cpu_count, share_count, quota_count; + int share, quota, period; + int result; + + cpu_count = os::Linux::active_processor_count(); + + share = cpu_shares(); + if (share > -1) { + share_count = ceilf((float)share / (float)PER_CPU_SHARES); + log_trace(os, container)("cpu_share count: %d", share_count); + } else { + share_count = cpu_count; + } + + quota = cpu_quota(); + period = cpu_period(); + if (quota > -1 && period > 0) { + quota_count = ceilf((float)quota / (float)period); + log_trace(os, container)("quota_count: %d", quota_count); + } else { + quota_count = cpu_count; + } + + result = MIN2(cpu_count, MIN2(share_count, quota_count)); + log_trace(os, container)("OSContainer::active_processor_count: %d", result); + return result; +} + +char * OSContainer::cpu_cpuset_cpus() { + GET_CONTAINER_INFO(cptr, true, cpuset, (char *)"/cpuset.cpus", + "cpuset.cpus is: %s\n", cpus); + return cpus; +} + +char * OSContainer::cpu_cpuset_memory_nodes() { + GET_CONTAINER_INFO(cptr, true, cpuset, (char *)"/cpuset.mems", + "cpuset.mems is: %s\n", mems); + return mems; +} + +/* cpu_quota + * + * Return the number of milliseconds per period + * process is guaranteed to run. + * + * return: + * quota time in milliseconds + * -1 for no quota + * OSCONTAINER_ERROR for not supported + */ +int OSContainer::cpu_quota() { + GET_CONTAINER_INFO(int, false, cpu, (char *)"/cpu.cfs_quota_us", + "CPU Quota is: %d\n", quota); + return quota; +} + +int OSContainer::cpu_period() { + GET_CONTAINER_INFO(int, false, cpu, (char *)"/cpu.cfs_period_us", + "CPU Period is: %d\n", period); + return period; +} + +/* cpu_shares + * + * Return the amount of cpu shares available to the process + * + * return: + * Share number (typically a number relative to 1024) + * (2048 typically expresses 2 CPUs worth of processing) + * -1 for no share setup + * OSCONTAINER_ERROR for not supported + */ +int OSContainer::cpu_shares() { + GET_CONTAINER_INFO(int, false, cpu, (char *)"/cpu.shares", + "CPU Shares is: %d\n", shares); + // Convert 1024 to no shares setup + if (shares == 1024) return -1; + + return shares; +} +