--- old/src/hotspot/os/linux/osContainer_linux.cpp 2019-11-08 15:01:33.159141454 +0100 +++ new/src/hotspot/os/linux/osContainer_linux.cpp 2019-11-08 15:01:32.930140946 +0100 @@ -25,275 +25,16 @@ #include #include #include -#include "utilities/globalDefinitions.hpp" -#include "memory/allocation.hpp" #include "runtime/globals.hpp" #include "runtime/os.hpp" #include "logging/log.hpp" #include "osContainer_linux.hpp" +#include "cgroupSubsystem_linux.hpp" -/* - * PER_CPU_SHARES has been set to 1024 because CPU shares' quota - * is commonly used in cloud frameworks like Kubernetes[1], - * AWS[2] and Mesos[3] in a similar way. They spawn containers with - * --cpu-shares option values scaled by PER_CPU_SHARES. Thus, we do - * the inverse for determining the number of possible available - * CPUs to the JVM inside a container. See JDK-8216366. - * - * [1] https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu - * In particular: - * When using Docker: - * The spec.containers[].resources.requests.cpu is converted to its core value, which is potentially - * fractional, and multiplied by 1024. The greater of this number or 2 is used as the value of the - * --cpu-shares flag in the docker run command. - * [2] https://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_ContainerDefinition.html - * [3] https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/docker/docker.cpp#L648 - * https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/slave/containerizer/mesos/isolators/cgroups/constants.hpp#L30 - */ -#define PER_CPU_SHARES 1024 bool OSContainer::_is_initialized = false; bool OSContainer::_is_containerized = false; -int OSContainer::_active_processor_count = 1; -julong _unlimited_memory; - -class CgroupSubsystem: CHeapObj { - friend class OSContainer; - - - private: - volatile jlong _next_check_counter; - - /* mountinfo contents */ - char *_root; - char *_mount_point; - - /* Constructed subsystem directory */ - char *_path; - - public: - CgroupSubsystem(char *root, char *mountpoint) { - _root = os::strdup(root); - _mount_point = os::strdup(mountpoint); - _path = NULL; - _next_check_counter = min_jlong; - } - - /* - * Set directory to subsystem specific files based - * on the contents of the mountinfo and cgroup files. - */ - void set_subsystem_path(char *cgroup_path) { - char buf[MAXPATHLEN+1]; - if (_root != NULL && cgroup_path != NULL) { - if (strcmp(_root, "/") == 0) { - int buflen; - strncpy(buf, _mount_point, MAXPATHLEN); - buf[MAXPATHLEN-1] = '\0'; - if (strcmp(cgroup_path,"/") != 0) { - buflen = strlen(buf); - if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) { - return; - } - strncat(buf, cgroup_path, MAXPATHLEN-buflen); - buf[MAXPATHLEN-1] = '\0'; - } - _path = os::strdup(buf); - } else { - if (strcmp(_root, cgroup_path) == 0) { - strncpy(buf, _mount_point, MAXPATHLEN); - buf[MAXPATHLEN-1] = '\0'; - _path = os::strdup(buf); - } else { - char *p = strstr(cgroup_path, _root); - if (p != NULL && p == _root) { - if (strlen(cgroup_path) > strlen(_root)) { - int buflen; - strncpy(buf, _mount_point, MAXPATHLEN); - buf[MAXPATHLEN-1] = '\0'; - buflen = strlen(buf); - if ((buflen + strlen(cgroup_path) - strlen(_root)) > (MAXPATHLEN-1)) { - return; - } - strncat(buf, cgroup_path + strlen(_root), MAXPATHLEN-buflen); - buf[MAXPATHLEN-1] = '\0'; - _path = os::strdup(buf); - } - } - } - } - } - } - - char *subsystem_path() { return _path; } - - bool cache_has_expired() { - return os::elapsed_counter() > _next_check_counter; - } - - void set_cache_expiry_time(jlong timeout) { - _next_check_counter = os::elapsed_counter() + timeout; - } -}; - -class CgroupMemorySubsystem: CgroupSubsystem { - friend class OSContainer; - - private: - /* Some container runtimes set limits via cgroup - * hierarchy. If set to true consider also memory.stat - * file if everything else seems unlimited */ - bool _uses_mem_hierarchy; - volatile jlong _memory_limit_in_bytes; - - public: - CgroupMemorySubsystem(char *root, char *mountpoint) : CgroupSubsystem::CgroupSubsystem(root, mountpoint) { - _uses_mem_hierarchy = false; - _memory_limit_in_bytes = -1; - - } - - bool is_hierarchical() { return _uses_mem_hierarchy; } - void set_hierarchical(bool value) { _uses_mem_hierarchy = value; } - - jlong memory_limit_in_bytes() { return _memory_limit_in_bytes; } - void set_memory_limit_in_bytes(jlong value) { - _memory_limit_in_bytes = value; - // max memory limit is unlikely to change, but we want to remain - // responsive to configuration changes. A very short grace time - // between re-read avoids excessive overhead during startup without - // significantly reducing the VMs ability to promptly react to reduced - // memory availability - set_cache_expiry_time(OSCONTAINER_CACHE_TIMEOUT); - } - -}; - -CgroupMemorySubsystem* memory = NULL; -CgroupSubsystem* cpuset = NULL; -CgroupSubsystem* cpu = NULL; -CgroupSubsystem* cpuacct = NULL; - -typedef char * cptr; - -PRAGMA_DIAG_PUSH -PRAGMA_FORMAT_NONLITERAL_IGNORED -template int subsystem_file_line_contents(CgroupSubsystem* c, - const char *filename, - const char *matchline, - const char *scan_fmt, - T returnval) { - FILE *fp = NULL; - char *p; - char file[MAXPATHLEN+1]; - char buf[MAXPATHLEN+1]; - char discard[MAXPATHLEN+1]; - bool found_match = false; - - if (c == NULL) { - log_debug(os, container)("subsystem_file_line_contents: CgroupSubsytem* is NULL"); - return OSCONTAINER_ERROR; - } - if (c->subsystem_path() == NULL) { - log_debug(os, container)("subsystem_file_line_contents: subsystem path is NULL"); - return OSCONTAINER_ERROR; - } - - strncpy(file, c->subsystem_path(), MAXPATHLEN); - file[MAXPATHLEN-1] = '\0'; - int filelen = strlen(file); - if ((filelen + strlen(filename)) > (MAXPATHLEN-1)) { - log_debug(os, container)("File path too long %s, %s", file, filename); - return OSCONTAINER_ERROR; - } - strncat(file, filename, MAXPATHLEN-filelen); - log_trace(os, container)("Path to %s is %s", filename, file); - fp = fopen(file, "r"); - if (fp != NULL) { - int err = 0; - while ((p = fgets(buf, MAXPATHLEN, fp)) != NULL) { - found_match = false; - if (matchline == NULL) { - // single-line file case - int matched = sscanf(p, scan_fmt, returnval); - found_match = (matched == 1); - } else { - // multi-line file case - if (strstr(p, matchline) != NULL) { - // discard matchline string prefix - int matched = sscanf(p, scan_fmt, discard, returnval); - found_match = (matched == 2); - } else { - continue; // substring not found - } - } - if (found_match) { - fclose(fp); - return 0; - } else { - err = 1; - log_debug(os, container)("Type %s not found in file %s", scan_fmt, file); - } - } - if (err == 0) { - log_debug(os, container)("Empty file %s", file); - } - } else { - log_debug(os, container)("Open of file %s failed, %s", file, os::strerror(errno)); - } - if (fp != NULL) - fclose(fp); - return OSCONTAINER_ERROR; -} -PRAGMA_DIAG_POP - -#define GET_CONTAINER_INFO(return_type, subsystem, filename, \ - logstring, scan_fmt, variable) \ - return_type variable; \ -{ \ - int err; \ - err = subsystem_file_line_contents(subsystem, \ - filename, \ - NULL, \ - scan_fmt, \ - &variable); \ - if (err != 0) \ - return (return_type) OSCONTAINER_ERROR; \ - \ - log_trace(os, container)(logstring, variable); \ -} - -#define GET_CONTAINER_INFO_CPTR(return_type, subsystem, filename, \ - logstring, scan_fmt, variable, bufsize) \ - char variable[bufsize]; \ -{ \ - int err; \ - err = subsystem_file_line_contents(subsystem, \ - filename, \ - NULL, \ - scan_fmt, \ - variable); \ - if (err != 0) \ - return (return_type) NULL; \ - \ - log_trace(os, container)(logstring, variable); \ -} - -#define GET_CONTAINER_INFO_LINE(return_type, subsystem, filename, \ - matchline, logstring, scan_fmt, variable) \ - return_type variable; \ -{ \ - int err; \ - err = subsystem_file_line_contents(subsystem, \ - filename, \ - matchline, \ - scan_fmt, \ - &variable); \ - if (err != 0) \ - return (return_type) OSCONTAINER_ERROR; \ - \ - log_trace(os, container)(logstring, variable); \ -} +CgroupSubsystem* cgroup_subsystem; /* init * @@ -301,12 +42,6 @@ * we are running under cgroup control. */ void OSContainer::init() { - FILE *mntinfo = NULL; - FILE *cgroup = NULL; - char buf[MAXPATHLEN+1]; - char tmproot[MAXPATHLEN+1]; - char tmpmount[MAXPATHLEN+1]; - char *p; jlong mem_limit; assert(!_is_initialized, "Initializing OSContainer more than once"); @@ -314,139 +49,19 @@ _is_initialized = true; _is_containerized = false; - _unlimited_memory = (LONG_MAX / os::vm_page_size()) * os::vm_page_size(); - log_trace(os, container)("OSContainer::init: Initializing Container Support"); if (!UseContainerSupport) { log_trace(os, container)("Container Support not enabled"); return; } - /* - * Find the cgroup mount point for memory and cpuset - * by reading /proc/self/mountinfo - * - * Example for docker: - * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory - * - * Example for host: - * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory - */ - mntinfo = fopen("/proc/self/mountinfo", "r"); - if (mntinfo == NULL) { - log_debug(os, container)("Can't open /proc/self/mountinfo, %s", - os::strerror(errno)); - return; - } - - while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) { - char tmpcgroups[MAXPATHLEN+1]; - char *cptr = tmpcgroups; - char *token; - - // mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt - if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- cgroup %*s %s", tmproot, tmpmount, tmpcgroups) != 3) { - continue; - } - while ((token = strsep(&cptr, ",")) != NULL) { - if (strcmp(token, "memory") == 0) { - memory = new CgroupMemorySubsystem(tmproot, tmpmount); - } else if (strcmp(token, "cpuset") == 0) { - cpuset = new CgroupSubsystem(tmproot, tmpmount); - } else if (strcmp(token, "cpu") == 0) { - cpu = new CgroupSubsystem(tmproot, tmpmount); - } else if (strcmp(token, "cpuacct") == 0) { - cpuacct= new CgroupSubsystem(tmproot, tmpmount); - } - } - } - - fclose(mntinfo); - - if (memory == NULL) { - log_debug(os, container)("Required cgroup memory subsystem not found"); - return; - } - if (cpuset == NULL) { - log_debug(os, container)("Required cgroup cpuset subsystem not found"); - return; - } - if (cpu == NULL) { - log_debug(os, container)("Required cgroup cpu subsystem not found"); - return; + cgroup_subsystem = CgroupSubsystemFactory::create(); + if (cgroup_subsystem == NULL) { + return; // Required subsystem files not found or other error } - if (cpuacct == NULL) { - log_debug(os, container)("Required cgroup cpuacct subsystem not found"); - return; - } - - /* - * Read /proc/self/cgroup and map host mount point to - * local one via /proc/self/mountinfo content above - * - * Docker example: - * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044 - * - * Host example: - * 5:memory:/user.slice - * - * Construct a path to the process specific memory and cpuset - * cgroup directory. - * - * For a container running under Docker from memory example above - * the paths would be: - * - * /sys/fs/cgroup/memory - * - * For a Host from memory example above the path would be: - * - * /sys/fs/cgroup/memory/user.slice - * - */ - cgroup = fopen("/proc/self/cgroup", "r"); - if (cgroup == NULL) { - log_debug(os, container)("Can't open /proc/self/cgroup, %s", - os::strerror(errno)); - return; - } - - while ((p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) { - char *controllers; - char *token; - char *base; - - /* Skip cgroup number */ - strsep(&p, ":"); - /* Get controllers and base */ - controllers = strsep(&p, ":"); - base = strsep(&p, "\n"); - - if (controllers == NULL) { - continue; - } - - while ((token = strsep(&controllers, ",")) != NULL) { - if (strcmp(token, "memory") == 0) { - memory->set_subsystem_path(base); - jlong hierarchy = uses_mem_hierarchy(); - if (hierarchy > 0) { - memory->set_hierarchical(true); - } - } else if (strcmp(token, "cpuset") == 0) { - cpuset->set_subsystem_path(base); - } else if (strcmp(token, "cpu") == 0) { - cpu->set_subsystem_path(base); - } else if (strcmp(token, "cpuacct") == 0) { - cpuacct->set_subsystem_path(base); - } - } - } - - fclose(cgroup); - // We need to update the amount of physical memory now that - // command line arguments have been processed. - if ((mem_limit = memory_limit_in_bytes()) > 0) { + // cgroup subsystem files have been processed. + if ((mem_limit = cgroup_subsystem->memory_limit_in_bytes()) > 0) { os::Linux::set_physical_memory(mem_limit); log_info(os, container)("Memory Limit is: " JLONG_FORMAT, mem_limit); } @@ -456,272 +71,61 @@ } const char * OSContainer::container_type() { - if (is_containerized()) { - return "cgroupv1"; - } else { - return NULL; - } + assert(cgroup_subsystem != NULL, "cgroup subsystem not available"); + return cgroup_subsystem->container_type(); } -/* uses_mem_hierarchy - * - * Return whether or not hierarchical cgroup accounting is being - * done. - * - * return: - * A number > 0 if true, or - * OSCONTAINER_ERROR for not supported - */ -jlong OSContainer::uses_mem_hierarchy() { - GET_CONTAINER_INFO(jlong, memory, "/memory.use_hierarchy", - "Use Hierarchy is: " JLONG_FORMAT, JLONG_FORMAT, use_hierarchy); - return use_hierarchy; -} - - -/* memory_limit_in_bytes - * - * Return the limit of available memory for this process. - * - * return: - * memory limit in bytes or - * -1 for unlimited - * OSCONTAINER_ERROR for not supported - */ jlong OSContainer::memory_limit_in_bytes() { - if (!memory->cache_has_expired()) { - return memory->memory_limit_in_bytes(); - } - jlong memory_limit = read_memory_limit_in_bytes(); - // Update CgroupMemorySubsystem to avoid re-reading container settings too often - memory->set_memory_limit_in_bytes(memory_limit); - return memory_limit; -} - -jlong OSContainer::read_memory_limit_in_bytes() { - GET_CONTAINER_INFO(julong, memory, "/memory.limit_in_bytes", - "Memory Limit is: " JULONG_FORMAT, JULONG_FORMAT, memlimit); - - if (memlimit >= _unlimited_memory) { - log_trace(os, container)("Non-Hierarchical Memory Limit is: Unlimited"); - if (memory->is_hierarchical()) { - const char* matchline = "hierarchical_memory_limit"; - const char* format = "%s " JULONG_FORMAT; - GET_CONTAINER_INFO_LINE(julong, memory, "/memory.stat", matchline, - "Hierarchical Memory Limit is: " JULONG_FORMAT, format, hier_memlimit) - if (hier_memlimit >= _unlimited_memory) { - log_trace(os, container)("Hierarchical Memory Limit is: Unlimited"); - } else { - return (jlong)hier_memlimit; - } - } - return (jlong)-1; - } - else { - return (jlong)memlimit; - } + assert(cgroup_subsystem != NULL, "cgroup subsystem not available"); + return cgroup_subsystem->memory_limit_in_bytes(); } jlong OSContainer::memory_and_swap_limit_in_bytes() { - GET_CONTAINER_INFO(julong, memory, "/memory.memsw.limit_in_bytes", - "Memory and Swap Limit is: " JULONG_FORMAT, JULONG_FORMAT, memswlimit); - if (memswlimit >= _unlimited_memory) { - log_trace(os, container)("Non-Hierarchical Memory and Swap Limit is: Unlimited"); - if (memory->is_hierarchical()) { - const char* matchline = "hierarchical_memsw_limit"; - const char* format = "%s " JULONG_FORMAT; - GET_CONTAINER_INFO_LINE(julong, memory, "/memory.stat", matchline, - "Hierarchical Memory and Swap Limit is : " JULONG_FORMAT, format, hier_memlimit) - if (hier_memlimit >= _unlimited_memory) { - log_trace(os, container)("Hierarchical Memory and Swap Limit is: Unlimited"); - } else { - return (jlong)hier_memlimit; - } - } - return (jlong)-1; - } else { - return (jlong)memswlimit; - } + assert(cgroup_subsystem != NULL, "cgroup subsystem not available"); + return cgroup_subsystem->memory_and_swap_limit_in_bytes(); } jlong OSContainer::memory_soft_limit_in_bytes() { - GET_CONTAINER_INFO(julong, memory, "/memory.soft_limit_in_bytes", - "Memory Soft Limit is: " JULONG_FORMAT, JULONG_FORMAT, memsoftlimit); - if (memsoftlimit >= _unlimited_memory) { - log_trace(os, container)("Memory Soft Limit is: Unlimited"); - return (jlong)-1; - } else { - return (jlong)memsoftlimit; - } + assert(cgroup_subsystem != NULL, "cgroup subsystem not available"); + return cgroup_subsystem->memory_soft_limit_in_bytes(); } -/* memory_usage_in_bytes - * - * Return the amount of used memory for this process. - * - * return: - * memory usage in bytes or - * -1 for unlimited - * OSCONTAINER_ERROR for not supported - */ jlong OSContainer::memory_usage_in_bytes() { - GET_CONTAINER_INFO(jlong, memory, "/memory.usage_in_bytes", - "Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memusage); - return memusage; + assert(cgroup_subsystem != NULL, "cgroup subsystem not available"); + return cgroup_subsystem->memory_usage_in_bytes(); } -/* memory_max_usage_in_bytes - * - * Return the maximum amount of used memory for this process. - * - * return: - * max memory usage in bytes or - * OSCONTAINER_ERROR for not supported - */ jlong OSContainer::memory_max_usage_in_bytes() { - GET_CONTAINER_INFO(jlong, memory, "/memory.max_usage_in_bytes", - "Maximum Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memmaxusage); - return memmaxusage; -} - -/* active_processor_count - * - * Calculate an appropriate number of active processors for the - * VM to use based on these three inputs. - * - * cpu affinity - * cgroup cpu quota & cpu period - * cgroup cpu shares - * - * Algorithm: - * - * Determine the number of available CPUs from sched_getaffinity - * - * If user specified a quota (quota != -1), calculate the number of - * required CPUs by dividing quota by period. - * - * If shares are in effect (shares != -1), calculate the number - * of CPUs required for the shares by dividing the share value - * by PER_CPU_SHARES. - * - * All results of division are rounded up to the next whole number. - * - * If neither shares or quotas have been specified, return the - * number of active processors in the system. - * - * If both shares and quotas have been specified, the results are - * based on the flag PreferContainerQuotaForCPUCount. If true, - * return the quota value. If false return the smallest value - * between shares or quotas. - * - * If shares and/or quotas have been specified, the resulting number - * returned will never exceed the number of active processors. - * - * return: - * number of CPUs - */ -int OSContainer::active_processor_count() { - int quota_count = 0, share_count = 0; - int cpu_count, limit_count; - int result; - - // We use a cache with a timeout to avoid performing expensive - // computations in the event this function is called frequently. - // [See 8227006]. - if (!cpu->cache_has_expired()) { - log_trace(os, container)("OSContainer::active_processor_count (cached): %d", OSContainer::_active_processor_count); - return OSContainer::_active_processor_count; - } - - cpu_count = limit_count = os::Linux::active_processor_count(); - int quota = cpu_quota(); - int period = cpu_period(); - int share = cpu_shares(); - - if (quota > -1 && period > 0) { - quota_count = ceilf((float)quota / (float)period); - log_trace(os, container)("CPU Quota count based on quota/period: %d", quota_count); - } - if (share > -1) { - share_count = ceilf((float)share / (float)PER_CPU_SHARES); - log_trace(os, container)("CPU Share count based on shares: %d", share_count); - } - - // If both shares and quotas are setup results depend - // on flag PreferContainerQuotaForCPUCount. - // If true, limit CPU count to quota - // If false, use minimum of shares and quotas - if (quota_count !=0 && share_count != 0) { - if (PreferContainerQuotaForCPUCount) { - limit_count = quota_count; - } else { - limit_count = MIN2(quota_count, share_count); - } - } else if (quota_count != 0) { - limit_count = quota_count; - } else if (share_count != 0) { - limit_count = share_count; - } - - result = MIN2(cpu_count, limit_count); - log_trace(os, container)("OSContainer::active_processor_count: %d", result); - - // Update the value and reset the cache timeout - OSContainer::_active_processor_count = result; - cpu->set_cache_expiry_time(OSCONTAINER_CACHE_TIMEOUT); - - return result; + assert(cgroup_subsystem != NULL, "cgroup subsystem not available"); + return cgroup_subsystem->memory_max_usage_in_bytes(); } char * OSContainer::cpu_cpuset_cpus() { - GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.cpus", - "cpuset.cpus is: %s", "%1023s", cpus, 1024); - return os::strdup(cpus); + assert(cgroup_subsystem != NULL, "cgroup subsystem not available"); + return cgroup_subsystem->cpu_cpuset_cpus(); } char * OSContainer::cpu_cpuset_memory_nodes() { - GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.mems", - "cpuset.mems is: %s", "%1023s", mems, 1024); - return os::strdup(mems); + assert(cgroup_subsystem != NULL, "cgroup subsystem not available"); + return cgroup_subsystem->cpu_cpuset_memory_nodes(); +} + +int OSContainer::active_processor_count() { + assert(cgroup_subsystem != NULL, "cgroup subsystem not available"); + return cgroup_subsystem->active_processor_count(); } -/* cpu_quota - * - * Return the number of milliseconds per period - * process is guaranteed to run. - * - * return: - * quota time in milliseconds - * -1 for no quota - * OSCONTAINER_ERROR for not supported - */ int OSContainer::cpu_quota() { - GET_CONTAINER_INFO(int, cpu, "/cpu.cfs_quota_us", - "CPU Quota is: %d", "%d", quota); - return quota; + assert(cgroup_subsystem != NULL, "cgroup subsystem not available"); + return cgroup_subsystem->cpu_quota(); } int OSContainer::cpu_period() { - GET_CONTAINER_INFO(int, cpu, "/cpu.cfs_period_us", - "CPU Period is: %d", "%d", period); - return period; + assert(cgroup_subsystem != NULL, "cgroup subsystem not available"); + return cgroup_subsystem->cpu_period(); } -/* cpu_shares - * - * Return the amount of cpu shares available to the process - * - * return: - * Share number (typically a number relative to 1024) - * (2048 typically expresses 2 CPUs worth of processing) - * -1 for no share setup - * OSCONTAINER_ERROR for not supported - */ int OSContainer::cpu_shares() { - GET_CONTAINER_INFO(int, cpu, "/cpu.shares", - "CPU Shares is: %d", "%d", shares); - // Convert 1024 to no shares setup - if (shares == 1024) return -1; - - return shares; + assert(cgroup_subsystem != NULL, "cgroup subsystem not available"); + return cgroup_subsystem->cpu_shares(); } --- old/src/hotspot/os/linux/osContainer_linux.hpp 2019-11-08 15:01:34.506144436 +0100 +++ new/src/hotspot/os/linux/osContainer_linux.hpp 2019-11-08 15:01:34.267143907 +0100 @@ -48,7 +48,6 @@ static inline bool is_containerized(); static const char * container_type(); - static jlong uses_mem_hierarchy(); static jlong memory_limit_in_bytes(); static jlong memory_and_swap_limit_in_bytes(); static jlong memory_soft_limit_in_bytes(); --- old/src/hotspot/os/linux/os_linux.hpp 2019-11-08 15:01:35.931147592 +0100 +++ new/src/hotspot/os/linux/os_linux.hpp 2019-11-08 15:01:35.681147038 +0100 @@ -31,6 +31,7 @@ static bool zero_page_read_protected() { return true; } class Linux { + friend class CgroupSubsystem; friend class os; friend class OSContainer; friend class TestReserveMemorySpecial; --- old/test/hotspot/jtreg/containers/docker/TestCPUAwareness.java 2019-11-08 15:01:37.073150121 +0100 +++ new/test/hotspot/jtreg/containers/docker/TestCPUAwareness.java 2019-11-08 15:01:36.871149673 +0100 @@ -33,6 +33,7 @@ * @run driver TestCPUAwareness */ import java.util.List; +import jdk.test.lib.process.OutputAnalyzer; import jdk.test.lib.containers.docker.Common; import jdk.test.lib.containers.docker.DockerRunOptions; import jdk.test.lib.containers.docker.DockerTestUtils; @@ -202,8 +203,20 @@ DockerRunOptions opts = Common.newOpts(imageName) .addDockerOpts("--cpu-shares=" + shares); - Common.run(opts) - .shouldMatch("CPU Shares is.*" + shares) - .shouldMatch("active_processor_count.*" + expectedAPC); + OutputAnalyzer out = Common.run(opts); + // Cgroups v2 needs to do some scaling of raw shares values. Hence, + // 256 CPU shares come back as 264. Raw value written to cpu.weight + // is 10. The reason this works for >= 1024 shares value is because + // post-scaling the closest multiple of 1024 is found and returned. + // + // For values < 1024, this doesn't happen so loosen the match to a + // 3-digit number and ensure the active_processor_count is as + // expected. + if (shares < 1024) { + out.shouldMatch("CPU Shares is.*\\d{3}"); + } else { + out.shouldMatch("CPU Shares is.*" + shares); + } + out.shouldMatch("active_processor_count.*" + expectedAPC); } } --- /dev/null 2019-11-08 09:42:42.340406263 +0100 +++ new/src/hotspot/os/linux/cgroupSubsystem_linux.cpp 2019-11-08 15:01:37.810151753 +0100 @@ -0,0 +1,421 @@ +/* + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include +#include +#include +#include "cgroupSubsystem_linux.hpp" +#include "cgroupV1Subsystem_linux.hpp" +#include "cgroupV2Subsystem_linux.hpp" +#include "logging/log.hpp" +#include "memory/allocation.hpp" +#include "runtime/globals.hpp" +#include "runtime/os.hpp" +#include "utilities/globalDefinitions.hpp" + +CgroupSubsystem* CgroupSubsystemFactory::create() { + CgroupV1MemoryController* memory = NULL; + CgroupV1Controller* cpuset = NULL; + CgroupV1Controller* cpu = NULL; + CgroupV1Controller* cpuacct = NULL; + FILE *mntinfo = NULL; + FILE *cgroups = NULL; + FILE *cgroup = NULL; + char buf[MAXPATHLEN+1]; + char tmproot[MAXPATHLEN+1]; + char tmpmount[MAXPATHLEN+1]; + char *p; + bool is_cgroupsV2; + // true iff all controllers, memory, cpu, cpuset, cpuacct are enabled + // at the kernel level. + bool all_controllers_enabled; + + CgroupInfo cg_infos[CG_INFO_LENGTH]; + int cpuset_idx = 0; + int cpu_idx = 1; + int cpuacct_idx = 2; + int memory_idx = 3; + + /* + * Read /proc/cgroups so as to be able to distinguish cgroups v2 vs cgroups v1. + * + * For cgroups v1 unified hierarchy, cpu, cpuacct, cpuset, memory controllers + * must have non-zero for the hierarchy ID field. + */ + cgroups = fopen("/proc/cgroups", "r"); + if (cgroups == NULL) { + log_debug(os, container)("Can't open /proc/cgroups, %s", + os::strerror(errno)); + return NULL; + } + + while ((p = fgets(buf, MAXPATHLEN, cgroups)) != NULL) { + char name[MAXPATHLEN+1]; + int hierarchy_id; + int enabled; + + // Format of /proc/cgroups documented via man 7 cgroups + if (sscanf(p, "%s %d %*d %d", name, &hierarchy_id, &enabled) != 3) { + continue; + } + if (strcmp(name, "memory") == 0) { + cg_infos[memory_idx]._name = os::strdup(name); + cg_infos[memory_idx]._hierarchy_id = hierarchy_id; + cg_infos[memory_idx]._enabled = (enabled == 1); + } else if (strcmp(name, "cpuset") == 0) { + cg_infos[cpuset_idx]._name = os::strdup(name); + cg_infos[cpuset_idx]._hierarchy_id = hierarchy_id; + cg_infos[cpuset_idx]._enabled = (enabled == 1); + } else if (strcmp(name, "cpu") == 0) { + cg_infos[cpu_idx]._name = os::strdup(name); + cg_infos[cpu_idx]._hierarchy_id = hierarchy_id; + cg_infos[cpu_idx]._enabled = (enabled == 1); + } else if (strcmp(name, "cpuacct") == 0) { + cg_infos[cpuacct_idx]._name = os::strdup(name); + cg_infos[cpuacct_idx]._hierarchy_id = hierarchy_id; + cg_infos[cpuacct_idx]._enabled = (enabled == 1); + } + } + fclose(cgroups); + + is_cgroupsV2 = true; + all_controllers_enabled = true; + for (int i = 0; i < CG_INFO_LENGTH; i++) { + is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0; + all_controllers_enabled = all_controllers_enabled && cg_infos[i]._enabled; + } + + if (!all_controllers_enabled) { + // one or more controllers disabled, disable container support + log_debug(os, container)("One or more required controllers disabled at kernel level."); + return NULL; + } + + /* + * Read /proc/self/cgroup and determine: + * - the cgroup path for cgroups v2 or + * - on a cgroups v1 system, collect info for mapping + * the host mount point to the local one via /proc/self/mountinfo below. + */ + cgroup = fopen("/proc/self/cgroup", "r"); + if (cgroup == NULL) { + log_debug(os, container)("Can't open /proc/self/cgroup, %s", + os::strerror(errno)); + return NULL; + } + + while ((p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) { + char *controllers; + char *token; + char *hierarchy_id_str; + int hierarchy_id; + char *cgroup_path; + + hierarchy_id_str = strsep(&p, ":"); + hierarchy_id = atoi(hierarchy_id_str); + /* Get controllers and base */ + controllers = strsep(&p, ":"); + cgroup_path = strsep(&p, "\n"); + + if (controllers == NULL) { + continue; + } + + while (!is_cgroupsV2 && (token = strsep(&controllers, ",")) != NULL) { + if (strcmp(token, "memory") == 0) { + assert(hierarchy_id == cg_infos[memory_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch"); + cg_infos[memory_idx]._cgroup_path = os::strdup(cgroup_path); + } else if (strcmp(token, "cpuset") == 0) { + assert(hierarchy_id == cg_infos[cpuset_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch"); + cg_infos[cpuset_idx]._cgroup_path = os::strdup(cgroup_path); + } else if (strcmp(token, "cpu") == 0) { + assert(hierarchy_id == cg_infos[cpu_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch"); + cg_infos[cpu_idx]._cgroup_path = os::strdup(cgroup_path); + } else if (strcmp(token, "cpuacct") == 0) { + assert(hierarchy_id == cg_infos[cpuacct_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch"); + cg_infos[cpuacct_idx]._cgroup_path = os::strdup(cgroup_path); + } + } + if (is_cgroupsV2) { + for (int i = 0; i < CG_INFO_LENGTH; i++) { + cg_infos[i]._cgroup_path = os::strdup(cgroup_path); + } + } + } + fclose(cgroup); + + if (is_cgroupsV2) { + // Find the cgroup2 mount point by reading /proc/self/mountinfo + mntinfo = fopen("/proc/self/mountinfo", "r"); + if (mntinfo == NULL) { + log_debug(os, container)("Can't open /proc/self/mountinfo, %s", + os::strerror(errno)); + return NULL; + } + + char cgroupv2_mount[MAXPATHLEN+1]; + char fstype[MAXPATHLEN+1]; + bool mount_point_found = false; + while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) { + char *tmp_mount_point = cgroupv2_mount; + char *tmp_fs_type = fstype; + + // mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt + if (sscanf(p, "%*d %*d %*d:%*d %*s %s %*[^-]- %s cgroup2 %*s", tmp_mount_point, tmp_fs_type) == 2) { + // we likely have an early match return, be sure we have cgroup2 as fstype + if (strcmp("cgroup2", tmp_fs_type) == 0) { + mount_point_found = true; + break; + } + } + } + fclose(mntinfo); + if (!mount_point_found) { + log_trace(os, container)("Mount point for cgroupv2 not found in /proc/self/mountinfo"); + return NULL; + } + // Cgroups v2 case, we have all the info we need. + // Construct the subsystem, free resources and return + // Note: any index in cg_infos will do as the path is the same for + // all controllers. + CgroupController* unified = new CgroupV2Controller(cgroupv2_mount, cg_infos[memory_idx]._cgroup_path); + for (int i = 0; i < CG_INFO_LENGTH; i++) { + os::free(cg_infos[i]._name); + os::free(cg_infos[i]._cgroup_path); + } + log_debug(os, container)("Detected cgroups v2 unified hierarchy"); + return new CgroupV2Subsystem(unified); + } + + // What follows is cgroups v1 + log_debug(os, container)("Detected cgroups hybrid or legacy hierarchy, using cgroups v1 controllers"); + + /* + * Find the cgroup mount point for memory and cpuset + * by reading /proc/self/mountinfo + * + * Example for docker: + * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory + * + * Example for host: + * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory + */ + mntinfo = fopen("/proc/self/mountinfo", "r"); + if (mntinfo == NULL) { + log_debug(os, container)("Can't open /proc/self/mountinfo, %s", + os::strerror(errno)); + return NULL; + } + + while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) { + char tmpcgroups[MAXPATHLEN+1]; + char *cptr = tmpcgroups; + char *token; + + // mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt + if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- cgroup %*s %s", tmproot, tmpmount, tmpcgroups) != 3) { + continue; + } + while ((token = strsep(&cptr, ",")) != NULL) { + if (strcmp(token, "memory") == 0) { + memory = new CgroupV1MemoryController(tmproot, tmpmount); + } else if (strcmp(token, "cpuset") == 0) { + cpuset = new CgroupV1Controller(tmproot, tmpmount); + } else if (strcmp(token, "cpu") == 0) { + cpu = new CgroupV1Controller(tmproot, tmpmount); + } else if (strcmp(token, "cpuacct") == 0) { + cpuacct= new CgroupV1Controller(tmproot, tmpmount); + } + } + } + + fclose(mntinfo); + + if (memory == NULL) { + log_debug(os, container)("Required cgroup v1 memory subsystem not found"); + return NULL; + } + if (cpuset == NULL) { + log_debug(os, container)("Required cgroup v1 cpuset subsystem not found"); + return NULL; + } + if (cpu == NULL) { + log_debug(os, container)("Required cgroup v1 cpu subsystem not found"); + return NULL; + } + if (cpuacct == NULL) { + log_debug(os, container)("Required cgroup v1 cpuacct subsystem not found"); + return NULL; + } + + /* + * Use info gathered previously from /proc/self/cgroup + * and map host mount point to + * local one via /proc/self/mountinfo content above + * + * Docker example: + * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044 + * + * Host example: + * 5:memory:/user.slice + * + * Construct a path to the process specific memory and cpuset + * cgroup directory. + * + * For a container running under Docker from memory example above + * the paths would be: + * + * /sys/fs/cgroup/memory + * + * For a Host from memory example above the path would be: + * + * /sys/fs/cgroup/memory/user.slice + * + */ + for (int i = 0; i < CG_INFO_LENGTH; i++) { + CgroupInfo info = cg_infos[i]; + if (strcmp(info._name, "memory") == 0) { + memory->set_subsystem_path(info._cgroup_path); + } else if (strcmp(info._name, "cpuset") == 0) { + cpuset->set_subsystem_path(info._cgroup_path); + } else if (strcmp(info._name, "cpu") == 0) { + cpu->set_subsystem_path(info._cgroup_path); + } else if (strcmp(info._name, "cpuacct") == 0) { + cpuacct->set_subsystem_path(info._cgroup_path); + } + } + return new CgroupV1Subsystem(cpuset, cpu, cpuacct, memory); +} + +/* active_processor_count + * + * Calculate an appropriate number of active processors for the + * VM to use based on these three inputs. + * + * cpu affinity + * cgroup cpu quota & cpu period + * cgroup cpu shares + * + * Algorithm: + * + * Determine the number of available CPUs from sched_getaffinity + * + * If user specified a quota (quota != -1), calculate the number of + * required CPUs by dividing quota by period. + * + * If shares are in effect (shares != -1), calculate the number + * of CPUs required for the shares by dividing the share value + * by PER_CPU_SHARES. + * + * All results of division are rounded up to the next whole number. + * + * If neither shares or quotas have been specified, return the + * number of active processors in the system. + * + * If both shares and quotas have been specified, the results are + * based on the flag PreferContainerQuotaForCPUCount. If true, + * return the quota value. If false return the smallest value + * between shares or quotas. + * + * If shares and/or quotas have been specified, the resulting number + * returned will never exceed the number of active processors. + * + * return: + * number of CPUs + */ +int CgroupSubsystem::active_processor_count() { + int quota_count = 0, share_count = 0; + int cpu_count, limit_count; + int result; + + // We use a cache with a timeout to avoid performing expensive + // computations in the event this function is called frequently. + // [See 8227006]. + CachingCgroupController* contrl = cpu_controller(); + CachedMetric* cpu_limit = contrl->metrics_cache(); + if (!cpu_limit->should_check_metric()) { + int val = (int)cpu_limit->value(); + log_trace(os, container)("CgroupSubsystem::active_processor_count (cached): %d", val); + return val; + } + + cpu_count = limit_count = os::Linux::active_processor_count(); + int quota = cpu_quota(); + int period = cpu_period(); + int share = cpu_shares(); + + if (quota > -1 && period > 0) { + quota_count = ceilf((float)quota / (float)period); + log_trace(os, container)("CPU Quota count based on quota/period: %d", quota_count); + } + if (share > -1) { + share_count = ceilf((float)share / (float)PER_CPU_SHARES); + log_trace(os, container)("CPU Share count based on shares: %d", share_count); + } + + // If both shares and quotas are setup results depend + // on flag PreferContainerQuotaForCPUCount. + // If true, limit CPU count to quota + // If false, use minimum of shares and quotas + if (quota_count !=0 && share_count != 0) { + if (PreferContainerQuotaForCPUCount) { + limit_count = quota_count; + } else { + limit_count = MIN2(quota_count, share_count); + } + } else if (quota_count != 0) { + limit_count = quota_count; + } else if (share_count != 0) { + limit_count = share_count; + } + + result = MIN2(cpu_count, limit_count); + log_trace(os, container)("OSContainer::active_processor_count: %d", result); + + // Update cached metric to avoid re-reading container settings too often + cpu_limit->set_value(result, OSCONTAINER_CACHE_TIMEOUT); + + return result; +} + +/* memory_limit_in_bytes + * + * Return the limit of available memory for this process. + * + * return: + * memory limit in bytes or + * -1 for unlimited + * OSCONTAINER_ERROR for not supported + */ +jlong CgroupSubsystem::memory_limit_in_bytes() { + CachingCgroupController* contrl = memory_controller(); + CachedMetric* memory_limit = contrl->metrics_cache(); + if (!memory_limit->should_check_metric()) { + return memory_limit->value(); + } + jlong mem_limit = read_memory_limit_in_bytes(); + // Update cached metric to avoid re-reading container settings too often + memory_limit->set_value(mem_limit, OSCONTAINER_CACHE_TIMEOUT); + return mem_limit; +} --- /dev/null 2019-11-08 09:42:42.340406263 +0100 +++ new/src/hotspot/os/linux/cgroupSubsystem_linux.hpp 2019-11-08 15:01:38.987154359 +0100 @@ -0,0 +1,264 @@ +/* + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CGROUP_SUBSYSTEM_LINUX_HPP +#define CGROUP_SUBSYSTEM_LINUX_HPP + +#include "memory/allocation.hpp" +#include "runtime/os.hpp" +#include "logging/log.hpp" +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" +#include "osContainer_linux.hpp" + +// Shared cgroups code (used by cgroup version 1 and version 2) + +/* + * PER_CPU_SHARES has been set to 1024 because CPU shares' quota + * is commonly used in cloud frameworks like Kubernetes[1], + * AWS[2] and Mesos[3] in a similar way. They spawn containers with + * --cpu-shares option values scaled by PER_CPU_SHARES. Thus, we do + * the inverse for determining the number of possible available + * CPUs to the JVM inside a container. See JDK-8216366. + * + * [1] https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-cpu + * In particular: + * When using Docker: + * The spec.containers[].resources.requests.cpu is converted to its core value, which is potentially + * fractional, and multiplied by 1024. The greater of this number or 2 is used as the value of the + * --cpu-shares flag in the docker run command. + * [2] https://docs.aws.amazon.com/AmazonECS/latest/APIReference/API_ContainerDefinition.html + * [3] https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/docker/docker.cpp#L648 + * https://github.com/apache/mesos/blob/3478e344fb77d931f6122980c6e94cd3913c441d/src/slave/containerizer/mesos/isolators/cgroups/constants.hpp#L30 + */ +#define PER_CPU_SHARES 1024 + +typedef char * cptr; + +class CgroupController: public CHeapObj { + public: + virtual char *subsystem_path(); +}; + +PRAGMA_DIAG_PUSH +PRAGMA_FORMAT_NONLITERAL_IGNORED +template int subsystem_file_line_contents(CgroupController* c, + const char *filename, + const char *matchline, + const char *scan_fmt, + T returnval) { + FILE *fp = NULL; + char *p; + char file[MAXPATHLEN+1]; + char buf[MAXPATHLEN+1]; + char discard[MAXPATHLEN+1]; + bool found_match = false; + + if (c == NULL) { + log_debug(os, container)("subsystem_file_line_contents: CgroupController* is NULL"); + return OSCONTAINER_ERROR; + } + if (c->subsystem_path() == NULL) { + log_debug(os, container)("subsystem_file_line_contents: subsystem path is NULL"); + return OSCONTAINER_ERROR; + } + + strncpy(file, c->subsystem_path(), MAXPATHLEN); + file[MAXPATHLEN-1] = '\0'; + int filelen = strlen(file); + if ((filelen + strlen(filename)) > (MAXPATHLEN-1)) { + log_debug(os, container)("File path too long %s, %s", file, filename); + return OSCONTAINER_ERROR; + } + strncat(file, filename, MAXPATHLEN-filelen); + log_trace(os, container)("Path to %s is %s", filename, file); + fp = fopen(file, "r"); + if (fp != NULL) { + int err = 0; + while ((p = fgets(buf, MAXPATHLEN, fp)) != NULL) { + found_match = false; + if (matchline == NULL) { + // single-line file case + int matched = sscanf(p, scan_fmt, returnval); + found_match = (matched == 1); + } else { + // multi-line file case + if (strstr(p, matchline) != NULL) { + // discard matchline string prefix + int matched = sscanf(p, scan_fmt, discard, returnval); + found_match = (matched == 2); + } else { + continue; // substring not found + } + } + if (found_match) { + fclose(fp); + return 0; + } else { + err = 1; + log_debug(os, container)("Type %s not found in file %s", scan_fmt, file); + } + } + if (err == 0) { + log_debug(os, container)("Empty file %s", file); + } + } else { + log_debug(os, container)("Open of file %s failed, %s", file, os::strerror(errno)); + } + if (fp != NULL) + fclose(fp); + return OSCONTAINER_ERROR; +} +PRAGMA_DIAG_POP + +#define GET_CONTAINER_INFO(return_type, subsystem, filename, \ + logstring, scan_fmt, variable) \ + return_type variable; \ +{ \ + int err; \ + err = subsystem_file_line_contents(subsystem, \ + filename, \ + NULL, \ + scan_fmt, \ + &variable); \ + if (err != 0) \ + return (return_type) OSCONTAINER_ERROR; \ + \ + log_trace(os, container)(logstring, variable); \ +} + +#define GET_CONTAINER_INFO_CPTR(return_type, subsystem, filename, \ + logstring, scan_fmt, variable, bufsize) \ + char variable[bufsize]; \ +{ \ + int err; \ + err = subsystem_file_line_contents(subsystem, \ + filename, \ + NULL, \ + scan_fmt, \ + variable); \ + if (err != 0) \ + return (return_type) NULL; \ + \ + log_trace(os, container)(logstring, variable); \ +} + +#define GET_CONTAINER_INFO_LINE(return_type, controller, filename, \ + matchline, logstring, scan_fmt, variable) \ + return_type variable; \ +{ \ + int err; \ + err = subsystem_file_line_contents(controller, \ + filename, \ + matchline, \ + scan_fmt, \ + &variable); \ + if (err != 0) \ + return (return_type) OSCONTAINER_ERROR; \ + \ + log_trace(os, container)(logstring, variable); \ +} + +// Four controllers: cpu, cpuset, cpuacct, memory +#define CG_INFO_LENGTH 4 + +class CachedMetric : public CHeapObj{ + private: + volatile jlong _metric; + volatile jlong _next_check_counter; + public: + CachedMetric() { + _metric = -1; + _next_check_counter = min_jlong; + } + bool should_check_metric() { + return os::elapsed_counter() > _next_check_counter; + } + jlong value() { return _metric; } + void set_value(jlong value, jlong timeout) { + _metric = value; + // Metric is unlikely to change, but we want to remain + // responsive to configuration changes. A very short grace time + // between re-read avoids excessive overhead during startup without + // significantly reducing the VMs ability to promptly react to changed + // metric config + _next_check_counter = os::elapsed_counter() + timeout; + } +}; + +class CachingCgroupController : public CHeapObj { + private: + CgroupController* _controller; + CachedMetric* _metrics_cache; + + public: + CachingCgroupController(CgroupController* cont) { + _controller = cont; + _metrics_cache = new CachedMetric(); + } + + CachedMetric* metrics_cache() { return _metrics_cache; } + CgroupController* controller() { return _controller; } +}; + +class CgroupSubsystem: public CHeapObj { + public: + jlong memory_limit_in_bytes(); + int active_processor_count(); + + virtual int cpu_quota(); + virtual int cpu_period(); + virtual int cpu_shares(); + virtual jlong memory_usage_in_bytes(); + virtual jlong memory_and_swap_limit_in_bytes(); + virtual jlong memory_soft_limit_in_bytes(); + virtual jlong memory_max_usage_in_bytes(); + virtual char * cpu_cpuset_cpus(); + virtual char * cpu_cpuset_memory_nodes(); + virtual jlong read_memory_limit_in_bytes(); + virtual const char * container_type(); + virtual CachingCgroupController* memory_controller(); + virtual CachingCgroupController* cpu_controller(); +}; + +class CgroupSubsystemFactory: AllStatic { + public: + static CgroupSubsystem* create(); +}; + +// Class representing info in /proc/self/cgroup. +// See man 7 cgroups +class CgroupInfo : public StackObj { + friend class CgroupSubsystemFactory; + + private: + char* _name; + int _hierarchy_id; + bool _enabled; + char* _cgroup_path; + +}; + + +#endif // CGROUP_SUBSYSTEM_LINUX_HPP --- /dev/null 2019-11-08 09:42:42.340406263 +0100 +++ new/src/hotspot/os/linux/cgroupV1Subsystem_linux.cpp 2019-11-08 15:01:40.071156759 +0100 @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include +#include +#include +#include "cgroupV1Subsystem_linux.hpp" +#include "logging/log.hpp" +#include "memory/allocation.hpp" +#include "runtime/globals.hpp" +#include "runtime/os.hpp" +#include "utilities/globalDefinitions.hpp" + +/* + * Set directory to subsystem specific files based + * on the contents of the mountinfo and cgroup files. + */ +void CgroupV1Controller::set_subsystem_path(char *cgroup_path) { + char buf[MAXPATHLEN+1]; + if (_root != NULL && cgroup_path != NULL) { + if (strcmp(_root, "/") == 0) { + int buflen; + strncpy(buf, _mount_point, MAXPATHLEN); + buf[MAXPATHLEN-1] = '\0'; + if (strcmp(cgroup_path,"/") != 0) { + buflen = strlen(buf); + if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) { + return; + } + strncat(buf, cgroup_path, MAXPATHLEN-buflen); + buf[MAXPATHLEN-1] = '\0'; + } + _path = os::strdup(buf); + } else { + if (strcmp(_root, cgroup_path) == 0) { + strncpy(buf, _mount_point, MAXPATHLEN); + buf[MAXPATHLEN-1] = '\0'; + _path = os::strdup(buf); + } else { + char *p = strstr(cgroup_path, _root); + if (p != NULL && p == _root) { + if (strlen(cgroup_path) > strlen(_root)) { + int buflen; + strncpy(buf, _mount_point, MAXPATHLEN); + buf[MAXPATHLEN-1] = '\0'; + buflen = strlen(buf); + if ((buflen + strlen(cgroup_path) - strlen(_root)) > (MAXPATHLEN-1)) { + return; + } + strncat(buf, cgroup_path + strlen(_root), MAXPATHLEN-buflen); + buf[MAXPATHLEN-1] = '\0'; + _path = os::strdup(buf); + } + } + } + } + } +} + +/* uses_mem_hierarchy + * + * Return whether or not hierarchical cgroup accounting is being + * done. + * + * return: + * A number > 0 if true, or + * OSCONTAINER_ERROR for not supported + */ +jlong CgroupV1MemoryController::uses_mem_hierarchy() { + GET_CONTAINER_INFO(jlong, this, "/memory.use_hierarchy", + "Use Hierarchy is: " JLONG_FORMAT, JLONG_FORMAT, use_hierarchy); + return use_hierarchy; +} + +void CgroupV1MemoryController::set_subsystem_path(char *cgroup_path) { + CgroupV1Controller::set_subsystem_path(cgroup_path); + jlong hierarchy = uses_mem_hierarchy(); + if (hierarchy > 0) { + set_hierarchical(true); + } +} + +jlong CgroupV1Subsystem::read_memory_limit_in_bytes() { + GET_CONTAINER_INFO(julong, _memory->controller(), "/memory.limit_in_bytes", + "Memory Limit is: " JULONG_FORMAT, JULONG_FORMAT, memlimit); + + if (memlimit >= _unlimited_memory) { + log_trace(os, container)("Non-Hierarchical Memory Limit is: Unlimited"); + CgroupV1MemoryController* mem_controller = reinterpret_cast(_memory->controller()); + if (mem_controller->is_hierarchical()) { + const char* matchline = "hierarchical_memory_limit"; + const char* format = "%s " JULONG_FORMAT; + GET_CONTAINER_INFO_LINE(julong, _memory->controller(), "/memory.stat", matchline, + "Hierarchical Memory Limit is: " JULONG_FORMAT, format, hier_memlimit) + if (hier_memlimit >= _unlimited_memory) { + log_trace(os, container)("Hierarchical Memory Limit is: Unlimited"); + } else { + return (jlong)hier_memlimit; + } + } + return (jlong)-1; + } + else { + return (jlong)memlimit; + } +} + +jlong CgroupV1Subsystem::memory_and_swap_limit_in_bytes() { + GET_CONTAINER_INFO(julong, _memory->controller(), "/memory.memsw.limit_in_bytes", + "Memory and Swap Limit is: " JULONG_FORMAT, JULONG_FORMAT, memswlimit); + if (memswlimit >= _unlimited_memory) { + log_trace(os, container)("Non-Hierarchical Memory and Swap Limit is: Unlimited"); + CgroupV1MemoryController* mem_controller = reinterpret_cast(_memory->controller()); + if (mem_controller->is_hierarchical()) { + const char* matchline = "hierarchical_memsw_limit"; + const char* format = "%s " JULONG_FORMAT; + GET_CONTAINER_INFO_LINE(julong, _memory->controller(), "/memory.stat", matchline, + "Hierarchical Memory and Swap Limit is : " JULONG_FORMAT, format, hier_memlimit) + if (hier_memlimit >= _unlimited_memory) { + log_trace(os, container)("Hierarchical Memory and Swap Limit is: Unlimited"); + } else { + return (jlong)hier_memlimit; + } + } + return (jlong)-1; + } else { + return (jlong)memswlimit; + } +} + +jlong CgroupV1Subsystem::memory_soft_limit_in_bytes() { + GET_CONTAINER_INFO(julong, _memory->controller(), "/memory.soft_limit_in_bytes", + "Memory Soft Limit is: " JULONG_FORMAT, JULONG_FORMAT, memsoftlimit); + if (memsoftlimit >= _unlimited_memory) { + log_trace(os, container)("Memory Soft Limit is: Unlimited"); + return (jlong)-1; + } else { + return (jlong)memsoftlimit; + } +} + +/* memory_usage_in_bytes + * + * Return the amount of used memory for this process. + * + * return: + * memory usage in bytes or + * -1 for unlimited + * OSCONTAINER_ERROR for not supported + */ +jlong CgroupV1Subsystem::memory_usage_in_bytes() { + GET_CONTAINER_INFO(jlong, _memory->controller(), "/memory.usage_in_bytes", + "Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memusage); + return memusage; +} + +/* memory_max_usage_in_bytes + * + * Return the maximum amount of used memory for this process. + * + * return: + * max memory usage in bytes or + * OSCONTAINER_ERROR for not supported + */ +jlong CgroupV1Subsystem::memory_max_usage_in_bytes() { + GET_CONTAINER_INFO(jlong, _memory->controller(), "/memory.max_usage_in_bytes", + "Maximum Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memmaxusage); + return memmaxusage; +} + +char * CgroupV1Subsystem::cpu_cpuset_cpus() { + GET_CONTAINER_INFO_CPTR(cptr, _cpuset, "/cpuset.cpus", + "cpuset.cpus is: %s", "%1023s", cpus, 1024); + return os::strdup(cpus); +} + +char * CgroupV1Subsystem::cpu_cpuset_memory_nodes() { + GET_CONTAINER_INFO_CPTR(cptr, _cpuset, "/cpuset.mems", + "cpuset.mems is: %s", "%1023s", mems, 1024); + return os::strdup(mems); +} + +/* cpu_quota + * + * Return the number of milliseconds per period + * process is guaranteed to run. + * + * return: + * quota time in milliseconds + * -1 for no quota + * OSCONTAINER_ERROR for not supported + */ +int CgroupV1Subsystem::cpu_quota() { + GET_CONTAINER_INFO(int, _cpu->controller(), "/cpu.cfs_quota_us", + "CPU Quota is: %d", "%d", quota); + return quota; +} + +int CgroupV1Subsystem::cpu_period() { + GET_CONTAINER_INFO(int, _cpu->controller(), "/cpu.cfs_period_us", + "CPU Period is: %d", "%d", period); + return period; +} + +/* cpu_shares + * + * Return the amount of cpu shares available to the process + * + * return: + * Share number (typically a number relative to 1024) + * (2048 typically expresses 2 CPUs worth of processing) + * -1 for no share setup + * OSCONTAINER_ERROR for not supported + */ +int CgroupV1Subsystem::cpu_shares() { + GET_CONTAINER_INFO(int, _cpu->controller(), "/cpu.shares", + "CPU Shares is: %d", "%d", shares); + // Convert 1024 to no shares setup + if (shares == 1024) return -1; + + return shares; +} --- /dev/null 2019-11-08 09:42:42.340406263 +0100 +++ new/src/hotspot/os/linux/cgroupV1Subsystem_linux.hpp 2019-11-08 15:01:41.184159224 +0100 @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CGROUP_V1_SUBSYSTEM_LINUX_HPP +#define CGROUP_V1_SUBSYSTEM_LINUX_HPP + +#include "runtime/os.hpp" +#include "memory/allocation.hpp" +#include "cgroupSubsystem_linux.hpp" + +// Cgroups version 1 specific implementation + +class CgroupV1Controller: public CgroupController { + private: + /* mountinfo contents */ + char *_root; + char *_mount_point; + + /* Constructed subsystem directory */ + char *_path; + + public: + CgroupV1Controller(char *root, char *mountpoint) { + _root = os::strdup(root); + _mount_point = os::strdup(mountpoint); + _path = NULL; + } + + virtual void set_subsystem_path(char *cgroup_path); + char *subsystem_path() { return _path; } +}; + +class CgroupV1MemoryController: public CgroupV1Controller { + + public: + bool is_hierarchical() { return _uses_mem_hierarchy; } + void set_subsystem_path(char *cgroup_path); + private: + /* Some container runtimes set limits via cgroup + * hierarchy. If set to true consider also memory.stat + * file if everything else seems unlimited */ + bool _uses_mem_hierarchy; + jlong uses_mem_hierarchy(); + void set_hierarchical(bool value) { _uses_mem_hierarchy = value; } + + public: + CgroupV1MemoryController(char *root, char *mountpoint) : CgroupV1Controller(root, mountpoint) { + _uses_mem_hierarchy = false; + } + +}; + +class CgroupV1Subsystem: public CgroupSubsystem { + + public: + jlong read_memory_limit_in_bytes(); + jlong memory_and_swap_limit_in_bytes(); + jlong memory_soft_limit_in_bytes(); + jlong memory_usage_in_bytes(); + jlong memory_max_usage_in_bytes(); + char * cpu_cpuset_cpus(); + char * cpu_cpuset_memory_nodes(); + + int cpu_quota(); + int cpu_period(); + + int cpu_shares(); + + const char * container_type() { + return "cgroupv1"; + } + CachingCgroupController * memory_controller() { return _memory; } + CachingCgroupController * cpu_controller() { return _cpu; } + + private: + julong _unlimited_memory; + + /* controllers */ + CachingCgroupController* _memory = NULL; + CgroupV1Controller* _cpuset = NULL; + CachingCgroupController* _cpu = NULL; + CgroupV1Controller* _cpuacct = NULL; + + public: + CgroupV1Subsystem(CgroupV1Controller* cpuset, + CgroupV1Controller* cpu, + CgroupV1Controller* cpuacct, + CgroupV1MemoryController* memory) { + _cpuset = cpuset; + _cpu = new CachingCgroupController(cpu); + _cpuacct = cpuacct; + _memory = new CachingCgroupController(memory); + _unlimited_memory = (LONG_MAX / os::vm_page_size()) * os::vm_page_size(); + } +}; + +#endif // CGROUP_V1_SUBSYSTEM_LINUX_HPP --- /dev/null 2019-11-08 09:42:42.340406263 +0100 +++ new/src/hotspot/os/linux/cgroupV2Subsystem_linux.cpp 2019-11-08 15:01:42.227161534 +0100 @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2019, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "cgroupV2Subsystem_linux.hpp" + +/* cpu_shares + * + * Return the amount of cpu shares available to the process + * + * return: + * Share number (typically a number relative to 1024) + * (2048 typically expresses 2 CPUs worth of processing) + * -1 for no share setup + * OSCONTAINER_ERROR for not supported + */ +int CgroupV2Subsystem::cpu_shares() { + GET_CONTAINER_INFO(int, _unified, "/cpu.weight", + "Raw value for CPU shares is: %d", "%d", shares); + // Convert default value of 100 to no shares setup + if (shares == 100) { + log_debug(os, container)("CPU Shares is: %d", -1); + return -1; + } + + // CPU shares (OCI) value needs to get translated into + // a proper Cgroups v2 value. See: + // https://github.com/containers/crun/blob/master/crun.1.md#cpu-controller + // + // Use the inverse of (x == OCI value, y == cgroupsv2 value): + // ((262142 * y - 1)/9999) + 2 = x + // + int x = 262142 * shares - 1; + double frac = x/9999.0; + x = ((int)frac) + 2; + log_trace(os, container)("Scaled CPU shares value is: %d", x); + // Since the scaled value is not precise, return the closest + // multiple of PER_CPU_SHARES for a more conservative mapping + if ( x <= PER_CPU_SHARES ) { + // will always map to 1 CPU + log_debug(os, container)("CPU Shares is: %d", x); + return x; + } + int f = x/PER_CPU_SHARES; + int lower_multiple = f * PER_CPU_SHARES; + int upper_multiple = (f + 1) * PER_CPU_SHARES; + int distance_lower = MAX2(lower_multiple, x) - MIN2(lower_multiple, x); + int distance_upper = MAX2(upper_multiple, x) - MIN2(upper_multiple, x); + x = distance_lower <= distance_upper ? lower_multiple : upper_multiple; + log_trace(os, container)("Closest multiple of %d of the CPU Shares value is: %d", PER_CPU_SHARES, x); + log_debug(os, container)("CPU Shares is: %d", x); + return x; +} + +/* cpu_quota + * + * Return the number of milliseconds per period + * process is guaranteed to run. + * + * return: + * quota time in milliseconds + * -1 for no quota + * OSCONTAINER_ERROR for not supported + */ +int CgroupV2Subsystem::cpu_quota() { + char * cpu_quota_str = cpu_quota_val(); + int limit = (int)limit_from_str(cpu_quota_str); + log_trace(os, container)("CPU Quota is: %d", limit); + return limit; +} + +char * CgroupV2Subsystem::cpu_cpuset_cpus() { + GET_CONTAINER_INFO_CPTR(cptr, _unified, "/cpuset.cpus", + "cpuset.cpus is: %s", "%1023s", cpus, 1024); + if (cpus == NULL) { + return NULL; + } + return os::strdup(cpus); +} + +char* CgroupV2Subsystem::cpu_quota_val() { + GET_CONTAINER_INFO_CPTR(cptr, _unified, "/cpu.max", + "Raw value for CPU quota is: %s", "%s %*d", quota, 1024); + if (quota == NULL) { + return NULL; + } + return os::strdup(quota); +} + +char * CgroupV2Subsystem::cpu_cpuset_memory_nodes() { + GET_CONTAINER_INFO_CPTR(cptr, _unified, "/cpuset.mems", + "cpuset.mems is: %s", "%1023s", mems, 1024); + if (mems == NULL) { + return NULL; + } + return os::strdup(mems); +} + +int CgroupV2Subsystem::cpu_period() { + GET_CONTAINER_INFO(int, _unified, "/cpu.max", + "CPU Period is: %d", "%*s %d", period); + return period; +} + +/* memory_usage_in_bytes + * + * Return the amount of used memory used by this cgroup and decendents + * + * return: + * memory usage in bytes or + * -1 for unlimited + * OSCONTAINER_ERROR for not supported + */ +jlong CgroupV2Subsystem::memory_usage_in_bytes() { + GET_CONTAINER_INFO(jlong, _unified, "/memory.current", + "Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memusage); + // Keep track of max seen memory usage so as to be able to synthesize + // memory_max_usage_in_bytes metric. + if (memusage > _memory_max_usage) { + _memory_max_usage = memusage; + } + return memusage; +} + +jlong CgroupV2Subsystem::memory_soft_limit_in_bytes() { + char* mem_soft_limit_str = mem_soft_limit_val(); + return limit_from_str(mem_soft_limit_str); +} + +jlong CgroupV2Subsystem::memory_max_usage_in_bytes() { + jlong max_usage = 0; + if (_memory_max_usage > 0) { + max_usage = _memory_max_usage; + } + log_trace(os, container)("Maximum Memory Usage is: " JLONG_FORMAT, max_usage); + return max_usage; +} + +char* CgroupV2Subsystem::mem_soft_limit_val() { + GET_CONTAINER_INFO_CPTR(cptr, _unified, "/memory.high", + "Memory Soft Limit is: %s", "%s", mem_soft_limit_str, 1024); + if (mem_soft_limit_str == NULL) { + return NULL; + } + return os::strdup(mem_soft_limit_str); +} + +jlong CgroupV2Subsystem::memory_and_swap_limit_in_bytes() { + char* mem_swp_limit_str = mem_swp_limit_val(); + return limit_from_str(mem_swp_limit_str); +} + +char* CgroupV2Subsystem::mem_swp_limit_val() { + GET_CONTAINER_INFO_CPTR(cptr, _unified, "/memory.swap.max", + "Memory and Swap Limit is: %s", "%s", mem_swp_limit_str, 1024); + if (mem_swp_limit_str == NULL) { + return NULL; + } + return os::strdup(mem_swp_limit_str); +} + +/* memory_limit_in_bytes + * + * Return the limit of available memory for this process. + * + * return: + * memory limit in bytes or + * -1 for unlimited, OSCONTAINER_ERROR for an error + */ +jlong CgroupV2Subsystem::read_memory_limit_in_bytes() { + char * mem_limit_str = mem_limit_val(); + jlong limit = limit_from_str(mem_limit_str); + if (log_is_enabled(Trace, os, container)) { + if (limit == -1) { + log_trace(os, container)("Memory Limit is: Unlimited"); + } else { + log_trace(os, container)("Memory Limit is: " JLONG_FORMAT, limit); + } + } + return limit; +} + +jlong CgroupV2Subsystem::limit_from_str(char* limit_str) { + if (limit_str == NULL) { + return OSCONTAINER_ERROR; + } + // Unlimited memory in Cgroups V2 is the literal string 'max' + if (strcmp("max", limit_str) == 0) { + os::free(limit_str); + return (jlong)-1; + } + julong limit; + if (sscanf(limit_str, JULONG_FORMAT, &limit) != 1) { + os::free(limit_str); + return OSCONTAINER_ERROR; + } + os::free(limit_str); + return (jlong)limit; +} + +char* CgroupV2Subsystem::mem_limit_val() { + GET_CONTAINER_INFO_CPTR(cptr, _unified, "/memory.max", + "Raw value for memory limit is: %s", "%s", mem_limit_str, 1024); + if (mem_limit_str == NULL) { + return NULL; + } + return os::strdup(mem_limit_str); +} + +char* CgroupV2Controller::construct_path(char* mount_path, char *cgroup_path) { + char buf[MAXPATHLEN+1]; + int buflen; + strncpy(buf, mount_path, MAXPATHLEN); + buf[MAXPATHLEN] = '\0'; + buflen = strlen(buf); + if ((buflen + strlen(cgroup_path)) > MAXPATHLEN) { + return NULL; + } + strncat(buf, cgroup_path, MAXPATHLEN-buflen); + buf[MAXPATHLEN] = '\0'; + return os::strdup(buf); +} + --- /dev/null 2019-11-08 09:42:42.340406263 +0100 +++ new/src/hotspot/os/linux/cgroupV2Subsystem_linux.hpp 2019-11-08 15:01:43.363164049 +0100 @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2019, Red Hat Inc. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CGROUP_V2_SUBSYSTEM_LINUX_HPP +#define CGROUP_V2_SUBSYSTEM_LINUX_HPP + +#include "cgroupSubsystem_linux.hpp" + +class CgroupV2Controller: public CgroupController { + private: + /* the mount path of the cgroup v2 hierarchy */ + char *_mount_path; + /* The cgroup path for the controller */ + char *_cgroup_path; + + /* Constructed full path to the subsystem directory */ + char *_path; + static char* construct_path(char* mount_path, char *cgroup_path); + + public: + CgroupV2Controller(char * mount_path, char *cgroup_path) { + _mount_path = mount_path; + _cgroup_path = os::strdup(cgroup_path); + _path = construct_path(mount_path, cgroup_path); + } + + char *subsystem_path() { return _path; } +}; + +class CgroupV2Subsystem: public CgroupSubsystem { + private: + /* One unified controller */ + CgroupController* _unified = NULL; + /* Caching wrappers for cpu/memory metrics */ + CachingCgroupController* _memory = NULL; + CachingCgroupController* _cpu = NULL; + /* synthesized memory max usage counter */ + volatile jlong _memory_max_usage; + + char *mem_limit_val(); + char *mem_swp_limit_val(); + char *mem_soft_limit_val(); + char *cpu_quota_val(); + jlong limit_from_str(char* limit_str); + + public: + CgroupV2Subsystem(CgroupController * unified) { + _unified = unified; + _memory = new CachingCgroupController(unified); + _cpu = new CachingCgroupController(unified); + _memory_max_usage = min_jlong; + } + + jlong read_memory_limit_in_bytes(); + int cpu_quota(); + int cpu_period(); + int cpu_shares(); + jlong memory_and_swap_limit_in_bytes(); + jlong memory_soft_limit_in_bytes(); + jlong memory_usage_in_bytes(); + jlong memory_max_usage_in_bytes(); + char * cpu_cpuset_cpus(); + char * cpu_cpuset_memory_nodes(); + const char * container_type() { + return "cgroupv2"; + } + CachingCgroupController * memory_controller() { return _memory; } + CachingCgroupController * cpu_controller() { return _cpu; } +}; + +#endif // CGROUP_V2_SUBSYSTEM_LINUX_HPP