--- old/src/hotspot/os/aix/os_aix.cpp 2017-10-24 13:04:40.843268651 -0400 +++ new/src/hotspot/os/aix/os_aix.cpp 2017-10-24 13:04:39.855212276 -0400 @@ -3615,6 +3615,14 @@ }; int os::active_processor_count() { + // User has overridden the number of active processors + if (ActiveProcessorCount > 0) { + log_trace(os)("active_processor_count: " + "active processor count set by user : %d", + ActiveProcessorCount); + return ActiveProcessorCount; + } + int online_cpus = ::sysconf(_SC_NPROCESSORS_ONLN); assert(online_cpus > 0 && online_cpus <= processor_count(), "sanity check"); return online_cpus; --- old/src/hotspot/os/bsd/os_bsd.cpp 2017-10-24 13:04:44.195459868 -0400 +++ new/src/hotspot/os/bsd/os_bsd.cpp 2017-10-24 13:04:42.687373870 -0400 @@ -3492,6 +3492,14 @@ } int os::active_processor_count() { + // User has overridden the number of active processors + if (ActiveProcessorCount > 0) { + log_trace(os)("active_processor_count: " + "active processor count set by user : %d", + ActiveProcessorCount); + return ActiveProcessorCount; + } + return _processor_count; } --- old/src/hotspot/os/linux/globals_linux.hpp 2017-10-24 13:04:47.787664681 -0400 +++ new/src/hotspot/os/linux/globals_linux.hpp 2017-10-24 13:04:46.799608347 -0400 @@ -59,6 +59,9 @@ product(bool, UseSHM, false, \ "Use SYSV shared memory for large pages") \ \ + product(bool, UseContainerSupport, true, \ + "Enable detection and runtime container configuration support") \ + \ diagnostic(bool, UseCpuAllocPath, false, \ "Use CPU_ALLOC code path in os::active_processor_count ") --- old/src/hotspot/os/linux/os_linux.cpp 2017-10-24 13:04:50.583824108 -0400 +++ new/src/hotspot/os/linux/os_linux.cpp 2017-10-24 13:04:49.591767544 -0400 @@ -38,6 +38,7 @@ #include "oops/oop.inline.hpp" #include "os_linux.inline.hpp" #include "os_share_linux.hpp" +#include "osContainer_linux.hpp" #include "prims/jniFastGetField.hpp" #include "prims/jvm.h" #include "prims/jvm_misc.hpp" @@ -172,13 +173,52 @@ julong os::Linux::available_memory() { // values in struct sysinfo are "unsigned long" struct sysinfo si; - sysinfo(&si); + julong avail_mem; - return (julong)si.freeram * si.mem_unit; + if (OSContainer::is_containerized()) { + jlong mem_limit, mem_usage; + if ((mem_limit = OSContainer::memory_limit_in_bytes()) > 0) { + if ((mem_usage = OSContainer::memory_usage_in_bytes()) > 0) { + if (mem_limit > mem_usage) { + avail_mem = (julong)mem_limit - (julong)mem_usage; + } else { + avail_mem = 0; + } + log_trace(os)("available container memory: " JULONG_FORMAT, avail_mem); + return avail_mem; + } else { + log_debug(os,container)("container memory usage call failed: " JLONG_FORMAT, mem_usage); + } + } else { + log_debug(os,container)("container memory unlimited or failed: " JLONG_FORMAT, mem_limit); + } + } + + sysinfo(&si); + avail_mem = (julong)si.freeram * si.mem_unit; + log_trace(os)("available memory: " JULONG_FORMAT, avail_mem); + return avail_mem; } julong os::physical_memory() { - return Linux::physical_memory(); + if (OSContainer::is_containerized()) { + jlong mem_limit; + if ((mem_limit = OSContainer::memory_limit_in_bytes()) > 0) { + log_trace(os)("total container memory: " JLONG_FORMAT, mem_limit); + return (julong)mem_limit; + } else { + if (mem_limit == OSCONTAINER_ERROR) { + log_debug(os,container)("container memory limit call failed"); + } + if (mem_limit == -1) { + log_debug(os,container)("container memory unlimited, using host value"); + } + } + } + + jlong phys_mem = Linux::physical_memory(); + log_trace(os)("total system memory: " JLONG_FORMAT, phys_mem); + return phys_mem; } // Return true if user is running as root. @@ -1951,6 +1991,8 @@ os::Posix::print_load_average(st); os::Linux::print_full_memory_info(st); + + os::Linux::print_container_info(st); } // Try to identify popular distros. @@ -2088,6 +2130,66 @@ st->cr(); } +void os::Linux::print_container_info(outputStream* st) { + if (OSContainer::is_containerized()) { + st->print("container (cgroup) information:\n"); + + char *p = OSContainer::container_type(); + if (p == NULL) + st->print("container_type() failed\n"); + else { + st->print("container_type: %s\n", p); + } + + p = OSContainer::cpu_cpuset_cpus(); + if (p == NULL) + st->print("cpu_cpuset_cpus() failed\n"); + else { + st->print("cpu_cpuset_cpus: %s", p); + free(p); + } + + p = OSContainer::cpu_cpuset_memory_nodes(); + if (p < 0) + st->print("cpu_memory_nodes() failed\n"); + else { + st->print("cpu_memory_nodes: %s", p); + free(p); + } + + int i = OSContainer::active_processor_count(); + if (i < 0) + st->print("active_processor_count() failed\n"); + else + st->print("active_processor_count: %d\n", i); + + i = OSContainer::cpu_quota(); + st->print("cpu_quota: %d\n", i); + + i = OSContainer::cpu_period(); + st->print("cpu_period: %d\n", i); + + i = OSContainer::cpu_shares(); + st->print("cpu_shares: %d\n", i); + + jlong j = OSContainer::memory_limit_in_bytes(); + st->print("memory_limit_in_bytes: " JLONG_FORMAT "\n", j); + + j = OSContainer::memory_and_swap_limit_in_bytes(); + st->print("memory_and_swap_limit_in_bytes: " JLONG_FORMAT "\n", j); + + j = OSContainer::memory_soft_limit_in_bytes(); + st->print("memory_soft_limit_in_bytes: " JLONG_FORMAT "\n", j); + + j = OSContainer::OSContainer::memory_usage_in_bytes(); + st->print("memory_usage_in_bytes: " JLONG_FORMAT "\n", j); + + j = OSContainer::OSContainer::memory_max_usage_in_bytes(); + st->print("memory_max_usage_in_bytes: " JLONG_FORMAT "\n", j); + st->cr(); + } +} + void os::print_memory_info(outputStream* st) { st->print("Memory:"); @@ -4799,6 +4901,10 @@ } } +void os::pd_init_container_support() { + OSContainer::init(); +} + // this is called _after_ the global arguments have been parsed jint os::init_2(void) { @@ -4961,12 +5067,12 @@ // dynamic check - see 6515172 for details. // If anything goes wrong we fallback to returning the number of online // processors - which can be greater than the number available to the process. -int os::active_processor_count() { +int os::Linux::active_processor_count() { cpu_set_t cpus; // can represent at most 1024 (CPU_SETSIZE) processors cpu_set_t* cpus_p = &cpus; int cpus_size = sizeof(cpu_set_t); - int configured_cpus = processor_count(); // upper bound on available cpus + int configured_cpus = os::processor_count(); // upper bound on available cpus int cpu_count = 0; // old build platforms may not support dynamic cpu sets @@ -5029,10 +5135,44 @@ CPU_FREE(cpus_p); } - assert(cpu_count > 0 && cpu_count <= processor_count(), "sanity check"); + assert(cpu_count > 0 && cpu_count <= os::processor_count(), "sanity check"); return cpu_count; } +// Determine the active processor count from one of +// three different sources: +// +// 1. User option -XX:ActiveProcessorCount +// 2. kernel os calls (sched_getaffinity or sysconf(_SC_NPROCESSORS_ONLN) +// 3. extracted from cgroup cpu subsystem (shares and quotas) +// +// Option 1, if specified, will always override. +// If the cgroup subsystem is active and configured, we +// will return the min of the cgroup and option 2 results. +// This is required since tools, such as numactl, that +// alter cpu affinity do not update cgroup subsystem +// cpuset configuration files. +int os::active_processor_count() { + // User has overridden the number of active processors + if (ActiveProcessorCount > 0) { + log_trace(os)("active_processor_count: " + "active processor count set by user : %d", + ActiveProcessorCount); + return ActiveProcessorCount; + } + + int active_cpus; + if (OSContainer::is_containerized()) { + active_cpus = OSContainer::active_processor_count(); + log_trace(os)("active_processor_count: determined by OSContainer: %d", + active_cpus); + } else { + active_cpus = os::Linux::active_processor_count(); + } + + return active_cpus; +} + void os::set_native_thread_name(const char *name) { if (Linux::_pthread_setname_np) { char buf [16]; // according to glibc manpage, 16 chars incl. '/0' --- old/src/hotspot/os/linux/os_linux.hpp 2017-10-24 13:04:53.415985586 -0400 +++ new/src/hotspot/os/linux/os_linux.hpp 2017-10-24 13:04:52.427929252 -0400 @@ -32,6 +32,7 @@ class Linux { friend class os; + friend class OSContainer; friend class TestReserveMemorySpecial; static bool libjsig_is_loaded; // libjsig that interposes sigaction(), @@ -75,6 +76,9 @@ static julong available_memory(); static julong physical_memory() { return _physical_memory; } + static void set_physical_memory(julong phys_mem) { _physical_memory = phys_mem; } + static int active_processor_count(); + static void initialize_system_info(); static int commit_memory_impl(char* addr, size_t bytes, bool exec); @@ -106,6 +110,7 @@ static bool release_memory_special_huge_tlbfs(char* base, size_t bytes); static void print_full_memory_info(outputStream* st); + static void print_container_info(outputStream* st); static void print_distro_info(outputStream* st); static void print_libversion_info(outputStream* st); --- old/src/hotspot/os/solaris/os_solaris.cpp 2017-10-24 13:04:56.132140451 -0400 +++ new/src/hotspot/os/solaris/os_solaris.cpp 2017-10-24 13:04:55.140083886 -0400 @@ -291,6 +291,14 @@ } int os::active_processor_count() { + // User has overridden the number of active processors + if (ActiveProcessorCount > 0) { + log_trace(os)("active_processor_count: " + "active processor count set by user : %d", + ActiveProcessorCount); + return ActiveProcessorCount; + } + int online_cpus = sysconf(_SC_NPROCESSORS_ONLN); pid_t pid = getpid(); psetid_t pset = PS_NONE; --- old/src/hotspot/os/windows/os_windows.cpp 2017-10-24 13:04:58.932300105 -0400 +++ new/src/hotspot/os/windows/os_windows.cpp 2017-10-24 13:04:57.936243318 -0400 @@ -720,6 +720,14 @@ } int os::active_processor_count() { + // User has overridden the number of active processors + if (ActiveProcessorCount > 0) { + log_trace(os)("active_processor_count: " + "active processor count set by user : %d", + ActiveProcessorCount); + return ActiveProcessorCount; + } + DWORD_PTR lpProcessAffinityMask = 0; DWORD_PTR lpSystemAffinityMask = 0; int proc_count = processor_count(); --- old/src/hotspot/share/logging/logTag.hpp 2017-10-24 13:05:01.756461128 -0400 +++ new/src/hotspot/share/logging/logTag.hpp 2017-10-24 13:05:00.768404796 -0400 @@ -54,6 +54,7 @@ LOG_TAG(compilation) \ LOG_TAG(constraints) \ LOG_TAG(constantpool) \ + LOG_TAG(container) \ LOG_TAG(coops) \ LOG_TAG(cpu) \ LOG_TAG(cset) \ --- old/src/hotspot/share/runtime/arguments.cpp 2017-10-24 13:05:04.468615764 -0400 +++ new/src/hotspot/share/runtime/arguments.cpp 2017-10-24 13:05:03.476559203 -0400 @@ -382,6 +382,7 @@ { "MaxRAMFraction", JDK_Version::jdk(10), JDK_Version::undefined(), JDK_Version::undefined() }, { "MinRAMFraction", JDK_Version::jdk(10), JDK_Version::undefined(), JDK_Version::undefined() }, { "InitialRAMFraction", JDK_Version::jdk(10), JDK_Version::undefined(), JDK_Version::undefined() }, + { "UseCGroupMemoryLimitForHeap", JDK_Version::jdk(10), JDK_Version::undefined(), JDK_Version::jdk(11) }, // --- Deprecated alias flags (see also aliased_jvm_flags) - sorted by obsolete_in then expired_in: { "DefaultMaxRAMFraction", JDK_Version::jdk(8), JDK_Version::undefined(), JDK_Version::undefined() }, @@ -2680,6 +2681,14 @@ return result; } + // We need to ensure processor and memory resources have been properly + // configured - which may rely on arguments we just processed - before + // doing the final argument processing. Any argument processing that + // needs to know about processor and memory resources must occur after + // this point. + + os::init_container_support(); + // Do final processing now that all arguments have been parsed result = finalize_vm_init_args(patch_mod_javabase); if (result != JNI_OK) { @@ -3355,12 +3364,6 @@ _exit_hook = CAST_TO_FN_PTR(exit_hook_t, option->extraInfo); } else if (match_option(option, "abort")) { _abort_hook = CAST_TO_FN_PTR(abort_hook_t, option->extraInfo); - // -XX:+AggressiveHeap - } else if (match_option(option, "-XX:+AggressiveHeap")) { - jint result = set_aggressive_heap_flags(); - if (result != JNI_OK) { - return result; - } // Need to keep consistency of MaxTenuringThreshold and AlwaysTenure/NeverTenure; // and the last option wins. } else if (match_option(option, "-XX:+NeverTenure")) { @@ -3642,6 +3645,16 @@ return JNI_ERR; } + // This must be done after all arguments have been processed + // and the container support has been initialized since AggressiveHeap + // relies on the amount of total memory available. + if (AggressiveHeap) { + jint result = set_aggressive_heap_flags(); + if (result != JNI_OK) { + return result; + } + } + // This must be done after all arguments have been processed. // java_compiler() true means set to "NONE" or empty. if (java_compiler() && !xdebug_mode()) { --- old/src/hotspot/share/runtime/globals.hpp 2017-10-24 13:05:07.288776559 -0400 +++ new/src/hotspot/share/runtime/globals.hpp 2017-10-24 13:05:06.296719995 -0400 @@ -2035,6 +2035,9 @@ "Real memory size (in bytes) used to set maximum heap size") \ range(0, 0XFFFFFFFFFFFFFFFF) \ \ + product(bool, AggressiveHeap, false, \ + "Optimize heap options for long-running memory intensive apps") \ + \ product(size_t, ErgoHeapSizeLimit, 0, \ "Maximum ergonomically set heap size (in bytes); zero means use " \ "MaxRAM * MaxRAMPercentage / 100") \ @@ -2042,7 +2045,8 @@ \ experimental(bool, UseCGroupMemoryLimitForHeap, false, \ "Use CGroup memory limit as physical memory limit for heap " \ - "sizing") \ + "sizing" \ + "Deprecated, replaced by container support") \ \ product(uintx, MaxRAMFraction, 4, \ "Maximum fraction (1/n) of real memory used for maximum heap " \ @@ -2074,6 +2078,9 @@ "Percentage of real memory used for initial heap size") \ range(0.0, 100.0) \ \ + product(int, ActiveProcessorCount, -1, \ + "Specify the CPU count the VM should use and report as active") \ + \ develop(uintx, MaxVirtMemFraction, 2, \ "Maximum fraction (1/n) of virtual memory used for ergonomically "\ "determining maximum heap size") \ --- old/src/hotspot/share/runtime/os.hpp 2017-10-24 13:05:10.088936213 -0400 +++ new/src/hotspot/share/runtime/os.hpp 2017-10-24 13:05:09.104880104 -0400 @@ -142,8 +142,16 @@ static void get_summary_os_info(char* buf, size_t buflen); static void initialize_initial_active_processor_count(); + + LINUX_ONLY(static void pd_init_container_support();) + public: static void init(void); // Called before command line parsing + + static void init_container_support() { // Called during command line parsing. + LINUX_ONLY(pd_init_container_support();) + } + static void init_before_ergo(void); // Called after command line parsing // before VM ergonomics processing. static jint init_2(void); // Called after command line parsing --- old/src/hotspot/share/runtime/thread.cpp 2017-10-24 13:05:12.821091989 -0400 +++ new/src/hotspot/share/runtime/thread.cpp 2017-10-24 13:05:11.829035424 -0400 @@ -3521,6 +3521,7 @@ LogConfiguration::initialize(create_vm_timer.begin_time()); // Parse arguments + // Note: this internally calls os::init_container_support() jint parse_result = Arguments::parse(args); if (parse_result != JNI_OK) return parse_result; --- old/test/hotspot/jtreg/runtime/CommandLine/VMDeprecatedOptions.java 2017-10-24 13:05:15.617251415 -0400 +++ new/test/hotspot/jtreg/runtime/CommandLine/VMDeprecatedOptions.java 2017-10-24 13:05:14.605193711 -0400 @@ -88,8 +88,22 @@ output.shouldMatch(match); } + // Deprecated experimental command line options need to be preceded on the + // command line by -XX:+UnlockExperimentalVMOption. + static void testDeprecatedExperimental(String option, String value) throws Throwable { + String XXoption = CommandLineOptionTest.prepareFlag(option, value); + ProcessBuilder processBuilder = ProcessTools.createJavaProcessBuilder( + CommandLineOptionTest.UNLOCK_EXPERIMENTAL_VM_OPTIONS, XXoption, "-version"); + OutputAnalyzer output = new OutputAnalyzer(processBuilder.start()); + // check for option deprecation message: + output.shouldHaveExitValue(0); + String match = getDeprecationString(option); + output.shouldMatch(match); + } + public static void main(String[] args) throws Throwable { testDeprecated(DEPRECATED_OPTIONS); // Make sure that each deprecated option is mentioned in the output. testDeprecatedDiagnostic("UnsyncloadClass", "false"); + testDeprecatedExperimental("UseCGroupMemoryLimitForHeap", "true"); } } --- /dev/null 2016-12-30 14:52:30.584485998 -0500 +++ new/src/hotspot/os/linux/osContainer_linux.cpp 2017-10-24 13:05:17.333349260 -0400 @@ -0,0 +1,582 @@ +/* + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include +#include +#include "utilities/globalDefinitions.hpp" +#include "memory/allocation.hpp" +#include "runtime/os.hpp" +#include "logging/log.hpp" +#include "osContainer_linux.hpp" + +/* + * Warning: Some linux distros use 0x7FFFFFFFFFFFF000 + * and others use 0x7FFFFFFFFFFFFFFF for unlimited. + */ +#define UNLIMITED_MEM CONST64(0x7FFFFFFFFFFFF000) + +#define PER_CPU_SHARES 1024 + +bool OSContainer::_is_initialized = false; +bool OSContainer::_is_containerized = false; + +class CgroupSubsystem: CHeapObj { + friend class OSContainer; + + private: + /* mountinfo contents */ + char *_root; + char *_mount_point; + + /* Constructed subsystem directory */ + char *_path; + + public: + CgroupSubsystem(char *root, char *mountpoint) { + _root = os::strdup(root); + _mount_point = os::strdup(mountpoint); + _path = NULL; + } + + /* + * Set directory to subsystem specific files based + * on the contents of the mountinfo and cgroup files. + */ + void set_subsystem_path(char *cgroup_path) { + char buf[MAXPATHLEN+1]; + if (_root != NULL && cgroup_path != NULL) { + if (strcmp(_root, "/") == 0) { + strncpy(buf, _mount_point, MAXPATHLEN); + buf[MAXPATHLEN-1] = '\0'; + if (strcmp(cgroup_path,"/") != 0) { + int buflen = strlen(buf); + if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) { + return; + } + strncat(buf, cgroup_path, MAXPATHLEN-buflen); + buf[MAXPATHLEN-1] = '\0'; + } + _path = os::strdup(buf); + } else { + if (strcmp(_root, cgroup_path) == 0) { + strncpy(buf, _mount_point, MAXPATHLEN); + buf[MAXPATHLEN-1] = '\0'; + _path = os::strdup(buf); + } else { + char *p = strstr(_root, cgroup_path); + if (p != NULL && p == _root) { + if (strlen(cgroup_path) > strlen(_root)) { + strncpy(buf, _mount_point, MAXPATHLEN); + buf[MAXPATHLEN-1] = '\0'; + int buflen = strlen(buf); + if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) { + return; + } + strncat(buf, cgroup_path + strlen(_root), MAXPATHLEN-buflen); + buf[MAXPATHLEN-1] = '\0'; + _path = os::strdup(buf); + } + } + } + } + } + } + + char *subsystem_path() { return _path; } +}; + +// CgroupSubsystem *cgroupv2; +CgroupSubsystem* memory = NULL; +CgroupSubsystem* cpuset = NULL; +CgroupSubsystem* cpu = NULL; +CgroupSubsystem* cpuacct = NULL; + +typedef char * cptr; + +#define GEN_CONTAINER_GET_INFO(return_type, scan_fmt, isstr) \ +int subsystem_file_contents_##return_type(CgroupSubsystem* c, \ + const char *filename, \ + return_type *returnval) { \ + FILE *fp = NULL; \ + char *p; \ + char buf[MAXPATHLEN+1]; \ + \ + if (c != NULL && c->subsystem_path() != NULL) { \ + strncpy(buf, c->subsystem_path(), MAXPATHLEN); \ + buf[MAXPATHLEN-1] = '\0'; \ + int buflen = strlen(buf); \ + if ((buflen + strlen(filename)) > (MAXPATHLEN-1)) { \ + return OSCONTAINER_ERROR; \ + } \ + strncat(buf, filename, MAXPATHLEN-buflen); \ + log_trace(os, container)("Path to %s is %s", filename, buf); \ + fp = fopen(buf, "r"); \ + if (fp != NULL) { \ + p = fgets(buf, MAXPATHLEN, fp); \ + if (p != NULL) { \ + if (isstr) { \ + *(char **)returnval = os::strdup(p); \ + fclose(fp); \ + return 0; \ + } else { \ + return_type value; \ + int matched = sscanf(p, scan_fmt, &value); \ + if (matched == 1) { \ + *returnval = value; \ + fclose(fp); \ + return 0; \ + } else { \ + log_debug(os, container)("Type %s not found in file %s", \ + scan_fmt , buf); \ + } \ + } \ + } else { \ + log_debug(os, container)("Empty file %s", buf); \ + } \ + } else { \ + log_debug(os, container)("file not found %s", buf); \ + } \ + } \ + if (fp != NULL) \ + fclose(fp); \ + return OSCONTAINER_ERROR; \ +} + + +GEN_CONTAINER_GET_INFO(int, "%d", false) +GEN_CONTAINER_GET_INFO(jlong, JLONG_FORMAT, false) +GEN_CONTAINER_GET_INFO(cptr, "%p", true) + +#define GET_CONTAINER_INFO(return_type, isstring, subsystem, \ + filename, logstring, variable) \ + return_type variable; \ +{ \ + int err; \ + err = subsystem_file_contents_##return_type(subsystem, \ + filename, \ + &variable); \ + if (err != 0) { \ + log_debug(os, container)("Error reading %s", filename); \ + return isstring ? (return_type) NULL : \ + (return_type) OSCONTAINER_ERROR; \ + } \ + log_trace(os, container)(logstring, variable); \ +} + +/* init + * + * Initialize the container support and determine if + * we are running under cgroup control. + */ +void OSContainer::init() { + int mountid; + int parentid; + int major; + int minor; + FILE *mntinfo = NULL; + FILE *cgroup = NULL; + char buf[MAXPATHLEN+1]; + char tmproot[MAXPATHLEN+1]; + char tmpmount[MAXPATHLEN+1]; + char tmpbase[MAXPATHLEN+1]; + char *p; + jlong mem_limit; + + assert(!_is_initialized, "Initializing OSContainer more than once"); + + _is_initialized = true; + _is_containerized = false; + + log_trace(os, container)("OSContainer::init: Initializing Container Support"); + if (!UseContainerSupport) { + log_trace(os, container)("Container Support not enabled"); + return; + } + + /* + * Find the cgroup mount point for memory and cpuset + * by reading /proc/self/mountinfo + * + * Example for docker: + * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory + * + * Example for host: + * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory + */ + mntinfo = fopen("/proc/self/mountinfo", "r"); + if (mntinfo == NULL) { + log_debug(os, container)("Can't locate /proc/self/mountinfo"); + return; + } + + while ( (p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) { + // Look for the filesystem type and see if it's cgroup + char fstype[MAXPATHLEN+1]; + fstype[0] = '\0'; + char *s = strstr(p, " - "); + if (s != NULL && + sscanf(s, " - %s", fstype) == 1 && + strcmp(fstype, "cgroup") == 0) { + + if (strstr(p, "memory") != NULL) { + int matched = sscanf(p, "%d %d %d:%d %s %s", + &mountid, + &parentid, + &major, + &minor, + tmproot, + tmpmount); + if (matched == 6) { + memory = new CgroupSubsystem(tmproot, tmpmount); + } + else + log_debug(os, container)("Incompatible str containing cgroup and memory: %s", p); + } else if (strstr(p, "cpuset") != NULL) { + int matched = sscanf(p, "%d %d %d:%d %s %s", + &mountid, + &parentid, + &major, + &minor, + tmproot, + tmpmount); + if (matched == 6) { + cpuset = new CgroupSubsystem(tmproot, tmpmount); + } + else { + log_debug(os, container)("Incompatible str containing cgroup and cpuset: %s", p); + } + } else if (strstr(p, "cpu,cpuacct") != NULL) { + int matched = sscanf(p, "%d %d %d:%d %s %s", + &mountid, + &parentid, + &major, + &minor, + tmproot, + tmpmount); + if (matched == 6) { + cpu = new CgroupSubsystem(tmproot, tmpmount); + cpuacct = new CgroupSubsystem(tmproot, tmpmount); + } + else { + log_debug(os, container)("Incompatible str containing cgroup and cpu,cpuacct: %s", p); + } + } else if (strstr(p, "cpuacct") != NULL) { + int matched = sscanf(p, "%d %d %d:%d %s %s", + &mountid, + &parentid, + &major, + &minor, + tmproot, + tmpmount); + if (matched == 6) { + cpuacct = new CgroupSubsystem(tmproot, tmpmount); + } + else { + log_debug(os, container)("Incompatible str containing cgroup and cpuacct: %s", p); + } + } else if (strstr(p, "cpu") != NULL) { + int matched = sscanf(p, "%d %d %d:%d %s %s", + &mountid, + &parentid, + &major, + &minor, + tmproot, + tmpmount); + if (matched == 6) { + cpu = new CgroupSubsystem(tmproot, tmpmount); + } + else { + log_debug(os, container)("Incompatible str containing cgroup and cpu: %s", p); + } + } + } + } + + if (mntinfo != NULL) fclose(mntinfo); + + /* + * Read /proc/self/cgroup and map host mount point to + * local one via /proc/self/mountinfo content above + * + * Docker example: + * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044 + * + * Host example: + * 5:memory:/user.slice + * + * Construct a path to the process specific memory and cpuset + * cgroup directory. + * + * For a container running under Docker from memory example above + * the paths would be: + * + * /sys/fs/cgroup/memory + * + * For a Host from memory example above the path would be: + * + * /sys/fs/cgroup/memory/user.slice + * + */ + cgroup = fopen("/proc/self/cgroup", "r"); + if (cgroup == NULL) { + log_debug(os, container)("Can't locate /proc/self/cgroup"); + return; + } + + while ( (p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) { + int cgno; + int matched; + char *controller; + char *base; + + /* Skip cgroup number */ + strsep(&p, ":"); + /* Get controller and base */ + controller = strsep(&p, ":"); + base = strsep(&p, "\n"); + + if (controller != NULL) { + if (strstr(controller, "memory") != NULL) { + memory->set_subsystem_path(base); + } else if (strstr(controller, "cpuset") != NULL) { + cpuset->set_subsystem_path(base); + } else if (strstr(controller, "cpu,cpuacct") != NULL) { + cpu->set_subsystem_path(base); + cpuacct->set_subsystem_path(base); + } else if (strstr(controller, "cpuacct") != NULL) { + cpuacct->set_subsystem_path(base); + } else if (strstr(controller, "cpu") != NULL) { + cpu->set_subsystem_path(base); + } + } + } + + if (cgroup != NULL) fclose(cgroup); + + if (memory == NULL || cpuset == NULL || cpu == NULL) { + log_debug(os, container)("Required cgroup subsystems not found"); + return; + } + + // We need to update the amount of physical memory now that + // command line arguments have been processed. + if ((mem_limit = memory_limit_in_bytes()) > 0) { + os::Linux::set_physical_memory(mem_limit); + } + + _is_containerized = true; +} + +char * OSContainer::container_type() { + if (is_containerized()) { + return (char *)"cgroupv1"; + } else { + return NULL; + } +} + + +/* memory_limit_in_bytes + * + * Return the limit of available memory for this process. + * + * return: + * memory limit in bytes or + * -1 for unlimited + * OSCONTAINER_ERROR for not supported + */ +jlong OSContainer::memory_limit_in_bytes() { + GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.limit_in_bytes", + "Memory Limit is: " JLONG_FORMAT, memlimit); + + if (memlimit >= UNLIMITED_MEM) { + log_trace(os, container)("Memory Limit is: Unlimited"); + return (jlong)-1; + } + else { + return memlimit; + } +} + +jlong OSContainer::memory_and_swap_limit_in_bytes() { + GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.memsw.limit_in_bytes", + "Memory and Swap Limit is: " JLONG_FORMAT, memswlimit); + if (memswlimit >= UNLIMITED_MEM) { + log_trace(os, container)("Memory and Swap Limit is: Unlimited"); + return (jlong)-1; + } else { + return memswlimit; + } +} + +jlong OSContainer::memory_soft_limit_in_bytes() { + GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.soft_limit_in_bytes", + "Memory Soft Limit is: " JLONG_FORMAT, memsoftlimit); + if (memsoftlimit >= UNLIMITED_MEM) { + log_trace(os, container)("Memory Soft Limit is: Unlimited"); + return (jlong)-1; + } else { + return memsoftlimit; + } +} + +/* memory_usage_in_bytes + * + * Return the amount of used memory for this process. + * + * return: + * memory usage in bytes or + * -1 for unlimited + * OSCONTAINER_ERROR for not supported + */ +jlong OSContainer::memory_usage_in_bytes() { + GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.usage_in_bytes", + "Memory Usage is: " JLONG_FORMAT, memusage); + return memusage; +} + +/* memory_max_usage_in_bytes + * + * Return the maximum amount of used memory for this process. + * + * return: + * max memory usage in bytes or + * OSCONTAINER_ERROR for not supported + */ +jlong OSContainer::memory_max_usage_in_bytes() { + GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.max_usage_in_bytes", + "Maximu, Memory Usage is: " JLONG_FORMAT, memmaxusage); + return memmaxusage; +} + +/* active_processor_count + * + * Calculate an appropriate number of active processors for the + * VM to use based on these three cgroup options. + * + * cpu affinity + * cpu quota & cpu period + * cpu shares + * + * Algorithm: + * + * Determine the number of available CPUs from sched_getaffinity + * + * If user specified a quota (quota != -1), calculate the number of + * required CPUs by dividing quota by period. + * + * If shares are in effect (shares != -1), calculate the number + * of cpus required for the shares by dividing the share value + * by PER_CPU_SHARES. + * + * All results of division are rounded up to the next whole number. + * + * Return the smaller number from the three different settings. + * + * return: + * number of cpus + * OSCONTAINER_ERROR if failure occured during extract of cpuset info + */ +int OSContainer::active_processor_count() { + int cpu_count, share_count, quota_count; + int share, quota, period; + int result; + + cpu_count = os::Linux::active_processor_count(); + + share = cpu_shares(); + if (share > -1) { + share_count = ceilf((float)share / (float)PER_CPU_SHARES); + log_trace(os, container)("cpu_share count: %d", share_count); + } else { + share_count = cpu_count; + } + + quota = cpu_quota(); + period = cpu_period(); + if (quota > -1 && period > 0) { + quota_count = ceilf((float)quota / (float)period); + log_trace(os, container)("quota_count: %d", quota_count); + } else { + quota_count = cpu_count; + } + + result = MIN2(cpu_count, MIN2(share_count, quota_count)); + log_trace(os, container)("OSContainer::active_processor_count: %d", result); + return result; +} + +char * OSContainer::cpu_cpuset_cpus() { + GET_CONTAINER_INFO(cptr, true, cpuset, (char *)"/cpuset.cpus", + "cpuset.cpus is: %s", cpus); + return cpus; +} + +char * OSContainer::cpu_cpuset_memory_nodes() { + GET_CONTAINER_INFO(cptr, true, cpuset, (char *)"/cpuset.mems", + "cpuset.mems is: %s", mems); + return mems; +} + +/* cpu_quota + * + * Return the number of milliseconds per period + * process is guaranteed to run. + * + * return: + * quota time in milliseconds + * -1 for no quota + * OSCONTAINER_ERROR for not supported + */ +int OSContainer::cpu_quota() { + GET_CONTAINER_INFO(int, false, cpu, (char *)"/cpu.cfs_quota_us", + "CPU Quota is: %d", quota); + return quota; +} + +int OSContainer::cpu_period() { + GET_CONTAINER_INFO(int, false, cpu, (char *)"/cpu.cfs_period_us", + "CPU Period is: %d", period); + return period; +} + +/* cpu_shares + * + * Return the amount of cpu shares available to the process + * + * return: + * Share number (typically a number relative to 1024) + * (2048 typically expresses 2 CPUs worth of processing) + * -1 for no share setup + * OSCONTAINER_ERROR for not supported + */ +int OSContainer::cpu_shares() { + GET_CONTAINER_INFO(int, false, cpu, (char *)"/cpu.shares", + "CPU Shares is: %d", shares); + // Convert 1024 to no shares setup + if (shares == 1024) return -1; + + return shares; +} + --- /dev/null 2016-12-30 14:52:30.584485998 -0500 +++ new/src/hotspot/os/linux/osContainer_linux.hpp 2017-10-24 13:05:20.013502072 -0400 @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_LINUX_VM_OSCONTAINER_LINUX_HPP +#define OS_LINUX_VM_OSCONTAINER_LINUX_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" +#include "memory/allocation.hpp" + +#define OSCONTAINER_ERROR (-2) + +class OSContainer: AllStatic { + + private: + static bool _is_initialized; + static bool _is_containerized; + + public: + static void init(); + static inline bool is_containerized(); + static char * container_type(); + + static jlong memory_limit_in_bytes(); + static jlong memory_and_swap_limit_in_bytes(); + static jlong memory_soft_limit_in_bytes(); + static jlong memory_usage_in_bytes(); + static jlong memory_max_usage_in_bytes(); + + static int active_processor_count(); + + static char * cpu_cpuset_cpus(); + static char * cpu_cpuset_memory_nodes(); + + static int cpu_quota(); + static int cpu_period(); + + static int cpu_shares(); + +}; + +inline bool OSContainer::is_containerized() { + assert(_is_initialized, "OSContainer not initialized"); + return _is_containerized; +} + +#endif // OS_LINUX_VM_OSCONTAINER_LINUX_HPP