--- /dev/null	2019-11-08 09:42:42.340406263 +0100
+++ new/src/hotspot/os/linux/cgroupSubsystem_linux.cpp	2019-11-08 15:01:37.810151753 +0100
@@ -0,0 +1,421 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include <string.h>
+#include <math.h>
+#include <errno.h>
+#include "cgroupSubsystem_linux.hpp"
+#include "cgroupV1Subsystem_linux.hpp"
+#include "cgroupV2Subsystem_linux.hpp"
+#include "logging/log.hpp"
+#include "memory/allocation.hpp"
+#include "runtime/globals.hpp"
+#include "runtime/os.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+CgroupSubsystem* CgroupSubsystemFactory::create() {
+  CgroupV1MemoryController* memory = NULL;
+  CgroupV1Controller* cpuset = NULL;
+  CgroupV1Controller* cpu = NULL;
+  CgroupV1Controller* cpuacct = NULL;
+  FILE *mntinfo = NULL;
+  FILE *cgroups = NULL;
+  FILE *cgroup = NULL;
+  char buf[MAXPATHLEN+1];
+  char tmproot[MAXPATHLEN+1];
+  char tmpmount[MAXPATHLEN+1];
+  char *p;
+  bool is_cgroupsV2;
+  // true iff all controllers, memory, cpu, cpuset, cpuacct are enabled
+  // at the kernel level.
+  bool all_controllers_enabled;
+
+  CgroupInfo cg_infos[CG_INFO_LENGTH];
+  int cpuset_idx  = 0;
+  int cpu_idx     = 1;
+  int cpuacct_idx = 2;
+  int memory_idx  = 3;
+
+  /*
+   * Read /proc/cgroups so as to be able to distinguish cgroups v2 vs cgroups v1.
+   *
+   * For cgroups v1 unified hierarchy, cpu, cpuacct, cpuset, memory controllers
+   * must have non-zero for the hierarchy ID field.
+   */
+  cgroups = fopen("/proc/cgroups", "r");
+  if (cgroups == NULL) {
+      log_debug(os, container)("Can't open /proc/cgroups, %s",
+                               os::strerror(errno));
+      return NULL;
+  }
+
+  while ((p = fgets(buf, MAXPATHLEN, cgroups)) != NULL) {
+    char name[MAXPATHLEN+1];
+    int  hierarchy_id;
+    int  enabled;
+
+    // Format of /proc/cgroups documented via man 7 cgroups
+    if (sscanf(p, "%s %d %*d %d", name, &hierarchy_id, &enabled) != 3) {
+      continue;
+    }
+    if (strcmp(name, "memory") == 0) {
+      cg_infos[memory_idx]._name = os::strdup(name);
+      cg_infos[memory_idx]._hierarchy_id = hierarchy_id;
+      cg_infos[memory_idx]._enabled = (enabled == 1);
+    } else if (strcmp(name, "cpuset") == 0) {
+      cg_infos[cpuset_idx]._name = os::strdup(name);
+      cg_infos[cpuset_idx]._hierarchy_id = hierarchy_id;
+      cg_infos[cpuset_idx]._enabled = (enabled == 1);
+    } else if (strcmp(name, "cpu") == 0) {
+      cg_infos[cpu_idx]._name = os::strdup(name);
+      cg_infos[cpu_idx]._hierarchy_id = hierarchy_id;
+      cg_infos[cpu_idx]._enabled = (enabled == 1);
+    } else if (strcmp(name, "cpuacct") == 0) {
+      cg_infos[cpuacct_idx]._name = os::strdup(name);
+      cg_infos[cpuacct_idx]._hierarchy_id = hierarchy_id;
+      cg_infos[cpuacct_idx]._enabled = (enabled == 1);
+    }
+  }
+  fclose(cgroups);
+
+  is_cgroupsV2 = true;
+  all_controllers_enabled = true;
+  for (int i = 0; i < CG_INFO_LENGTH; i++) {
+    is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0;
+    all_controllers_enabled = all_controllers_enabled && cg_infos[i]._enabled;
+  }
+
+  if (!all_controllers_enabled) {
+    // one or more controllers disabled, disable container support
+    log_debug(os, container)("One or more required controllers disabled at kernel level.");
+    return NULL;
+  }
+
+  /*
+   * Read /proc/self/cgroup and determine:
+   *  - the cgroup path for cgroups v2 or
+   *  - on a cgroups v1 system, collect info for mapping
+   *    the host mount point to the local one via /proc/self/mountinfo below.
+   */
+  cgroup = fopen("/proc/self/cgroup", "r");
+  if (cgroup == NULL) {
+    log_debug(os, container)("Can't open /proc/self/cgroup, %s",
+                             os::strerror(errno));
+    return NULL;
+  }
+
+  while ((p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) {
+    char *controllers;
+    char *token;
+    char *hierarchy_id_str;
+    int  hierarchy_id;
+    char *cgroup_path;
+
+    hierarchy_id_str = strsep(&p, ":");
+    hierarchy_id = atoi(hierarchy_id_str);
+    /* Get controllers and base */
+    controllers = strsep(&p, ":");
+    cgroup_path = strsep(&p, "\n");
+
+    if (controllers == NULL) {
+      continue;
+    }
+
+    while (!is_cgroupsV2 && (token = strsep(&controllers, ",")) != NULL) {
+      if (strcmp(token, "memory") == 0) {
+        assert(hierarchy_id == cg_infos[memory_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
+        cg_infos[memory_idx]._cgroup_path = os::strdup(cgroup_path);
+      } else if (strcmp(token, "cpuset") == 0) {
+        assert(hierarchy_id == cg_infos[cpuset_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
+        cg_infos[cpuset_idx]._cgroup_path = os::strdup(cgroup_path);
+      } else if (strcmp(token, "cpu") == 0) {
+        assert(hierarchy_id == cg_infos[cpu_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
+        cg_infos[cpu_idx]._cgroup_path = os::strdup(cgroup_path);
+      } else if (strcmp(token, "cpuacct") == 0) {
+        assert(hierarchy_id == cg_infos[cpuacct_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
+        cg_infos[cpuacct_idx]._cgroup_path = os::strdup(cgroup_path);
+      }
+    }
+    if (is_cgroupsV2) {
+      for (int i = 0; i < CG_INFO_LENGTH; i++) {
+        cg_infos[i]._cgroup_path = os::strdup(cgroup_path);
+      }
+    }
+  }
+  fclose(cgroup);
+
+  if (is_cgroupsV2) {
+    // Find the cgroup2 mount point by reading /proc/self/mountinfo
+    mntinfo = fopen("/proc/self/mountinfo", "r");
+    if (mntinfo == NULL) {
+        log_debug(os, container)("Can't open /proc/self/mountinfo, %s",
+                                 os::strerror(errno));
+        return NULL;
+    }
+
+    char cgroupv2_mount[MAXPATHLEN+1];
+    char fstype[MAXPATHLEN+1];
+    bool mount_point_found = false;
+    while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) {
+      char *tmp_mount_point = cgroupv2_mount;
+      char *tmp_fs_type = fstype;
+
+      // mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt
+      if (sscanf(p, "%*d %*d %*d:%*d %*s %s %*[^-]- %s cgroup2 %*s", tmp_mount_point, tmp_fs_type) == 2) {
+        // we likely have an early match return, be sure we have cgroup2 as fstype
+        if (strcmp("cgroup2", tmp_fs_type) == 0) {
+          mount_point_found = true;
+          break;
+        }
+      }
+    }
+    fclose(mntinfo);
+    if (!mount_point_found) {
+      log_trace(os, container)("Mount point for cgroupv2 not found in /proc/self/mountinfo");
+      return NULL;
+    }
+    // Cgroups v2 case, we have all the info we need.
+    // Construct the subsystem, free resources and return
+    // Note: any index in cg_infos will do as the path is the same for
+    //       all controllers.
+    CgroupController* unified = new CgroupV2Controller(cgroupv2_mount, cg_infos[memory_idx]._cgroup_path);
+    for (int i = 0; i < CG_INFO_LENGTH; i++) {
+      os::free(cg_infos[i]._name);
+      os::free(cg_infos[i]._cgroup_path);
+    }
+    log_debug(os, container)("Detected cgroups v2 unified hierarchy");
+    return new CgroupV2Subsystem(unified);
+  }
+
+  // What follows is cgroups v1
+  log_debug(os, container)("Detected cgroups hybrid or legacy hierarchy, using cgroups v1 controllers");
+
+  /*
+   * Find the cgroup mount point for memory and cpuset
+   * by reading /proc/self/mountinfo
+   *
+   * Example for docker:
+   * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
+   *
+   * Example for host:
+   * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory
+   */
+  mntinfo = fopen("/proc/self/mountinfo", "r");
+  if (mntinfo == NULL) {
+      log_debug(os, container)("Can't open /proc/self/mountinfo, %s",
+                               os::strerror(errno));
+      return NULL;
+  }
+
+  while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) {
+    char tmpcgroups[MAXPATHLEN+1];
+    char *cptr = tmpcgroups;
+    char *token;
+
+    // mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt
+    if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- cgroup %*s %s", tmproot, tmpmount, tmpcgroups) != 3) {
+      continue;
+    }
+    while ((token = strsep(&cptr, ",")) != NULL) {
+      if (strcmp(token, "memory") == 0) {
+        memory = new CgroupV1MemoryController(tmproot, tmpmount);
+      } else if (strcmp(token, "cpuset") == 0) {
+        cpuset = new CgroupV1Controller(tmproot, tmpmount);
+      } else if (strcmp(token, "cpu") == 0) {
+        cpu = new CgroupV1Controller(tmproot, tmpmount);
+      } else if (strcmp(token, "cpuacct") == 0) {
+        cpuacct= new CgroupV1Controller(tmproot, tmpmount);
+      }
+    }
+  }
+
+  fclose(mntinfo);
+
+  if (memory == NULL) {
+    log_debug(os, container)("Required cgroup v1 memory subsystem not found");
+    return NULL;
+  }
+  if (cpuset == NULL) {
+    log_debug(os, container)("Required cgroup v1 cpuset subsystem not found");
+    return NULL;
+  }
+  if (cpu == NULL) {
+    log_debug(os, container)("Required cgroup v1 cpu subsystem not found");
+    return NULL;
+  }
+  if (cpuacct == NULL) {
+    log_debug(os, container)("Required cgroup v1 cpuacct subsystem not found");
+    return NULL;
+  }
+
+  /*
+   * Use info gathered previously from /proc/self/cgroup
+   * and map host mount point to
+   * local one via /proc/self/mountinfo content above
+   *
+   * Docker example:
+   * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044
+   *
+   * Host example:
+   * 5:memory:/user.slice
+   *
+   * Construct a path to the process specific memory and cpuset
+   * cgroup directory.
+   *
+   * For a container running under Docker from memory example above
+   * the paths would be:
+   *
+   * /sys/fs/cgroup/memory
+   *
+   * For a Host from memory example above the path would be:
+   *
+   * /sys/fs/cgroup/memory/user.slice
+   *
+   */
+  for (int i = 0; i < CG_INFO_LENGTH; i++) {
+    CgroupInfo info = cg_infos[i];
+    if (strcmp(info._name, "memory") == 0) {
+      memory->set_subsystem_path(info._cgroup_path);
+    } else if (strcmp(info._name, "cpuset") == 0) {
+      cpuset->set_subsystem_path(info._cgroup_path);
+    } else if (strcmp(info._name, "cpu") == 0) {
+      cpu->set_subsystem_path(info._cgroup_path);
+    } else if (strcmp(info._name, "cpuacct") == 0) {
+      cpuacct->set_subsystem_path(info._cgroup_path);
+    }
+  }
+  return new CgroupV1Subsystem(cpuset, cpu, cpuacct, memory);
+}
+
+/* active_processor_count
+ *
+ * Calculate an appropriate number of active processors for the
+ * VM to use based on these three inputs.
+ *
+ * cpu affinity
+ * cgroup cpu quota & cpu period
+ * cgroup cpu shares
+ *
+ * Algorithm:
+ *
+ * Determine the number of available CPUs from sched_getaffinity
+ *
+ * If user specified a quota (quota != -1), calculate the number of
+ * required CPUs by dividing quota by period.
+ *
+ * If shares are in effect (shares != -1), calculate the number
+ * of CPUs required for the shares by dividing the share value
+ * by PER_CPU_SHARES.
+ *
+ * All results of division are rounded up to the next whole number.
+ *
+ * If neither shares or quotas have been specified, return the
+ * number of active processors in the system.
+ *
+ * If both shares and quotas have been specified, the results are
+ * based on the flag PreferContainerQuotaForCPUCount.  If true,
+ * return the quota value.  If false return the smallest value
+ * between shares or quotas.
+ *
+ * If shares and/or quotas have been specified, the resulting number
+ * returned will never exceed the number of active processors.
+ *
+ * return:
+ *    number of CPUs
+ */
+int CgroupSubsystem::active_processor_count() {
+  int quota_count = 0, share_count = 0;
+  int cpu_count, limit_count;
+  int result;
+
+  // We use a cache with a timeout to avoid performing expensive
+  // computations in the event this function is called frequently.
+  // [See 8227006].
+  CachingCgroupController* contrl = cpu_controller();
+  CachedMetric* cpu_limit = contrl->metrics_cache();
+  if (!cpu_limit->should_check_metric()) {
+    int val = (int)cpu_limit->value();
+    log_trace(os, container)("CgroupSubsystem::active_processor_count (cached): %d", val);
+    return val;
+  }
+
+  cpu_count = limit_count = os::Linux::active_processor_count();
+  int quota  = cpu_quota();
+  int period = cpu_period();
+  int share  = cpu_shares();
+
+  if (quota > -1 && period > 0) {
+    quota_count = ceilf((float)quota / (float)period);
+    log_trace(os, container)("CPU Quota count based on quota/period: %d", quota_count);
+  }
+  if (share > -1) {
+    share_count = ceilf((float)share / (float)PER_CPU_SHARES);
+    log_trace(os, container)("CPU Share count based on shares: %d", share_count);
+  }
+
+  // If both shares and quotas are setup results depend
+  // on flag PreferContainerQuotaForCPUCount.
+  // If true, limit CPU count to quota
+  // If false, use minimum of shares and quotas
+  if (quota_count !=0 && share_count != 0) {
+    if (PreferContainerQuotaForCPUCount) {
+      limit_count = quota_count;
+    } else {
+      limit_count = MIN2(quota_count, share_count);
+    }
+  } else if (quota_count != 0) {
+    limit_count = quota_count;
+  } else if (share_count != 0) {
+    limit_count = share_count;
+  }
+
+  result = MIN2(cpu_count, limit_count);
+  log_trace(os, container)("OSContainer::active_processor_count: %d", result);
+
+  // Update cached metric to avoid re-reading container settings too often
+  cpu_limit->set_value(result, OSCONTAINER_CACHE_TIMEOUT);
+
+  return result;
+}
+
+/* memory_limit_in_bytes
+ *
+ * Return the limit of available memory for this process.
+ *
+ * return:
+ *    memory limit in bytes or
+ *    -1 for unlimited
+ *    OSCONTAINER_ERROR for not supported
+ */
+jlong CgroupSubsystem::memory_limit_in_bytes() {
+  CachingCgroupController* contrl = memory_controller();
+  CachedMetric* memory_limit = contrl->metrics_cache();
+  if (!memory_limit->should_check_metric()) {
+    return memory_limit->value();
+  }
+  jlong mem_limit = read_memory_limit_in_bytes();
+  // Update cached metric to avoid re-reading container settings too often
+  memory_limit->set_value(mem_limit, OSCONTAINER_CACHE_TIMEOUT);
+  return mem_limit;
+}