< prev index next >

src/hotspot/os/linux/cgroupSubsystem_linux.cpp

Print this page
@  rev 56576 : 8230305: Cgroups v2: Container awareness
|  Summary: Implement Cgroups v2 container awareness in hotspot
|  Reviewed-by: bobv
o  rev 56575 : 8230848: OSContainer: Refactor container detection code
|  Summary: Move cgroups v1 implementation details out of osContainer_linux.cpp
~  Reviewed-by: bobv

@@ -25,10 +25,11 @@
 #include <string.h>
 #include <math.h>
 #include <errno.h>
 #include "cgroupSubsystem_linux.hpp"
 #include "cgroupV1Subsystem_linux.hpp"
+#include "cgroupV2Subsystem_linux.hpp"
 #include "logging/log.hpp"
 #include "memory/allocation.hpp"
 #include "runtime/globals.hpp"
 #include "runtime/os.hpp"
 #include "utilities/globalDefinitions.hpp"

@@ -37,15 +38,180 @@
   CgroupV1MemoryController* memory = NULL;
   CgroupV1Controller* cpuset = NULL;
   CgroupV1Controller* cpu = NULL;
   CgroupV1Controller* cpuacct = NULL;
   FILE *mntinfo = NULL;
+  FILE *cgroups = NULL;
   FILE *cgroup = NULL;
   char buf[MAXPATHLEN+1];
   char tmproot[MAXPATHLEN+1];
   char tmpmount[MAXPATHLEN+1];
   char *p;
+  bool is_cgroupsV2;
+  // true iff all controllers, memory, cpu, cpuset, cpuacct are enabled
+  // at the kernel level.
+  bool all_controllers_enabled;
+
+  CgroupInfo cg_infos[CG_INFO_LENGTH];
+  int cpuset_idx  = 0;
+  int cpu_idx     = 1;
+  int cpuacct_idx = 2;
+  int memory_idx  = 3;
+
+  /*
+   * Read /proc/cgroups so as to be able to distinguish cgroups v2 vs cgroups v1.
+   *
+   * For cgroups v1 unified hierarchy, cpu, cpuacct, cpuset, memory controllers
+   * must have non-zero for the hierarchy ID field.
+   */
+  cgroups = fopen("/proc/cgroups", "r");
+  if (cgroups == NULL) {
+      log_debug(os, container)("Can't open /proc/cgroups, %s",
+                               os::strerror(errno));
+      return NULL;
+  }
+
+  while ((p = fgets(buf, MAXPATHLEN, cgroups)) != NULL) {
+    char name[MAXPATHLEN+1];
+    int  hierarchy_id;
+    int  enabled;
+
+    // Format of /proc/cgroups documented via man 7 cgroups
+    if (sscanf(p, "%s %d %*d %d", name, &hierarchy_id, &enabled) != 3) {
+      continue;
+    }
+    if (strcmp(name, "memory") == 0) {
+      cg_infos[memory_idx]._name = os::strdup(name);
+      cg_infos[memory_idx]._hierarchy_id = hierarchy_id;
+      cg_infos[memory_idx]._enabled = (enabled == 1);
+    } else if (strcmp(name, "cpuset") == 0) {
+      cg_infos[cpuset_idx]._name = os::strdup(name);
+      cg_infos[cpuset_idx]._hierarchy_id = hierarchy_id;
+      cg_infos[cpuset_idx]._enabled = (enabled == 1);
+    } else if (strcmp(name, "cpu") == 0) {
+      cg_infos[cpu_idx]._name = os::strdup(name);
+      cg_infos[cpu_idx]._hierarchy_id = hierarchy_id;
+      cg_infos[cpu_idx]._enabled = (enabled == 1);
+    } else if (strcmp(name, "cpuacct") == 0) {
+      cg_infos[cpuacct_idx]._name = os::strdup(name);
+      cg_infos[cpuacct_idx]._hierarchy_id = hierarchy_id;
+      cg_infos[cpuacct_idx]._enabled = (enabled == 1);
+    }
+  }
+  fclose(cgroups);
+
+  is_cgroupsV2 = true;
+  all_controllers_enabled = true;
+  for (int i = 0; i < CG_INFO_LENGTH; i++) {
+    is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0;
+    all_controllers_enabled = all_controllers_enabled && cg_infos[i]._enabled;
+  }
+
+  if (!all_controllers_enabled) {
+    // one or more controllers enabled, disable container support
+    log_debug(os, container)("One or more required controllers not enabled at kernel level.");
+    return NULL;
+  }
+
+  /*
+   * Read /proc/self/cgroup and determine:
+   *  - the cgroup path for cgroups v2 or
+   *  - on a cgroups v1 system, collect info for mapping
+   *    the host mount point to the local one via /proc/self/mountinfo below.
+   */
+  cgroup = fopen("/proc/self/cgroup", "r");
+  if (cgroup == NULL) {
+    log_debug(os, container)("Can't open /proc/self/cgroup, %s",
+                             os::strerror(errno));
+    return NULL;
+  }
+
+  while ((p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) {
+    char *controllers;
+    char *token;
+    char *hierarchy_id_str;
+    int  hierarchy_id;
+    char *cgroup_path;
+
+    hierarchy_id_str = strsep(&p, ":");
+    hierarchy_id = atoi(hierarchy_id_str);
+    /* Get controllers and base */
+    controllers = strsep(&p, ":");
+    cgroup_path = strsep(&p, "\n");
+
+    if (controllers == NULL) {
+      continue;
+    }
+
+    while (!is_cgroupsV2 && (token = strsep(&controllers, ",")) != NULL) {
+      if (strcmp(token, "memory") == 0) {
+        assert(hierarchy_id == cg_infos[memory_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
+        cg_infos[memory_idx]._cgroup_path = os::strdup(cgroup_path);
+      } else if (strcmp(token, "cpuset") == 0) {
+        assert(hierarchy_id == cg_infos[cpuset_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
+        cg_infos[cpuset_idx]._cgroup_path = os::strdup(cgroup_path);
+      } else if (strcmp(token, "cpu") == 0) {
+        assert(hierarchy_id == cg_infos[cpu_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
+        cg_infos[cpu_idx]._cgroup_path = os::strdup(cgroup_path);
+      } else if (strcmp(token, "cpuacct") == 0) {
+        assert(hierarchy_id == cg_infos[cpuacct_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch");
+        cg_infos[cpuacct_idx]._cgroup_path = os::strdup(cgroup_path);
+      }
+    }
+    if (is_cgroupsV2) {
+      for (int i = 0; i < CG_INFO_LENGTH; i++) {
+        cg_infos[i]._cgroup_path = os::strdup(cgroup_path);
+      }
+    }
+  }
+  fclose(cgroup);
+
+  if (is_cgroupsV2) {
+    // Find the cgroup2 mount point by reading /proc/self/mountinfo
+    mntinfo = fopen("/proc/self/mountinfo", "r");
+    if (mntinfo == NULL) {
+        log_debug(os, container)("Can't open /proc/self/mountinfo, %s",
+                                 os::strerror(errno));
+        return NULL;
+    }
+
+    char cgroupv2_mount[MAXPATHLEN+1];
+    char fstype[MAXPATHLEN+1];
+    bool mount_point_found = false;
+    while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) {
+      char *tmp_mount_point = cgroupv2_mount;
+      char *tmp_fs_type = fstype;
+
+      // mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt
+      if (sscanf(p, "%*d %*d %*d:%*d %*s %s %*[^-]- %s cgroup2 %*s", tmp_mount_point, tmp_fs_type) == 2) {
+        // we likely have an early match return, be sure we have cgroup2 as fstype
+        if (strcmp("cgroup2", tmp_fs_type) == 0) {
+          mount_point_found = true;
+          break;
+        }
+      }
+    }
+    fclose(mntinfo);
+    if (!mount_point_found) {
+      log_trace(os, container)("Mount point for cgroupv2 not found in /proc/self/mountinfo");
+      return NULL;
+    }
+    // Cgroups v2 case, we have all the info we need.
+    // Construct the subsystem, free resources and return
+    // Note: any index in cg_infos will do as the path is the same for
+    //       all controllers.
+    CgroupController* unified = new CgroupV2Controller(cgroupv2_mount, cg_infos[memory_idx]._cgroup_path);
+    for (int i = 0; i < CG_INFO_LENGTH; i++) {
+      os::free(cg_infos[i]._name);
+      os::free(cg_infos[i]._cgroup_path);
+    }
+    log_debug(os, container)("Detected cgroups v2 unified hierarchy");
+    return new CgroupV2Subsystem(unified);
+  }
+
+  // What follows is cgroups v1
+  log_debug(os, container)("Detected cgroups hybrid or legacy hierarchy, using cgroups v1 controllers");
 
   /*
    * Find the cgroup mount point for memory and cpuset
    * by reading /proc/self/mountinfo
    *

@@ -85,28 +251,29 @@
   }
 
   fclose(mntinfo);
 
   if (memory == NULL) {
-    log_debug(os, container)("Required cgroup memory subsystem not found");
+    log_debug(os, container)("Required cgroup v1 memory subsystem not found");
     return NULL;
   }
   if (cpuset == NULL) {
-    log_debug(os, container)("Required cgroup cpuset subsystem not found");
+    log_debug(os, container)("Required cgroup v1 cpuset subsystem not found");
     return NULL;
   }
   if (cpu == NULL) {
-    log_debug(os, container)("Required cgroup cpu subsystem not found");
+    log_debug(os, container)("Required cgroup v1 cpu subsystem not found");
     return NULL;
   }
   if (cpuacct == NULL) {
-    log_debug(os, container)("Required cgroup cpuacct subsystem not found");
+    log_debug(os, container)("Required cgroup v1 cpuacct subsystem not found");
     return NULL;
   }
 
   /*
-   * Read /proc/self/cgroup and map host mount point to
+   * Use info gathered previously from /proc/self/cgroup
+   * and map host mount point to
    * local one via /proc/self/mountinfo content above
    *
    * Docker example:
    * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044
    *

@@ -124,46 +291,22 @@
    * For a Host from memory example above the path would be:
    *
    * /sys/fs/cgroup/memory/user.slice
    *
    */
-  cgroup = fopen("/proc/self/cgroup", "r");
-  if (cgroup == NULL) {
-    log_debug(os, container)("Can't open /proc/self/cgroup, %s",
-                             os::strerror(errno));
-    return NULL;
-  }
-
-  while ((p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) {
-    char *controllers;
-    char *token;
-    char *base;
-
-    /* Skip cgroup number */
-    strsep(&p, ":");
-    /* Get controllers and base */
-    controllers = strsep(&p, ":");
-    base = strsep(&p, "\n");
-
-    if (controllers == NULL) {
-      continue;
-    }
-
-    while ((token = strsep(&controllers, ",")) != NULL) {
-      if (strcmp(token, "memory") == 0) {
-        memory->set_subsystem_path(base);
-      } else if (strcmp(token, "cpuset") == 0) {
-        cpuset->set_subsystem_path(base);
-      } else if (strcmp(token, "cpu") == 0) {
-        cpu->set_subsystem_path(base);
-      } else if (strcmp(token, "cpuacct") == 0) {
-        cpuacct->set_subsystem_path(base);
-      }
+  for (int i = 0; i < CG_INFO_LENGTH; i++) {
+    CgroupInfo info = cg_infos[i];
+    if (strcmp(info._name, "memory") == 0) {
+      memory->set_subsystem_path(info._cgroup_path);
+    } else if (strcmp(info._name, "cpuset") == 0) {
+      cpuset->set_subsystem_path(info._cgroup_path);
+    } else if (strcmp(info._name, "cpu") == 0) {
+      cpu->set_subsystem_path(info._cgroup_path);
+    } else if (strcmp(info._name, "cpuacct") == 0) {
+      cpuacct->set_subsystem_path(info._cgroup_path);
     }
   }
-
-  fclose(cgroup);
   return new CgroupV1Subsystem(cpuset, cpu, cpuacct, memory);
 }
 
 /* active_processor_count
  *
< prev index next >