1 /*
   2  * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include <string.h>
  26 #include <math.h>
  27 #include <errno.h>
  28 #include "cgroupSubsystem_linux.hpp"
  29 #include "cgroupV1Subsystem_linux.hpp"
  30 #include "logging/log.hpp"
  31 #include "memory/allocation.hpp"
  32 #include "runtime/globals.hpp"
  33 #include "runtime/os.hpp"
  34 #include "utilities/globalDefinitions.hpp"
  35 
  36 CgroupSubsystem* CgroupSubsystemFactory::create() {
  37   CgroupV1MemoryController* memory = NULL;
  38   CgroupV1Controller* cpuset = NULL;
  39   CgroupV1Controller* cpu = NULL;
  40   CgroupV1Controller* cpuacct = NULL;
  41   FILE *mntinfo = NULL;
  42   FILE *cgroup = NULL;
  43   char buf[MAXPATHLEN+1];
  44   char tmproot[MAXPATHLEN+1];
  45   char tmpmount[MAXPATHLEN+1];
  46   char *p;
  47 
  48   /*
  49    * Find the cgroup mount point for memory and cpuset
  50    * by reading /proc/self/mountinfo
  51    *
  52    * Example for docker:
  53    * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
  54    *
  55    * Example for host:
  56    * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory
  57    */
  58   mntinfo = fopen("/proc/self/mountinfo", "r");
  59   if (mntinfo == NULL) {
  60       log_debug(os, container)("Can't open /proc/self/mountinfo, %s",
  61                                os::strerror(errno));
  62       return NULL;
  63   }
  64 
  65   while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) {
  66     char tmpcgroups[MAXPATHLEN+1];
  67     char *cptr = tmpcgroups;
  68     char *token;
  69 
  70     // mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt
  71     if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- cgroup %*s %s", tmproot, tmpmount, tmpcgroups) != 3) {
  72       continue;
  73     }
  74     while ((token = strsep(&cptr, ",")) != NULL) {
  75       if (strcmp(token, "memory") == 0) {
  76         memory = new CgroupV1MemoryController(tmproot, tmpmount);
  77       } else if (strcmp(token, "cpuset") == 0) {
  78         cpuset = new CgroupV1Controller(tmproot, tmpmount);
  79       } else if (strcmp(token, "cpu") == 0) {
  80         cpu = new CgroupV1Controller(tmproot, tmpmount);
  81       } else if (strcmp(token, "cpuacct") == 0) {
  82         cpuacct= new CgroupV1Controller(tmproot, tmpmount);
  83       }
  84     }
  85   }
  86 
  87   fclose(mntinfo);
  88 
  89   if (memory == NULL) {
  90     log_debug(os, container)("Required cgroup memory subsystem not found");
  91     return NULL;
  92   }
  93   if (cpuset == NULL) {
  94     log_debug(os, container)("Required cgroup cpuset subsystem not found");
  95     return NULL;
  96   }
  97   if (cpu == NULL) {
  98     log_debug(os, container)("Required cgroup cpu subsystem not found");
  99     return NULL;
 100   }
 101   if (cpuacct == NULL) {
 102     log_debug(os, container)("Required cgroup cpuacct subsystem not found");
 103     return NULL;
 104   }
 105 
 106   /*
 107    * Read /proc/self/cgroup and map host mount point to
 108    * local one via /proc/self/mountinfo content above
 109    *
 110    * Docker example:
 111    * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044
 112    *
 113    * Host example:
 114    * 5:memory:/user.slice
 115    *
 116    * Construct a path to the process specific memory and cpuset
 117    * cgroup directory.
 118    *
 119    * For a container running under Docker from memory example above
 120    * the paths would be:
 121    *
 122    * /sys/fs/cgroup/memory
 123    *
 124    * For a Host from memory example above the path would be:
 125    *
 126    * /sys/fs/cgroup/memory/user.slice
 127    *
 128    */
 129   cgroup = fopen("/proc/self/cgroup", "r");
 130   if (cgroup == NULL) {
 131     log_debug(os, container)("Can't open /proc/self/cgroup, %s",
 132                              os::strerror(errno));
 133     return NULL;
 134   }
 135 
 136   while ((p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) {
 137     char *controllers;
 138     char *token;
 139     char *base;
 140 
 141     /* Skip cgroup number */
 142     strsep(&p, ":");
 143     /* Get controllers and base */
 144     controllers = strsep(&p, ":");
 145     base = strsep(&p, "\n");
 146 
 147     if (controllers == NULL) {
 148       continue;
 149     }
 150 
 151     while ((token = strsep(&controllers, ",")) != NULL) {
 152       if (strcmp(token, "memory") == 0) {
 153         memory->set_subsystem_path(base);
 154       } else if (strcmp(token, "cpuset") == 0) {
 155         cpuset->set_subsystem_path(base);
 156       } else if (strcmp(token, "cpu") == 0) {
 157         cpu->set_subsystem_path(base);
 158       } else if (strcmp(token, "cpuacct") == 0) {
 159         cpuacct->set_subsystem_path(base);
 160       }
 161     }
 162   }
 163 
 164   fclose(cgroup);
 165   return new CgroupV1Subsystem(cpuset, cpu, cpuacct, memory);
 166 }
 167 
 168 /* active_processor_count
 169  *
 170  * Calculate an appropriate number of active processors for the
 171  * VM to use based on these three inputs.
 172  *
 173  * cpu affinity
 174  * cgroup cpu quota & cpu period
 175  * cgroup cpu shares
 176  *
 177  * Algorithm:
 178  *
 179  * Determine the number of available CPUs from sched_getaffinity
 180  *
 181  * If user specified a quota (quota != -1), calculate the number of
 182  * required CPUs by dividing quota by period.
 183  *
 184  * If shares are in effect (shares != -1), calculate the number
 185  * of CPUs required for the shares by dividing the share value
 186  * by PER_CPU_SHARES.
 187  *
 188  * All results of division are rounded up to the next whole number.
 189  *
 190  * If neither shares or quotas have been specified, return the
 191  * number of active processors in the system.
 192  *
 193  * If both shares and quotas have been specified, the results are
 194  * based on the flag PreferContainerQuotaForCPUCount.  If true,
 195  * return the quota value.  If false return the smallest value
 196  * between shares or quotas.
 197  *
 198  * If shares and/or quotas have been specified, the resulting number
 199  * returned will never exceed the number of active processors.
 200  *
 201  * return:
 202  *    number of CPUs
 203  */
 204 int CgroupSubsystem::active_processor_count(int physical_proc_count) {
 205   int quota_count = 0, share_count = 0;
 206   int cpu_count, limit_count;
 207   int result;
 208 
 209   cpu_count = limit_count = physical_proc_count;
 210   int quota  = cpu_quota();
 211   int period = cpu_period();
 212   int share  = cpu_shares();
 213 
 214   if (quota > -1 && period > 0) {
 215     quota_count = ceilf((float)quota / (float)period);
 216     log_trace(os, container)("CPU Quota count based on quota/period: %d", quota_count);
 217   }
 218   if (share > -1) {
 219     share_count = ceilf((float)share / (float)PER_CPU_SHARES);
 220     log_trace(os, container)("CPU Share count based on shares: %d", share_count);
 221   }
 222 
 223   // If both shares and quotas are setup results depend
 224   // on flag PreferContainerQuotaForCPUCount.
 225   // If true, limit CPU count to quota
 226   // If false, use minimum of shares and quotas
 227   if (quota_count !=0 && share_count != 0) {
 228     if (PreferContainerQuotaForCPUCount) {
 229       limit_count = quota_count;
 230     } else {
 231       limit_count = MIN2(quota_count, share_count);
 232     }
 233   } else if (quota_count != 0) {
 234     limit_count = quota_count;
 235   } else if (share_count != 0) {
 236     limit_count = share_count;
 237   }
 238 
 239   result = MIN2(cpu_count, limit_count);
 240   log_trace(os, container)("OSContainer::active_processor_count: %d", result);
 241   return result;
 242 }