1 /*
   2  * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include <string.h>
  26 #include <math.h>
  27 #include "memory/allocation.hpp"
  28 #include "runtime/osContainer.hpp"
  29 #include "logging/log.hpp"
  30 
  31 #define MAXBUF 256
  32 #define CONTAINER_ERROR -2
  33 
  34 class CgroupSubsystem: CHeapObj<mtInternal> {
  35  friend OSContainer;
  36  private:
  37     /* mountinfo contents */
  38     char *_root;
  39     char *_mount_point;
  40 
  41     /* Constructed subsystem directory */
  42     char *_path;
  43 
  44  protected:
  45     CgroupSubsystem(char *root, char *mountpoint) {
  46       _root = strdup(root);
  47       _mount_point = strdup(mountpoint);
  48       _path = NULL;
  49     }
  50 
  51     ~CgroupSubsystem() {
  52       if (_root != NULL) free(_root);
  53       if ( _mount_point != NULL) free(_mount_point);
  54       if (_path != NULL) free(_path);
  55     }
  56 
  57  public:
  58     /* 
  59      * Set directory to subsystem specific files based 
  60      * on the contents of the mountinfo and cgroup files.
  61      */
  62     void set_subsystem_path(char *cgroup_path) {
  63       char buf[MAXBUF];
  64       if (_root != NULL && cgroup_path != NULL) {
  65         if (strcmp(_root, "/") == 0) {
  66           strncpy(buf, _mount_point, MAXBUF);
  67           buf[MAXBUF-1] = '\0';
  68           strncat(buf, cgroup_path, MAXBUF-strlen(buf));
  69           buf[MAXBUF-1] = '\0';
  70           _path = strdup(buf);
  71         }
  72         else {
  73           if (strcmp(_root, cgroup_path) == 0) {
  74             strncpy(buf, _mount_point, MAXBUF);
  75             buf[MAXBUF-1] = '\0';
  76             _path = strdup(buf);
  77           }
  78           else {
  79             char *p = strstr(_root, cgroup_path);
  80             if (p != NULL && p == _root) {
  81               if (strlen(cgroup_path) > strlen(_root)) {
  82                 strncpy(buf, _mount_point, MAXBUF);
  83                 buf[MAXBUF-1] = '\0';
  84                 strncat(buf, cgroup_path + strlen(_root), MAXBUF-strlen(buf));
  85                 buf[MAXBUF-1] = '\0';
  86                 _path = strdup(buf);
  87               }
  88             }
  89           }
  90         }
  91       }
  92     }
  93 
  94     char *get_subsystem_path() { return _path; }
  95 };
  96 
  97 // CgroupSubsystem *cgroupv2;
  98 CgroupSubsystem* memory = NULL;
  99 CgroupSubsystem* cpuset = NULL;
 100 CgroupSubsystem* cpu = NULL;
 101 CgroupSubsystem* cpuacct = NULL;
 102 
 103 #define GEN_CONTAINER_GET_INFO(return_name, return_type, scan_fmt)      \
 104 int get_subsystem_file_contents_##return_name(CgroupSubsystem* c,       \
 105                                               char *filename,           \
 106                                               return_type *returnval) { \
 107   FILE *fp = NULL;                                                      \
 108   char *p;                                                              \
 109   char buf[MAXBUF];                                                     \
 110                                                                         \
 111   if (c != NULL && c->get_subsystem_path() != NULL) {                   \
 112     strncpy(buf, c->get_subsystem_path(), MAXBUF);                      \
 113     buf[MAXBUF-1] = '\0';                                               \
 114     strncat(buf, filename, MAXBUF-strlen(buf));                         \
 115     log_trace(os, container)("Path to %s is %s\n", filename, buf);      \
 116     fp = fopen(buf, "r");                                               \
 117     if (fp != NULL) {                                                   \
 118       p = fgets(buf, MAXBUF, fp);                                       \
 119       return_type value;                                                \
 120       int matched;                                                      \
 121       if (p != NULL) {                                                  \
 122         matched = sscanf(p, scan_fmt, &value);                          \
 123         if (matched == 1) {                                             \
 124           fclose(fp);                                                   \
 125           *returnval = value;                                           \
 126           return 0;                                                     \
 127         }                                                               \
 128         else log_error(os, container)("Type %s not found in file %s\n", scan_fmt , buf);  \
 129       }                                                                 \
 130       else log_error(os, container)("Empty file %s\n", buf);            \
 131     }                                                                   \
 132     else log_error(os, container)("file not found %s\n", buf);          \
 133   }                                                                     \
 134   if (fp != NULL)                                                       \
 135     fclose(fp);                                                         \
 136   return CONTAINER_ERROR;                                               \
 137 }
 138 
 139 #define GEN_CONTAINER_GET_INFO_STR(return_name, return_type)            \
 140 int get_subsystem_file_contents_##return_name(CgroupSubsystem* c,       \
 141                                               char *filename,           \
 142                                               return_type *returnval) { \
 143   FILE *fp = NULL;                                                      \
 144   char *p;                                                              \
 145   char buf[MAXBUF];                                                     \
 146                                                                         \
 147   if (c != NULL && c->get_subsystem_path() != NULL) {                   \
 148     strncpy(buf, c->get_subsystem_path(), MAXBUF);                      \
 149     buf[MAXBUF-1] = '\0';                                               \
 150     strncat(buf, filename, MAXBUF-strlen(buf));                         \
 151     log_trace(os, container)("Path to %s is %s\n", filename, buf);      \
 152     fp = fopen(buf, "r");                                               \
 153     if (fp != NULL) {                                                   \
 154       p = fgets(buf, MAXBUF, fp);                                       \
 155       if (p != NULL) {                                                  \
 156         *returnval = strdup(p);                                         \
 157         fclose(fp);                                                     \
 158         return 0;                                                       \
 159       }                                                                 \
 160       else log_error(os, container)("Empty file %s\n", buf);            \
 161     }                                                                   \
 162     else log_error(os, container)("File not found %s\n", buf);          \
 163   }                                                                     \
 164   if (fp != NULL)                                                       \
 165     fclose(fp);                                                         \
 166   return CONTAINER_ERROR;                                               \
 167 }
 168 
 169 GEN_CONTAINER_GET_INFO(int, int, "%d")
 170 GEN_CONTAINER_GET_INFO(jlong, jlong, "%ld")
 171 GEN_CONTAINER_GET_INFO_STR(cptr, char *)
 172  
 173 #define GET_CONTAINER_INFO(return_name, return_type, subsystem,         \
 174                               filename, logstring, variable)            \
 175   int err;                                                              \
 176   return_type variable;                                                 \
 177   err = get_subsystem_file_contents_##return_name(subsystem,            \
 178                                     filename,                           \
 179                                     &variable);                         \
 180   if (err != 0) {                                                       \
 181     log_error(os, container)("Error reading %s", filename);             \
 182     return (return_type)CONTAINER_ERROR;                                \
 183   }                                                                     \
 184   log_info(os, container)(logstring, variable);
 185 
 186 static int cpuset_cpus_to_count(char *cpus);
 187 
 188 /* pd_init
 189  *
 190  * Initialize the container support and return true if the
 191  * container support is enabled and we are running under cgroup control.
 192  */
 193 bool OSContainer::pd_init() {
 194   int mountid;
 195   int parentid;
 196   int major;
 197   int minor;
 198   FILE *mntinfo = NULL;
 199   FILE *cgroup = NULL;
 200   char buf[MAXBUF];
 201   char tmproot[MAXBUF];
 202   char tmpmount[MAXBUF];
 203   char tmpbase[MAXBUF];
 204   char *p;
 205   jlong mem_limit;
 206 
 207   log_trace(os, container)("OSContainer::pd_init: Initializing Container Support");
 208   if (!UseContainerSupport) {
 209     log_trace(os, container)("Container Support not enabled");
 210     return false;
 211   }
 212 
 213   /* 
 214    * Find the cgroup mount point for memory and cpuset 
 215    * by reading /proc/self/mountinfo
 216    *
 217    * Example for docker:
 218    * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory
 219    *
 220    * Example for host:
 221    * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory
 222    */
 223   mntinfo = fopen("/proc/self/mountinfo", "r");
 224   if (mntinfo == NULL) {
 225       log_error(os, container)("Can't locate /proc/self/mountinfo\n");
 226       return false;
 227   }
 228 
 229   while ( (p = fgets(buf, MAXBUF, mntinfo)) != NULL) {
 230     // Look for the filesystem type and see if it's cgroup
 231     char fstype[MAXBUF];
 232     fstype[0] = '\0';
 233     char *s =  strstr(p, " - ");
 234     if (s != NULL && 
 235         sscanf(s, " - %s", fstype) == 1 &&
 236         strcmp(fstype, "cgroup") == 0) {
 237 
 238       if (strstr(p, "memory") != NULL) {
 239         int matched = sscanf(p, "%d %d %d:%d %s %s", 
 240                              &mountid,
 241                              &parentid,
 242                              &major,
 243                              &minor,
 244                              tmproot,
 245                              tmpmount);
 246         if (matched == 6) {
 247           memory = new CgroupSubsystem(tmproot, tmpmount);
 248         }
 249         else log_error(os, container)("Incompatible str containing cgroup and memory: %s\n", p);
 250       }
 251       else if (strstr(p, "cpuset") != NULL) {
 252         int matched = sscanf(p, "%d %d %d:%d %s %s", 
 253                              &mountid,
 254                              &parentid,
 255                              &major,
 256                              &minor,
 257                              tmproot,
 258                              tmpmount);
 259         if (matched == 6) {
 260           cpuset = new CgroupSubsystem(tmproot, tmpmount);
 261         }
 262         else log_error(os, container)("Incompatible str containing cgroup and cpuset: %s\n", p);
 263       }
 264       else if (strstr(p, "cpu,cpuacct") != NULL) {
 265         int matched = sscanf(p, "%d %d %d:%d %s %s", 
 266                              &mountid,
 267                              &parentid,
 268                              &major,
 269                              &minor,
 270                              tmproot,
 271                              tmpmount);
 272         if (matched == 6) {
 273           cpu = new CgroupSubsystem(tmproot, tmpmount);
 274           cpuacct = new CgroupSubsystem(tmproot, tmpmount);
 275         }
 276         else log_error(os, container)("Incompatible str containing cgroup and cpu,cpuacct: %s\n", p);
 277       }
 278       else if (strstr(p, "cpuacct") != NULL) {
 279         int matched = sscanf(p, "%d %d %d:%d %s %s", 
 280                              &mountid,
 281                              &parentid,
 282                              &major,
 283                              &minor,
 284                              tmproot,
 285                              tmpmount);
 286         if (matched == 6) {
 287           cpuacct = new CgroupSubsystem(tmproot, tmpmount);
 288         }
 289         else log_error(os, container)("Incompatible str containing cgroup and cpuacct: %s\n", p);
 290       }
 291       else if (strstr(p, "cpu") != NULL) {
 292         int matched = sscanf(p, "%d %d %d:%d %s %s", 
 293                              &mountid,
 294                              &parentid,
 295                              &major,
 296                              &minor,
 297                              tmproot,
 298                              tmpmount);
 299         if (matched == 6) {
 300           cpu = new CgroupSubsystem(tmproot, tmpmount);
 301         }
 302         else log_error(os, container)("Incompatible str containing cgroup and cpu: %s\n", p);
 303       }
 304     }
 305   }
 306 
 307   if (mntinfo != NULL) fclose(mntinfo);
 308 
 309   /* 
 310    * Read /proc/self/cgroup and map host mount point to 
 311    * local one via /proc/self/mountinfo content above
 312    *
 313    * Docker example:
 314    * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044
 315    *
 316    * Host example:
 317    * 5:memory:/user.slice
 318    *
 319    * Construct a path to the process specific memory and cpuset 
 320    * cgroup directory.
 321    *
 322    * For a container running under Docker from memory example above 
 323    * the paths would be:
 324    *
 325    * /sys/fs/cgroup/memory
 326    *
 327    * For a Host from memory example above the path would be:
 328    *
 329    * /sys/fs/cgroup/memory/user.slice
 330    * 
 331    */
 332   cgroup = fopen("/proc/self/cgroup", "r");
 333   if (cgroup == NULL) {
 334     log_error(os, container)("Can't locate /proc/self/cgroup\n");
 335     return false;
 336   }
 337 
 338   while ( (p = fgets(buf, MAXBUF, cgroup)) != NULL) {
 339     int cgno;
 340     int matched;
 341     char *controller;
 342     char *base;
 343 
 344     /* Skip cgroup number */
 345     strsep(&p, ":");
 346     /* Get controller and base */
 347     controller = strsep(&p, ":");
 348     base = strsep(&p, "\n");
 349 
 350     if (controller != NULL) {
 351       if (strstr(controller, "memory") != NULL) {
 352         memory->set_subsystem_path(base);
 353       }
 354       else if (strstr(controller, "cpuset") != NULL) {
 355         cpuset->set_subsystem_path(base);
 356       }
 357       else if (strstr(controller, "cpu,cpuacct") != NULL) {
 358         cpu->set_subsystem_path(base);
 359         cpuacct->set_subsystem_path(base);
 360       }
 361       else if (strstr(controller, "cpuacct") != NULL) {
 362         cpuacct->set_subsystem_path(base);
 363       }
 364       else if (strstr(controller, "cpu") != NULL) {
 365         cpu->set_subsystem_path(base);
 366       }
 367     }
 368   }
 369 
 370   if (cgroup != NULL) fclose(cgroup);
 371 
 372   if (memory == NULL || cpuset == NULL || cpu == NULL) {
 373     log_warning(os, container)("Required cgroup subsystems not found");
 374     return false;
 375   }
 376 
 377   // We need to update the amount of physical memory now that
 378   // command line arguments have been processed.
 379   if ((mem_limit = OSContainer::memory_limit_in_bytes()) > 0) {
 380     os::Linux::set_physical_memory(mem_limit);
 381   }
 382 
 383 #if 0
 384   // Test Container functions
 385   OSContainer::memory_usage_in_bytes();
 386   OSContainer::memory_and_swap_limit_in_bytes();
 387   OSContainer::memory_soft_limit_in_bytes();
 388   OSContainer::cpu_cpuset_memory_nodes();
 389   OSContainer::cpu_cpuset_cpus();
 390   OSContainer::cpu_quota();
 391   OSContainer::cpu_period();
 392   OSContainer::cpu_shares();
 393   OSContainer::active_processor_count();
 394 #endif
 395 
 396   return true;
 397 }
 398 
 399 char * OSContainer::get_container_type() {
 400   return (char *)0;
 401 }
 402 
 403 
 404 /* pd_memory_limit_in_bytes
 405  *
 406  * Return the limit of available memory for this process.
 407  *
 408  * return:
 409  *    memory limit in bytes or 
 410  *    -1 for unlimited
 411  *    CONTAINER_ERROR for not supported
 412  */
 413 jlong OSContainer::pd_memory_limit_in_bytes() {
 414   GET_CONTAINER_INFO(jlong, jlong, memory,  (char *)"/memory.limit_in_bytes",
 415                      "Memory Limit is: %ld\n", memlimit);
 416 
 417   if (memlimit == 9223372036854771712) {
 418     log_info(os, container)("Memory Limit is: Unlimited\n");
 419     return (jlong)-1;
 420   }
 421   else
 422     return memlimit;
 423 }
 424 
 425 jlong OSContainer::pd_memory_and_swap_limit_in_bytes() {
 426   GET_CONTAINER_INFO(jlong, jlong, memory,  (char *)"/memory.memsw.limit_in_bytes",
 427                      "Memory and Swap Limit is: %ld\n", memswlimit);
 428   if (memswlimit == 9223372036854771712) {
 429     log_info(os, container)("Memory and Swap Limit is: Unlimited\n");
 430     return (jlong)-1;
 431   }
 432   else
 433     return memswlimit;
 434 }
 435 
 436 jlong OSContainer::pd_memory_soft_limit_in_bytes() {
 437   GET_CONTAINER_INFO(jlong, jlong, memory,  (char *)"/memory.soft_limit_in_bytes",
 438                      "Memory Soft Limit is: %ld\n", memsoftlimit);
 439   if (memsoftlimit == 9223372036854771712) {
 440     log_info(os, container)("Memory Soft Limit is: Unlimited\n");
 441     return (jlong)-1;
 442   }
 443   else
 444     return memsoftlimit;
 445 }
 446 
 447 /* pd_memory_usage_in_bytes
 448  *
 449  * Return the amount of used memory for this process.
 450  *
 451  * return:
 452  *    memory usage in bytes or 
 453  *    -1 for unlimited
 454  *    CONTAINER_ERROR for not supported
 455  */
 456 jlong OSContainer::pd_memory_usage_in_bytes() {
 457   GET_CONTAINER_INFO(jlong, jlong, memory,  (char *)"/memory.usage_in_bytes",
 458                      "Memory Usage is: %ld\n", memusage);
 459   return memusage;
 460 }
 461 
 462 /* pd_memory_max_usage_in_bytes
 463  *
 464  * Return the maximum amount of used memory for this process.
 465  *
 466  * return:
 467  *    max memory usage in bytes or 
 468  *    CONTAINER_ERROR for not supported
 469  */
 470 jlong OSContainer::pd_memory_max_usage_in_bytes() {
 471   GET_CONTAINER_INFO(jlong, jlong, memory,  (char *)"/memory.max_usage_in_bytes",
 472                      "Maximu, Memory Usage is: %ld\n", memmaxusage);
 473   return memmaxusage;
 474 }
 475 
 476 /* pd_active_processor_count
 477  *
 478  * Calculate an appropriate number of active processors for the
 479  * VM to use based on these three cgroup options.
 480  *
 481  * cpu sets
 482  * cpu quota & cpu period
 483  * cpu shares
 484  *
 485  * Algorythm:
 486  *
 487  * Determine the number of available CPUs from cpu_sets
 488  *
 489  * If user specified a quota (quota != -1), calculate the number of 
 490  * required CPUs by dividing quota by period.  
 491  *
 492  * If shares are in effect (shares != -1), calculate the number
 493  * of cpus required for the shares by dividing the share value by 1024.  
 494  *
 495  * All results of division are rounded up to the next whole number.
 496  *
 497  * Return the smaller number from the three different settings.
 498  *
 499  * return:
 500  *    number of cpus
 501  *    CONTAINER_ERROR if failure occured during extract of cpuset info
 502  */
 503 int OSContainer::pd_active_processor_count() {
 504   int cpu_count, share_count, quota_count;
 505   int share, quota, period;
 506   int result;
 507   char *cpus;
 508 
 509   cpus = OSContainer::cpu_cpuset_cpus();
 510   if (cpus != (char *)CONTAINER_ERROR) {
 511     cpu_count = cpuset_cpus_to_count(cpus);
 512     log_info(os,container)("cpuset_cpu count is %d\n", cpu_count);
 513     free(cpus);
 514   }
 515   else {
 516     log_error(os,container)("Error getting cpuset_cpucount");
 517     return CONTAINER_ERROR;
 518   }
 519 
 520   share = OSContainer::cpu_shares();
 521   if (share > -1) {
 522     share_count = ceilf((float)share / 1024.0f);
 523     log_trace(os, container)("cpu_share count: %d", share_count);
 524   }
 525   else share_count = cpu_count;
 526   
 527   
 528   quota = OSContainer::cpu_quota();
 529   period = OSContainer::cpu_period();
 530   if (quota > -1 && period > 0) {
 531     quota_count = ceilf((float)quota / (float)period);
 532     log_trace(os, container)("quota_count: %d", quota_count);
 533   }
 534   else quota_count = cpu_count;
 535 
 536   result = MIN2(cpu_count, MIN2(share_count, quota_count)); 
 537   log_trace(os, container)("OSContainer::active_processor_count: %d", result);
 538   return result;
 539 }
 540 
 541 char * OSContainer::pd_cpu_cpuset_cpus() {
 542   GET_CONTAINER_INFO(cptr, char *, cpuset, (char *)"/cpuset.cpus", 
 543                      "cpuset.cpus is: %s\n", cpus);
 544   return cpus;
 545 }
 546 
 547 char * OSContainer::pd_cpu_cpuset_memory_nodes() {
 548   GET_CONTAINER_INFO(cptr, char *, cpuset, (char *)"/cpuset.mems", 
 549                      "cpuset.mems is: %s\n", mems);
 550   return mems;
 551 }
 552 
 553 /* pd_cpu_quota
 554  *
 555  * Return the number of miliseconds per period
 556  * process is guaranteed to run.
 557  *
 558  * return:
 559  *    quota time in milliseconds
 560  *    -1 for no quota
 561  *    CONTAINER_ERROR for not supported
 562  */
 563 int OSContainer::pd_cpu_quota() {
 564   GET_CONTAINER_INFO(int, int, cpu, (char *)"/cpu.cfs_quota_us",
 565                      "CPU Quota is: %d\n", quota);
 566   return quota;
 567 }
 568 
 569 int OSContainer::pd_cpu_period() {
 570   GET_CONTAINER_INFO(int, int, cpu, (char *)"/cpu.cfs_period_us",
 571                      "CPU Period is: %d\n", period);
 572   return period;
 573 }
 574 
 575 /* pd_cpu_shares
 576  *
 577  * Return the amount of cpu shares available to the process
 578  *
 579  * return:
 580  *    Share number (typically a number relative to 1024)
 581  *                 (2048 typically expresses 2 CPUs worth of processing)
 582  *    -1 for no share setup
 583  *    CONTAINER_ERROR for not supported
 584  */
 585 int OSContainer::pd_cpu_shares() {
 586   GET_CONTAINER_INFO(int, int, cpu, (char *)"/cpu.shares", 
 587                      "CPU Shares is: %d\n", shares);
 588   // Convert 1024 to no shares setup
 589   if (shares == 1024) return -1;
 590 
 591   return shares;
 592 }
 593 
 594 /*
 595  * Convert cpuset.cpus comma separated string to a 
 596  * count of cpus
 597  */
 598 static int cpuset_cpus_to_count(char *cpus)
 599 {
 600   int cpu_count = 0;
 601   char *token, *string, *tofree;
 602 
 603   tofree = string = strdup(cpus);
 604   while ((token = strsep(&string, ",")) != NULL) {
 605     /* Range x-z format */
 606     if (index(token, '-') != 0) {
 607       int low, hi;
 608       char *rtoken;
 609       char *range, *tofree2;
 610       tofree2 = range = strdup(token);
 611       rtoken = strsep(&range, "-");
 612       low = atoi(rtoken);
 613       hi = atoi(range);
 614       free(tofree2);
 615       cpu_count += ((++hi) - (low));
 616     }
 617     /* single number */
 618     else {
 619       int cpu = atoi(token);
 620       cpu_count++;
 621     }
 622   }
 623   free(tofree);
 624   return cpu_count;
 625 }