1 /* 2 * Copyright (c) 2017, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include <string.h> 26 #include <math.h> 27 #include <errno.h> 28 #include "utilities/globalDefinitions.hpp" 29 #include "memory/allocation.hpp" 30 #include "runtime/os.hpp" 31 #include "osContainer_linux.hpp" 32 33 #define PER_CPU_SHARES 1024 34 35 bool OSContainer::_is_initialized = false; 36 bool OSContainer::_is_containerized = false; 37 julong _unlimited_memory; 38 39 class CgroupSubsystem: CHeapObj<mtInternal> { 40 friend class OSContainer; 41 42 private: 43 /* mountinfo contents */ 44 char *_root; 45 char *_mount_point; 46 47 /* Constructed subsystem directory */ 48 char *_path; 49 50 public: 51 CgroupSubsystem(char *root, char *mountpoint) { 52 _root = os::strdup(root); 53 _mount_point = os::strdup(mountpoint); 54 _path = NULL; 55 } 56 57 /* 58 * Set directory to subsystem specific files based 59 * on the contents of the mountinfo and cgroup files. 60 */ 61 void set_subsystem_path(char *cgroup_path) { 62 char buf[MAXPATHLEN+1]; 63 if (_root != NULL && cgroup_path != NULL) { 64 if (strcmp(_root, "/") == 0) { 65 int buflen; 66 strncpy(buf, _mount_point, MAXPATHLEN); 67 buf[MAXPATHLEN-1] = '\0'; 68 if (strcmp(cgroup_path,"/") != 0) { 69 buflen = strlen(buf); 70 if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) { 71 return; 72 } 73 strncat(buf, cgroup_path, MAXPATHLEN-buflen); 74 buf[MAXPATHLEN-1] = '\0'; 75 } 76 _path = os::strdup(buf); 77 } else { 78 if (strcmp(_root, cgroup_path) == 0) { 79 strncpy(buf, _mount_point, MAXPATHLEN); 80 buf[MAXPATHLEN-1] = '\0'; 81 _path = os::strdup(buf); 82 } else { 83 char *p = strstr(_root, cgroup_path); 84 if (p != NULL && p == _root) { 85 if (strlen(cgroup_path) > strlen(_root)) { 86 int buflen; 87 strncpy(buf, _mount_point, MAXPATHLEN); 88 buf[MAXPATHLEN-1] = '\0'; 89 buflen = strlen(buf); 90 if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) { 91 return; 92 } 93 strncat(buf, cgroup_path + strlen(_root), MAXPATHLEN-buflen); 94 buf[MAXPATHLEN-1] = '\0'; 95 _path = os::strdup(buf); 96 } 97 } 98 } 99 } 100 } 101 } 102 103 char *subsystem_path() { return _path; } 104 }; 105 106 CgroupSubsystem* memory = NULL; 107 CgroupSubsystem* cpuset = NULL; 108 CgroupSubsystem* cpu = NULL; 109 CgroupSubsystem* cpuacct = NULL; 110 111 typedef char * cptr; 112 113 PRAGMA_DIAG_PUSH 114 PRAGMA_FORMAT_NONLITERAL_IGNORED 115 template <typename T> int subsystem_file_contents(CgroupSubsystem* c, 116 const char *filename, 117 const char *scan_fmt, 118 T returnval) { 119 FILE *fp = NULL; 120 char *p; 121 char file[MAXPATHLEN+1]; 122 char buf[MAXPATHLEN+1]; 123 124 if (c == NULL) { 125 if (PrintContainerInfo) { 126 tty->print_cr("subsystem_file_contents: CgroupSubsytem* is NULL"); 127 } 128 return OSCONTAINER_ERROR; 129 } 130 if (c->subsystem_path() == NULL) { 131 if (PrintContainerInfo) { 132 tty->print_cr("subsystem_file_contents: subsystem path is NULL"); 133 } 134 return OSCONTAINER_ERROR; 135 } 136 137 strncpy(file, c->subsystem_path(), MAXPATHLEN); 138 file[MAXPATHLEN-1] = '\0'; 139 int filelen = strlen(file); 140 if ((filelen + strlen(filename)) > (MAXPATHLEN-1)) { 141 if (PrintContainerInfo) { 142 tty->print_cr("File path too long %s, %s", file, filename); 143 } 144 return OSCONTAINER_ERROR; 145 } 146 strncat(file, filename, MAXPATHLEN-filelen); 147 if (PrintContainerInfo) { 148 tty->print_cr("Path to %s is %s", filename, file); 149 } 150 fp = fopen(file, "r"); 151 if (fp != NULL) { 152 p = fgets(buf, MAXPATHLEN, fp); 153 if (p != NULL) { 154 int matched = sscanf(p, scan_fmt, returnval); 155 if (matched == 1) { 156 fclose(fp); 157 return 0; 158 } else { 159 if (PrintContainerInfo) { 160 tty->print_cr("Type %s not found in file %s", scan_fmt, file); 161 } 162 } 163 } else { 164 if (PrintContainerInfo) { 165 tty->print_cr("Empty file %s", file); 166 } 167 } 168 } else { 169 if (PrintContainerInfo) { 170 tty->print_cr("Open of file %s failed, %s", file, strerror(errno)); 171 } 172 } 173 if (fp != NULL) 174 fclose(fp); 175 return OSCONTAINER_ERROR; 176 } 177 PRAGMA_DIAG_POP 178 179 #define GET_CONTAINER_INFO(return_type, subsystem, filename, \ 180 logstring, scan_fmt, variable) \ 181 return_type variable; \ 182 { \ 183 int err; \ 184 err = subsystem_file_contents(subsystem, \ 185 filename, \ 186 scan_fmt, \ 187 &variable); \ 188 if (err != 0) \ 189 return (return_type) OSCONTAINER_ERROR; \ 190 \ 191 if (PrintContainerInfo) \ 192 tty->print_cr(logstring, variable); \ 193 } 194 195 #define GET_CONTAINER_INFO_CPTR(return_type, subsystem, filename, \ 196 logstring, scan_fmt, variable, bufsize) \ 197 char variable[bufsize]; \ 198 { \ 199 int err; \ 200 err = subsystem_file_contents(subsystem, \ 201 filename, \ 202 scan_fmt, \ 203 variable); \ 204 if (err != 0) \ 205 return (return_type) NULL; \ 206 \ 207 if (PrintContainerInfo) \ 208 tty->print_cr(logstring, variable); \ 209 } 210 211 /* init 212 * 213 * Initialize the container support and determine if 214 * we are running under cgroup control. 215 */ 216 void OSContainer::init() { 217 int mountid; 218 int parentid; 219 int major; 220 int minor; 221 FILE *mntinfo = NULL; 222 FILE *cgroup = NULL; 223 char buf[MAXPATHLEN+1]; 224 char tmproot[MAXPATHLEN+1]; 225 char tmpmount[MAXPATHLEN+1]; 226 char tmpbase[MAXPATHLEN+1]; 227 char *p; 228 jlong mem_limit; 229 230 assert(!_is_initialized, "Initializing OSContainer more than once"); 231 232 _is_initialized = true; 233 _is_containerized = false; 234 235 _unlimited_memory = (LONG_MAX / os::vm_page_size()) * os::vm_page_size(); 236 237 if (PrintContainerInfo) { 238 tty->print_cr("OSContainer::init: Initializing Container Support"); 239 } 240 if (!UseContainerSupport) { 241 if (PrintContainerInfo) { 242 tty->print_cr("Container Support not enabled"); 243 } 244 return; 245 } 246 247 /* 248 * Find the cgroup mount point for memory and cpuset 249 * by reading /proc/self/mountinfo 250 * 251 * Example for docker: 252 * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory 253 * 254 * Example for host: 255 * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory 256 */ 257 mntinfo = fopen("/proc/self/mountinfo", "r"); 258 if (mntinfo == NULL) { 259 if (PrintContainerInfo) { 260 tty->print_cr("Can't open /proc/self/mountinfo, %s", 261 strerror(errno)); 262 } 263 return; 264 } 265 266 while ( (p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) { 267 // Look for the filesystem type and see if it's cgroup 268 char fstype[MAXPATHLEN+1]; 269 fstype[0] = '\0'; 270 char *s = strstr(p, " - "); 271 if (s != NULL && 272 sscanf(s, " - %s", fstype) == 1 && 273 strcmp(fstype, "cgroup") == 0) { 274 275 if (strstr(p, "memory") != NULL) { 276 int matched = sscanf(p, "%d %d %d:%d %s %s", 277 &mountid, 278 &parentid, 279 &major, 280 &minor, 281 tmproot, 282 tmpmount); 283 if (matched == 6) { 284 memory = new CgroupSubsystem(tmproot, tmpmount); 285 } 286 else 287 if (PrintContainerInfo) { 288 tty->print_cr("Incompatible str containing cgroup and memory: %s", p); 289 } 290 } else if (strstr(p, "cpuset") != NULL) { 291 int matched = sscanf(p, "%d %d %d:%d %s %s", 292 &mountid, 293 &parentid, 294 &major, 295 &minor, 296 tmproot, 297 tmpmount); 298 if (matched == 6) { 299 cpuset = new CgroupSubsystem(tmproot, tmpmount); 300 } 301 else { 302 if (PrintContainerInfo) { 303 tty->print_cr("Incompatible str containing cgroup and cpuset: %s", p); 304 } 305 } 306 } else if (strstr(p, "cpu,cpuacct") != NULL || strstr(p, "cpuacct,cpu") != NULL) { 307 int matched = sscanf(p, "%d %d %d:%d %s %s", 308 &mountid, 309 &parentid, 310 &major, 311 &minor, 312 tmproot, 313 tmpmount); 314 if (matched == 6) { 315 cpu = new CgroupSubsystem(tmproot, tmpmount); 316 cpuacct = new CgroupSubsystem(tmproot, tmpmount); 317 } 318 else { 319 if (PrintContainerInfo) { 320 tty->print_cr("Incompatible str containing cgroup and cpu,cpuacct: %s", p); 321 } 322 } 323 } else if (strstr(p, "cpuacct") != NULL) { 324 int matched = sscanf(p, "%d %d %d:%d %s %s", 325 &mountid, 326 &parentid, 327 &major, 328 &minor, 329 tmproot, 330 tmpmount); 331 if (matched == 6) { 332 cpuacct = new CgroupSubsystem(tmproot, tmpmount); 333 } 334 else { 335 if (PrintContainerInfo) { 336 tty->print_cr("Incompatible str containing cgroup and cpuacct: %s", p); 337 } 338 } 339 } else if (strstr(p, "cpu") != NULL) { 340 int matched = sscanf(p, "%d %d %d:%d %s %s", 341 &mountid, 342 &parentid, 343 &major, 344 &minor, 345 tmproot, 346 tmpmount); 347 if (matched == 6) { 348 cpu = new CgroupSubsystem(tmproot, tmpmount); 349 } 350 else { 351 if (PrintContainerInfo) { 352 tty->print_cr("Incompatible str containing cgroup and cpu: %s", p); 353 } 354 } 355 } 356 } 357 } 358 359 fclose(mntinfo); 360 361 if (memory == NULL) { 362 if (PrintContainerInfo) { 363 tty->print_cr("Required cgroup memory subsystem not found"); 364 } 365 return; 366 } 367 if (cpuset == NULL) { 368 if (PrintContainerInfo) { 369 tty->print_cr("Required cgroup cpuset subsystem not found"); 370 } 371 return; 372 } 373 if (cpu == NULL) { 374 if (PrintContainerInfo) { 375 tty->print_cr("Required cgroup cpu subsystem not found"); 376 } 377 return; 378 } 379 if (cpuacct == NULL) { 380 if (PrintContainerInfo) { 381 tty->print_cr("Required cgroup cpuacct subsystem not found"); 382 } 383 return; 384 } 385 386 /* 387 * Read /proc/self/cgroup and map host mount point to 388 * local one via /proc/self/mountinfo content above 389 * 390 * Docker example: 391 * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044 392 * 393 * Host example: 394 * 5:memory:/user.slice 395 * 396 * Construct a path to the process specific memory and cpuset 397 * cgroup directory. 398 * 399 * For a container running under Docker from memory example above 400 * the paths would be: 401 * 402 * /sys/fs/cgroup/memory 403 * 404 * For a Host from memory example above the path would be: 405 * 406 * /sys/fs/cgroup/memory/user.slice 407 * 408 */ 409 cgroup = fopen("/proc/self/cgroup", "r"); 410 if (cgroup == NULL) { 411 if (PrintContainerInfo) { 412 tty->print_cr("Can't open /proc/self/cgroup, %s", 413 strerror(errno)); 414 } 415 return; 416 } 417 418 while ( (p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) { 419 int cgno; 420 int matched; 421 char *controller; 422 char *base; 423 424 /* Skip cgroup number */ 425 strsep(&p, ":"); 426 /* Get controller and base */ 427 controller = strsep(&p, ":"); 428 base = strsep(&p, "\n"); 429 430 if (controller != NULL) { 431 if (strstr(controller, "memory") != NULL) { 432 memory->set_subsystem_path(base); 433 } else if (strstr(controller, "cpuset") != NULL) { 434 cpuset->set_subsystem_path(base); 435 } else if (strstr(controller, "cpu,cpuacct") != NULL || strstr(controller, "cpuacct,cpu") != NULL) { 436 cpu->set_subsystem_path(base); 437 cpuacct->set_subsystem_path(base); 438 } else if (strstr(controller, "cpuacct") != NULL) { 439 cpuacct->set_subsystem_path(base); 440 } else if (strstr(controller, "cpu") != NULL) { 441 cpu->set_subsystem_path(base); 442 } 443 } 444 } 445 446 fclose(cgroup); 447 448 // We need to update the amount of physical memory now that 449 // command line arguments have been processed. 450 if ((mem_limit = memory_limit_in_bytes()) > 0) { 451 os::Linux::set_physical_memory(mem_limit); 452 } 453 454 _is_containerized = true; 455 456 } 457 458 const char * OSContainer::container_type() { 459 if (is_containerized()) { 460 return "cgroupv1"; 461 } else { 462 return NULL; 463 } 464 } 465 466 467 /* memory_limit_in_bytes 468 * 469 * Return the limit of available memory for this process. 470 * 471 * return: 472 * memory limit in bytes or 473 * -1 for unlimited 474 * OSCONTAINER_ERROR for not supported 475 */ 476 jlong OSContainer::memory_limit_in_bytes() { 477 GET_CONTAINER_INFO(julong, memory, "/memory.limit_in_bytes", 478 "Memory Limit is: " JULONG_FORMAT, JULONG_FORMAT, memlimit); 479 480 if (memlimit >= _unlimited_memory) { 481 if (PrintContainerInfo) { 482 tty->print_cr("Memory Limit is: Unlimited"); 483 } 484 return (jlong)-1; 485 } 486 else { 487 return (jlong)memlimit; 488 } 489 } 490 491 jlong OSContainer::memory_and_swap_limit_in_bytes() { 492 GET_CONTAINER_INFO(julong, memory, "/memory.memsw.limit_in_bytes", 493 "Memory and Swap Limit is: " JULONG_FORMAT, JULONG_FORMAT, memswlimit); 494 if (memswlimit >= _unlimited_memory) { 495 if (PrintContainerInfo) { 496 tty->print_cr("Memory and Swap Limit is: Unlimited"); 497 } 498 return (jlong)-1; 499 } else { 500 return (jlong)memswlimit; 501 } 502 } 503 504 jlong OSContainer::memory_soft_limit_in_bytes() { 505 GET_CONTAINER_INFO(julong, memory, "/memory.soft_limit_in_bytes", 506 "Memory Soft Limit is: " JULONG_FORMAT, JULONG_FORMAT, memsoftlimit); 507 if (memsoftlimit >= _unlimited_memory) { 508 if (PrintContainerInfo) { 509 tty->print_cr("Memory Soft Limit is: Unlimited"); 510 } 511 return (jlong)-1; 512 } else { 513 return (jlong)memsoftlimit; 514 } 515 } 516 517 /* memory_usage_in_bytes 518 * 519 * Return the amount of used memory for this process. 520 * 521 * return: 522 * memory usage in bytes or 523 * -1 for unlimited 524 * OSCONTAINER_ERROR for not supported 525 */ 526 jlong OSContainer::memory_usage_in_bytes() { 527 GET_CONTAINER_INFO(jlong, memory, "/memory.usage_in_bytes", 528 "Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memusage); 529 return memusage; 530 } 531 532 /* memory_max_usage_in_bytes 533 * 534 * Return the maximum amount of used memory for this process. 535 * 536 * return: 537 * max memory usage in bytes or 538 * OSCONTAINER_ERROR for not supported 539 */ 540 jlong OSContainer::memory_max_usage_in_bytes() { 541 GET_CONTAINER_INFO(jlong, memory, "/memory.max_usage_in_bytes", 542 "Maximum Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memmaxusage); 543 return memmaxusage; 544 } 545 546 /* active_processor_count 547 * 548 * Calculate an appropriate number of active processors for the 549 * VM to use based on these three inputs. 550 * 551 * cpu affinity 552 * cgroup cpu quota & cpu period 553 * cgroup cpu shares 554 * 555 * Algorithm: 556 * 557 * Determine the number of available CPUs from sched_getaffinity 558 * 559 * If user specified a quota (quota != -1), calculate the number of 560 * required CPUs by dividing quota by period. 561 * 562 * If shares are in effect (shares != -1), calculate the number 563 * of CPUs required for the shares by dividing the share value 564 * by PER_CPU_SHARES. 565 * 566 * All results of division are rounded up to the next whole number. 567 * 568 * If neither shares or quotas have been specified, return the 569 * number of active processors in the system. 570 * 571 * If both shares and quotas have been specified, the results are 572 * based on the flag PreferContainerQuotaForCPUCount. If true, 573 * return the quota value. If false return the smallest value 574 * between shares or quotas. 575 * 576 * If shares and/or quotas have been specified, the resulting number 577 * returned will never exceed the number of active processors. 578 * 579 * return: 580 * number of CPUs 581 */ 582 int OSContainer::active_processor_count() { 583 int quota_count = 0, share_count = 0; 584 int cpu_count, limit_count; 585 int result; 586 587 cpu_count = limit_count = os::Linux::active_processor_count(); 588 int quota = cpu_quota(); 589 int period = cpu_period(); 590 int share = cpu_shares(); 591 592 if (quota > -1 && period > 0) { 593 quota_count = ceilf((float)quota / (float)period); 594 if (PrintContainerInfo) { 595 tty->print_cr("CPU Quota count based on quota/period: %d", quota_count); 596 } 597 } 598 if (share > -1) { 599 share_count = ceilf((float)share / (float)PER_CPU_SHARES); 600 if (PrintContainerInfo) { 601 tty->print_cr("CPU Share count based on shares: %d", share_count); 602 } 603 } 604 605 // If both shares and quotas are setup results depend 606 // on flag PreferContainerQuotaForCPUCount. 607 // If true, limit CPU count to quota 608 // If false, use minimum of shares and quotas 609 if (quota_count !=0 && share_count != 0) { 610 if (PreferContainerQuotaForCPUCount) { 611 limit_count = quota_count; 612 } else { 613 limit_count = MIN2(quota_count, share_count); 614 } 615 } else if (quota_count != 0) { 616 limit_count = quota_count; 617 } else if (share_count != 0) { 618 limit_count = share_count; 619 } 620 621 result = MIN2(cpu_count, limit_count); 622 if (PrintContainerInfo) { 623 tty->print_cr("OSContainer::active_processor_count: %d", result); 624 } 625 return result; 626 } 627 628 char * OSContainer::cpu_cpuset_cpus() { 629 GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.cpus", 630 "cpuset.cpus is: %s", "%1023s", cpus, 1024); 631 return os::strdup(cpus); 632 } 633 634 char * OSContainer::cpu_cpuset_memory_nodes() { 635 GET_CONTAINER_INFO_CPTR(cptr, cpuset, "/cpuset.mems", 636 "cpuset.mems is: %s", "%1023s", mems, 1024); 637 return os::strdup(mems); 638 } 639 640 /* cpu_quota 641 * 642 * Return the number of milliseconds per period 643 * process is guaranteed to run. 644 * 645 * return: 646 * quota time in milliseconds 647 * -1 for no quota 648 * OSCONTAINER_ERROR for not supported 649 */ 650 int OSContainer::cpu_quota() { 651 GET_CONTAINER_INFO(int, cpu, "/cpu.cfs_quota_us", 652 "CPU Quota is: %d", "%d", quota); 653 return quota; 654 } 655 656 int OSContainer::cpu_period() { 657 GET_CONTAINER_INFO(int, cpu, "/cpu.cfs_period_us", 658 "CPU Period is: %d", "%d", period); 659 return period; 660 } 661 662 /* cpu_shares 663 * 664 * Return the amount of cpu shares available to the process 665 * 666 * return: 667 * Share number (typically a number relative to 1024) 668 * (2048 typically expresses 2 CPUs worth of processing) 669 * -1 for no share setup 670 * OSCONTAINER_ERROR for not supported 671 */ 672 int OSContainer::cpu_shares() { 673 GET_CONTAINER_INFO(int, cpu, "/cpu.shares", 674 "CPU Shares is: %d", "%d", shares); 675 // Convert 1024 to no shares setup 676 if (shares == 1024) return -1; 677 678 return shares; 679 } 680