1 /* 2 * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include <string.h> 26 #include <math.h> 27 #include <errno.h> 28 #include "cgroupSubsystem_linux.hpp" 29 #include "logging/log.hpp" 30 #include "memory/allocation.hpp" 31 #include "runtime/globals.hpp" 32 #include "runtime/os.hpp" 33 #include "utilities/globalDefinitions.hpp" 34 35 typedef char * cptr; 36 37 PRAGMA_DIAG_PUSH 38 PRAGMA_FORMAT_NONLITERAL_IGNORED 39 template <typename T> int subsystem_file_line_contents(CgroupController* c, 40 const char *filename, 41 const char *matchline, 42 const char *scan_fmt, 43 T returnval) { 44 FILE *fp = NULL; 45 char *p; 46 char file[MAXPATHLEN+1]; 47 char buf[MAXPATHLEN+1]; 48 char discard[MAXPATHLEN+1]; 49 bool found_match = false; 50 51 if (c == NULL) { 52 log_debug(os, container)("subsystem_file_line_contents: CgroupV1Controller* is NULL"); 53 return OSCONTAINER_ERROR; 54 } 55 if (c->subsystem_path() == NULL) { 56 log_debug(os, container)("subsystem_file_line_contents: subsystem path is NULL"); 57 return OSCONTAINER_ERROR; 58 } 59 60 strncpy(file, c->subsystem_path(), MAXPATHLEN); 61 file[MAXPATHLEN-1] = '\0'; 62 int filelen = strlen(file); 63 if ((filelen + strlen(filename)) > (MAXPATHLEN-1)) { 64 log_debug(os, container)("File path too long %s, %s", file, filename); 65 return OSCONTAINER_ERROR; 66 } 67 strncat(file, filename, MAXPATHLEN-filelen); 68 log_trace(os, container)("Path to %s is %s", filename, file); 69 fp = fopen(file, "r"); 70 if (fp != NULL) { 71 int err = 0; 72 while ((p = fgets(buf, MAXPATHLEN, fp)) != NULL) { 73 found_match = false; 74 if (matchline == NULL) { 75 // single-line file case 76 int matched = sscanf(p, scan_fmt, returnval); 77 found_match = (matched == 1); 78 } else { 79 // multi-line file case 80 if (strstr(p, matchline) != NULL) { 81 // discard matchline string prefix 82 int matched = sscanf(p, scan_fmt, discard, returnval); 83 found_match = (matched == 2); 84 } else { 85 continue; // substring not found 86 } 87 } 88 if (found_match) { 89 fclose(fp); 90 return 0; 91 } else { 92 err = 1; 93 log_debug(os, container)("Type %s not found in file %s", scan_fmt, file); 94 } 95 } 96 if (err == 0) { 97 log_debug(os, container)("Empty file %s", file); 98 } 99 } else { 100 log_debug(os, container)("Open of file %s failed, %s", file, os::strerror(errno)); 101 } 102 if (fp != NULL) 103 fclose(fp); 104 return OSCONTAINER_ERROR; 105 } 106 PRAGMA_DIAG_POP 107 108 #define GET_CONTAINER_INFO(return_type, subsystem, filename, \ 109 logstring, scan_fmt, variable) \ 110 return_type variable; \ 111 { \ 112 int err; \ 113 err = subsystem_file_line_contents(subsystem, \ 114 filename, \ 115 NULL, \ 116 scan_fmt, \ 117 &variable); \ 118 if (err != 0) \ 119 return (return_type) OSCONTAINER_ERROR; \ 120 \ 121 log_trace(os, container)(logstring, variable); \ 122 } 123 124 #define GET_CONTAINER_INFO_CPTR(return_type, subsystem, filename, \ 125 logstring, scan_fmt, variable, bufsize) \ 126 char variable[bufsize]; \ 127 { \ 128 int err; \ 129 err = subsystem_file_line_contents(subsystem, \ 130 filename, \ 131 NULL, \ 132 scan_fmt, \ 133 variable); \ 134 if (err != 0) \ 135 return (return_type) NULL; \ 136 \ 137 log_trace(os, container)(logstring, variable); \ 138 } 139 140 #define GET_CONTAINER_INFO_LINE(return_type, controller, filename, \ 141 matchline, logstring, scan_fmt, variable) \ 142 return_type variable; \ 143 { \ 144 int err; \ 145 err = subsystem_file_line_contents(controller, \ 146 filename, \ 147 matchline, \ 148 scan_fmt, \ 149 &variable); \ 150 if (err != 0) \ 151 return (return_type) OSCONTAINER_ERROR; \ 152 \ 153 log_trace(os, container)(logstring, variable); \ 154 } 155 156 /* 157 * Set directory to subsystem specific files based 158 * on the contents of the mountinfo and cgroup files. 159 */ 160 void CgroupV1Controller::set_subsystem_path(char *cgroup_path) { 161 char buf[MAXPATHLEN+1]; 162 if (_root != NULL && cgroup_path != NULL) { 163 if (strcmp(_root, "/") == 0) { 164 int buflen; 165 strncpy(buf, _mount_point, MAXPATHLEN); 166 buf[MAXPATHLEN-1] = '\0'; 167 if (strcmp(cgroup_path,"/") != 0) { 168 buflen = strlen(buf); 169 if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) { 170 return; 171 } 172 strncat(buf, cgroup_path, MAXPATHLEN-buflen); 173 buf[MAXPATHLEN-1] = '\0'; 174 } 175 _path = os::strdup(buf); 176 } else { 177 if (strcmp(_root, cgroup_path) == 0) { 178 strncpy(buf, _mount_point, MAXPATHLEN); 179 buf[MAXPATHLEN-1] = '\0'; 180 _path = os::strdup(buf); 181 } else { 182 char *p = strstr(cgroup_path, _root); 183 if (p != NULL && p == _root) { 184 if (strlen(cgroup_path) > strlen(_root)) { 185 int buflen; 186 strncpy(buf, _mount_point, MAXPATHLEN); 187 buf[MAXPATHLEN-1] = '\0'; 188 buflen = strlen(buf); 189 if ((buflen + strlen(cgroup_path) - strlen(_root)) > (MAXPATHLEN-1)) { 190 return; 191 } 192 strncat(buf, cgroup_path + strlen(_root), MAXPATHLEN-buflen); 193 buf[MAXPATHLEN-1] = '\0'; 194 _path = os::strdup(buf); 195 } 196 } 197 } 198 } 199 } 200 } 201 202 /* uses_mem_hierarchy 203 * 204 * Return whether or not hierarchical cgroup accounting is being 205 * done. 206 * 207 * return: 208 * A number > 0 if true, or 209 * OSCONTAINER_ERROR for not supported 210 */ 211 jlong CgroupV1MemoryController::uses_mem_hierarchy() { 212 GET_CONTAINER_INFO(jlong, this, "/memory.use_hierarchy", 213 "Use Hierarchy is: " JLONG_FORMAT, JLONG_FORMAT, use_hierarchy); 214 return use_hierarchy; 215 } 216 217 void CgroupV1MemoryController::set_subsystem_path(char *cgroup_path) { 218 CgroupV1Controller::set_subsystem_path(cgroup_path); 219 jlong hierarchy = uses_mem_hierarchy(); 220 if (hierarchy > 0) { 221 set_hierarchical(true); 222 } 223 } 224 225 CgroupSubsystem* CgroupSubsystemFactory::create() { 226 CgroupV1MemoryController* memory = NULL; 227 CgroupV1Controller* cpuset = NULL; 228 CgroupV1Controller* cpu = NULL; 229 CgroupV1Controller* cpuacct = NULL; 230 FILE *mntinfo = NULL; 231 FILE *cgroups = NULL; 232 FILE *cgroup = NULL; 233 char buf[MAXPATHLEN+1]; 234 char tmproot[MAXPATHLEN+1]; 235 char tmpmount[MAXPATHLEN+1]; 236 char *p; 237 bool is_cgroupsV2; 238 // true iff all controllers, memory, cpu, cpuset, cpuacct are enabled 239 // at the kernel level. 240 bool all_controllers_enabled; 241 242 CgroupInfo cg_infos[CG_INFO_LENGTH]; 243 int cpuset_idx = 0; 244 int cpu_idx = 1; 245 int cpuacct_idx = 2; 246 int memory_idx = 3; 247 248 /* 249 * Read /proc/cgroups so as to be able to distinguish cgroups v2 vs cgroups v1. 250 * 251 * For cgroups v1 unified hierarchy, cpu, cpuacct, cpuset, memory controllers 252 * must have non-zero for the hierarchy ID field. 253 */ 254 cgroups = fopen("/proc/cgroups", "r"); 255 if (cgroups == NULL) { 256 log_debug(os, container)("Can't open /proc/cgroups, %s", 257 os::strerror(errno)); 258 return NULL; 259 } 260 261 while ((p = fgets(buf, MAXPATHLEN, cgroups)) != NULL) { 262 char name[MAXPATHLEN+1]; 263 int hierarchy_id; 264 int enabled; 265 266 // Format of /proc/cgroups documented via man 7 cgroups 267 if (sscanf(p, "%s %d %*d %d", name, &hierarchy_id, &enabled) != 3) { 268 continue; 269 } 270 if (strcmp(name, "memory") == 0) { 271 cg_infos[memory_idx]._name = os::strdup(name); 272 cg_infos[memory_idx]._hierarchy_id = hierarchy_id; 273 cg_infos[memory_idx]._enabled = (enabled == 1); 274 } else if (strcmp(name, "cpuset") == 0) { 275 cg_infos[cpuset_idx]._name = os::strdup(name); 276 cg_infos[cpuset_idx]._hierarchy_id = hierarchy_id; 277 cg_infos[cpuset_idx]._enabled = (enabled == 1); 278 } else if (strcmp(name, "cpu") == 0) { 279 cg_infos[cpu_idx]._name = os::strdup(name); 280 cg_infos[cpu_idx]._hierarchy_id = hierarchy_id; 281 cg_infos[cpu_idx]._enabled = (enabled == 1); 282 } else if (strcmp(name, "cpuacct") == 0) { 283 cg_infos[cpuacct_idx]._name = os::strdup(name); 284 cg_infos[cpuacct_idx]._hierarchy_id = hierarchy_id; 285 cg_infos[cpuacct_idx]._enabled = (enabled == 1); 286 } 287 } 288 fclose(cgroups); 289 290 is_cgroupsV2 = true; 291 all_controllers_enabled = true; 292 for (int i = 0; i < CG_INFO_LENGTH; i++) { 293 is_cgroupsV2 = is_cgroupsV2 && cg_infos[i]._hierarchy_id == 0; 294 all_controllers_enabled = all_controllers_enabled && cg_infos[i]._enabled; 295 } 296 297 if (!all_controllers_enabled) { 298 // one or more controllers enabled, disable container support 299 log_debug(os, container)("One or more required controllers not enabled at kernel level."); 300 return NULL; 301 } 302 303 /* 304 * Read /proc/self/cgroup and determine: 305 * - the cgroup path for cgroups v2 or 306 * - on a cgroups v1 system, collect info for mapping 307 * the host mount point to the local one via /proc/self/mountinfo below. 308 */ 309 cgroup = fopen("/proc/self/cgroup", "r"); 310 if (cgroup == NULL) { 311 log_debug(os, container)("Can't open /proc/self/cgroup, %s", 312 os::strerror(errno)); 313 return NULL; 314 } 315 316 while ((p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) { 317 char *controllers; 318 char *token; 319 char *hierarchy_id_str; 320 int hierarchy_id; 321 char *cgroup_path; 322 323 hierarchy_id_str = strsep(&p, ":"); 324 hierarchy_id = atoi(hierarchy_id_str); 325 /* Get controllers and base */ 326 controllers = strsep(&p, ":"); 327 cgroup_path = strsep(&p, "\n"); 328 329 if (controllers == NULL) { 330 continue; 331 } 332 333 while (!is_cgroupsV2 && (token = strsep(&controllers, ",")) != NULL) { 334 if (strcmp(token, "memory") == 0) { 335 assert(hierarchy_id == cg_infos[memory_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch"); 336 cg_infos[memory_idx]._cgroup_path = os::strdup(cgroup_path); 337 } else if (strcmp(token, "cpuset") == 0) { 338 assert(hierarchy_id == cg_infos[cpuset_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch"); 339 cg_infos[cpuset_idx]._cgroup_path = os::strdup(cgroup_path); 340 } else if (strcmp(token, "cpu") == 0) { 341 assert(hierarchy_id == cg_infos[cpu_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch"); 342 cg_infos[cpu_idx]._cgroup_path = os::strdup(cgroup_path); 343 } else if (strcmp(token, "cpuacct") == 0) { 344 assert(hierarchy_id == cg_infos[cpuacct_idx]._hierarchy_id, "/proc/cgroups and /proc/self/cgroup hierarchy mismatch"); 345 cg_infos[cpuacct_idx]._cgroup_path = os::strdup(cgroup_path); 346 } 347 } 348 if (is_cgroupsV2) { 349 for (int i = 0; i < CG_INFO_LENGTH; i++) { 350 cg_infos[i]._cgroup_path = os::strdup(cgroup_path); 351 } 352 } 353 } 354 fclose(cgroup); 355 356 if (is_cgroupsV2) { 357 // Find the cgroup2 mount point by reading /proc/self/mountinfo 358 mntinfo = fopen("/proc/self/mountinfo", "r"); 359 if (mntinfo == NULL) { 360 log_debug(os, container)("Can't open /proc/self/mountinfo, %s", 361 os::strerror(errno)); 362 return NULL; 363 } 364 365 char cgroupv2_mount[MAXPATHLEN+1]; 366 char fstype[MAXPATHLEN+1]; 367 bool mount_point_found = false; 368 while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) { 369 char *tmp_mount_point = cgroupv2_mount; 370 char *tmp_fs_type = fstype; 371 372 // mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt 373 if (sscanf(p, "%*d %*d %*d:%*d %*s %s %*[^-]- %s cgroup2 %*s", tmp_mount_point, tmp_fs_type) == 2) { 374 // we likely have an early match return, be sure we have cgroup2 as fstype 375 if (strcmp("cgroup2", tmp_fs_type) == 0) { 376 mount_point_found = true; 377 break; 378 } 379 } 380 } 381 fclose(mntinfo); 382 if (!mount_point_found) { 383 log_trace(os, container)("Mount point for cgroupv2 not found in /proc/self/mountinfo"); 384 return NULL; 385 } 386 // Cgroups v2 case, we have all the info we need. 387 // Construct the subsystem, free resources and return 388 // Note: any index in cg_infos will do as the path is the same for 389 // all controllers. 390 CgroupController* unified = new CgroupV2Controller(cgroupv2_mount, cg_infos[memory_idx]._cgroup_path); 391 for (int i = 0; i < CG_INFO_LENGTH; i++) { 392 os::free(cg_infos[i]._name); 393 os::free(cg_infos[i]._cgroup_path); 394 } 395 log_debug(os, container)("Detected cgroups v2 unified hierarchy"); 396 return new CgroupV2Subsystem(unified); 397 } 398 399 // What follows is cgroups v1 400 log_debug(os, container)("Detected cgroups hybrid or legacy hierarchy, using cgroups v1 controllers"); 401 402 /* 403 * Find the cgroup mount point for memory and cpuset 404 * by reading /proc/self/mountinfo 405 * 406 * Example for docker: 407 * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory 408 * 409 * Example for host: 410 * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory 411 */ 412 mntinfo = fopen("/proc/self/mountinfo", "r"); 413 if (mntinfo == NULL) { 414 log_debug(os, container)("Can't open /proc/self/mountinfo, %s", 415 os::strerror(errno)); 416 return NULL; 417 } 418 419 while ((p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) { 420 char tmpcgroups[MAXPATHLEN+1]; 421 char *cptr = tmpcgroups; 422 char *token; 423 424 // mountinfo format is documented at https://www.kernel.org/doc/Documentation/filesystems/proc.txt 425 if (sscanf(p, "%*d %*d %*d:%*d %s %s %*[^-]- cgroup %*s %s", tmproot, tmpmount, tmpcgroups) != 3) { 426 continue; 427 } 428 while ((token = strsep(&cptr, ",")) != NULL) { 429 if (strcmp(token, "memory") == 0) { 430 memory = new CgroupV1MemoryController(tmproot, tmpmount); 431 } else if (strcmp(token, "cpuset") == 0) { 432 cpuset = new CgroupV1Controller(tmproot, tmpmount); 433 } else if (strcmp(token, "cpu") == 0) { 434 cpu = new CgroupV1Controller(tmproot, tmpmount); 435 } else if (strcmp(token, "cpuacct") == 0) { 436 cpuacct= new CgroupV1Controller(tmproot, tmpmount); 437 } 438 } 439 } 440 441 fclose(mntinfo); 442 443 if (memory == NULL) { 444 log_debug(os, container)("Required cgroup v1 memory subsystem not found"); 445 return NULL; 446 } 447 if (cpuset == NULL) { 448 log_debug(os, container)("Required cgroup v1 cpuset subsystem not found"); 449 return NULL; 450 } 451 if (cpu == NULL) { 452 log_debug(os, container)("Required cgroup v1 cpu subsystem not found"); 453 return NULL; 454 } 455 if (cpuacct == NULL) { 456 log_debug(os, container)("Required cgroup v1 cpuacct subsystem not found"); 457 return NULL; 458 } 459 460 /* 461 * Use info gathered previously from /proc/self/cgroup 462 * and map host mount point to 463 * local one via /proc/self/mountinfo content above 464 * 465 * Docker example: 466 * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044 467 * 468 * Host example: 469 * 5:memory:/user.slice 470 * 471 * Construct a path to the process specific memory and cpuset 472 * cgroup directory. 473 * 474 * For a container running under Docker from memory example above 475 * the paths would be: 476 * 477 * /sys/fs/cgroup/memory 478 * 479 * For a Host from memory example above the path would be: 480 * 481 * /sys/fs/cgroup/memory/user.slice 482 * 483 */ 484 for (int i = 0; i < CG_INFO_LENGTH; i++) { 485 CgroupInfo info = cg_infos[i]; 486 if (strcmp(info._name, "memory") == 0) { 487 memory->set_subsystem_path(info._cgroup_path); 488 } else if (strcmp(info._name, "cpuset") == 0) { 489 cpuset->set_subsystem_path(info._cgroup_path); 490 } else if (strcmp(info._name, "cpu") == 0) { 491 cpu->set_subsystem_path(info._cgroup_path); 492 } else if (strcmp(info._name, "cpuacct") == 0) { 493 cpuacct->set_subsystem_path(info._cgroup_path); 494 } 495 } 496 return new CgroupV1Subsystem(cpuset, cpu, cpuacct, memory); 497 } 498 499 /* available_memory 500 * 501 * Return the available memory for this process. 502 * 503 * return: 504 * available memory in bytes or 505 * -1 for unlimited 506 * OSCONTAINER_ERROR for not supported 507 */ 508 jlong CgroupSubsystem::available_memory() { 509 jlong mem_limit, mem_usage, avail_mem; 510 if ((mem_limit = memory_limit_in_bytes()) < 1) { 511 log_debug(os, container)("container memory limit %s: " JLONG_FORMAT ", using host value", 512 mem_limit == OSCONTAINER_ERROR ? "failed" : "unlimited", mem_limit); 513 return mem_limit; // error case 514 } 515 if ((mem_usage = memory_usage_in_bytes()) < 1) { 516 log_debug(os, container)("container memory usage failed: " JLONG_FORMAT ", using host value", mem_usage); 517 return mem_usage; // error case 518 } 519 avail_mem = mem_limit > mem_usage ? mem_limit - mem_usage : 0; 520 log_trace(os)("available container memory: " JLONG_FORMAT, avail_mem); 521 return avail_mem; 522 } 523 524 /* memory_limit_in_bytes 525 * 526 * Return the limit of available memory for this process. 527 * 528 * return: 529 * memory limit in bytes or 530 * -1 for unlimited 531 * OSCONTAINER_ERROR for not supported 532 */ 533 jlong CgroupV1Subsystem::memory_limit_in_bytes() { 534 GET_CONTAINER_INFO(julong, _memory, "/memory.limit_in_bytes", 535 "Memory Limit is: " JULONG_FORMAT, JULONG_FORMAT, memlimit); 536 537 if (memlimit >= _unlimited_memory) { 538 log_trace(os, container)("Non-Hierarchical Memory Limit is: Unlimited"); 539 if (_memory->is_hierarchical()) { 540 const char* matchline = "hierarchical_memory_limit"; 541 const char* format = "%s " JULONG_FORMAT; 542 GET_CONTAINER_INFO_LINE(julong, _memory, "/memory.stat", matchline, 543 "Hierarchical Memory Limit is: " JULONG_FORMAT, format, hier_memlimit) 544 if (hier_memlimit >= _unlimited_memory) { 545 log_trace(os, container)("Hierarchical Memory Limit is: Unlimited"); 546 } else { 547 return (jlong)hier_memlimit; 548 } 549 } 550 return (jlong)-1; 551 } 552 else { 553 return (jlong)memlimit; 554 } 555 } 556 557 jlong CgroupV1Subsystem::memory_and_swap_limit_in_bytes() { 558 GET_CONTAINER_INFO(julong, _memory, "/memory.memsw.limit_in_bytes", 559 "Memory and Swap Limit is: " JULONG_FORMAT, JULONG_FORMAT, memswlimit); 560 if (memswlimit >= _unlimited_memory) { 561 log_trace(os, container)("Non-Hierarchical Memory and Swap Limit is: Unlimited"); 562 if (_memory->is_hierarchical()) { 563 const char* matchline = "hierarchical_memsw_limit"; 564 const char* format = "%s " JULONG_FORMAT; 565 GET_CONTAINER_INFO_LINE(julong, _memory, "/memory.stat", matchline, 566 "Hierarchical Memory and Swap Limit is : " JULONG_FORMAT, format, hier_memlimit) 567 if (hier_memlimit >= _unlimited_memory) { 568 log_trace(os, container)("Hierarchical Memory and Swap Limit is: Unlimited"); 569 } else { 570 return (jlong)hier_memlimit; 571 } 572 } 573 return (jlong)-1; 574 } else { 575 return (jlong)memswlimit; 576 } 577 } 578 579 jlong CgroupV1Subsystem::memory_soft_limit_in_bytes() { 580 GET_CONTAINER_INFO(julong, _memory, "/memory.soft_limit_in_bytes", 581 "Memory Soft Limit is: " JULONG_FORMAT, JULONG_FORMAT, memsoftlimit); 582 if (memsoftlimit >= _unlimited_memory) { 583 log_trace(os, container)("Memory Soft Limit is: Unlimited"); 584 return (jlong)-1; 585 } else { 586 return (jlong)memsoftlimit; 587 } 588 } 589 590 /* memory_usage_in_bytes 591 * 592 * Return the amount of used memory for this process. 593 * 594 * return: 595 * memory usage in bytes or 596 * -1 for unlimited 597 * OSCONTAINER_ERROR for not supported 598 */ 599 jlong CgroupV1Subsystem::memory_usage_in_bytes() { 600 GET_CONTAINER_INFO(jlong, _memory, "/memory.usage_in_bytes", 601 "Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memusage); 602 return memusage; 603 } 604 605 /* memory_max_usage_in_bytes 606 * 607 * Return the maximum amount of used memory for this process. 608 * 609 * return: 610 * max memory usage in bytes or 611 * OSCONTAINER_ERROR for not supported 612 */ 613 jlong CgroupV1Subsystem::memory_max_usage_in_bytes() { 614 GET_CONTAINER_INFO(jlong, _memory, "/memory.max_usage_in_bytes", 615 "Maximum Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memmaxusage); 616 return memmaxusage; 617 } 618 619 char * CgroupV1Subsystem::cpu_cpuset_cpus() { 620 GET_CONTAINER_INFO_CPTR(cptr, _cpuset, "/cpuset.cpus", 621 "cpuset.cpus is: %s", "%1023s", cpus, 1024); 622 return os::strdup(cpus); 623 } 624 625 char * CgroupV1Subsystem::cpu_cpuset_memory_nodes() { 626 GET_CONTAINER_INFO_CPTR(cptr, _cpuset, "/cpuset.mems", 627 "cpuset.mems is: %s", "%1023s", mems, 1024); 628 return os::strdup(mems); 629 } 630 631 /* cpu_quota 632 * 633 * Return the number of milliseconds per period 634 * process is guaranteed to run. 635 * 636 * return: 637 * quota time in milliseconds 638 * -1 for no quota 639 * OSCONTAINER_ERROR for not supported 640 */ 641 int CgroupV1Subsystem::cpu_quota() { 642 GET_CONTAINER_INFO(int, _cpu, "/cpu.cfs_quota_us", 643 "CPU Quota is: %d", "%d", quota); 644 return quota; 645 } 646 647 int CgroupV1Subsystem::cpu_period() { 648 GET_CONTAINER_INFO(int, _cpu, "/cpu.cfs_period_us", 649 "CPU Period is: %d", "%d", period); 650 return period; 651 } 652 653 /* cpu_shares 654 * 655 * Return the amount of cpu shares available to the process 656 * 657 * return: 658 * Share number (typically a number relative to 1024) 659 * (2048 typically expresses 2 CPUs worth of processing) 660 * -1 for no share setup 661 * OSCONTAINER_ERROR for not supported 662 */ 663 int CgroupV1Subsystem::cpu_shares() { 664 GET_CONTAINER_INFO(int, _cpu, "/cpu.shares", 665 "CPU Shares is: %d", "%d", shares); 666 // Convert 1024 to no shares setup 667 if (shares == 1024) return -1; 668 669 return shares; 670 } 671 672 /* active_processor_count 673 * 674 * Calculate an appropriate number of active processors for the 675 * VM to use based on these three inputs. 676 * 677 * cpu affinity 678 * cgroup cpu quota & cpu period 679 * cgroup cpu shares 680 * 681 * Algorithm: 682 * 683 * Determine the number of available CPUs from sched_getaffinity 684 * 685 * If user specified a quota (quota != -1), calculate the number of 686 * required CPUs by dividing quota by period. 687 * 688 * If shares are in effect (shares != -1), calculate the number 689 * of CPUs required for the shares by dividing the share value 690 * by PER_CPU_SHARES. 691 * 692 * All results of division are rounded up to the next whole number. 693 * 694 * If neither shares or quotas have been specified, return the 695 * number of active processors in the system. 696 * 697 * If both shares and quotas have been specified, the results are 698 * based on the flag PreferContainerQuotaForCPUCount. If true, 699 * return the quota value. If false return the smallest value 700 * between shares or quotas. 701 * 702 * If shares and/or quotas have been specified, the resulting number 703 * returned will never exceed the number of active processors. 704 * 705 * return: 706 * number of CPUs 707 */ 708 int CgroupSubsystem::active_processor_count(int physical_proc_count) { 709 int quota_count = 0, share_count = 0; 710 int cpu_count, limit_count; 711 int result; 712 713 cpu_count = limit_count = physical_proc_count; 714 int quota = cpu_quota(); 715 int period = cpu_period(); 716 int share = cpu_shares(); 717 718 if (quota > -1 && period > 0) { 719 quota_count = ceilf((float)quota / (float)period); 720 log_trace(os, container)("CPU Quota count based on quota/period: %d", quota_count); 721 } 722 if (share > -1) { 723 share_count = ceilf((float)share / (float)PER_CPU_SHARES); 724 log_trace(os, container)("CPU Share count based on shares: %d", share_count); 725 } 726 727 // If both shares and quotas are setup results depend 728 // on flag PreferContainerQuotaForCPUCount. 729 // If true, limit CPU count to quota 730 // If false, use minimum of shares and quotas 731 if (quota_count !=0 && share_count != 0) { 732 if (PreferContainerQuotaForCPUCount) { 733 limit_count = quota_count; 734 } else { 735 limit_count = MIN2(quota_count, share_count); 736 } 737 } else if (quota_count != 0) { 738 limit_count = quota_count; 739 } else if (share_count != 0) { 740 limit_count = share_count; 741 } 742 743 result = MIN2(cpu_count, limit_count); 744 log_trace(os, container)("OSContainer::active_processor_count: %d", result); 745 return result; 746 } 747 748 void CgroupSubsystem::print_container_info(outputStream* st, int physical_proc_count) { 749 st->print("container (cgroup) information:\n"); 750 751 const char *p_ct = container_type(); 752 st->print("container_type: %s\n", p_ct != NULL ? p_ct : "not supported"); 753 754 char *p = cpu_cpuset_cpus(); 755 st->print("cpu_cpuset_cpus: %s\n", p != NULL ? p : "not supported"); 756 os::free(p); 757 758 p = cpu_cpuset_memory_nodes(); 759 st->print("cpu_memory_nodes: %s\n", p != NULL ? p : "not supported"); 760 os::free(p); 761 762 int i = active_processor_count(physical_proc_count); 763 st->print("active_processor_count: "); 764 if (i > 0) { 765 st->print("%d\n", i); 766 } else { 767 st->print("not supported\n"); 768 } 769 770 i = cpu_quota(); 771 st->print("cpu_quota: "); 772 if (i > 0) { 773 st->print("%d\n", i); 774 } else { 775 st->print("%s\n", i == OSCONTAINER_ERROR ? "not supported" : "no quota"); 776 } 777 778 i = cpu_period(); 779 st->print("cpu_period: "); 780 if (i > 0) { 781 st->print("%d\n", i); 782 } else { 783 st->print("%s\n", i == OSCONTAINER_ERROR ? "not supported" : "no period"); 784 } 785 786 i = cpu_shares(); 787 st->print("cpu_shares: "); 788 if (i > 0) { 789 st->print("%d\n", i); 790 } else { 791 st->print("%s\n", i == OSCONTAINER_ERROR ? "not supported" : "no shares"); 792 } 793 794 jlong j = memory_limit_in_bytes(); 795 st->print("memory_limit_in_bytes: "); 796 if (j > 0) { 797 st->print(JLONG_FORMAT "\n", j); 798 } else { 799 st->print("%s\n", j == OSCONTAINER_ERROR ? "not supported" : "unlimited"); 800 } 801 802 j = memory_and_swap_limit_in_bytes(); 803 st->print("memory_and_swap_limit_in_bytes: "); 804 if (j > 0) { 805 st->print(JLONG_FORMAT "\n", j); 806 } else { 807 st->print("%s\n", j == OSCONTAINER_ERROR ? "not supported" : "unlimited"); 808 } 809 810 j = memory_soft_limit_in_bytes(); 811 st->print("memory_soft_limit_in_bytes: "); 812 if (j > 0) { 813 st->print(JLONG_FORMAT "\n", j); 814 } else { 815 st->print("%s\n", j == OSCONTAINER_ERROR ? "not supported" : "unlimited"); 816 } 817 818 j = memory_usage_in_bytes(); 819 st->print("memory_usage_in_bytes: "); 820 if (j > 0) { 821 st->print(JLONG_FORMAT "\n", j); 822 } else { 823 st->print("%s\n", j == OSCONTAINER_ERROR ? "not supported" : "unlimited"); 824 } 825 826 j = memory_max_usage_in_bytes(); 827 st->print("memory_max_usage_in_bytes: "); 828 if (j > 0) { 829 st->print(JLONG_FORMAT "\n", j); 830 } else { 831 st->print("%s\n", j == OSCONTAINER_ERROR ? "not supported" : "unlimited"); 832 } 833 st->cr(); 834 } 835 836 /* cpu_shares 837 * 838 * Return the amount of cpu shares available to the process 839 * 840 * return: 841 * Share number (typically a number relative to 1024) 842 * (2048 typically expresses 2 CPUs worth of processing) 843 * -1 for no share setup 844 * OSCONTAINER_ERROR for not supported 845 */ 846 int CgroupV2Subsystem::cpu_shares() { 847 GET_CONTAINER_INFO(int, _unified, "/cpu.weight", 848 "CPU Shares is: %d", "%d", shares); 849 // Convert default value of 100 to no shares setup 850 if (shares == 100) return -1; 851 852 // CPU shares (OCI) value needs to get translated into 853 // a proper Cgroups v2 value. See: 854 // https://github.com/containers/crun/blob/master/crun.1.md#cpu-controller 855 // 856 // Use the inverse of (x == OCI value, y == cgroupsv2 value): 857 // ((262142 * y - 1)/9999) + 2 = x 858 // 859 int x = 262142 * shares - 1; 860 double frac = x/9999.0; 861 x = ((int)frac) + 2; 862 log_trace(os, container)("Scaled CPU Shares value is: %d", x); 863 // Since the scaled value is not precise, return the closest 864 // multiple of PER_CPU_SHARES for a more conservative mapping 865 if ( x <= PER_CPU_SHARES ) { 866 // will always map to 1 CPU 867 return x; 868 } 869 int f = x/PER_CPU_SHARES; 870 int lower_multiple = f * PER_CPU_SHARES; 871 int upper_multiple = (f + 1) * PER_CPU_SHARES; 872 int distance_lower = MAX2(lower_multiple, x) - MIN2(lower_multiple, x); 873 int distance_upper = MAX2(upper_multiple, x) - MIN2(upper_multiple, x); 874 x = distance_lower <= distance_upper ? lower_multiple : upper_multiple; 875 log_trace(os, container)("Closest multiple of %d of the CPU Shares value is: %d", PER_CPU_SHARES, x); 876 return x; 877 } 878 879 /* cpu_quota 880 * 881 * Return the number of milliseconds per period 882 * process is guaranteed to run. 883 * 884 * return: 885 * quota time in milliseconds 886 * -1 for no quota 887 * OSCONTAINER_ERROR for not supported 888 */ 889 int CgroupV2Subsystem::cpu_quota() { 890 char * cpu_quota_str = cpu_quota_val(); 891 return (int)limit_from_str(cpu_quota_str); 892 } 893 894 char * CgroupV2Subsystem::cpu_cpuset_cpus() { 895 GET_CONTAINER_INFO_CPTR(cptr, _unified, "/cpuset.cpus", 896 "cpuset.cpus is: %s", "%1023s", cpus, 1024); 897 if (cpus == NULL) { 898 return NULL; 899 } 900 return os::strdup(cpus); 901 } 902 903 char* CgroupV2Subsystem::cpu_quota_val() { 904 GET_CONTAINER_INFO_CPTR(cptr, _unified, "/cpu.max", 905 "CPU Quota is: %s", "%s %*d", quota, 1024); 906 if (quota == NULL) { 907 return NULL; 908 } 909 return os::strdup(quota); 910 } 911 912 char * CgroupV2Subsystem::cpu_cpuset_memory_nodes() { 913 GET_CONTAINER_INFO_CPTR(cptr, _unified, "/cpuset.mems", 914 "cpuset.mems is: %s", "%1023s", mems, 1024); 915 if (mems == NULL) { 916 return NULL; 917 } 918 return os::strdup(mems); 919 } 920 921 int CgroupV2Subsystem::cpu_period() { 922 GET_CONTAINER_INFO(int, _unified, "/cpu.max", 923 "CPU Period is: %d", "%*s %d", period); 924 return period; 925 } 926 927 /* memory_usage_in_bytes 928 * 929 * Return the amount of used memory used by this cgroup and decendents 930 * 931 * return: 932 * memory usage in bytes or 933 * -1 for unlimited 934 * OSCONTAINER_ERROR for not supported 935 */ 936 jlong CgroupV2Subsystem::memory_usage_in_bytes() { 937 GET_CONTAINER_INFO(jlong, _unified, "/memory.current", 938 "Memory Usage is: " JLONG_FORMAT, JLONG_FORMAT, memusage); 939 return memusage; 940 } 941 942 jlong CgroupV2Subsystem::memory_soft_limit_in_bytes() { 943 char* mem_soft_limit_str = mem_soft_limit_val(); 944 return limit_from_str(mem_soft_limit_str); 945 } 946 947 jlong CgroupV2Subsystem::memory_max_usage_in_bytes() { 948 return OSCONTAINER_ERROR; // Not supported for Cgroups V2. 949 } 950 951 char* CgroupV2Subsystem::mem_soft_limit_val() { 952 GET_CONTAINER_INFO_CPTR(cptr, _unified, "/memory.high", 953 "Memory Soft Limit is: %s", "%s", mem_soft_limit_str, 1024); 954 if (mem_soft_limit_str == NULL) { 955 return NULL; 956 } 957 return os::strdup(mem_soft_limit_str); 958 } 959 960 jlong CgroupV2Subsystem::memory_and_swap_limit_in_bytes() { 961 char* mem_swp_limit_str = mem_swp_limit_val(); 962 return limit_from_str(mem_swp_limit_str); 963 } 964 965 char* CgroupV2Subsystem::mem_swp_limit_val() { 966 GET_CONTAINER_INFO_CPTR(cptr, _unified, "/memory.swap.max", 967 "Memory and Swap Limit is: %s", "%s", mem_swp_limit_str, 1024); 968 if (mem_swp_limit_str == NULL) { 969 return NULL; 970 } 971 return os::strdup(mem_swp_limit_str); 972 } 973 974 /* memory_limit_in_bytes 975 * 976 * Return the limit of available memory for this process. 977 * 978 * return: 979 * memory limit in bytes or 980 * -1 for unlimited, OSCONTAINER_ERROR for an error 981 */ 982 jlong CgroupV2Subsystem::memory_limit_in_bytes() { 983 char * mem_limit_str = mem_limit_val(); 984 return limit_from_str(mem_limit_str); 985 } 986 987 jlong CgroupV2Subsystem::limit_from_str(char* limit_str) { 988 if (limit_str == NULL) { 989 return OSCONTAINER_ERROR; 990 } 991 // Unlimited memory in Cgroups V2 is the literal string 'max' 992 if (strcmp("max", limit_str) == 0) { 993 os::free(limit_str); 994 return (jlong)-1; 995 } 996 julong limit; 997 if (sscanf(limit_str, JULONG_FORMAT, &limit) != 1) { 998 os::free(limit_str); 999 return OSCONTAINER_ERROR; 1000 } 1001 os::free(limit_str); 1002 return (jlong)limit; 1003 } 1004 1005 char* CgroupV2Subsystem::mem_limit_val() { 1006 GET_CONTAINER_INFO_CPTR(cptr, _unified, "/memory.max", 1007 "Memory Limit is: %s", "%s", mem_limit_str, 1024); 1008 if (mem_limit_str == NULL) { 1009 return NULL; 1010 } 1011 return os::strdup(mem_limit_str); 1012 } 1013 1014 char* CgroupV2Controller::construct_path(char* mount_path, char *cgroup_path) { 1015 char buf[MAXPATHLEN+1]; 1016 int buflen; 1017 strncpy(buf, mount_path, MAXPATHLEN); 1018 buf[MAXPATHLEN] = '\0'; 1019 buflen = strlen(buf); 1020 if ((buflen + strlen(cgroup_path)) > MAXPATHLEN) { 1021 return NULL; 1022 } 1023 strncat(buf, cgroup_path, MAXPATHLEN-buflen); 1024 buf[MAXPATHLEN] = '\0'; 1025 return os::strdup(buf); 1026 } 1027