1 /* 2 * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include <string.h> 26 #include <math.h> 27 #include "utilities/globalDefinitions.hpp" 28 #include "memory/allocation.hpp" 29 #include "runtime/os.hpp" 30 #include "logging/log.hpp" 31 #include "osContainer_linux.hpp" 32 33 /* 34 * Warning: Some linux distros use 0x7FFFFFFFFFFFF000 35 * and others use 0x7FFFFFFFFFFFFFFF for unlimited. 36 */ 37 #define UNLIMITED_MEM CONST64(0x7FFFFFFFFFFFF000) 38 39 #define PER_CPU_SHARES 1024 40 41 bool OSContainer::_is_initialized = false; 42 bool OSContainer::_is_containerized = false; 43 44 class CgroupSubsystem: CHeapObj<mtInternal> { 45 friend class OSContainer; 46 47 private: 48 /* mountinfo contents */ 49 char *_root; 50 char *_mount_point; 51 52 /* Constructed subsystem directory */ 53 char *_path; 54 55 public: 56 CgroupSubsystem(char *root, char *mountpoint) { 57 _root = os::strdup(root); 58 _mount_point = os::strdup(mountpoint); 59 _path = NULL; 60 } 61 62 /* 63 * Set directory to subsystem specific files based 64 * on the contents of the mountinfo and cgroup files. 65 */ 66 void set_subsystem_path(char *cgroup_path) { 67 char buf[MAXPATHLEN+1]; 68 if (_root != NULL && cgroup_path != NULL) { 69 if (strcmp(_root, "/") == 0) { 70 strncpy(buf, _mount_point, MAXPATHLEN); 71 buf[MAXPATHLEN-1] = '\0'; 72 if (strcmp(cgroup_path,"/") != 0) { 73 int buflen = strlen(buf); 74 if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) { 75 return; 76 } 77 strncat(buf, cgroup_path, MAXPATHLEN-buflen); 78 buf[MAXPATHLEN-1] = '\0'; 79 } 80 _path = os::strdup(buf); 81 } else { 82 if (strcmp(_root, cgroup_path) == 0) { 83 strncpy(buf, _mount_point, MAXPATHLEN); 84 buf[MAXPATHLEN-1] = '\0'; 85 _path = os::strdup(buf); 86 } else { 87 char *p = strstr(_root, cgroup_path); 88 if (p != NULL && p == _root) { 89 if (strlen(cgroup_path) > strlen(_root)) { 90 strncpy(buf, _mount_point, MAXPATHLEN); 91 buf[MAXPATHLEN-1] = '\0'; 92 int buflen = strlen(buf); 93 if ((buflen + strlen(cgroup_path)) > (MAXPATHLEN-1)) { 94 return; 95 } 96 strncat(buf, cgroup_path + strlen(_root), MAXPATHLEN-buflen); 97 buf[MAXPATHLEN-1] = '\0'; 98 _path = os::strdup(buf); 99 } 100 } 101 } 102 } 103 } 104 } 105 106 char *subsystem_path() { return _path; } 107 }; 108 109 // CgroupSubsystem *cgroupv2; 110 CgroupSubsystem* memory = NULL; 111 CgroupSubsystem* cpuset = NULL; 112 CgroupSubsystem* cpu = NULL; 113 CgroupSubsystem* cpuacct = NULL; 114 115 typedef char * cptr; 116 117 #define GEN_CONTAINER_GET_INFO(return_type, scan_fmt, isstr) \ 118 int subsystem_file_contents_##return_type(CgroupSubsystem* c, \ 119 const char *filename, \ 120 return_type *returnval) { \ 121 FILE *fp = NULL; \ 122 char *p; \ 123 char buf[MAXPATHLEN+1]; \ 124 \ 125 if (c != NULL && c->subsystem_path() != NULL) { \ 126 strncpy(buf, c->subsystem_path(), MAXPATHLEN); \ 127 buf[MAXPATHLEN-1] = '\0'; \ 128 int buflen = strlen(buf); \ 129 if ((buflen + strlen(filename)) > (MAXPATHLEN-1)) { \ 130 return OSCONTAINER_ERROR; \ 131 } \ 132 strncat(buf, filename, MAXPATHLEN-buflen); \ 133 log_trace(os, container)("Path to %s is %s", filename, buf); \ 134 fp = fopen(buf, "r"); \ 135 if (fp != NULL) { \ 136 p = fgets(buf, MAXPATHLEN, fp); \ 137 if (p != NULL) { \ 138 if (isstr) { \ 139 *(char **)returnval = os::strdup(p); \ 140 fclose(fp); \ 141 return 0; \ 142 } else { \ 143 return_type value; \ 144 int matched = sscanf(p, scan_fmt, &value); \ 145 if (matched == 1) { \ 146 *returnval = value; \ 147 fclose(fp); \ 148 return 0; \ 149 } else { \ 150 log_debug(os, container)("Type %s not found in file %s", \ 151 scan_fmt , buf); \ 152 } \ 153 } \ 154 } else { \ 155 log_debug(os, container)("Empty file %s", buf); \ 156 } \ 157 } else { \ 158 log_debug(os, container)("file not found %s", buf); \ 159 } \ 160 } \ 161 if (fp != NULL) \ 162 fclose(fp); \ 163 return OSCONTAINER_ERROR; \ 164 } 165 166 167 GEN_CONTAINER_GET_INFO(int, "%d", false) 168 GEN_CONTAINER_GET_INFO(jlong, JLONG_FORMAT, false) 169 GEN_CONTAINER_GET_INFO(cptr, "%p", true) 170 171 #define GET_CONTAINER_INFO(return_type, isstring, subsystem, \ 172 filename, logstring, variable) \ 173 return_type variable; \ 174 { \ 175 int err; \ 176 err = subsystem_file_contents_##return_type(subsystem, \ 177 filename, \ 178 &variable); \ 179 if (err != 0) { \ 180 log_debug(os, container)("Error reading %s", filename); \ 181 return isstring ? (return_type) NULL : \ 182 (return_type) OSCONTAINER_ERROR; \ 183 } \ 184 log_trace(os, container)(logstring, variable); \ 185 } 186 187 /* init 188 * 189 * Initialize the container support and determine if 190 * we are running under cgroup control. 191 */ 192 void OSContainer::init() { 193 int mountid; 194 int parentid; 195 int major; 196 int minor; 197 FILE *mntinfo = NULL; 198 FILE *cgroup = NULL; 199 char buf[MAXPATHLEN+1]; 200 char tmproot[MAXPATHLEN+1]; 201 char tmpmount[MAXPATHLEN+1]; 202 char tmpbase[MAXPATHLEN+1]; 203 char *p; 204 jlong mem_limit; 205 206 assert(!_is_initialized, "Initializing OSContainer more than once"); 207 208 _is_initialized = true; 209 _is_containerized = false; 210 211 log_trace(os, container)("OSContainer::init: Initializing Container Support"); 212 if (!UseContainerSupport) { 213 log_trace(os, container)("Container Support not enabled"); 214 return; 215 } 216 217 /* 218 * Find the cgroup mount point for memory and cpuset 219 * by reading /proc/self/mountinfo 220 * 221 * Example for docker: 222 * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory 223 * 224 * Example for host: 225 * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory 226 */ 227 mntinfo = fopen("/proc/self/mountinfo", "r"); 228 if (mntinfo == NULL) { 229 log_debug(os, container)("Can't locate /proc/self/mountinfo"); 230 return; 231 } 232 233 while ( (p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) { 234 // Look for the filesystem type and see if it's cgroup 235 char fstype[MAXPATHLEN+1]; 236 fstype[0] = '\0'; 237 char *s = strstr(p, " - "); 238 if (s != NULL && 239 sscanf(s, " - %s", fstype) == 1 && 240 strcmp(fstype, "cgroup") == 0) { 241 242 if (strstr(p, "memory") != NULL) { 243 int matched = sscanf(p, "%d %d %d:%d %s %s", 244 &mountid, 245 &parentid, 246 &major, 247 &minor, 248 tmproot, 249 tmpmount); 250 if (matched == 6) { 251 memory = new CgroupSubsystem(tmproot, tmpmount); 252 } 253 else 254 log_debug(os, container)("Incompatible str containing cgroup and memory: %s", p); 255 } else if (strstr(p, "cpuset") != NULL) { 256 int matched = sscanf(p, "%d %d %d:%d %s %s", 257 &mountid, 258 &parentid, 259 &major, 260 &minor, 261 tmproot, 262 tmpmount); 263 if (matched == 6) { 264 cpuset = new CgroupSubsystem(tmproot, tmpmount); 265 } 266 else { 267 log_debug(os, container)("Incompatible str containing cgroup and cpuset: %s", p); 268 } 269 } else if (strstr(p, "cpu,cpuacct") != NULL) { 270 int matched = sscanf(p, "%d %d %d:%d %s %s", 271 &mountid, 272 &parentid, 273 &major, 274 &minor, 275 tmproot, 276 tmpmount); 277 if (matched == 6) { 278 cpu = new CgroupSubsystem(tmproot, tmpmount); 279 cpuacct = new CgroupSubsystem(tmproot, tmpmount); 280 } 281 else { 282 log_debug(os, container)("Incompatible str containing cgroup and cpu,cpuacct: %s", p); 283 } 284 } else if (strstr(p, "cpuacct") != NULL) { 285 int matched = sscanf(p, "%d %d %d:%d %s %s", 286 &mountid, 287 &parentid, 288 &major, 289 &minor, 290 tmproot, 291 tmpmount); 292 if (matched == 6) { 293 cpuacct = new CgroupSubsystem(tmproot, tmpmount); 294 } 295 else { 296 log_debug(os, container)("Incompatible str containing cgroup and cpuacct: %s", p); 297 } 298 } else if (strstr(p, "cpu") != NULL) { 299 int matched = sscanf(p, "%d %d %d:%d %s %s", 300 &mountid, 301 &parentid, 302 &major, 303 &minor, 304 tmproot, 305 tmpmount); 306 if (matched == 6) { 307 cpu = new CgroupSubsystem(tmproot, tmpmount); 308 } 309 else { 310 log_debug(os, container)("Incompatible str containing cgroup and cpu: %s", p); 311 } 312 } 313 } 314 } 315 316 if (mntinfo != NULL) fclose(mntinfo); 317 318 /* 319 * Read /proc/self/cgroup and map host mount point to 320 * local one via /proc/self/mountinfo content above 321 * 322 * Docker example: 323 * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044 324 * 325 * Host example: 326 * 5:memory:/user.slice 327 * 328 * Construct a path to the process specific memory and cpuset 329 * cgroup directory. 330 * 331 * For a container running under Docker from memory example above 332 * the paths would be: 333 * 334 * /sys/fs/cgroup/memory 335 * 336 * For a Host from memory example above the path would be: 337 * 338 * /sys/fs/cgroup/memory/user.slice 339 * 340 */ 341 cgroup = fopen("/proc/self/cgroup", "r"); 342 if (cgroup == NULL) { 343 log_debug(os, container)("Can't locate /proc/self/cgroup"); 344 return; 345 } 346 347 while ( (p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) { 348 int cgno; 349 int matched; 350 char *controller; 351 char *base; 352 353 /* Skip cgroup number */ 354 strsep(&p, ":"); 355 /* Get controller and base */ 356 controller = strsep(&p, ":"); 357 base = strsep(&p, "\n"); 358 359 if (controller != NULL) { 360 if (strstr(controller, "memory") != NULL) { 361 memory->set_subsystem_path(base); 362 } else if (strstr(controller, "cpuset") != NULL) { 363 cpuset->set_subsystem_path(base); 364 } else if (strstr(controller, "cpu,cpuacct") != NULL) { 365 cpu->set_subsystem_path(base); 366 cpuacct->set_subsystem_path(base); 367 } else if (strstr(controller, "cpuacct") != NULL) { 368 cpuacct->set_subsystem_path(base); 369 } else if (strstr(controller, "cpu") != NULL) { 370 cpu->set_subsystem_path(base); 371 } 372 } 373 } 374 375 if (cgroup != NULL) fclose(cgroup); 376 377 if (memory == NULL || cpuset == NULL || cpu == NULL) { 378 log_debug(os, container)("Required cgroup subsystems not found"); 379 return; 380 } 381 382 // We need to update the amount of physical memory now that 383 // command line arguments have been processed. 384 if ((mem_limit = memory_limit_in_bytes()) > 0) { 385 os::Linux::set_physical_memory(mem_limit); 386 } 387 388 _is_containerized = true; 389 } 390 391 char * OSContainer::container_type() { 392 if (is_containerized()) { 393 return (char *)"cgroupv1"; 394 } else { 395 return NULL; 396 } 397 } 398 399 400 /* memory_limit_in_bytes 401 * 402 * Return the limit of available memory for this process. 403 * 404 * return: 405 * memory limit in bytes or 406 * -1 for unlimited 407 * OSCONTAINER_ERROR for not supported 408 */ 409 jlong OSContainer::memory_limit_in_bytes() { 410 GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.limit_in_bytes", 411 "Memory Limit is: " JLONG_FORMAT, memlimit); 412 413 if (memlimit >= UNLIMITED_MEM) { 414 log_trace(os, container)("Memory Limit is: Unlimited"); 415 return (jlong)-1; 416 } 417 else { 418 return memlimit; 419 } 420 } 421 422 jlong OSContainer::memory_and_swap_limit_in_bytes() { 423 GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.memsw.limit_in_bytes", 424 "Memory and Swap Limit is: " JLONG_FORMAT, memswlimit); 425 if (memswlimit >= UNLIMITED_MEM) { 426 log_trace(os, container)("Memory and Swap Limit is: Unlimited"); 427 return (jlong)-1; 428 } else { 429 return memswlimit; 430 } 431 } 432 433 jlong OSContainer::memory_soft_limit_in_bytes() { 434 GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.soft_limit_in_bytes", 435 "Memory Soft Limit is: " JLONG_FORMAT, memsoftlimit); 436 if (memsoftlimit >= UNLIMITED_MEM) { 437 log_trace(os, container)("Memory Soft Limit is: Unlimited"); 438 return (jlong)-1; 439 } else { 440 return memsoftlimit; 441 } 442 } 443 444 /* memory_usage_in_bytes 445 * 446 * Return the amount of used memory for this process. 447 * 448 * return: 449 * memory usage in bytes or 450 * -1 for unlimited 451 * OSCONTAINER_ERROR for not supported 452 */ 453 jlong OSContainer::memory_usage_in_bytes() { 454 GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.usage_in_bytes", 455 "Memory Usage is: " JLONG_FORMAT, memusage); 456 return memusage; 457 } 458 459 /* memory_max_usage_in_bytes 460 * 461 * Return the maximum amount of used memory for this process. 462 * 463 * return: 464 * max memory usage in bytes or 465 * OSCONTAINER_ERROR for not supported 466 */ 467 jlong OSContainer::memory_max_usage_in_bytes() { 468 GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.max_usage_in_bytes", 469 "Maximu, Memory Usage is: " JLONG_FORMAT, memmaxusage); 470 return memmaxusage; 471 } 472 473 /* active_processor_count 474 * 475 * Calculate an appropriate number of active processors for the 476 * VM to use based on these three cgroup options. 477 * 478 * cpu affinity 479 * cpu quota & cpu period 480 * cpu shares 481 * 482 * Algorithm: 483 * 484 * Determine the number of available CPUs from sched_getaffinity 485 * 486 * If user specified a quota (quota != -1), calculate the number of 487 * required CPUs by dividing quota by period. 488 * 489 * If shares are in effect (shares != -1), calculate the number 490 * of cpus required for the shares by dividing the share value 491 * by PER_CPU_SHARES. 492 * 493 * All results of division are rounded up to the next whole number. 494 * 495 * Return the smaller number from the three different settings. 496 * 497 * return: 498 * number of cpus 499 * OSCONTAINER_ERROR if failure occured during extract of cpuset info 500 */ 501 int OSContainer::active_processor_count() { 502 int cpu_count, share_count, quota_count; 503 int share, quota, period; 504 int result; 505 506 cpu_count = os::Linux::active_processor_count(); 507 508 share = cpu_shares(); 509 if (share > -1) { 510 share_count = ceilf((float)share / (float)PER_CPU_SHARES); 511 log_trace(os, container)("cpu_share count: %d", share_count); 512 } else { 513 share_count = cpu_count; 514 } 515 516 quota = cpu_quota(); 517 period = cpu_period(); 518 if (quota > -1 && period > 0) { 519 quota_count = ceilf((float)quota / (float)period); 520 log_trace(os, container)("quota_count: %d", quota_count); 521 } else { 522 quota_count = cpu_count; 523 } 524 525 result = MIN2(cpu_count, MIN2(share_count, quota_count)); 526 log_trace(os, container)("OSContainer::active_processor_count: %d", result); 527 return result; 528 } 529 530 char * OSContainer::cpu_cpuset_cpus() { 531 GET_CONTAINER_INFO(cptr, true, cpuset, (char *)"/cpuset.cpus", 532 "cpuset.cpus is: %s", cpus); 533 return cpus; 534 } 535 536 char * OSContainer::cpu_cpuset_memory_nodes() { 537 GET_CONTAINER_INFO(cptr, true, cpuset, (char *)"/cpuset.mems", 538 "cpuset.mems is: %s", mems); 539 return mems; 540 } 541 542 /* cpu_quota 543 * 544 * Return the number of milliseconds per period 545 * process is guaranteed to run. 546 * 547 * return: 548 * quota time in milliseconds 549 * -1 for no quota 550 * OSCONTAINER_ERROR for not supported 551 */ 552 int OSContainer::cpu_quota() { 553 GET_CONTAINER_INFO(int, false, cpu, (char *)"/cpu.cfs_quota_us", 554 "CPU Quota is: %d", quota); 555 return quota; 556 } 557 558 int OSContainer::cpu_period() { 559 GET_CONTAINER_INFO(int, false, cpu, (char *)"/cpu.cfs_period_us", 560 "CPU Period is: %d", period); 561 return period; 562 } 563 564 /* cpu_shares 565 * 566 * Return the amount of cpu shares available to the process 567 * 568 * return: 569 * Share number (typically a number relative to 1024) 570 * (2048 typically expresses 2 CPUs worth of processing) 571 * -1 for no share setup 572 * OSCONTAINER_ERROR for not supported 573 */ 574 int OSContainer::cpu_shares() { 575 GET_CONTAINER_INFO(int, false, cpu, (char *)"/cpu.shares", 576 "CPU Shares is: %d", shares); 577 // Convert 1024 to no shares setup 578 if (shares == 1024) return -1; 579 580 return shares; 581 } 582