1 /* 2 * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include <string.h> 26 #include <math.h> 27 #include "utilities/globalDefinitions.hpp" 28 #include "memory/allocation.hpp" 29 #include "runtime/os.hpp" 30 #include "logging/log.hpp" 31 #include "osContainer_linux.hpp" 32 33 /* 34 * Warning: Some linux distros use 0x7FFFFFFFFFFFF000 35 * and others use 0x7FFFFFFFFFFFFFFF for unlimited. 36 */ 37 #define UNLIMITED_MEM CONST64(0x7FFFFFFFFFFFF000) 38 39 #define PER_CPU_SHARES 1024 40 41 bool OSContainer::_is_initialized = false; 42 bool OSContainer::_is_containerized = false; 43 44 class CgroupSubsystem: CHeapObj<mtInternal> { 45 friend class OSContainer; 46 47 private: 48 /* mountinfo contents */ 49 char *_root; 50 char *_mount_point; 51 52 /* Constructed subsystem directory */ 53 char *_path; 54 55 public: 56 CgroupSubsystem(char *root, char *mountpoint) { 57 _root = os::strdup(root); 58 _mount_point = os::strdup(mountpoint); 59 _path = NULL; 60 } 61 62 /* 63 * Set directory to subsystem specific files based 64 * on the contents of the mountinfo and cgroup files. 65 */ 66 void set_subsystem_path(char *cgroup_path) { 67 char buf[MAXPATHLEN+1]; 68 if (_root != NULL && cgroup_path != NULL) { 69 if (strcmp(_root, "/") == 0) { 70 strncpy(buf, _mount_point, MAXPATHLEN); 71 buf[MAXPATHLEN-1] = '\0'; 72 strncat(buf, cgroup_path, MAXPATHLEN-strlen(buf)); 73 buf[MAXPATHLEN-1] = '\0'; 74 _path = os::strdup(buf); 75 } else { 76 if (strcmp(_root, cgroup_path) == 0) { 77 strncpy(buf, _mount_point, MAXPATHLEN); 78 buf[MAXPATHLEN-1] = '\0'; 79 _path = os::strdup(buf); 80 } else { 81 char *p = strstr(_root, cgroup_path); 82 if (p != NULL && p == _root) { 83 if (strlen(cgroup_path) > strlen(_root)) { 84 strncpy(buf, _mount_point, MAXPATHLEN); 85 buf[MAXPATHLEN-1] = '\0'; 86 strncat(buf, cgroup_path + strlen(_root), MAXPATHLEN-strlen(buf)); 87 buf[MAXPATHLEN-1] = '\0'; 88 _path = os::strdup(buf); 89 } 90 } 91 } 92 } 93 } 94 } 95 96 char *subsystem_path() { return _path; } 97 }; 98 99 // CgroupSubsystem *cgroupv2; 100 CgroupSubsystem* memory = NULL; 101 CgroupSubsystem* cpuset = NULL; 102 CgroupSubsystem* cpu = NULL; 103 CgroupSubsystem* cpuacct = NULL; 104 105 typedef char * cptr; 106 107 #define GEN_CONTAINER_GET_INFO(return_type, scan_fmt, isstr) \ 108 int subsystem_file_contents_##return_type(CgroupSubsystem* c, \ 109 char *filename, \ 110 return_type *returnval) { \ 111 FILE *fp = NULL; \ 112 char *p; \ 113 char buf[MAXPATHLEN+1]; \ 114 \ 115 if (c != NULL && c->subsystem_path() != NULL) { \ 116 strncpy(buf, c->subsystem_path(), MAXPATHLEN); \ 117 buf[MAXPATHLEN-1] = '\0'; \ 118 strncat(buf, filename, MAXPATHLEN-strlen(buf)); \ 119 log_trace(os, container)("Path to %s is %s\n", filename, buf); \ 120 fp = fopen(buf, "r"); \ 121 if (fp != NULL) { \ 122 p = fgets(buf, MAXPATHLEN, fp); \ 123 if (p != NULL) { \ 124 if (isstr) { \ 125 *(char **)returnval = os::strdup(p); \ 126 fclose(fp); \ 127 return 0; \ 128 } else { \ 129 return_type value; \ 130 int matched = sscanf(p, scan_fmt, &value); \ 131 if (matched == 1) { \ 132 *returnval = value; \ 133 fclose(fp); \ 134 return 0; \ 135 } else { \ 136 log_debug(os, container)("Type %s not found in file %s\n", \ 137 scan_fmt , buf); \ 138 } \ 139 } \ 140 } else { \ 141 log_debug(os, container)("Empty file %s\n", buf); \ 142 } \ 143 } else { \ 144 log_debug(os, container)("file not found %s\n", buf); \ 145 } \ 146 } \ 147 if (fp != NULL) \ 148 fclose(fp); \ 149 return OSCONTAINER_ERROR; \ 150 } 151 152 153 GEN_CONTAINER_GET_INFO(int, "%d", false) 154 GEN_CONTAINER_GET_INFO(jlong, JLONG_FORMAT, false) 155 GEN_CONTAINER_GET_INFO(cptr, "%p", true) 156 157 #define GET_CONTAINER_INFO(return_type, isstring, subsystem, \ 158 filename, logstring, variable) \ 159 return_type variable; \ 160 { \ 161 int err; \ 162 err = subsystem_file_contents_##return_type(subsystem, \ 163 filename, \ 164 &variable); \ 165 if (err != 0) { \ 166 log_debug(os, container)("Error reading %s", filename); \ 167 return isstring ? (return_type) NULL : \ 168 (return_type) OSCONTAINER_ERROR; \ 169 } \ 170 log_trace(os, container)(logstring, variable); \ 171 } 172 173 /* init 174 * 175 * Initialize the container support and determine if 176 * we are running under cgroup control. 177 */ 178 void OSContainer::init() { 179 int mountid; 180 int parentid; 181 int major; 182 int minor; 183 FILE *mntinfo = NULL; 184 FILE *cgroup = NULL; 185 char buf[MAXPATHLEN+1]; 186 char tmproot[MAXPATHLEN+1]; 187 char tmpmount[MAXPATHLEN+1]; 188 char tmpbase[MAXPATHLEN+1]; 189 char *p; 190 jlong mem_limit; 191 192 assert(!_is_initialized, "Initializing OSContainer more than once"); 193 194 _is_initialized = true; 195 _is_containerized = false; 196 197 log_trace(os, container)("OSContainer::init: Initializing Container Support"); 198 if (!UseContainerSupport) { 199 log_trace(os, container)("Container Support not enabled"); 200 return; 201 } 202 203 /* 204 * Find the cgroup mount point for memory and cpuset 205 * by reading /proc/self/mountinfo 206 * 207 * Example for docker: 208 * 219 214 0:29 /docker/7208cebd00fa5f2e342b1094f7bed87fa25661471a4637118e65f1c995be8a34 /sys/fs/cgroup/memory ro,nosuid,nodev,noexec,relatime - cgroup cgroup rw,memory 209 * 210 * Example for host: 211 * 34 28 0:29 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,memory 212 */ 213 mntinfo = fopen("/proc/self/mountinfo", "r"); 214 if (mntinfo == NULL) { 215 log_debug(os, container)("Can't locate /proc/self/mountinfo\n"); 216 return; 217 } 218 219 while ( (p = fgets(buf, MAXPATHLEN, mntinfo)) != NULL) { 220 // Look for the filesystem type and see if it's cgroup 221 char fstype[MAXPATHLEN+1]; 222 fstype[0] = '\0'; 223 char *s = strstr(p, " - "); 224 if (s != NULL && 225 sscanf(s, " - %s", fstype) == 1 && 226 strcmp(fstype, "cgroup") == 0) { 227 228 if (strstr(p, "memory") != NULL) { 229 int matched = sscanf(p, "%d %d %d:%d %s %s", 230 &mountid, 231 &parentid, 232 &major, 233 &minor, 234 tmproot, 235 tmpmount); 236 if (matched == 6) { 237 memory = new CgroupSubsystem(tmproot, tmpmount); 238 } 239 else 240 log_debug(os, container)("Incompatible str containing cgroup and memory: %s\n", p); 241 } else if (strstr(p, "cpuset") != NULL) { 242 int matched = sscanf(p, "%d %d %d:%d %s %s", 243 &mountid, 244 &parentid, 245 &major, 246 &minor, 247 tmproot, 248 tmpmount); 249 if (matched == 6) { 250 cpuset = new CgroupSubsystem(tmproot, tmpmount); 251 } 252 else { 253 log_debug(os, container)("Incompatible str containing cgroup and cpuset: %s\n", p); 254 } 255 } else if (strstr(p, "cpu,cpuacct") != NULL) { 256 int matched = sscanf(p, "%d %d %d:%d %s %s", 257 &mountid, 258 &parentid, 259 &major, 260 &minor, 261 tmproot, 262 tmpmount); 263 if (matched == 6) { 264 cpu = new CgroupSubsystem(tmproot, tmpmount); 265 cpuacct = new CgroupSubsystem(tmproot, tmpmount); 266 } 267 else { 268 log_debug(os, container)("Incompatible str containing cgroup and cpu,cpuacct: %s\n", p); 269 } 270 } else if (strstr(p, "cpuacct") != NULL) { 271 int matched = sscanf(p, "%d %d %d:%d %s %s", 272 &mountid, 273 &parentid, 274 &major, 275 &minor, 276 tmproot, 277 tmpmount); 278 if (matched == 6) { 279 cpuacct = new CgroupSubsystem(tmproot, tmpmount); 280 } 281 else { 282 log_debug(os, container)("Incompatible str containing cgroup and cpuacct: %s\n", p); 283 } 284 } else if (strstr(p, "cpu") != NULL) { 285 int matched = sscanf(p, "%d %d %d:%d %s %s", 286 &mountid, 287 &parentid, 288 &major, 289 &minor, 290 tmproot, 291 tmpmount); 292 if (matched == 6) { 293 cpu = new CgroupSubsystem(tmproot, tmpmount); 294 } 295 else { 296 log_debug(os, container)("Incompatible str containing cgroup and cpu: %s\n", p); 297 } 298 } 299 } 300 } 301 302 if (mntinfo != NULL) fclose(mntinfo); 303 304 /* 305 * Read /proc/self/cgroup and map host mount point to 306 * local one via /proc/self/mountinfo content above 307 * 308 * Docker example: 309 * 5:memory:/docker/6558aed8fc662b194323ceab5b964f69cf36b3e8af877a14b80256e93aecb044 310 * 311 * Host example: 312 * 5:memory:/user.slice 313 * 314 * Construct a path to the process specific memory and cpuset 315 * cgroup directory. 316 * 317 * For a container running under Docker from memory example above 318 * the paths would be: 319 * 320 * /sys/fs/cgroup/memory 321 * 322 * For a Host from memory example above the path would be: 323 * 324 * /sys/fs/cgroup/memory/user.slice 325 * 326 */ 327 cgroup = fopen("/proc/self/cgroup", "r"); 328 if (cgroup == NULL) { 329 log_debug(os, container)("Can't locate /proc/self/cgroup\n"); 330 return; 331 } 332 333 while ( (p = fgets(buf, MAXPATHLEN, cgroup)) != NULL) { 334 int cgno; 335 int matched; 336 char *controller; 337 char *base; 338 339 /* Skip cgroup number */ 340 strsep(&p, ":"); 341 /* Get controller and base */ 342 controller = strsep(&p, ":"); 343 base = strsep(&p, "\n"); 344 345 if (controller != NULL) { 346 if (strstr(controller, "memory") != NULL) { 347 memory->set_subsystem_path(base); 348 } else if (strstr(controller, "cpuset") != NULL) { 349 cpuset->set_subsystem_path(base); 350 } else if (strstr(controller, "cpu,cpuacct") != NULL) { 351 cpu->set_subsystem_path(base); 352 cpuacct->set_subsystem_path(base); 353 } else if (strstr(controller, "cpuacct") != NULL) { 354 cpuacct->set_subsystem_path(base); 355 } else if (strstr(controller, "cpu") != NULL) { 356 cpu->set_subsystem_path(base); 357 } 358 } 359 } 360 361 if (cgroup != NULL) fclose(cgroup); 362 363 if (memory == NULL || cpuset == NULL || cpu == NULL) { 364 log_debug(os, container)("Required cgroup subsystems not found"); 365 return; 366 } 367 368 // We need to update the amount of physical memory now that 369 // command line arguments have been processed. 370 if ((mem_limit = memory_limit_in_bytes()) > 0) { 371 os::Linux::set_physical_memory(mem_limit); 372 } 373 374 _is_containerized = true; 375 } 376 377 char * OSContainer::container_type() { 378 if (is_containerized()) { 379 return (char *)"cgroupv1"; 380 } else { 381 return NULL; 382 } 383 } 384 385 386 /* memory_limit_in_bytes 387 * 388 * Return the limit of available memory for this process. 389 * 390 * return: 391 * memory limit in bytes or 392 * -1 for unlimited 393 * OSCONTAINER_ERROR for not supported 394 */ 395 jlong OSContainer::memory_limit_in_bytes() { 396 GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.limit_in_bytes", 397 "Memory Limit is: " JLONG_FORMAT "\n", memlimit); 398 399 if (memlimit >= UNLIMITED_MEM) { 400 log_trace(os, container)("Memory Limit is: Unlimited\n"); 401 return (jlong)-1; 402 } 403 else { 404 return memlimit; 405 } 406 } 407 408 jlong OSContainer::memory_and_swap_limit_in_bytes() { 409 GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.memsw.limit_in_bytes", 410 "Memory and Swap Limit is: " JLONG_FORMAT "\n", memswlimit); 411 if (memswlimit >= UNLIMITED_MEM) { 412 log_trace(os, container)("Memory and Swap Limit is: Unlimited\n"); 413 return (jlong)-1; 414 } else { 415 return memswlimit; 416 } 417 } 418 419 jlong OSContainer::memory_soft_limit_in_bytes() { 420 GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.soft_limit_in_bytes", 421 "Memory Soft Limit is: " JLONG_FORMAT "\n", memsoftlimit); 422 if (memsoftlimit >= UNLIMITED_MEM) { 423 log_trace(os, container)("Memory Soft Limit is: Unlimited\n"); 424 return (jlong)-1; 425 } else { 426 return memsoftlimit; 427 } 428 } 429 430 /* memory_usage_in_bytes 431 * 432 * Return the amount of used memory for this process. 433 * 434 * return: 435 * memory usage in bytes or 436 * -1 for unlimited 437 * OSCONTAINER_ERROR for not supported 438 */ 439 jlong OSContainer::memory_usage_in_bytes() { 440 GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.usage_in_bytes", 441 "Memory Usage is: " JLONG_FORMAT "\n", memusage); 442 return memusage; 443 } 444 445 /* memory_max_usage_in_bytes 446 * 447 * Return the maximum amount of used memory for this process. 448 * 449 * return: 450 * max memory usage in bytes or 451 * OSCONTAINER_ERROR for not supported 452 */ 453 jlong OSContainer::memory_max_usage_in_bytes() { 454 GET_CONTAINER_INFO(jlong, false, memory, (char *)"/memory.max_usage_in_bytes", 455 "Maximu, Memory Usage is: " JLONG_FORMAT "\n", memmaxusage); 456 return memmaxusage; 457 } 458 459 /* active_processor_count 460 * 461 * Calculate an appropriate number of active processors for the 462 * VM to use based on these three cgroup options. 463 * 464 * cpu affinity 465 * cpu quota & cpu period 466 * cpu shares 467 * 468 * Algorithm: 469 * 470 * Determine the number of available CPUs from sched_getaffinity 471 * 472 * If user specified a quota (quota != -1), calculate the number of 473 * required CPUs by dividing quota by period. 474 * 475 * If shares are in effect (shares != -1), calculate the number 476 * of cpus required for the shares by dividing the share value 477 * by PER_CPU_SHARES. 478 * 479 * All results of division are rounded up to the next whole number. 480 * 481 * Return the smaller number from the three different settings. 482 * 483 * return: 484 * number of cpus 485 * OSCONTAINER_ERROR if failure occured during extract of cpuset info 486 */ 487 int OSContainer::active_processor_count() { 488 int cpu_count, share_count, quota_count; 489 int share, quota, period; 490 int result; 491 492 cpu_count = os::Linux::active_processor_count(); 493 494 share = cpu_shares(); 495 if (share > -1) { 496 share_count = ceilf((float)share / (float)PER_CPU_SHARES); 497 log_trace(os, container)("cpu_share count: %d", share_count); 498 } else { 499 share_count = cpu_count; 500 } 501 502 quota = cpu_quota(); 503 period = cpu_period(); 504 if (quota > -1 && period > 0) { 505 quota_count = ceilf((float)quota / (float)period); 506 log_trace(os, container)("quota_count: %d", quota_count); 507 } else { 508 quota_count = cpu_count; 509 } 510 511 result = MIN2(cpu_count, MIN2(share_count, quota_count)); 512 log_trace(os, container)("OSContainer::active_processor_count: %d", result); 513 return result; 514 } 515 516 char * OSContainer::cpu_cpuset_cpus() { 517 GET_CONTAINER_INFO(cptr, true, cpuset, (char *)"/cpuset.cpus", 518 "cpuset.cpus is: %s\n", cpus); 519 return cpus; 520 } 521 522 char * OSContainer::cpu_cpuset_memory_nodes() { 523 GET_CONTAINER_INFO(cptr, true, cpuset, (char *)"/cpuset.mems", 524 "cpuset.mems is: %s\n", mems); 525 return mems; 526 } 527 528 /* cpu_quota 529 * 530 * Return the number of milliseconds per period 531 * process is guaranteed to run. 532 * 533 * return: 534 * quota time in milliseconds 535 * -1 for no quota 536 * OSCONTAINER_ERROR for not supported 537 */ 538 int OSContainer::cpu_quota() { 539 GET_CONTAINER_INFO(int, false, cpu, (char *)"/cpu.cfs_quota_us", 540 "CPU Quota is: %d\n", quota); 541 return quota; 542 } 543 544 int OSContainer::cpu_period() { 545 GET_CONTAINER_INFO(int, false, cpu, (char *)"/cpu.cfs_period_us", 546 "CPU Period is: %d\n", period); 547 return period; 548 } 549 550 /* cpu_shares 551 * 552 * Return the amount of cpu shares available to the process 553 * 554 * return: 555 * Share number (typically a number relative to 1024) 556 * (2048 typically expresses 2 CPUs worth of processing) 557 * -1 for no share setup 558 * OSCONTAINER_ERROR for not supported 559 */ 560 int OSContainer::cpu_shares() { 561 GET_CONTAINER_INFO(int, false, cpu, (char *)"/cpu.shares", 562 "CPU Shares is: %d\n", shares); 563 // Convert 1024 to no shares setup 564 if (shares == 1024) return -1; 565 566 return shares; 567 } 568