1 /* 2 * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 // no precompiled headers 26 #include "classfile/classLoader.hpp" 27 #include "classfile/systemDictionary.hpp" 28 #include "classfile/vmSymbols.hpp" 29 #include "code/icBuffer.hpp" 30 #include "code/vtableStubs.hpp" 31 #include "compiler/compileBroker.hpp" 32 #include "compiler/disassembler.hpp" 33 #include "interpreter/interpreter.hpp" 34 #include "jvm_linux.h" 35 #include "logging/log.hpp" 36 #include "memory/allocation.inline.hpp" 37 #include "memory/filemap.hpp" 38 #include "mutex_linux.inline.hpp" 39 #include "oops/oop.inline.hpp" 40 #include "os_linux.inline.hpp" 41 #include "os_share_linux.hpp" 42 #include "prims/jniFastGetField.hpp" 43 #include "prims/jvm.h" 44 #include "prims/jvm_misc.hpp" 45 #include "runtime/arguments.hpp" 46 #include "runtime/atomic.inline.hpp" 47 #include "runtime/extendedPC.hpp" 48 #include "runtime/globals.hpp" 49 #include "runtime/interfaceSupport.hpp" 50 #include "runtime/init.hpp" 51 #include "runtime/java.hpp" 52 #include "runtime/javaCalls.hpp" 53 #include "runtime/mutexLocker.hpp" 54 #include "runtime/objectMonitor.hpp" 55 #include "runtime/orderAccess.inline.hpp" 56 #include "runtime/osThread.hpp" 57 #include "runtime/perfMemory.hpp" 58 #include "runtime/sharedRuntime.hpp" 59 #include "runtime/statSampler.hpp" 60 #include "runtime/stubRoutines.hpp" 61 #include "runtime/thread.inline.hpp" 62 #include "runtime/threadCritical.hpp" 63 #include "runtime/timer.hpp" 64 #include "semaphore_posix.hpp" 65 #include "services/attachListener.hpp" 66 #include "services/memTracker.hpp" 67 #include "services/runtimeService.hpp" 68 #include "utilities/decoder.hpp" 69 #include "utilities/defaultStream.hpp" 70 #include "utilities/events.hpp" 71 #include "utilities/elfFile.hpp" 72 #include "utilities/growableArray.hpp" 73 #include "utilities/macros.hpp" 74 #include "utilities/vmError.hpp" 75 76 // put OS-includes here 77 # include <sys/types.h> 78 # include <sys/mman.h> 79 # include <sys/stat.h> 80 # include <sys/select.h> 81 # include <pthread.h> 82 # include <signal.h> 83 # include <errno.h> 84 # include <dlfcn.h> 85 # include <stdio.h> 86 # include <unistd.h> 87 # include <sys/resource.h> 88 # include <pthread.h> 89 # include <sys/stat.h> 90 # include <sys/time.h> 91 # include <sys/times.h> 92 # include <sys/utsname.h> 93 # include <sys/socket.h> 94 # include <sys/wait.h> 95 # include <pwd.h> 96 # include <poll.h> 97 # include <semaphore.h> 98 # include <fcntl.h> 99 # include <string.h> 100 # include <syscall.h> 101 # include <sys/sysinfo.h> 102 # include <gnu/libc-version.h> 103 # include <sys/ipc.h> 104 # include <sys/shm.h> 105 # include <link.h> 106 # include <stdint.h> 107 # include <inttypes.h> 108 # include <sys/ioctl.h> 109 110 #ifndef _GNU_SOURCE 111 #define _GNU_SOURCE 112 #include <sched.h> 113 #undef _GNU_SOURCE 114 #else 115 #include <sched.h> 116 #endif 117 118 // if RUSAGE_THREAD for getrusage() has not been defined, do it here. The code calling 119 // getrusage() is prepared to handle the associated failure. 120 #ifndef RUSAGE_THREAD 121 #define RUSAGE_THREAD (1) /* only the calling thread */ 122 #endif 123 124 #define MAX_PATH (2 * K) 125 126 #define MAX_SECS 100000000 127 128 // for timer info max values which include all bits 129 #define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF) 130 131 #define LARGEPAGES_BIT (1 << 6) 132 //////////////////////////////////////////////////////////////////////////////// 133 // global variables 134 julong os::Linux::_physical_memory = 0; 135 136 address os::Linux::_initial_thread_stack_bottom = NULL; 137 uintptr_t os::Linux::_initial_thread_stack_size = 0; 138 139 int (*os::Linux::_clock_gettime)(clockid_t, struct timespec *) = NULL; 140 int (*os::Linux::_pthread_getcpuclockid)(pthread_t, clockid_t *) = NULL; 141 int (*os::Linux::_pthread_setname_np)(pthread_t, const char*) = NULL; 142 Mutex* os::Linux::_createThread_lock = NULL; 143 pthread_t os::Linux::_main_thread; 144 int os::Linux::_page_size = -1; 145 const int os::Linux::_vm_default_page_size = (8 * K); 146 bool os::Linux::_supports_fast_thread_cpu_time = false; 147 const char * os::Linux::_glibc_version = NULL; 148 const char * os::Linux::_libpthread_version = NULL; 149 pthread_condattr_t os::Linux::_condattr[1]; 150 151 static jlong initial_time_count=0; 152 153 static int clock_tics_per_sec = 100; 154 155 // For diagnostics to print a message once. see run_periodic_checks 156 static sigset_t check_signal_done; 157 static bool check_signals = true; 158 159 // Signal number used to suspend/resume a thread 160 161 // do not use any signal number less than SIGSEGV, see 4355769 162 static int SR_signum = SIGUSR2; 163 sigset_t SR_sigset; 164 165 // Declarations 166 static void unpackTime(timespec* absTime, bool isAbsolute, jlong time); 167 168 // utility functions 169 170 static int SR_initialize(); 171 172 julong os::available_memory() { 173 return Linux::available_memory(); 174 } 175 176 julong os::Linux::available_memory() { 177 // values in struct sysinfo are "unsigned long" 178 struct sysinfo si; 179 sysinfo(&si); 180 181 return (julong)si.freeram * si.mem_unit; 182 } 183 184 julong os::physical_memory() { 185 return Linux::physical_memory(); 186 } 187 188 // Return true if user is running as root. 189 190 bool os::have_special_privileges() { 191 static bool init = false; 192 static bool privileges = false; 193 if (!init) { 194 privileges = (getuid() != geteuid()) || (getgid() != getegid()); 195 init = true; 196 } 197 return privileges; 198 } 199 200 201 #ifndef SYS_gettid 202 // i386: 224, ia64: 1105, amd64: 186, sparc 143 203 #ifdef __ia64__ 204 #define SYS_gettid 1105 205 #else 206 #ifdef __i386__ 207 #define SYS_gettid 224 208 #else 209 #ifdef __amd64__ 210 #define SYS_gettid 186 211 #else 212 #ifdef __sparc__ 213 #define SYS_gettid 143 214 #else 215 #error define gettid for the arch 216 #endif 217 #endif 218 #endif 219 #endif 220 #endif 221 222 // Cpu architecture string 223 static char cpu_arch[] = HOTSPOT_LIB_ARCH; 224 225 226 // pid_t gettid() 227 // 228 // Returns the kernel thread id of the currently running thread. Kernel 229 // thread id is used to access /proc. 230 pid_t os::Linux::gettid() { 231 int rslt = syscall(SYS_gettid); 232 assert(rslt != -1, "must be."); // old linuxthreads implementation? 233 return (pid_t)rslt; 234 } 235 236 // Most versions of linux have a bug where the number of processors are 237 // determined by looking at the /proc file system. In a chroot environment, 238 // the system call returns 1. This causes the VM to act as if it is 239 // a single processor and elide locking (see is_MP() call). 240 static bool unsafe_chroot_detected = false; 241 static const char *unstable_chroot_error = "/proc file system not found.\n" 242 "Java may be unstable running multithreaded in a chroot " 243 "environment on Linux when /proc filesystem is not mounted."; 244 245 void os::Linux::initialize_system_info() { 246 set_processor_count(sysconf(_SC_NPROCESSORS_CONF)); 247 if (processor_count() == 1) { 248 pid_t pid = os::Linux::gettid(); 249 char fname[32]; 250 jio_snprintf(fname, sizeof(fname), "/proc/%d", pid); 251 FILE *fp = fopen(fname, "r"); 252 if (fp == NULL) { 253 unsafe_chroot_detected = true; 254 } else { 255 fclose(fp); 256 } 257 } 258 _physical_memory = (julong)sysconf(_SC_PHYS_PAGES) * (julong)sysconf(_SC_PAGESIZE); 259 assert(processor_count() > 0, "linux error"); 260 } 261 262 void os::init_system_properties_values() { 263 // The next steps are taken in the product version: 264 // 265 // Obtain the JAVA_HOME value from the location of libjvm.so. 266 // This library should be located at: 267 // <JAVA_HOME>/jre/lib/<arch>/{client|server}/libjvm.so. 268 // 269 // If "/jre/lib/" appears at the right place in the path, then we 270 // assume libjvm.so is installed in a JDK and we use this path. 271 // 272 // Otherwise exit with message: "Could not create the Java virtual machine." 273 // 274 // The following extra steps are taken in the debugging version: 275 // 276 // If "/jre/lib/" does NOT appear at the right place in the path 277 // instead of exit check for $JAVA_HOME environment variable. 278 // 279 // If it is defined and we are able to locate $JAVA_HOME/jre/lib/<arch>, 280 // then we append a fake suffix "hotspot/libjvm.so" to this path so 281 // it looks like libjvm.so is installed there 282 // <JAVA_HOME>/jre/lib/<arch>/hotspot/libjvm.so. 283 // 284 // Otherwise exit. 285 // 286 // Important note: if the location of libjvm.so changes this 287 // code needs to be changed accordingly. 288 289 // See ld(1): 290 // The linker uses the following search paths to locate required 291 // shared libraries: 292 // 1: ... 293 // ... 294 // 7: The default directories, normally /lib and /usr/lib. 295 #if defined(AMD64) || defined(_LP64) && (defined(SPARC) || defined(PPC) || defined(S390)) 296 #define DEFAULT_LIBPATH "/usr/lib64:/lib64:/lib:/usr/lib" 297 #else 298 #define DEFAULT_LIBPATH "/lib:/usr/lib" 299 #endif 300 301 // Base path of extensions installed on the system. 302 #define SYS_EXT_DIR "/usr/java/packages" 303 #define EXTENSIONS_DIR "/lib/ext" 304 305 // Buffer that fits several sprintfs. 306 // Note that the space for the colon and the trailing null are provided 307 // by the nulls included by the sizeof operator. 308 const size_t bufsize = 309 MAX2((size_t)MAXPATHLEN, // For dll_dir & friends. 310 (size_t)MAXPATHLEN + sizeof(EXTENSIONS_DIR) + sizeof(SYS_EXT_DIR) + sizeof(EXTENSIONS_DIR)); // extensions dir 311 char *buf = (char *)NEW_C_HEAP_ARRAY(char, bufsize, mtInternal); 312 313 // sysclasspath, java_home, dll_dir 314 { 315 char *pslash; 316 os::jvm_path(buf, bufsize); 317 318 // Found the full path to libjvm.so. 319 // Now cut the path to <java_home>/jre if we can. 320 pslash = strrchr(buf, '/'); 321 if (pslash != NULL) { 322 *pslash = '\0'; // Get rid of /libjvm.so. 323 } 324 pslash = strrchr(buf, '/'); 325 if (pslash != NULL) { 326 *pslash = '\0'; // Get rid of /{client|server|hotspot}. 327 } 328 Arguments::set_dll_dir(buf); 329 330 if (pslash != NULL) { 331 pslash = strrchr(buf, '/'); 332 if (pslash != NULL) { 333 *pslash = '\0'; // Get rid of /<arch>. 334 pslash = strrchr(buf, '/'); 335 if (pslash != NULL) { 336 *pslash = '\0'; // Get rid of /lib. 337 } 338 } 339 } 340 Arguments::set_java_home(buf); 341 set_boot_path('/', ':'); 342 } 343 344 // Where to look for native libraries. 345 // 346 // Note: Due to a legacy implementation, most of the library path 347 // is set in the launcher. This was to accomodate linking restrictions 348 // on legacy Linux implementations (which are no longer supported). 349 // Eventually, all the library path setting will be done here. 350 // 351 // However, to prevent the proliferation of improperly built native 352 // libraries, the new path component /usr/java/packages is added here. 353 // Eventually, all the library path setting will be done here. 354 { 355 // Get the user setting of LD_LIBRARY_PATH, and prepended it. It 356 // should always exist (until the legacy problem cited above is 357 // addressed). 358 const char *v = ::getenv("LD_LIBRARY_PATH"); 359 const char *v_colon = ":"; 360 if (v == NULL) { v = ""; v_colon = ""; } 361 // That's +1 for the colon and +1 for the trailing '\0'. 362 char *ld_library_path = (char *)NEW_C_HEAP_ARRAY(char, 363 strlen(v) + 1 + 364 sizeof(SYS_EXT_DIR) + sizeof("/lib/") + strlen(cpu_arch) + sizeof(DEFAULT_LIBPATH) + 1, 365 mtInternal); 366 sprintf(ld_library_path, "%s%s" SYS_EXT_DIR "/lib/%s:" DEFAULT_LIBPATH, v, v_colon, cpu_arch); 367 Arguments::set_library_path(ld_library_path); 368 FREE_C_HEAP_ARRAY(char, ld_library_path); 369 } 370 371 // Extensions directories. 372 sprintf(buf, "%s" EXTENSIONS_DIR ":" SYS_EXT_DIR EXTENSIONS_DIR, Arguments::get_java_home()); 373 Arguments::set_ext_dirs(buf); 374 375 FREE_C_HEAP_ARRAY(char, buf); 376 377 #undef DEFAULT_LIBPATH 378 #undef SYS_EXT_DIR 379 #undef EXTENSIONS_DIR 380 } 381 382 //////////////////////////////////////////////////////////////////////////////// 383 // breakpoint support 384 385 void os::breakpoint() { 386 BREAKPOINT; 387 } 388 389 extern "C" void breakpoint() { 390 // use debugger to set breakpoint here 391 } 392 393 //////////////////////////////////////////////////////////////////////////////// 394 // signal support 395 396 debug_only(static bool signal_sets_initialized = false); 397 static sigset_t unblocked_sigs, vm_sigs, allowdebug_blocked_sigs; 398 399 bool os::Linux::is_sig_ignored(int sig) { 400 struct sigaction oact; 401 sigaction(sig, (struct sigaction*)NULL, &oact); 402 void* ohlr = oact.sa_sigaction ? CAST_FROM_FN_PTR(void*, oact.sa_sigaction) 403 : CAST_FROM_FN_PTR(void*, oact.sa_handler); 404 if (ohlr == CAST_FROM_FN_PTR(void*, SIG_IGN)) { 405 return true; 406 } else { 407 return false; 408 } 409 } 410 411 void os::Linux::signal_sets_init() { 412 // Should also have an assertion stating we are still single-threaded. 413 assert(!signal_sets_initialized, "Already initialized"); 414 // Fill in signals that are necessarily unblocked for all threads in 415 // the VM. Currently, we unblock the following signals: 416 // SHUTDOWN{1,2,3}_SIGNAL: for shutdown hooks support (unless over-ridden 417 // by -Xrs (=ReduceSignalUsage)); 418 // BREAK_SIGNAL which is unblocked only by the VM thread and blocked by all 419 // other threads. The "ReduceSignalUsage" boolean tells us not to alter 420 // the dispositions or masks wrt these signals. 421 // Programs embedding the VM that want to use the above signals for their 422 // own purposes must, at this time, use the "-Xrs" option to prevent 423 // interference with shutdown hooks and BREAK_SIGNAL thread dumping. 424 // (See bug 4345157, and other related bugs). 425 // In reality, though, unblocking these signals is really a nop, since 426 // these signals are not blocked by default. 427 sigemptyset(&unblocked_sigs); 428 sigemptyset(&allowdebug_blocked_sigs); 429 sigaddset(&unblocked_sigs, SIGILL); 430 sigaddset(&unblocked_sigs, SIGSEGV); 431 sigaddset(&unblocked_sigs, SIGBUS); 432 sigaddset(&unblocked_sigs, SIGFPE); 433 #if defined(PPC64) 434 sigaddset(&unblocked_sigs, SIGTRAP); 435 #endif 436 sigaddset(&unblocked_sigs, SR_signum); 437 438 if (!ReduceSignalUsage) { 439 if (!os::Linux::is_sig_ignored(SHUTDOWN1_SIGNAL)) { 440 sigaddset(&unblocked_sigs, SHUTDOWN1_SIGNAL); 441 sigaddset(&allowdebug_blocked_sigs, SHUTDOWN1_SIGNAL); 442 } 443 if (!os::Linux::is_sig_ignored(SHUTDOWN2_SIGNAL)) { 444 sigaddset(&unblocked_sigs, SHUTDOWN2_SIGNAL); 445 sigaddset(&allowdebug_blocked_sigs, SHUTDOWN2_SIGNAL); 446 } 447 if (!os::Linux::is_sig_ignored(SHUTDOWN3_SIGNAL)) { 448 sigaddset(&unblocked_sigs, SHUTDOWN3_SIGNAL); 449 sigaddset(&allowdebug_blocked_sigs, SHUTDOWN3_SIGNAL); 450 } 451 } 452 // Fill in signals that are blocked by all but the VM thread. 453 sigemptyset(&vm_sigs); 454 if (!ReduceSignalUsage) { 455 sigaddset(&vm_sigs, BREAK_SIGNAL); 456 } 457 debug_only(signal_sets_initialized = true); 458 459 } 460 461 // These are signals that are unblocked while a thread is running Java. 462 // (For some reason, they get blocked by default.) 463 sigset_t* os::Linux::unblocked_signals() { 464 assert(signal_sets_initialized, "Not initialized"); 465 return &unblocked_sigs; 466 } 467 468 // These are the signals that are blocked while a (non-VM) thread is 469 // running Java. Only the VM thread handles these signals. 470 sigset_t* os::Linux::vm_signals() { 471 assert(signal_sets_initialized, "Not initialized"); 472 return &vm_sigs; 473 } 474 475 // These are signals that are blocked during cond_wait to allow debugger in 476 sigset_t* os::Linux::allowdebug_blocked_signals() { 477 assert(signal_sets_initialized, "Not initialized"); 478 return &allowdebug_blocked_sigs; 479 } 480 481 void os::Linux::hotspot_sigmask(Thread* thread) { 482 483 //Save caller's signal mask before setting VM signal mask 484 sigset_t caller_sigmask; 485 pthread_sigmask(SIG_BLOCK, NULL, &caller_sigmask); 486 487 OSThread* osthread = thread->osthread(); 488 osthread->set_caller_sigmask(caller_sigmask); 489 490 pthread_sigmask(SIG_UNBLOCK, os::Linux::unblocked_signals(), NULL); 491 492 if (!ReduceSignalUsage) { 493 if (thread->is_VM_thread()) { 494 // Only the VM thread handles BREAK_SIGNAL ... 495 pthread_sigmask(SIG_UNBLOCK, vm_signals(), NULL); 496 } else { 497 // ... all other threads block BREAK_SIGNAL 498 pthread_sigmask(SIG_BLOCK, vm_signals(), NULL); 499 } 500 } 501 } 502 503 ////////////////////////////////////////////////////////////////////////////// 504 // detecting pthread library 505 506 void os::Linux::libpthread_init() { 507 // Save glibc and pthread version strings. 508 #if !defined(_CS_GNU_LIBC_VERSION) || \ 509 !defined(_CS_GNU_LIBPTHREAD_VERSION) 510 #error "glibc too old (< 2.3.2)" 511 #endif 512 513 size_t n = confstr(_CS_GNU_LIBC_VERSION, NULL, 0); 514 assert(n > 0, "cannot retrieve glibc version"); 515 char *str = (char *)malloc(n, mtInternal); 516 confstr(_CS_GNU_LIBC_VERSION, str, n); 517 os::Linux::set_glibc_version(str); 518 519 n = confstr(_CS_GNU_LIBPTHREAD_VERSION, NULL, 0); 520 assert(n > 0, "cannot retrieve pthread version"); 521 str = (char *)malloc(n, mtInternal); 522 confstr(_CS_GNU_LIBPTHREAD_VERSION, str, n); 523 os::Linux::set_libpthread_version(str); 524 } 525 526 ///////////////////////////////////////////////////////////////////////////// 527 // thread stack expansion 528 529 // os::Linux::manually_expand_stack() takes care of expanding the thread 530 // stack. Note that this is normally not needed: pthread stacks allocate 531 // thread stack using mmap() without MAP_NORESERVE, so the stack is already 532 // committed. Therefore it is not necessary to expand the stack manually. 533 // 534 // Manually expanding the stack was historically needed on LinuxThreads 535 // thread stacks, which were allocated with mmap(MAP_GROWSDOWN). Nowadays 536 // it is kept to deal with very rare corner cases: 537 // 538 // For one, user may run the VM on an own implementation of threads 539 // whose stacks are - like the old LinuxThreads - implemented using 540 // mmap(MAP_GROWSDOWN). 541 // 542 // Also, this coding may be needed if the VM is running on the primordial 543 // thread. Normally we avoid running on the primordial thread; however, 544 // user may still invoke the VM on the primordial thread. 545 // 546 // The following historical comment describes the details about running 547 // on a thread stack allocated with mmap(MAP_GROWSDOWN): 548 549 550 // Force Linux kernel to expand current thread stack. If "bottom" is close 551 // to the stack guard, caller should block all signals. 552 // 553 // MAP_GROWSDOWN: 554 // A special mmap() flag that is used to implement thread stacks. It tells 555 // kernel that the memory region should extend downwards when needed. This 556 // allows early versions of LinuxThreads to only mmap the first few pages 557 // when creating a new thread. Linux kernel will automatically expand thread 558 // stack as needed (on page faults). 559 // 560 // However, because the memory region of a MAP_GROWSDOWN stack can grow on 561 // demand, if a page fault happens outside an already mapped MAP_GROWSDOWN 562 // region, it's hard to tell if the fault is due to a legitimate stack 563 // access or because of reading/writing non-exist memory (e.g. buffer 564 // overrun). As a rule, if the fault happens below current stack pointer, 565 // Linux kernel does not expand stack, instead a SIGSEGV is sent to the 566 // application (see Linux kernel fault.c). 567 // 568 // This Linux feature can cause SIGSEGV when VM bangs thread stack for 569 // stack overflow detection. 570 // 571 // Newer version of LinuxThreads (since glibc-2.2, or, RH-7.x) and NPTL do 572 // not use MAP_GROWSDOWN. 573 // 574 // To get around the problem and allow stack banging on Linux, we need to 575 // manually expand thread stack after receiving the SIGSEGV. 576 // 577 // There are two ways to expand thread stack to address "bottom", we used 578 // both of them in JVM before 1.5: 579 // 1. adjust stack pointer first so that it is below "bottom", and then 580 // touch "bottom" 581 // 2. mmap() the page in question 582 // 583 // Now alternate signal stack is gone, it's harder to use 2. For instance, 584 // if current sp is already near the lower end of page 101, and we need to 585 // call mmap() to map page 100, it is possible that part of the mmap() frame 586 // will be placed in page 100. When page 100 is mapped, it is zero-filled. 587 // That will destroy the mmap() frame and cause VM to crash. 588 // 589 // The following code works by adjusting sp first, then accessing the "bottom" 590 // page to force a page fault. Linux kernel will then automatically expand the 591 // stack mapping. 592 // 593 // _expand_stack_to() assumes its frame size is less than page size, which 594 // should always be true if the function is not inlined. 595 596 static void NOINLINE _expand_stack_to(address bottom) { 597 address sp; 598 size_t size; 599 volatile char *p; 600 601 // Adjust bottom to point to the largest address within the same page, it 602 // gives us a one-page buffer if alloca() allocates slightly more memory. 603 bottom = (address)align_size_down((uintptr_t)bottom, os::Linux::page_size()); 604 bottom += os::Linux::page_size() - 1; 605 606 // sp might be slightly above current stack pointer; if that's the case, we 607 // will alloca() a little more space than necessary, which is OK. Don't use 608 // os::current_stack_pointer(), as its result can be slightly below current 609 // stack pointer, causing us to not alloca enough to reach "bottom". 610 sp = (address)&sp; 611 612 if (sp > bottom) { 613 size = sp - bottom; 614 p = (volatile char *)alloca(size); 615 assert(p != NULL && p <= (volatile char *)bottom, "alloca problem?"); 616 p[0] = '\0'; 617 } 618 } 619 620 bool os::Linux::manually_expand_stack(JavaThread * t, address addr) { 621 assert(t!=NULL, "just checking"); 622 assert(t->osthread()->expanding_stack(), "expand should be set"); 623 assert(t->stack_base() != NULL, "stack_base was not initialized"); 624 625 if (addr < t->stack_base() && addr >= t->stack_reserved_zone_base()) { 626 sigset_t mask_all, old_sigset; 627 sigfillset(&mask_all); 628 pthread_sigmask(SIG_SETMASK, &mask_all, &old_sigset); 629 _expand_stack_to(addr); 630 pthread_sigmask(SIG_SETMASK, &old_sigset, NULL); 631 return true; 632 } 633 return false; 634 } 635 636 ////////////////////////////////////////////////////////////////////////////// 637 // create new thread 638 639 // Thread start routine for all newly created threads 640 static void *java_start(Thread *thread) { 641 // Try to randomize the cache line index of hot stack frames. 642 // This helps when threads of the same stack traces evict each other's 643 // cache lines. The threads can be either from the same JVM instance, or 644 // from different JVM instances. The benefit is especially true for 645 // processors with hyperthreading technology. 646 static int counter = 0; 647 int pid = os::current_process_id(); 648 alloca(((pid ^ counter++) & 7) * 128); 649 650 thread->initialize_thread_current(); 651 652 OSThread* osthread = thread->osthread(); 653 Monitor* sync = osthread->startThread_lock(); 654 655 osthread->set_thread_id(os::current_thread_id()); 656 657 log_info(os, thread)("Thread is alive (tid: " UINTX_FORMAT ", pthread id: " UINTX_FORMAT ").", 658 os::current_thread_id(), (uintx) pthread_self()); 659 660 if (UseNUMA) { 661 int lgrp_id = os::numa_get_group_id(); 662 if (lgrp_id != -1) { 663 thread->set_lgrp_id(lgrp_id); 664 } 665 } 666 // initialize signal mask for this thread 667 os::Linux::hotspot_sigmask(thread); 668 669 // initialize floating point control register 670 os::Linux::init_thread_fpu_state(); 671 672 // handshaking with parent thread 673 { 674 MutexLockerEx ml(sync, Mutex::_no_safepoint_check_flag); 675 676 // notify parent thread 677 osthread->set_state(INITIALIZED); 678 sync->notify_all(); 679 680 // wait until os::start_thread() 681 while (osthread->get_state() == INITIALIZED) { 682 sync->wait(Mutex::_no_safepoint_check_flag); 683 } 684 } 685 686 // call one more level start routine 687 thread->run(); 688 689 log_info(os, thread)("Thread finished (tid: " UINTX_FORMAT ", pthread id: " UINTX_FORMAT ").", 690 os::current_thread_id(), (uintx) pthread_self()); 691 692 return 0; 693 } 694 695 bool os::create_thread(Thread* thread, ThreadType thr_type, 696 size_t stack_size) { 697 assert(thread->osthread() == NULL, "caller responsible"); 698 699 // Allocate the OSThread object 700 OSThread* osthread = new OSThread(NULL, NULL); 701 if (osthread == NULL) { 702 return false; 703 } 704 705 // set the correct thread state 706 osthread->set_thread_type(thr_type); 707 708 // Initial state is ALLOCATED but not INITIALIZED 709 osthread->set_state(ALLOCATED); 710 711 thread->set_osthread(osthread); 712 713 // init thread attributes 714 pthread_attr_t attr; 715 pthread_attr_init(&attr); 716 pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); 717 718 // stack size 719 // calculate stack size if it's not specified by caller 720 if (stack_size == 0) { 721 stack_size = os::Linux::default_stack_size(thr_type); 722 723 switch (thr_type) { 724 case os::java_thread: 725 // Java threads use ThreadStackSize which default value can be 726 // changed with the flag -Xss 727 assert(JavaThread::stack_size_at_create() > 0, "this should be set"); 728 stack_size = JavaThread::stack_size_at_create(); 729 break; 730 case os::compiler_thread: 731 if (CompilerThreadStackSize > 0) { 732 stack_size = (size_t)(CompilerThreadStackSize * K); 733 break; 734 } // else fall through: 735 // use VMThreadStackSize if CompilerThreadStackSize is not defined 736 case os::vm_thread: 737 case os::pgc_thread: 738 case os::cgc_thread: 739 case os::watcher_thread: 740 if (VMThreadStackSize > 0) stack_size = (size_t)(VMThreadStackSize * K); 741 break; 742 } 743 } 744 745 stack_size = MAX2(stack_size, os::Linux::min_stack_allowed); 746 pthread_attr_setstacksize(&attr, stack_size); 747 748 // glibc guard page 749 pthread_attr_setguardsize(&attr, os::Linux::default_guard_size(thr_type)); 750 751 ThreadState state; 752 753 { 754 pthread_t tid; 755 int ret = pthread_create(&tid, &attr, (void* (*)(void*)) java_start, thread); 756 757 char buf[64]; 758 if (ret == 0) { 759 log_info(os, thread)("Thread started (pthread id: " UINTX_FORMAT ", attributes: %s). ", 760 (uintx) tid, os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr)); 761 } else { 762 log_warning(os, thread)("Failed to start thread - pthread_create failed (%s) for attributes: %s.", 763 strerror(ret), os::Posix::describe_pthread_attr(buf, sizeof(buf), &attr)); 764 } 765 766 pthread_attr_destroy(&attr); 767 768 if (ret != 0) { 769 // Need to clean up stuff we've allocated so far 770 thread->set_osthread(NULL); 771 delete osthread; 772 return false; 773 } 774 775 // Store pthread info into the OSThread 776 osthread->set_pthread_id(tid); 777 778 // Wait until child thread is either initialized or aborted 779 { 780 Monitor* sync_with_child = osthread->startThread_lock(); 781 MutexLockerEx ml(sync_with_child, Mutex::_no_safepoint_check_flag); 782 while ((state = osthread->get_state()) == ALLOCATED) { 783 sync_with_child->wait(Mutex::_no_safepoint_check_flag); 784 } 785 } 786 } 787 788 // Aborted due to thread limit being reached 789 if (state == ZOMBIE) { 790 thread->set_osthread(NULL); 791 delete osthread; 792 return false; 793 } 794 795 // The thread is returned suspended (in state INITIALIZED), 796 // and is started higher up in the call chain 797 assert(state == INITIALIZED, "race condition"); 798 return true; 799 } 800 801 ///////////////////////////////////////////////////////////////////////////// 802 // attach existing thread 803 804 // bootstrap the main thread 805 bool os::create_main_thread(JavaThread* thread) { 806 assert(os::Linux::_main_thread == pthread_self(), "should be called inside main thread"); 807 return create_attached_thread(thread); 808 } 809 810 bool os::create_attached_thread(JavaThread* thread) { 811 #ifdef ASSERT 812 thread->verify_not_published(); 813 #endif 814 815 // Allocate the OSThread object 816 OSThread* osthread = new OSThread(NULL, NULL); 817 818 if (osthread == NULL) { 819 return false; 820 } 821 822 // Store pthread info into the OSThread 823 osthread->set_thread_id(os::Linux::gettid()); 824 osthread->set_pthread_id(::pthread_self()); 825 826 // initialize floating point control register 827 os::Linux::init_thread_fpu_state(); 828 829 // Initial thread state is RUNNABLE 830 osthread->set_state(RUNNABLE); 831 832 thread->set_osthread(osthread); 833 834 if (UseNUMA) { 835 int lgrp_id = os::numa_get_group_id(); 836 if (lgrp_id != -1) { 837 thread->set_lgrp_id(lgrp_id); 838 } 839 } 840 841 if (os::Linux::is_initial_thread()) { 842 // If current thread is initial thread, its stack is mapped on demand, 843 // see notes about MAP_GROWSDOWN. Here we try to force kernel to map 844 // the entire stack region to avoid SEGV in stack banging. 845 // It is also useful to get around the heap-stack-gap problem on SuSE 846 // kernel (see 4821821 for details). We first expand stack to the top 847 // of yellow zone, then enable stack yellow zone (order is significant, 848 // enabling yellow zone first will crash JVM on SuSE Linux), so there 849 // is no gap between the last two virtual memory regions. 850 851 JavaThread *jt = (JavaThread *)thread; 852 address addr = jt->stack_reserved_zone_base(); 853 assert(addr != NULL, "initialization problem?"); 854 assert(jt->stack_available(addr) > 0, "stack guard should not be enabled"); 855 856 osthread->set_expanding_stack(); 857 os::Linux::manually_expand_stack(jt, addr); 858 osthread->clear_expanding_stack(); 859 } 860 861 // initialize signal mask for this thread 862 // and save the caller's signal mask 863 os::Linux::hotspot_sigmask(thread); 864 865 log_info(os, thread)("Thread attached (tid: " UINTX_FORMAT ", pthread id: " UINTX_FORMAT ").", 866 os::current_thread_id(), (uintx) pthread_self()); 867 868 return true; 869 } 870 871 void os::pd_start_thread(Thread* thread) { 872 OSThread * osthread = thread->osthread(); 873 assert(osthread->get_state() != INITIALIZED, "just checking"); 874 Monitor* sync_with_child = osthread->startThread_lock(); 875 MutexLockerEx ml(sync_with_child, Mutex::_no_safepoint_check_flag); 876 sync_with_child->notify(); 877 } 878 879 // Free Linux resources related to the OSThread 880 void os::free_thread(OSThread* osthread) { 881 assert(osthread != NULL, "osthread not set"); 882 883 if (Thread::current()->osthread() == osthread) { 884 // Restore caller's signal mask 885 sigset_t sigmask = osthread->caller_sigmask(); 886 pthread_sigmask(SIG_SETMASK, &sigmask, NULL); 887 } 888 889 delete osthread; 890 } 891 892 ////////////////////////////////////////////////////////////////////////////// 893 // initial thread 894 895 // Check if current thread is the initial thread, similar to Solaris thr_main. 896 bool os::Linux::is_initial_thread(void) { 897 char dummy; 898 // If called before init complete, thread stack bottom will be null. 899 // Can be called if fatal error occurs before initialization. 900 if (initial_thread_stack_bottom() == NULL) return false; 901 assert(initial_thread_stack_bottom() != NULL && 902 initial_thread_stack_size() != 0, 903 "os::init did not locate initial thread's stack region"); 904 if ((address)&dummy >= initial_thread_stack_bottom() && 905 (address)&dummy < initial_thread_stack_bottom() + initial_thread_stack_size()) { 906 return true; 907 } else { 908 return false; 909 } 910 } 911 912 // Find the virtual memory area that contains addr 913 static bool find_vma(address addr, address* vma_low, address* vma_high) { 914 FILE *fp = fopen("/proc/self/maps", "r"); 915 if (fp) { 916 address low, high; 917 while (!feof(fp)) { 918 if (fscanf(fp, "%p-%p", &low, &high) == 2) { 919 if (low <= addr && addr < high) { 920 if (vma_low) *vma_low = low; 921 if (vma_high) *vma_high = high; 922 fclose(fp); 923 return true; 924 } 925 } 926 for (;;) { 927 int ch = fgetc(fp); 928 if (ch == EOF || ch == (int)'\n') break; 929 } 930 } 931 fclose(fp); 932 } 933 return false; 934 } 935 936 // Locate initial thread stack. This special handling of initial thread stack 937 // is needed because pthread_getattr_np() on most (all?) Linux distros returns 938 // bogus value for initial thread. 939 void os::Linux::capture_initial_stack(size_t max_size) { 940 // stack size is the easy part, get it from RLIMIT_STACK 941 size_t stack_size; 942 struct rlimit rlim; 943 getrlimit(RLIMIT_STACK, &rlim); 944 stack_size = rlim.rlim_cur; 945 946 // 6308388: a bug in ld.so will relocate its own .data section to the 947 // lower end of primordial stack; reduce ulimit -s value a little bit 948 // so we won't install guard page on ld.so's data section. 949 stack_size -= 2 * page_size(); 950 951 // 4441425: avoid crash with "unlimited" stack size on SuSE 7.1 or Redhat 952 // 7.1, in both cases we will get 2G in return value. 953 // 4466587: glibc 2.2.x compiled w/o "--enable-kernel=2.4.0" (RH 7.0, 954 // SuSE 7.2, Debian) can not handle alternate signal stack correctly 955 // for initial thread if its stack size exceeds 6M. Cap it at 2M, 956 // in case other parts in glibc still assumes 2M max stack size. 957 // FIXME: alt signal stack is gone, maybe we can relax this constraint? 958 // Problem still exists RH7.2 (IA64 anyway) but 2MB is a little small 959 if (stack_size > 2 * K * K IA64_ONLY(*2)) { 960 stack_size = 2 * K * K IA64_ONLY(*2); 961 } 962 // Try to figure out where the stack base (top) is. This is harder. 963 // 964 // When an application is started, glibc saves the initial stack pointer in 965 // a global variable "__libc_stack_end", which is then used by system 966 // libraries. __libc_stack_end should be pretty close to stack top. The 967 // variable is available since the very early days. However, because it is 968 // a private interface, it could disappear in the future. 969 // 970 // Linux kernel saves start_stack information in /proc/<pid>/stat. Similar 971 // to __libc_stack_end, it is very close to stack top, but isn't the real 972 // stack top. Note that /proc may not exist if VM is running as a chroot 973 // program, so reading /proc/<pid>/stat could fail. Also the contents of 974 // /proc/<pid>/stat could change in the future (though unlikely). 975 // 976 // We try __libc_stack_end first. If that doesn't work, look for 977 // /proc/<pid>/stat. If neither of them works, we use current stack pointer 978 // as a hint, which should work well in most cases. 979 980 uintptr_t stack_start; 981 982 // try __libc_stack_end first 983 uintptr_t *p = (uintptr_t *)dlsym(RTLD_DEFAULT, "__libc_stack_end"); 984 if (p && *p) { 985 stack_start = *p; 986 } else { 987 // see if we can get the start_stack field from /proc/self/stat 988 FILE *fp; 989 int pid; 990 char state; 991 int ppid; 992 int pgrp; 993 int session; 994 int nr; 995 int tpgrp; 996 unsigned long flags; 997 unsigned long minflt; 998 unsigned long cminflt; 999 unsigned long majflt; 1000 unsigned long cmajflt; 1001 unsigned long utime; 1002 unsigned long stime; 1003 long cutime; 1004 long cstime; 1005 long prio; 1006 long nice; 1007 long junk; 1008 long it_real; 1009 uintptr_t start; 1010 uintptr_t vsize; 1011 intptr_t rss; 1012 uintptr_t rsslim; 1013 uintptr_t scodes; 1014 uintptr_t ecode; 1015 int i; 1016 1017 // Figure what the primordial thread stack base is. Code is inspired 1018 // by email from Hans Boehm. /proc/self/stat begins with current pid, 1019 // followed by command name surrounded by parentheses, state, etc. 1020 char stat[2048]; 1021 int statlen; 1022 1023 fp = fopen("/proc/self/stat", "r"); 1024 if (fp) { 1025 statlen = fread(stat, 1, 2047, fp); 1026 stat[statlen] = '\0'; 1027 fclose(fp); 1028 1029 // Skip pid and the command string. Note that we could be dealing with 1030 // weird command names, e.g. user could decide to rename java launcher 1031 // to "java 1.4.2 :)", then the stat file would look like 1032 // 1234 (java 1.4.2 :)) R ... ... 1033 // We don't really need to know the command string, just find the last 1034 // occurrence of ")" and then start parsing from there. See bug 4726580. 1035 char * s = strrchr(stat, ')'); 1036 1037 i = 0; 1038 if (s) { 1039 // Skip blank chars 1040 do { s++; } while (s && isspace(*s)); 1041 1042 #define _UFM UINTX_FORMAT 1043 #define _DFM INTX_FORMAT 1044 1045 // 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 1046 // 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 1047 i = sscanf(s, "%c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld " _UFM _UFM _DFM _UFM _UFM _UFM _UFM, 1048 &state, // 3 %c 1049 &ppid, // 4 %d 1050 &pgrp, // 5 %d 1051 &session, // 6 %d 1052 &nr, // 7 %d 1053 &tpgrp, // 8 %d 1054 &flags, // 9 %lu 1055 &minflt, // 10 %lu 1056 &cminflt, // 11 %lu 1057 &majflt, // 12 %lu 1058 &cmajflt, // 13 %lu 1059 &utime, // 14 %lu 1060 &stime, // 15 %lu 1061 &cutime, // 16 %ld 1062 &cstime, // 17 %ld 1063 &prio, // 18 %ld 1064 &nice, // 19 %ld 1065 &junk, // 20 %ld 1066 &it_real, // 21 %ld 1067 &start, // 22 UINTX_FORMAT 1068 &vsize, // 23 UINTX_FORMAT 1069 &rss, // 24 INTX_FORMAT 1070 &rsslim, // 25 UINTX_FORMAT 1071 &scodes, // 26 UINTX_FORMAT 1072 &ecode, // 27 UINTX_FORMAT 1073 &stack_start); // 28 UINTX_FORMAT 1074 } 1075 1076 #undef _UFM 1077 #undef _DFM 1078 1079 if (i != 28 - 2) { 1080 assert(false, "Bad conversion from /proc/self/stat"); 1081 // product mode - assume we are the initial thread, good luck in the 1082 // embedded case. 1083 warning("Can't detect initial thread stack location - bad conversion"); 1084 stack_start = (uintptr_t) &rlim; 1085 } 1086 } else { 1087 // For some reason we can't open /proc/self/stat (for example, running on 1088 // FreeBSD with a Linux emulator, or inside chroot), this should work for 1089 // most cases, so don't abort: 1090 warning("Can't detect initial thread stack location - no /proc/self/stat"); 1091 stack_start = (uintptr_t) &rlim; 1092 } 1093 } 1094 1095 // Now we have a pointer (stack_start) very close to the stack top, the 1096 // next thing to do is to figure out the exact location of stack top. We 1097 // can find out the virtual memory area that contains stack_start by 1098 // reading /proc/self/maps, it should be the last vma in /proc/self/maps, 1099 // and its upper limit is the real stack top. (again, this would fail if 1100 // running inside chroot, because /proc may not exist.) 1101 1102 uintptr_t stack_top; 1103 address low, high; 1104 if (find_vma((address)stack_start, &low, &high)) { 1105 // success, "high" is the true stack top. (ignore "low", because initial 1106 // thread stack grows on demand, its real bottom is high - RLIMIT_STACK.) 1107 stack_top = (uintptr_t)high; 1108 } else { 1109 // failed, likely because /proc/self/maps does not exist 1110 warning("Can't detect initial thread stack location - find_vma failed"); 1111 // best effort: stack_start is normally within a few pages below the real 1112 // stack top, use it as stack top, and reduce stack size so we won't put 1113 // guard page outside stack. 1114 stack_top = stack_start; 1115 stack_size -= 16 * page_size(); 1116 } 1117 1118 // stack_top could be partially down the page so align it 1119 stack_top = align_size_up(stack_top, page_size()); 1120 1121 if (max_size && stack_size > max_size) { 1122 _initial_thread_stack_size = max_size; 1123 } else { 1124 _initial_thread_stack_size = stack_size; 1125 } 1126 1127 _initial_thread_stack_size = align_size_down(_initial_thread_stack_size, page_size()); 1128 _initial_thread_stack_bottom = (address)stack_top - _initial_thread_stack_size; 1129 } 1130 1131 //////////////////////////////////////////////////////////////////////////////// 1132 // time support 1133 1134 // Time since start-up in seconds to a fine granularity. 1135 // Used by VMSelfDestructTimer and the MemProfiler. 1136 double os::elapsedTime() { 1137 1138 return ((double)os::elapsed_counter()) / os::elapsed_frequency(); // nanosecond resolution 1139 } 1140 1141 jlong os::elapsed_counter() { 1142 return javaTimeNanos() - initial_time_count; 1143 } 1144 1145 jlong os::elapsed_frequency() { 1146 return NANOSECS_PER_SEC; // nanosecond resolution 1147 } 1148 1149 bool os::supports_vtime() { return true; } 1150 bool os::enable_vtime() { return false; } 1151 bool os::vtime_enabled() { return false; } 1152 1153 double os::elapsedVTime() { 1154 struct rusage usage; 1155 int retval = getrusage(RUSAGE_THREAD, &usage); 1156 if (retval == 0) { 1157 return (double) (usage.ru_utime.tv_sec + usage.ru_stime.tv_sec) + (double) (usage.ru_utime.tv_usec + usage.ru_stime.tv_usec) / (1000 * 1000); 1158 } else { 1159 // better than nothing, but not much 1160 return elapsedTime(); 1161 } 1162 } 1163 1164 jlong os::javaTimeMillis() { 1165 timeval time; 1166 int status = gettimeofday(&time, NULL); 1167 assert(status != -1, "linux error"); 1168 return jlong(time.tv_sec) * 1000 + jlong(time.tv_usec / 1000); 1169 } 1170 1171 void os::javaTimeSystemUTC(jlong &seconds, jlong &nanos) { 1172 timeval time; 1173 int status = gettimeofday(&time, NULL); 1174 assert(status != -1, "linux error"); 1175 seconds = jlong(time.tv_sec); 1176 nanos = jlong(time.tv_usec) * 1000; 1177 } 1178 1179 1180 #ifndef CLOCK_MONOTONIC 1181 #define CLOCK_MONOTONIC (1) 1182 #endif 1183 1184 void os::Linux::clock_init() { 1185 // we do dlopen's in this particular order due to bug in linux 1186 // dynamical loader (see 6348968) leading to crash on exit 1187 void* handle = dlopen("librt.so.1", RTLD_LAZY); 1188 if (handle == NULL) { 1189 handle = dlopen("librt.so", RTLD_LAZY); 1190 } 1191 1192 if (handle) { 1193 int (*clock_getres_func)(clockid_t, struct timespec*) = 1194 (int(*)(clockid_t, struct timespec*))dlsym(handle, "clock_getres"); 1195 int (*clock_gettime_func)(clockid_t, struct timespec*) = 1196 (int(*)(clockid_t, struct timespec*))dlsym(handle, "clock_gettime"); 1197 if (clock_getres_func && clock_gettime_func) { 1198 // See if monotonic clock is supported by the kernel. Note that some 1199 // early implementations simply return kernel jiffies (updated every 1200 // 1/100 or 1/1000 second). It would be bad to use such a low res clock 1201 // for nano time (though the monotonic property is still nice to have). 1202 // It's fixed in newer kernels, however clock_getres() still returns 1203 // 1/HZ. We check if clock_getres() works, but will ignore its reported 1204 // resolution for now. Hopefully as people move to new kernels, this 1205 // won't be a problem. 1206 struct timespec res; 1207 struct timespec tp; 1208 if (clock_getres_func (CLOCK_MONOTONIC, &res) == 0 && 1209 clock_gettime_func(CLOCK_MONOTONIC, &tp) == 0) { 1210 // yes, monotonic clock is supported 1211 _clock_gettime = clock_gettime_func; 1212 return; 1213 } else { 1214 // close librt if there is no monotonic clock 1215 dlclose(handle); 1216 } 1217 } 1218 } 1219 warning("No monotonic clock was available - timed services may " \ 1220 "be adversely affected if the time-of-day clock changes"); 1221 } 1222 1223 #ifndef SYS_clock_getres 1224 #if defined(IA32) || defined(AMD64) 1225 #define SYS_clock_getres IA32_ONLY(266) AMD64_ONLY(229) 1226 #define sys_clock_getres(x,y) ::syscall(SYS_clock_getres, x, y) 1227 #else 1228 #warning "SYS_clock_getres not defined for this platform, disabling fast_thread_cpu_time" 1229 #define sys_clock_getres(x,y) -1 1230 #endif 1231 #else 1232 #define sys_clock_getres(x,y) ::syscall(SYS_clock_getres, x, y) 1233 #endif 1234 1235 void os::Linux::fast_thread_clock_init() { 1236 if (!UseLinuxPosixThreadCPUClocks) { 1237 return; 1238 } 1239 clockid_t clockid; 1240 struct timespec tp; 1241 int (*pthread_getcpuclockid_func)(pthread_t, clockid_t *) = 1242 (int(*)(pthread_t, clockid_t *)) dlsym(RTLD_DEFAULT, "pthread_getcpuclockid"); 1243 1244 // Switch to using fast clocks for thread cpu time if 1245 // the sys_clock_getres() returns 0 error code. 1246 // Note, that some kernels may support the current thread 1247 // clock (CLOCK_THREAD_CPUTIME_ID) but not the clocks 1248 // returned by the pthread_getcpuclockid(). 1249 // If the fast Posix clocks are supported then the sys_clock_getres() 1250 // must return at least tp.tv_sec == 0 which means a resolution 1251 // better than 1 sec. This is extra check for reliability. 1252 1253 if (pthread_getcpuclockid_func && 1254 pthread_getcpuclockid_func(_main_thread, &clockid) == 0 && 1255 sys_clock_getres(clockid, &tp) == 0 && tp.tv_sec == 0) { 1256 _supports_fast_thread_cpu_time = true; 1257 _pthread_getcpuclockid = pthread_getcpuclockid_func; 1258 } 1259 } 1260 1261 jlong os::javaTimeNanos() { 1262 if (os::supports_monotonic_clock()) { 1263 struct timespec tp; 1264 int status = Linux::clock_gettime(CLOCK_MONOTONIC, &tp); 1265 assert(status == 0, "gettime error"); 1266 jlong result = jlong(tp.tv_sec) * (1000 * 1000 * 1000) + jlong(tp.tv_nsec); 1267 return result; 1268 } else { 1269 timeval time; 1270 int status = gettimeofday(&time, NULL); 1271 assert(status != -1, "linux error"); 1272 jlong usecs = jlong(time.tv_sec) * (1000 * 1000) + jlong(time.tv_usec); 1273 return 1000 * usecs; 1274 } 1275 } 1276 1277 void os::javaTimeNanos_info(jvmtiTimerInfo *info_ptr) { 1278 if (os::supports_monotonic_clock()) { 1279 info_ptr->max_value = ALL_64_BITS; 1280 1281 // CLOCK_MONOTONIC - amount of time since some arbitrary point in the past 1282 info_ptr->may_skip_backward = false; // not subject to resetting or drifting 1283 info_ptr->may_skip_forward = false; // not subject to resetting or drifting 1284 } else { 1285 // gettimeofday - based on time in seconds since the Epoch thus does not wrap 1286 info_ptr->max_value = ALL_64_BITS; 1287 1288 // gettimeofday is a real time clock so it skips 1289 info_ptr->may_skip_backward = true; 1290 info_ptr->may_skip_forward = true; 1291 } 1292 1293 info_ptr->kind = JVMTI_TIMER_ELAPSED; // elapsed not CPU time 1294 } 1295 1296 // Return the real, user, and system times in seconds from an 1297 // arbitrary fixed point in the past. 1298 bool os::getTimesSecs(double* process_real_time, 1299 double* process_user_time, 1300 double* process_system_time) { 1301 struct tms ticks; 1302 clock_t real_ticks = times(&ticks); 1303 1304 if (real_ticks == (clock_t) (-1)) { 1305 return false; 1306 } else { 1307 double ticks_per_second = (double) clock_tics_per_sec; 1308 *process_user_time = ((double) ticks.tms_utime) / ticks_per_second; 1309 *process_system_time = ((double) ticks.tms_stime) / ticks_per_second; 1310 *process_real_time = ((double) real_ticks) / ticks_per_second; 1311 1312 return true; 1313 } 1314 } 1315 1316 1317 char * os::local_time_string(char *buf, size_t buflen) { 1318 struct tm t; 1319 time_t long_time; 1320 time(&long_time); 1321 localtime_r(&long_time, &t); 1322 jio_snprintf(buf, buflen, "%d-%02d-%02d %02d:%02d:%02d", 1323 t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, 1324 t.tm_hour, t.tm_min, t.tm_sec); 1325 return buf; 1326 } 1327 1328 struct tm* os::localtime_pd(const time_t* clock, struct tm* res) { 1329 return localtime_r(clock, res); 1330 } 1331 1332 //////////////////////////////////////////////////////////////////////////////// 1333 // runtime exit support 1334 1335 // Note: os::shutdown() might be called very early during initialization, or 1336 // called from signal handler. Before adding something to os::shutdown(), make 1337 // sure it is async-safe and can handle partially initialized VM. 1338 void os::shutdown() { 1339 1340 // allow PerfMemory to attempt cleanup of any persistent resources 1341 perfMemory_exit(); 1342 1343 // needs to remove object in file system 1344 AttachListener::abort(); 1345 1346 // flush buffered output, finish log files 1347 ostream_abort(); 1348 1349 // Check for abort hook 1350 abort_hook_t abort_hook = Arguments::abort_hook(); 1351 if (abort_hook != NULL) { 1352 abort_hook(); 1353 } 1354 1355 } 1356 1357 // Note: os::abort() might be called very early during initialization, or 1358 // called from signal handler. Before adding something to os::abort(), make 1359 // sure it is async-safe and can handle partially initialized VM. 1360 void os::abort(bool dump_core, void* siginfo, const void* context) { 1361 os::shutdown(); 1362 if (dump_core) { 1363 #ifndef PRODUCT 1364 fdStream out(defaultStream::output_fd()); 1365 out.print_raw("Current thread is "); 1366 char buf[16]; 1367 jio_snprintf(buf, sizeof(buf), UINTX_FORMAT, os::current_thread_id()); 1368 out.print_raw_cr(buf); 1369 out.print_raw_cr("Dumping core ..."); 1370 #endif 1371 ::abort(); // dump core 1372 } 1373 1374 ::exit(1); 1375 } 1376 1377 // Die immediately, no exit hook, no abort hook, no cleanup. 1378 void os::die() { 1379 ::abort(); 1380 } 1381 1382 1383 // This method is a copy of JDK's sysGetLastErrorString 1384 // from src/solaris/hpi/src/system_md.c 1385 1386 size_t os::lasterror(char *buf, size_t len) { 1387 if (errno == 0) return 0; 1388 1389 const char *s = ::strerror(errno); 1390 size_t n = ::strlen(s); 1391 if (n >= len) { 1392 n = len - 1; 1393 } 1394 ::strncpy(buf, s, n); 1395 buf[n] = '\0'; 1396 return n; 1397 } 1398 1399 // thread_id is kernel thread id (similar to Solaris LWP id) 1400 intx os::current_thread_id() { return os::Linux::gettid(); } 1401 int os::current_process_id() { 1402 return ::getpid(); 1403 } 1404 1405 // DLL functions 1406 1407 const char* os::dll_file_extension() { return ".so"; } 1408 1409 // This must be hard coded because it's the system's temporary 1410 // directory not the java application's temp directory, ala java.io.tmpdir. 1411 const char* os::get_temp_directory() { return "/tmp"; } 1412 1413 static bool file_exists(const char* filename) { 1414 struct stat statbuf; 1415 if (filename == NULL || strlen(filename) == 0) { 1416 return false; 1417 } 1418 return os::stat(filename, &statbuf) == 0; 1419 } 1420 1421 bool os::dll_build_name(char* buffer, size_t buflen, 1422 const char* pname, const char* fname) { 1423 bool retval = false; 1424 // Copied from libhpi 1425 const size_t pnamelen = pname ? strlen(pname) : 0; 1426 1427 // Return error on buffer overflow. 1428 if (pnamelen + strlen(fname) + 10 > (size_t) buflen) { 1429 return retval; 1430 } 1431 1432 if (pnamelen == 0) { 1433 snprintf(buffer, buflen, "lib%s.so", fname); 1434 retval = true; 1435 } else if (strchr(pname, *os::path_separator()) != NULL) { 1436 int n; 1437 char** pelements = split_path(pname, &n); 1438 if (pelements == NULL) { 1439 return false; 1440 } 1441 for (int i = 0; i < n; i++) { 1442 // Really shouldn't be NULL, but check can't hurt 1443 if (pelements[i] == NULL || strlen(pelements[i]) == 0) { 1444 continue; // skip the empty path values 1445 } 1446 snprintf(buffer, buflen, "%s/lib%s.so", pelements[i], fname); 1447 if (file_exists(buffer)) { 1448 retval = true; 1449 break; 1450 } 1451 } 1452 // release the storage 1453 for (int i = 0; i < n; i++) { 1454 if (pelements[i] != NULL) { 1455 FREE_C_HEAP_ARRAY(char, pelements[i]); 1456 } 1457 } 1458 if (pelements != NULL) { 1459 FREE_C_HEAP_ARRAY(char*, pelements); 1460 } 1461 } else { 1462 snprintf(buffer, buflen, "%s/lib%s.so", pname, fname); 1463 retval = true; 1464 } 1465 return retval; 1466 } 1467 1468 // check if addr is inside libjvm.so 1469 bool os::address_is_in_vm(address addr) { 1470 static address libjvm_base_addr; 1471 Dl_info dlinfo; 1472 1473 if (libjvm_base_addr == NULL) { 1474 if (dladdr(CAST_FROM_FN_PTR(void *, os::address_is_in_vm), &dlinfo) != 0) { 1475 libjvm_base_addr = (address)dlinfo.dli_fbase; 1476 } 1477 assert(libjvm_base_addr !=NULL, "Cannot obtain base address for libjvm"); 1478 } 1479 1480 if (dladdr((void *)addr, &dlinfo) != 0) { 1481 if (libjvm_base_addr == (address)dlinfo.dli_fbase) return true; 1482 } 1483 1484 return false; 1485 } 1486 1487 bool os::dll_address_to_function_name(address addr, char *buf, 1488 int buflen, int *offset, 1489 bool demangle) { 1490 // buf is not optional, but offset is optional 1491 assert(buf != NULL, "sanity check"); 1492 1493 Dl_info dlinfo; 1494 1495 if (dladdr((void*)addr, &dlinfo) != 0) { 1496 // see if we have a matching symbol 1497 if (dlinfo.dli_saddr != NULL && dlinfo.dli_sname != NULL) { 1498 if (!(demangle && Decoder::demangle(dlinfo.dli_sname, buf, buflen))) { 1499 jio_snprintf(buf, buflen, "%s", dlinfo.dli_sname); 1500 } 1501 if (offset != NULL) *offset = addr - (address)dlinfo.dli_saddr; 1502 return true; 1503 } 1504 // no matching symbol so try for just file info 1505 if (dlinfo.dli_fname != NULL && dlinfo.dli_fbase != NULL) { 1506 if (Decoder::decode((address)(addr - (address)dlinfo.dli_fbase), 1507 buf, buflen, offset, dlinfo.dli_fname, demangle)) { 1508 return true; 1509 } 1510 } 1511 } 1512 1513 buf[0] = '\0'; 1514 if (offset != NULL) *offset = -1; 1515 return false; 1516 } 1517 1518 struct _address_to_library_name { 1519 address addr; // input : memory address 1520 size_t buflen; // size of fname 1521 char* fname; // output: library name 1522 address base; // library base addr 1523 }; 1524 1525 static int address_to_library_name_callback(struct dl_phdr_info *info, 1526 size_t size, void *data) { 1527 int i; 1528 bool found = false; 1529 address libbase = NULL; 1530 struct _address_to_library_name * d = (struct _address_to_library_name *)data; 1531 1532 // iterate through all loadable segments 1533 for (i = 0; i < info->dlpi_phnum; i++) { 1534 address segbase = (address)(info->dlpi_addr + info->dlpi_phdr[i].p_vaddr); 1535 if (info->dlpi_phdr[i].p_type == PT_LOAD) { 1536 // base address of a library is the lowest address of its loaded 1537 // segments. 1538 if (libbase == NULL || libbase > segbase) { 1539 libbase = segbase; 1540 } 1541 // see if 'addr' is within current segment 1542 if (segbase <= d->addr && 1543 d->addr < segbase + info->dlpi_phdr[i].p_memsz) { 1544 found = true; 1545 } 1546 } 1547 } 1548 1549 // dlpi_name is NULL or empty if the ELF file is executable, return 0 1550 // so dll_address_to_library_name() can fall through to use dladdr() which 1551 // can figure out executable name from argv[0]. 1552 if (found && info->dlpi_name && info->dlpi_name[0]) { 1553 d->base = libbase; 1554 if (d->fname) { 1555 jio_snprintf(d->fname, d->buflen, "%s", info->dlpi_name); 1556 } 1557 return 1; 1558 } 1559 return 0; 1560 } 1561 1562 bool os::dll_address_to_library_name(address addr, char* buf, 1563 int buflen, int* offset) { 1564 // buf is not optional, but offset is optional 1565 assert(buf != NULL, "sanity check"); 1566 1567 Dl_info dlinfo; 1568 struct _address_to_library_name data; 1569 1570 // There is a bug in old glibc dladdr() implementation that it could resolve 1571 // to wrong library name if the .so file has a base address != NULL. Here 1572 // we iterate through the program headers of all loaded libraries to find 1573 // out which library 'addr' really belongs to. This workaround can be 1574 // removed once the minimum requirement for glibc is moved to 2.3.x. 1575 data.addr = addr; 1576 data.fname = buf; 1577 data.buflen = buflen; 1578 data.base = NULL; 1579 int rslt = dl_iterate_phdr(address_to_library_name_callback, (void *)&data); 1580 1581 if (rslt) { 1582 // buf already contains library name 1583 if (offset) *offset = addr - data.base; 1584 return true; 1585 } 1586 if (dladdr((void*)addr, &dlinfo) != 0) { 1587 if (dlinfo.dli_fname != NULL) { 1588 jio_snprintf(buf, buflen, "%s", dlinfo.dli_fname); 1589 } 1590 if (dlinfo.dli_fbase != NULL && offset != NULL) { 1591 *offset = addr - (address)dlinfo.dli_fbase; 1592 } 1593 return true; 1594 } 1595 1596 buf[0] = '\0'; 1597 if (offset) *offset = -1; 1598 return false; 1599 } 1600 1601 // Loads .dll/.so and 1602 // in case of error it checks if .dll/.so was built for the 1603 // same architecture as Hotspot is running on 1604 1605 1606 // Remember the stack's state. The Linux dynamic linker will change 1607 // the stack to 'executable' at most once, so we must safepoint only once. 1608 bool os::Linux::_stack_is_executable = false; 1609 1610 // VM operation that loads a library. This is necessary if stack protection 1611 // of the Java stacks can be lost during loading the library. If we 1612 // do not stop the Java threads, they can stack overflow before the stacks 1613 // are protected again. 1614 class VM_LinuxDllLoad: public VM_Operation { 1615 private: 1616 const char *_filename; 1617 char *_ebuf; 1618 int _ebuflen; 1619 void *_lib; 1620 public: 1621 VM_LinuxDllLoad(const char *fn, char *ebuf, int ebuflen) : 1622 _filename(fn), _ebuf(ebuf), _ebuflen(ebuflen), _lib(NULL) {} 1623 VMOp_Type type() const { return VMOp_LinuxDllLoad; } 1624 void doit() { 1625 _lib = os::Linux::dll_load_in_vmthread(_filename, _ebuf, _ebuflen); 1626 os::Linux::_stack_is_executable = true; 1627 } 1628 void* loaded_library() { return _lib; } 1629 }; 1630 1631 void * os::dll_load(const char *filename, char *ebuf, int ebuflen) { 1632 void * result = NULL; 1633 bool load_attempted = false; 1634 1635 // Check whether the library to load might change execution rights 1636 // of the stack. If they are changed, the protection of the stack 1637 // guard pages will be lost. We need a safepoint to fix this. 1638 // 1639 // See Linux man page execstack(8) for more info. 1640 if (os::uses_stack_guard_pages() && !os::Linux::_stack_is_executable) { 1641 ElfFile ef(filename); 1642 if (!ef.specifies_noexecstack()) { 1643 if (!is_init_completed()) { 1644 os::Linux::_stack_is_executable = true; 1645 // This is OK - No Java threads have been created yet, and hence no 1646 // stack guard pages to fix. 1647 // 1648 // This should happen only when you are building JDK7 using a very 1649 // old version of JDK6 (e.g., with JPRT) and running test_gamma. 1650 // 1651 // Dynamic loader will make all stacks executable after 1652 // this function returns, and will not do that again. 1653 assert(Threads::first() == NULL, "no Java threads should exist yet."); 1654 } else { 1655 warning("You have loaded library %s which might have disabled stack guard. " 1656 "The VM will try to fix the stack guard now.\n" 1657 "It's highly recommended that you fix the library with " 1658 "'execstack -c <libfile>', or link it with '-z noexecstack'.", 1659 filename); 1660 1661 assert(Thread::current()->is_Java_thread(), "must be Java thread"); 1662 JavaThread *jt = JavaThread::current(); 1663 if (jt->thread_state() != _thread_in_native) { 1664 // This happens when a compiler thread tries to load a hsdis-<arch>.so file 1665 // that requires ExecStack. Cannot enter safe point. Let's give up. 1666 warning("Unable to fix stack guard. Giving up."); 1667 } else { 1668 if (!LoadExecStackDllInVMThread) { 1669 // This is for the case where the DLL has an static 1670 // constructor function that executes JNI code. We cannot 1671 // load such DLLs in the VMThread. 1672 result = os::Linux::dlopen_helper(filename, ebuf, ebuflen); 1673 } 1674 1675 ThreadInVMfromNative tiv(jt); 1676 debug_only(VMNativeEntryWrapper vew;) 1677 1678 VM_LinuxDllLoad op(filename, ebuf, ebuflen); 1679 VMThread::execute(&op); 1680 if (LoadExecStackDllInVMThread) { 1681 result = op.loaded_library(); 1682 } 1683 load_attempted = true; 1684 } 1685 } 1686 } 1687 } 1688 1689 if (!load_attempted) { 1690 result = os::Linux::dlopen_helper(filename, ebuf, ebuflen); 1691 } 1692 1693 if (result != NULL) { 1694 // Successful loading 1695 return result; 1696 } 1697 1698 Elf32_Ehdr elf_head; 1699 int diag_msg_max_length=ebuflen-strlen(ebuf); 1700 char* diag_msg_buf=ebuf+strlen(ebuf); 1701 1702 if (diag_msg_max_length==0) { 1703 // No more space in ebuf for additional diagnostics message 1704 return NULL; 1705 } 1706 1707 1708 int file_descriptor= ::open(filename, O_RDONLY | O_NONBLOCK); 1709 1710 if (file_descriptor < 0) { 1711 // Can't open library, report dlerror() message 1712 return NULL; 1713 } 1714 1715 bool failed_to_read_elf_head= 1716 (sizeof(elf_head)!= 1717 (::read(file_descriptor, &elf_head,sizeof(elf_head)))); 1718 1719 ::close(file_descriptor); 1720 if (failed_to_read_elf_head) { 1721 // file i/o error - report dlerror() msg 1722 return NULL; 1723 } 1724 1725 typedef struct { 1726 Elf32_Half code; // Actual value as defined in elf.h 1727 Elf32_Half compat_class; // Compatibility of archs at VM's sense 1728 char elf_class; // 32 or 64 bit 1729 char endianess; // MSB or LSB 1730 char* name; // String representation 1731 } arch_t; 1732 1733 #ifndef EM_486 1734 #define EM_486 6 /* Intel 80486 */ 1735 #endif 1736 #ifndef EM_AARCH64 1737 #define EM_AARCH64 183 /* ARM AARCH64 */ 1738 #endif 1739 1740 static const arch_t arch_array[]={ 1741 {EM_386, EM_386, ELFCLASS32, ELFDATA2LSB, (char*)"IA 32"}, 1742 {EM_486, EM_386, ELFCLASS32, ELFDATA2LSB, (char*)"IA 32"}, 1743 {EM_IA_64, EM_IA_64, ELFCLASS64, ELFDATA2LSB, (char*)"IA 64"}, 1744 {EM_X86_64, EM_X86_64, ELFCLASS64, ELFDATA2LSB, (char*)"AMD 64"}, 1745 {EM_SPARC, EM_SPARC, ELFCLASS32, ELFDATA2MSB, (char*)"Sparc 32"}, 1746 {EM_SPARC32PLUS, EM_SPARC, ELFCLASS32, ELFDATA2MSB, (char*)"Sparc 32"}, 1747 {EM_SPARCV9, EM_SPARCV9, ELFCLASS64, ELFDATA2MSB, (char*)"Sparc v9 64"}, 1748 {EM_PPC, EM_PPC, ELFCLASS32, ELFDATA2MSB, (char*)"Power PC 32"}, 1749 #if defined(VM_LITTLE_ENDIAN) 1750 {EM_PPC64, EM_PPC64, ELFCLASS64, ELFDATA2LSB, (char*)"Power PC 64"}, 1751 #else 1752 {EM_PPC64, EM_PPC64, ELFCLASS64, ELFDATA2MSB, (char*)"Power PC 64 LE"}, 1753 #endif 1754 {EM_ARM, EM_ARM, ELFCLASS32, ELFDATA2LSB, (char*)"ARM"}, 1755 {EM_S390, EM_S390, ELFCLASSNONE, ELFDATA2MSB, (char*)"IBM System/390"}, 1756 {EM_ALPHA, EM_ALPHA, ELFCLASS64, ELFDATA2LSB, (char*)"Alpha"}, 1757 {EM_MIPS_RS3_LE, EM_MIPS_RS3_LE, ELFCLASS32, ELFDATA2LSB, (char*)"MIPSel"}, 1758 {EM_MIPS, EM_MIPS, ELFCLASS32, ELFDATA2MSB, (char*)"MIPS"}, 1759 {EM_PARISC, EM_PARISC, ELFCLASS32, ELFDATA2MSB, (char*)"PARISC"}, 1760 {EM_68K, EM_68K, ELFCLASS32, ELFDATA2MSB, (char*)"M68k"}, 1761 {EM_AARCH64, EM_AARCH64, ELFCLASS64, ELFDATA2LSB, (char*)"AARCH64"}, 1762 }; 1763 1764 #if (defined IA32) 1765 static Elf32_Half running_arch_code=EM_386; 1766 #elif (defined AMD64) 1767 static Elf32_Half running_arch_code=EM_X86_64; 1768 #elif (defined IA64) 1769 static Elf32_Half running_arch_code=EM_IA_64; 1770 #elif (defined __sparc) && (defined _LP64) 1771 static Elf32_Half running_arch_code=EM_SPARCV9; 1772 #elif (defined __sparc) && (!defined _LP64) 1773 static Elf32_Half running_arch_code=EM_SPARC; 1774 #elif (defined __powerpc64__) 1775 static Elf32_Half running_arch_code=EM_PPC64; 1776 #elif (defined __powerpc__) 1777 static Elf32_Half running_arch_code=EM_PPC; 1778 #elif (defined ARM) 1779 static Elf32_Half running_arch_code=EM_ARM; 1780 #elif (defined S390) 1781 static Elf32_Half running_arch_code=EM_S390; 1782 #elif (defined ALPHA) 1783 static Elf32_Half running_arch_code=EM_ALPHA; 1784 #elif (defined MIPSEL) 1785 static Elf32_Half running_arch_code=EM_MIPS_RS3_LE; 1786 #elif (defined PARISC) 1787 static Elf32_Half running_arch_code=EM_PARISC; 1788 #elif (defined MIPS) 1789 static Elf32_Half running_arch_code=EM_MIPS; 1790 #elif (defined M68K) 1791 static Elf32_Half running_arch_code=EM_68K; 1792 #elif (defined AARCH64) 1793 static Elf32_Half running_arch_code=EM_AARCH64; 1794 #else 1795 #error Method os::dll_load requires that one of following is defined:\ 1796 IA32, AMD64, IA64, __sparc, __powerpc__, ARM, S390, ALPHA, MIPS, MIPSEL, PARISC, M68K, AARCH64 1797 #endif 1798 1799 // Identify compatability class for VM's architecture and library's architecture 1800 // Obtain string descriptions for architectures 1801 1802 arch_t lib_arch={elf_head.e_machine,0,elf_head.e_ident[EI_CLASS], elf_head.e_ident[EI_DATA], NULL}; 1803 int running_arch_index=-1; 1804 1805 for (unsigned int i=0; i < ARRAY_SIZE(arch_array); i++) { 1806 if (running_arch_code == arch_array[i].code) { 1807 running_arch_index = i; 1808 } 1809 if (lib_arch.code == arch_array[i].code) { 1810 lib_arch.compat_class = arch_array[i].compat_class; 1811 lib_arch.name = arch_array[i].name; 1812 } 1813 } 1814 1815 assert(running_arch_index != -1, 1816 "Didn't find running architecture code (running_arch_code) in arch_array"); 1817 if (running_arch_index == -1) { 1818 // Even though running architecture detection failed 1819 // we may still continue with reporting dlerror() message 1820 return NULL; 1821 } 1822 1823 if (lib_arch.endianess != arch_array[running_arch_index].endianess) { 1824 ::snprintf(diag_msg_buf, diag_msg_max_length-1," (Possible cause: endianness mismatch)"); 1825 return NULL; 1826 } 1827 1828 #ifndef S390 1829 if (lib_arch.elf_class != arch_array[running_arch_index].elf_class) { 1830 ::snprintf(diag_msg_buf, diag_msg_max_length-1," (Possible cause: architecture word width mismatch)"); 1831 return NULL; 1832 } 1833 #endif // !S390 1834 1835 if (lib_arch.compat_class != arch_array[running_arch_index].compat_class) { 1836 if (lib_arch.name!=NULL) { 1837 ::snprintf(diag_msg_buf, diag_msg_max_length-1, 1838 " (Possible cause: can't load %s-bit .so on a %s-bit platform)", 1839 lib_arch.name, arch_array[running_arch_index].name); 1840 } else { 1841 ::snprintf(diag_msg_buf, diag_msg_max_length-1, 1842 " (Possible cause: can't load this .so (machine code=0x%x) on a %s-bit platform)", 1843 lib_arch.code, 1844 arch_array[running_arch_index].name); 1845 } 1846 } 1847 1848 return NULL; 1849 } 1850 1851 void * os::Linux::dlopen_helper(const char *filename, char *ebuf, 1852 int ebuflen) { 1853 void * result = ::dlopen(filename, RTLD_LAZY); 1854 if (result == NULL) { 1855 ::strncpy(ebuf, ::dlerror(), ebuflen - 1); 1856 ebuf[ebuflen-1] = '\0'; 1857 } 1858 return result; 1859 } 1860 1861 void * os::Linux::dll_load_in_vmthread(const char *filename, char *ebuf, 1862 int ebuflen) { 1863 void * result = NULL; 1864 if (LoadExecStackDllInVMThread) { 1865 result = dlopen_helper(filename, ebuf, ebuflen); 1866 } 1867 1868 // Since 7019808, libjvm.so is linked with -noexecstack. If the VM loads a 1869 // library that requires an executable stack, or which does not have this 1870 // stack attribute set, dlopen changes the stack attribute to executable. The 1871 // read protection of the guard pages gets lost. 1872 // 1873 // Need to check _stack_is_executable again as multiple VM_LinuxDllLoad 1874 // may have been queued at the same time. 1875 1876 if (!_stack_is_executable) { 1877 JavaThread *jt = Threads::first(); 1878 1879 while (jt) { 1880 if (!jt->stack_guard_zone_unused() && // Stack not yet fully initialized 1881 jt->stack_guards_enabled()) { // No pending stack overflow exceptions 1882 if (!os::guard_memory((char *)jt->stack_end(), jt->stack_guard_zone_size())) { 1883 warning("Attempt to reguard stack yellow zone failed."); 1884 } 1885 } 1886 jt = jt->next(); 1887 } 1888 } 1889 1890 return result; 1891 } 1892 1893 void* os::dll_lookup(void* handle, const char* name) { 1894 void* res = dlsym(handle, name); 1895 return res; 1896 } 1897 1898 void* os::get_default_process_handle() { 1899 return (void*)::dlopen(NULL, RTLD_LAZY); 1900 } 1901 1902 static bool _print_ascii_file(const char* filename, outputStream* st) { 1903 int fd = ::open(filename, O_RDONLY); 1904 if (fd == -1) { 1905 return false; 1906 } 1907 1908 char buf[32]; 1909 int bytes; 1910 while ((bytes = ::read(fd, buf, sizeof(buf))) > 0) { 1911 st->print_raw(buf, bytes); 1912 } 1913 1914 ::close(fd); 1915 1916 return true; 1917 } 1918 1919 void os::print_dll_info(outputStream *st) { 1920 st->print_cr("Dynamic libraries:"); 1921 1922 char fname[32]; 1923 pid_t pid = os::Linux::gettid(); 1924 1925 jio_snprintf(fname, sizeof(fname), "/proc/%d/maps", pid); 1926 1927 if (!_print_ascii_file(fname, st)) { 1928 st->print("Can not get library information for pid = %d\n", pid); 1929 } 1930 } 1931 1932 int os::get_loaded_modules_info(os::LoadedModulesCallbackFunc callback, void *param) { 1933 FILE *procmapsFile = NULL; 1934 1935 // Open the procfs maps file for the current process 1936 if ((procmapsFile = fopen("/proc/self/maps", "r")) != NULL) { 1937 // Allocate PATH_MAX for file name plus a reasonable size for other fields. 1938 char line[PATH_MAX + 100]; 1939 1940 // Read line by line from 'file' 1941 while (fgets(line, sizeof(line), procmapsFile) != NULL) { 1942 u8 base, top, offset, inode; 1943 char permissions[5]; 1944 char device[6]; 1945 char name[PATH_MAX + 1]; 1946 1947 // Parse fields from line 1948 sscanf(line, UINT64_FORMAT_X "-" UINT64_FORMAT_X " %4s " UINT64_FORMAT_X " %5s " INT64_FORMAT " %s", 1949 &base, &top, permissions, &offset, device, &inode, name); 1950 1951 // Filter by device id '00:00' so that we only get file system mapped files. 1952 if (strcmp(device, "00:00") != 0) { 1953 1954 // Call callback with the fields of interest 1955 if(callback(name, (address)base, (address)top, param)) { 1956 // Oops abort, callback aborted 1957 fclose(procmapsFile); 1958 return 1; 1959 } 1960 } 1961 } 1962 fclose(procmapsFile); 1963 } 1964 return 0; 1965 } 1966 1967 void os::print_os_info_brief(outputStream* st) { 1968 os::Linux::print_distro_info(st); 1969 1970 os::Posix::print_uname_info(st); 1971 1972 os::Linux::print_libversion_info(st); 1973 1974 } 1975 1976 void os::print_os_info(outputStream* st) { 1977 st->print("OS:"); 1978 1979 os::Linux::print_distro_info(st); 1980 1981 os::Posix::print_uname_info(st); 1982 1983 // Print warning if unsafe chroot environment detected 1984 if (unsafe_chroot_detected) { 1985 st->print("WARNING!! "); 1986 st->print_cr("%s", unstable_chroot_error); 1987 } 1988 1989 os::Linux::print_libversion_info(st); 1990 1991 os::Posix::print_rlimit_info(st); 1992 1993 os::Posix::print_load_average(st); 1994 1995 os::Linux::print_full_memory_info(st); 1996 } 1997 1998 // Try to identify popular distros. 1999 // Most Linux distributions have a /etc/XXX-release file, which contains 2000 // the OS version string. Newer Linux distributions have a /etc/lsb-release 2001 // file that also contains the OS version string. Some have more than one 2002 // /etc/XXX-release file (e.g. Mandrake has both /etc/mandrake-release and 2003 // /etc/redhat-release.), so the order is important. 2004 // Any Linux that is based on Redhat (i.e. Oracle, Mandrake, Sun JDS...) have 2005 // their own specific XXX-release file as well as a redhat-release file. 2006 // Because of this the XXX-release file needs to be searched for before the 2007 // redhat-release file. 2008 // Since Red Hat has a lsb-release file that is not very descriptive the 2009 // search for redhat-release needs to be before lsb-release. 2010 // Since the lsb-release file is the new standard it needs to be searched 2011 // before the older style release files. 2012 // Searching system-release (Red Hat) and os-release (other Linuxes) are a 2013 // next to last resort. The os-release file is a new standard that contains 2014 // distribution information and the system-release file seems to be an old 2015 // standard that has been replaced by the lsb-release and os-release files. 2016 // Searching for the debian_version file is the last resort. It contains 2017 // an informative string like "6.0.6" or "wheezy/sid". Because of this 2018 // "Debian " is printed before the contents of the debian_version file. 2019 2020 const char* distro_files[] = { 2021 "/etc/oracle-release", 2022 "/etc/mandriva-release", 2023 "/etc/mandrake-release", 2024 "/etc/sun-release", 2025 "/etc/redhat-release", 2026 "/etc/lsb-release", 2027 "/etc/SuSE-release", 2028 "/etc/turbolinux-release", 2029 "/etc/gentoo-release", 2030 "/etc/ltib-release", 2031 "/etc/angstrom-version", 2032 "/etc/system-release", 2033 "/etc/os-release", 2034 NULL }; 2035 2036 void os::Linux::print_distro_info(outputStream* st) { 2037 for (int i = 0;; i++) { 2038 const char* file = distro_files[i]; 2039 if (file == NULL) { 2040 break; // done 2041 } 2042 // If file prints, we found it. 2043 if (_print_ascii_file(file, st)) { 2044 return; 2045 } 2046 } 2047 2048 if (file_exists("/etc/debian_version")) { 2049 st->print("Debian "); 2050 _print_ascii_file("/etc/debian_version", st); 2051 } else { 2052 st->print("Linux"); 2053 } 2054 st->cr(); 2055 } 2056 2057 static void parse_os_info(char* distro, size_t length, const char* file) { 2058 FILE* fp = fopen(file, "r"); 2059 if (fp != NULL) { 2060 char buf[256]; 2061 // get last line of the file. 2062 while (fgets(buf, sizeof(buf), fp)) { } 2063 // Edit out extra stuff in expected ubuntu format 2064 if (strstr(buf, "DISTRIB_DESCRIPTION=") != NULL) { 2065 char* ptr = strstr(buf, "\""); // the name is in quotes 2066 if (ptr != NULL) { 2067 ptr++; // go beyond first quote 2068 char* nl = strchr(ptr, '\"'); 2069 if (nl != NULL) *nl = '\0'; 2070 strncpy(distro, ptr, length); 2071 } else { 2072 ptr = strstr(buf, "="); 2073 ptr++; // go beyond equals then 2074 char* nl = strchr(ptr, '\n'); 2075 if (nl != NULL) *nl = '\0'; 2076 strncpy(distro, ptr, length); 2077 } 2078 } else { 2079 // if not in expected Ubuntu format, print out whole line minus \n 2080 char* nl = strchr(buf, '\n'); 2081 if (nl != NULL) *nl = '\0'; 2082 strncpy(distro, buf, length); 2083 } 2084 // close distro file 2085 fclose(fp); 2086 } 2087 } 2088 2089 void os::get_summary_os_info(char* buf, size_t buflen) { 2090 for (int i = 0;; i++) { 2091 const char* file = distro_files[i]; 2092 if (file == NULL) { 2093 break; // ran out of distro_files 2094 } 2095 if (file_exists(file)) { 2096 parse_os_info(buf, buflen, file); 2097 return; 2098 } 2099 } 2100 // special case for debian 2101 if (file_exists("/etc/debian_version")) { 2102 strncpy(buf, "Debian ", buflen); 2103 parse_os_info(&buf[7], buflen-7, "/etc/debian_version"); 2104 } else { 2105 strncpy(buf, "Linux", buflen); 2106 } 2107 } 2108 2109 void os::Linux::print_libversion_info(outputStream* st) { 2110 // libc, pthread 2111 st->print("libc:"); 2112 st->print("%s ", os::Linux::glibc_version()); 2113 st->print("%s ", os::Linux::libpthread_version()); 2114 st->cr(); 2115 } 2116 2117 void os::Linux::print_full_memory_info(outputStream* st) { 2118 st->print("\n/proc/meminfo:\n"); 2119 _print_ascii_file("/proc/meminfo", st); 2120 st->cr(); 2121 } 2122 2123 void os::print_memory_info(outputStream* st) { 2124 2125 st->print("Memory:"); 2126 st->print(" %dk page", os::vm_page_size()>>10); 2127 2128 // values in struct sysinfo are "unsigned long" 2129 struct sysinfo si; 2130 sysinfo(&si); 2131 2132 st->print(", physical " UINT64_FORMAT "k", 2133 os::physical_memory() >> 10); 2134 st->print("(" UINT64_FORMAT "k free)", 2135 os::available_memory() >> 10); 2136 st->print(", swap " UINT64_FORMAT "k", 2137 ((jlong)si.totalswap * si.mem_unit) >> 10); 2138 st->print("(" UINT64_FORMAT "k free)", 2139 ((jlong)si.freeswap * si.mem_unit) >> 10); 2140 st->cr(); 2141 } 2142 2143 // Print the first "model name" line and the first "flags" line 2144 // that we find and nothing more. We assume "model name" comes 2145 // before "flags" so if we find a second "model name", then the 2146 // "flags" field is considered missing. 2147 static bool print_model_name_and_flags(outputStream* st, char* buf, size_t buflen) { 2148 #if defined(IA32) || defined(AMD64) 2149 // Other platforms have less repetitive cpuinfo files 2150 FILE *fp = fopen("/proc/cpuinfo", "r"); 2151 if (fp) { 2152 while (!feof(fp)) { 2153 if (fgets(buf, buflen, fp)) { 2154 // Assume model name comes before flags 2155 bool model_name_printed = false; 2156 if (strstr(buf, "model name") != NULL) { 2157 if (!model_name_printed) { 2158 st->print_raw("\nCPU Model and flags from /proc/cpuinfo:\n"); 2159 st->print_raw(buf); 2160 model_name_printed = true; 2161 } else { 2162 // model name printed but not flags? Odd, just return 2163 fclose(fp); 2164 return true; 2165 } 2166 } 2167 // print the flags line too 2168 if (strstr(buf, "flags") != NULL) { 2169 st->print_raw(buf); 2170 fclose(fp); 2171 return true; 2172 } 2173 } 2174 } 2175 fclose(fp); 2176 } 2177 #endif // x86 platforms 2178 return false; 2179 } 2180 2181 void os::pd_print_cpu_info(outputStream* st, char* buf, size_t buflen) { 2182 // Only print the model name if the platform provides this as a summary 2183 if (!print_model_name_and_flags(st, buf, buflen)) { 2184 st->print("\n/proc/cpuinfo:\n"); 2185 if (!_print_ascii_file("/proc/cpuinfo", st)) { 2186 st->print_cr(" <Not Available>"); 2187 } 2188 } 2189 } 2190 2191 #if defined(AMD64) || defined(IA32) || defined(X32) 2192 const char* search_string = "model name"; 2193 #elif defined(SPARC) 2194 const char* search_string = "cpu"; 2195 #elif defined(PPC64) 2196 const char* search_string = "cpu"; 2197 #else 2198 const char* search_string = "Processor"; 2199 #endif 2200 2201 // Parses the cpuinfo file for string representing the model name. 2202 void os::get_summary_cpu_info(char* cpuinfo, size_t length) { 2203 FILE* fp = fopen("/proc/cpuinfo", "r"); 2204 if (fp != NULL) { 2205 while (!feof(fp)) { 2206 char buf[256]; 2207 if (fgets(buf, sizeof(buf), fp)) { 2208 char* start = strstr(buf, search_string); 2209 if (start != NULL) { 2210 char *ptr = start + strlen(search_string); 2211 char *end = buf + strlen(buf); 2212 while (ptr != end) { 2213 // skip whitespace and colon for the rest of the name. 2214 if (*ptr != ' ' && *ptr != '\t' && *ptr != ':') { 2215 break; 2216 } 2217 ptr++; 2218 } 2219 if (ptr != end) { 2220 // reasonable string, get rid of newline and keep the rest 2221 char* nl = strchr(buf, '\n'); 2222 if (nl != NULL) *nl = '\0'; 2223 strncpy(cpuinfo, ptr, length); 2224 fclose(fp); 2225 return; 2226 } 2227 } 2228 } 2229 } 2230 fclose(fp); 2231 } 2232 // cpuinfo not found or parsing failed, just print generic string. The entire 2233 // /proc/cpuinfo file will be printed later in the file (or enough of it for x86) 2234 #if defined(AMD64) 2235 strncpy(cpuinfo, "x86_64", length); 2236 #elif defined(IA32) 2237 strncpy(cpuinfo, "x86_32", length); 2238 #elif defined(IA64) 2239 strncpy(cpuinfo, "IA64", length); 2240 #elif defined(SPARC) 2241 strncpy(cpuinfo, "sparcv9", length); 2242 #elif defined(AARCH64) 2243 strncpy(cpuinfo, "AArch64", length); 2244 #elif defined(ARM) 2245 strncpy(cpuinfo, "ARM", length); 2246 #elif defined(PPC) 2247 strncpy(cpuinfo, "PPC64", length); 2248 #elif defined(ZERO_LIBARCH) 2249 strncpy(cpuinfo, ZERO_LIBARCH, length); 2250 #else 2251 strncpy(cpuinfo, "unknown", length); 2252 #endif 2253 } 2254 2255 static void print_signal_handler(outputStream* st, int sig, 2256 char* buf, size_t buflen); 2257 2258 void os::print_signal_handlers(outputStream* st, char* buf, size_t buflen) { 2259 st->print_cr("Signal Handlers:"); 2260 print_signal_handler(st, SIGSEGV, buf, buflen); 2261 print_signal_handler(st, SIGBUS , buf, buflen); 2262 print_signal_handler(st, SIGFPE , buf, buflen); 2263 print_signal_handler(st, SIGPIPE, buf, buflen); 2264 print_signal_handler(st, SIGXFSZ, buf, buflen); 2265 print_signal_handler(st, SIGILL , buf, buflen); 2266 print_signal_handler(st, SR_signum, buf, buflen); 2267 print_signal_handler(st, SHUTDOWN1_SIGNAL, buf, buflen); 2268 print_signal_handler(st, SHUTDOWN2_SIGNAL , buf, buflen); 2269 print_signal_handler(st, SHUTDOWN3_SIGNAL , buf, buflen); 2270 print_signal_handler(st, BREAK_SIGNAL, buf, buflen); 2271 #if defined(PPC64) 2272 print_signal_handler(st, SIGTRAP, buf, buflen); 2273 #endif 2274 } 2275 2276 static char saved_jvm_path[MAXPATHLEN] = {0}; 2277 2278 // Find the full path to the current module, libjvm.so 2279 void os::jvm_path(char *buf, jint buflen) { 2280 // Error checking. 2281 if (buflen < MAXPATHLEN) { 2282 assert(false, "must use a large-enough buffer"); 2283 buf[0] = '\0'; 2284 return; 2285 } 2286 // Lazy resolve the path to current module. 2287 if (saved_jvm_path[0] != 0) { 2288 strcpy(buf, saved_jvm_path); 2289 return; 2290 } 2291 2292 char dli_fname[MAXPATHLEN]; 2293 bool ret = dll_address_to_library_name( 2294 CAST_FROM_FN_PTR(address, os::jvm_path), 2295 dli_fname, sizeof(dli_fname), NULL); 2296 assert(ret, "cannot locate libjvm"); 2297 char *rp = NULL; 2298 if (ret && dli_fname[0] != '\0') { 2299 rp = realpath(dli_fname, buf); 2300 } 2301 if (rp == NULL) { 2302 return; 2303 } 2304 2305 if (Arguments::sun_java_launcher_is_altjvm()) { 2306 // Support for the java launcher's '-XXaltjvm=<path>' option. Typical 2307 // value for buf is "<JAVA_HOME>/jre/lib/<arch>/<vmtype>/libjvm.so". 2308 // If "/jre/lib/" appears at the right place in the string, then 2309 // assume we are installed in a JDK and we're done. Otherwise, check 2310 // for a JAVA_HOME environment variable and fix up the path so it 2311 // looks like libjvm.so is installed there (append a fake suffix 2312 // hotspot/libjvm.so). 2313 const char *p = buf + strlen(buf) - 1; 2314 for (int count = 0; p > buf && count < 5; ++count) { 2315 for (--p; p > buf && *p != '/'; --p) 2316 /* empty */ ; 2317 } 2318 2319 if (strncmp(p, "/jre/lib/", 9) != 0) { 2320 // Look for JAVA_HOME in the environment. 2321 char* java_home_var = ::getenv("JAVA_HOME"); 2322 if (java_home_var != NULL && java_home_var[0] != 0) { 2323 char* jrelib_p; 2324 int len; 2325 2326 // Check the current module name "libjvm.so". 2327 p = strrchr(buf, '/'); 2328 if (p == NULL) { 2329 return; 2330 } 2331 assert(strstr(p, "/libjvm") == p, "invalid library name"); 2332 2333 rp = realpath(java_home_var, buf); 2334 if (rp == NULL) { 2335 return; 2336 } 2337 2338 // determine if this is a legacy image or modules image 2339 // modules image doesn't have "jre" subdirectory 2340 len = strlen(buf); 2341 assert(len < buflen, "Ran out of buffer room"); 2342 jrelib_p = buf + len; 2343 snprintf(jrelib_p, buflen-len, "/jre/lib/%s", cpu_arch); 2344 if (0 != access(buf, F_OK)) { 2345 snprintf(jrelib_p, buflen-len, "/lib/%s", cpu_arch); 2346 } 2347 2348 if (0 == access(buf, F_OK)) { 2349 // Use current module name "libjvm.so" 2350 len = strlen(buf); 2351 snprintf(buf + len, buflen-len, "/hotspot/libjvm.so"); 2352 } else { 2353 // Go back to path of .so 2354 rp = realpath(dli_fname, buf); 2355 if (rp == NULL) { 2356 return; 2357 } 2358 } 2359 } 2360 } 2361 } 2362 2363 strncpy(saved_jvm_path, buf, MAXPATHLEN); 2364 saved_jvm_path[MAXPATHLEN - 1] = '\0'; 2365 } 2366 2367 void os::print_jni_name_prefix_on(outputStream* st, int args_size) { 2368 // no prefix required, not even "_" 2369 } 2370 2371 void os::print_jni_name_suffix_on(outputStream* st, int args_size) { 2372 // no suffix required 2373 } 2374 2375 //////////////////////////////////////////////////////////////////////////////// 2376 // sun.misc.Signal support 2377 2378 static volatile jint sigint_count = 0; 2379 2380 static void UserHandler(int sig, void *siginfo, void *context) { 2381 // 4511530 - sem_post is serialized and handled by the manager thread. When 2382 // the program is interrupted by Ctrl-C, SIGINT is sent to every thread. We 2383 // don't want to flood the manager thread with sem_post requests. 2384 if (sig == SIGINT && Atomic::add(1, &sigint_count) > 1) { 2385 return; 2386 } 2387 2388 // Ctrl-C is pressed during error reporting, likely because the error 2389 // handler fails to abort. Let VM die immediately. 2390 if (sig == SIGINT && is_error_reported()) { 2391 os::die(); 2392 } 2393 2394 os::signal_notify(sig); 2395 } 2396 2397 void* os::user_handler() { 2398 return CAST_FROM_FN_PTR(void*, UserHandler); 2399 } 2400 2401 struct timespec PosixSemaphore::create_timespec(unsigned int sec, int nsec) { 2402 struct timespec ts; 2403 // Semaphore's are always associated with CLOCK_REALTIME 2404 os::Linux::clock_gettime(CLOCK_REALTIME, &ts); 2405 // see unpackTime for discussion on overflow checking 2406 if (sec >= MAX_SECS) { 2407 ts.tv_sec += MAX_SECS; 2408 ts.tv_nsec = 0; 2409 } else { 2410 ts.tv_sec += sec; 2411 ts.tv_nsec += nsec; 2412 if (ts.tv_nsec >= NANOSECS_PER_SEC) { 2413 ts.tv_nsec -= NANOSECS_PER_SEC; 2414 ++ts.tv_sec; // note: this must be <= max_secs 2415 } 2416 } 2417 2418 return ts; 2419 } 2420 2421 extern "C" { 2422 typedef void (*sa_handler_t)(int); 2423 typedef void (*sa_sigaction_t)(int, siginfo_t *, void *); 2424 } 2425 2426 void* os::signal(int signal_number, void* handler) { 2427 struct sigaction sigAct, oldSigAct; 2428 2429 sigfillset(&(sigAct.sa_mask)); 2430 sigAct.sa_flags = SA_RESTART|SA_SIGINFO; 2431 sigAct.sa_handler = CAST_TO_FN_PTR(sa_handler_t, handler); 2432 2433 if (sigaction(signal_number, &sigAct, &oldSigAct)) { 2434 // -1 means registration failed 2435 return (void *)-1; 2436 } 2437 2438 return CAST_FROM_FN_PTR(void*, oldSigAct.sa_handler); 2439 } 2440 2441 void os::signal_raise(int signal_number) { 2442 ::raise(signal_number); 2443 } 2444 2445 // The following code is moved from os.cpp for making this 2446 // code platform specific, which it is by its very nature. 2447 2448 // Will be modified when max signal is changed to be dynamic 2449 int os::sigexitnum_pd() { 2450 return NSIG; 2451 } 2452 2453 // a counter for each possible signal value 2454 static volatile jint pending_signals[NSIG+1] = { 0 }; 2455 2456 // Linux(POSIX) specific hand shaking semaphore. 2457 static sem_t sig_sem; 2458 static PosixSemaphore sr_semaphore; 2459 2460 void os::signal_init_pd() { 2461 // Initialize signal structures 2462 ::memset((void*)pending_signals, 0, sizeof(pending_signals)); 2463 2464 // Initialize signal semaphore 2465 ::sem_init(&sig_sem, 0, 0); 2466 } 2467 2468 void os::signal_notify(int sig) { 2469 Atomic::inc(&pending_signals[sig]); 2470 ::sem_post(&sig_sem); 2471 } 2472 2473 static int check_pending_signals(bool wait) { 2474 Atomic::store(0, &sigint_count); 2475 for (;;) { 2476 for (int i = 0; i < NSIG + 1; i++) { 2477 jint n = pending_signals[i]; 2478 if (n > 0 && n == Atomic::cmpxchg(n - 1, &pending_signals[i], n)) { 2479 return i; 2480 } 2481 } 2482 if (!wait) { 2483 return -1; 2484 } 2485 JavaThread *thread = JavaThread::current(); 2486 ThreadBlockInVM tbivm(thread); 2487 2488 bool threadIsSuspended; 2489 do { 2490 thread->set_suspend_equivalent(); 2491 // cleared by handle_special_suspend_equivalent_condition() or java_suspend_self() 2492 ::sem_wait(&sig_sem); 2493 2494 // were we externally suspended while we were waiting? 2495 threadIsSuspended = thread->handle_special_suspend_equivalent_condition(); 2496 if (threadIsSuspended) { 2497 // The semaphore has been incremented, but while we were waiting 2498 // another thread suspended us. We don't want to continue running 2499 // while suspended because that would surprise the thread that 2500 // suspended us. 2501 ::sem_post(&sig_sem); 2502 2503 thread->java_suspend_self(); 2504 } 2505 } while (threadIsSuspended); 2506 } 2507 } 2508 2509 int os::signal_lookup() { 2510 return check_pending_signals(false); 2511 } 2512 2513 int os::signal_wait() { 2514 return check_pending_signals(true); 2515 } 2516 2517 //////////////////////////////////////////////////////////////////////////////// 2518 // Virtual Memory 2519 2520 int os::vm_page_size() { 2521 // Seems redundant as all get out 2522 assert(os::Linux::page_size() != -1, "must call os::init"); 2523 return os::Linux::page_size(); 2524 } 2525 2526 // Solaris allocates memory by pages. 2527 int os::vm_allocation_granularity() { 2528 assert(os::Linux::page_size() != -1, "must call os::init"); 2529 return os::Linux::page_size(); 2530 } 2531 2532 // Rationale behind this function: 2533 // current (Mon Apr 25 20:12:18 MSD 2005) oprofile drops samples without executable 2534 // mapping for address (see lookup_dcookie() in the kernel module), thus we cannot get 2535 // samples for JITted code. Here we create private executable mapping over the code cache 2536 // and then we can use standard (well, almost, as mapping can change) way to provide 2537 // info for the reporting script by storing timestamp and location of symbol 2538 void linux_wrap_code(char* base, size_t size) { 2539 static volatile jint cnt = 0; 2540 2541 if (!UseOprofile) { 2542 return; 2543 } 2544 2545 char buf[PATH_MAX+1]; 2546 int num = Atomic::add(1, &cnt); 2547 2548 snprintf(buf, sizeof(buf), "%s/hs-vm-%d-%d", 2549 os::get_temp_directory(), os::current_process_id(), num); 2550 unlink(buf); 2551 2552 int fd = ::open(buf, O_CREAT | O_RDWR, S_IRWXU); 2553 2554 if (fd != -1) { 2555 off_t rv = ::lseek(fd, size-2, SEEK_SET); 2556 if (rv != (off_t)-1) { 2557 if (::write(fd, "", 1) == 1) { 2558 mmap(base, size, 2559 PROT_READ|PROT_WRITE|PROT_EXEC, 2560 MAP_PRIVATE|MAP_FIXED|MAP_NORESERVE, fd, 0); 2561 } 2562 } 2563 ::close(fd); 2564 unlink(buf); 2565 } 2566 } 2567 2568 static bool recoverable_mmap_error(int err) { 2569 // See if the error is one we can let the caller handle. This 2570 // list of errno values comes from JBS-6843484. I can't find a 2571 // Linux man page that documents this specific set of errno 2572 // values so while this list currently matches Solaris, it may 2573 // change as we gain experience with this failure mode. 2574 switch (err) { 2575 case EBADF: 2576 case EINVAL: 2577 case ENOTSUP: 2578 // let the caller deal with these errors 2579 return true; 2580 2581 default: 2582 // Any remaining errors on this OS can cause our reserved mapping 2583 // to be lost. That can cause confusion where different data 2584 // structures think they have the same memory mapped. The worst 2585 // scenario is if both the VM and a library think they have the 2586 // same memory mapped. 2587 return false; 2588 } 2589 } 2590 2591 static void warn_fail_commit_memory(char* addr, size_t size, bool exec, 2592 int err) { 2593 warning("INFO: os::commit_memory(" PTR_FORMAT ", " SIZE_FORMAT 2594 ", %d) failed; error='%s' (errno=%d)", p2i(addr), size, exec, 2595 strerror(err), err); 2596 } 2597 2598 static void warn_fail_commit_memory(char* addr, size_t size, 2599 size_t alignment_hint, bool exec, 2600 int err) { 2601 warning("INFO: os::commit_memory(" PTR_FORMAT ", " SIZE_FORMAT 2602 ", " SIZE_FORMAT ", %d) failed; error='%s' (errno=%d)", p2i(addr), size, 2603 alignment_hint, exec, strerror(err), err); 2604 } 2605 2606 // NOTE: Linux kernel does not really reserve the pages for us. 2607 // All it does is to check if there are enough free pages 2608 // left at the time of mmap(). This could be a potential 2609 // problem. 2610 int os::Linux::commit_memory_impl(char* addr, size_t size, bool exec) { 2611 int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE; 2612 uintptr_t res = (uintptr_t) ::mmap(addr, size, prot, 2613 MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0); 2614 if (res != (uintptr_t) MAP_FAILED) { 2615 if (UseNUMAInterleaving) { 2616 numa_make_global(addr, size); 2617 } 2618 return 0; 2619 } 2620 2621 int err = errno; // save errno from mmap() call above 2622 2623 if (!recoverable_mmap_error(err)) { 2624 warn_fail_commit_memory(addr, size, exec, err); 2625 vm_exit_out_of_memory(size, OOM_MMAP_ERROR, "committing reserved memory."); 2626 } 2627 2628 return err; 2629 } 2630 2631 bool os::pd_commit_memory(char* addr, size_t size, bool exec) { 2632 return os::Linux::commit_memory_impl(addr, size, exec) == 0; 2633 } 2634 2635 void os::pd_commit_memory_or_exit(char* addr, size_t size, bool exec, 2636 const char* mesg) { 2637 assert(mesg != NULL, "mesg must be specified"); 2638 int err = os::Linux::commit_memory_impl(addr, size, exec); 2639 if (err != 0) { 2640 // the caller wants all commit errors to exit with the specified mesg: 2641 warn_fail_commit_memory(addr, size, exec, err); 2642 vm_exit_out_of_memory(size, OOM_MMAP_ERROR, "%s", mesg); 2643 } 2644 } 2645 2646 // Define MAP_HUGETLB here so we can build HotSpot on old systems. 2647 #ifndef MAP_HUGETLB 2648 #define MAP_HUGETLB 0x40000 2649 #endif 2650 2651 // Define MADV_HUGEPAGE here so we can build HotSpot on old systems. 2652 #ifndef MADV_HUGEPAGE 2653 #define MADV_HUGEPAGE 14 2654 #endif 2655 2656 int os::Linux::commit_memory_impl(char* addr, size_t size, 2657 size_t alignment_hint, bool exec) { 2658 int err = os::Linux::commit_memory_impl(addr, size, exec); 2659 if (err == 0) { 2660 realign_memory(addr, size, alignment_hint); 2661 } 2662 return err; 2663 } 2664 2665 bool os::pd_commit_memory(char* addr, size_t size, size_t alignment_hint, 2666 bool exec) { 2667 return os::Linux::commit_memory_impl(addr, size, alignment_hint, exec) == 0; 2668 } 2669 2670 void os::pd_commit_memory_or_exit(char* addr, size_t size, 2671 size_t alignment_hint, bool exec, 2672 const char* mesg) { 2673 assert(mesg != NULL, "mesg must be specified"); 2674 int err = os::Linux::commit_memory_impl(addr, size, alignment_hint, exec); 2675 if (err != 0) { 2676 // the caller wants all commit errors to exit with the specified mesg: 2677 warn_fail_commit_memory(addr, size, alignment_hint, exec, err); 2678 vm_exit_out_of_memory(size, OOM_MMAP_ERROR, "%s", mesg); 2679 } 2680 } 2681 2682 void os::pd_realign_memory(char *addr, size_t bytes, size_t alignment_hint) { 2683 if (UseTransparentHugePages && alignment_hint > (size_t)vm_page_size()) { 2684 // We don't check the return value: madvise(MADV_HUGEPAGE) may not 2685 // be supported or the memory may already be backed by huge pages. 2686 ::madvise(addr, bytes, MADV_HUGEPAGE); 2687 } 2688 } 2689 2690 void os::pd_free_memory(char *addr, size_t bytes, size_t alignment_hint) { 2691 // This method works by doing an mmap over an existing mmaping and effectively discarding 2692 // the existing pages. However it won't work for SHM-based large pages that cannot be 2693 // uncommitted at all. We don't do anything in this case to avoid creating a segment with 2694 // small pages on top of the SHM segment. This method always works for small pages, so we 2695 // allow that in any case. 2696 if (alignment_hint <= (size_t)os::vm_page_size() || can_commit_large_page_memory()) { 2697 commit_memory(addr, bytes, alignment_hint, !ExecMem); 2698 } 2699 } 2700 2701 void os::numa_make_global(char *addr, size_t bytes) { 2702 Linux::numa_interleave_memory(addr, bytes); 2703 } 2704 2705 // Define for numa_set_bind_policy(int). Setting the argument to 0 will set the 2706 // bind policy to MPOL_PREFERRED for the current thread. 2707 #define USE_MPOL_PREFERRED 0 2708 2709 void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) { 2710 // To make NUMA and large pages more robust when both enabled, we need to ease 2711 // the requirements on where the memory should be allocated. MPOL_BIND is the 2712 // default policy and it will force memory to be allocated on the specified 2713 // node. Changing this to MPOL_PREFERRED will prefer to allocate the memory on 2714 // the specified node, but will not force it. Using this policy will prevent 2715 // getting SIGBUS when trying to allocate large pages on NUMA nodes with no 2716 // free large pages. 2717 Linux::numa_set_bind_policy(USE_MPOL_PREFERRED); 2718 Linux::numa_tonode_memory(addr, bytes, lgrp_hint); 2719 } 2720 2721 bool os::numa_topology_changed() { return false; } 2722 2723 size_t os::numa_get_groups_num() { 2724 int max_node = Linux::numa_max_node(); 2725 return max_node > 0 ? max_node + 1 : 1; 2726 } 2727 2728 int os::numa_get_group_id() { 2729 int cpu_id = Linux::sched_getcpu(); 2730 if (cpu_id != -1) { 2731 int lgrp_id = Linux::get_node_by_cpu(cpu_id); 2732 if (lgrp_id != -1) { 2733 return lgrp_id; 2734 } 2735 } 2736 return 0; 2737 } 2738 2739 size_t os::numa_get_leaf_groups(int *ids, size_t size) { 2740 for (size_t i = 0; i < size; i++) { 2741 ids[i] = i; 2742 } 2743 return size; 2744 } 2745 2746 bool os::get_page_info(char *start, page_info* info) { 2747 return false; 2748 } 2749 2750 char *os::scan_pages(char *start, char* end, page_info* page_expected, 2751 page_info* page_found) { 2752 return end; 2753 } 2754 2755 2756 int os::Linux::sched_getcpu_syscall(void) { 2757 unsigned int cpu = 0; 2758 int retval = -1; 2759 2760 #if defined(IA32) 2761 #ifndef SYS_getcpu 2762 #define SYS_getcpu 318 2763 #endif 2764 retval = syscall(SYS_getcpu, &cpu, NULL, NULL); 2765 #elif defined(AMD64) 2766 // Unfortunately we have to bring all these macros here from vsyscall.h 2767 // to be able to compile on old linuxes. 2768 #define __NR_vgetcpu 2 2769 #define VSYSCALL_START (-10UL << 20) 2770 #define VSYSCALL_SIZE 1024 2771 #define VSYSCALL_ADDR(vsyscall_nr) (VSYSCALL_START+VSYSCALL_SIZE*(vsyscall_nr)) 2772 typedef long (*vgetcpu_t)(unsigned int *cpu, unsigned int *node, unsigned long *tcache); 2773 vgetcpu_t vgetcpu = (vgetcpu_t)VSYSCALL_ADDR(__NR_vgetcpu); 2774 retval = vgetcpu(&cpu, NULL, NULL); 2775 #endif 2776 2777 return (retval == -1) ? retval : cpu; 2778 } 2779 2780 // Something to do with the numa-aware allocator needs these symbols 2781 extern "C" JNIEXPORT void numa_warn(int number, char *where, ...) { } 2782 extern "C" JNIEXPORT void numa_error(char *where) { } 2783 2784 2785 // If we are running with libnuma version > 2, then we should 2786 // be trying to use symbols with versions 1.1 2787 // If we are running with earlier version, which did not have symbol versions, 2788 // we should use the base version. 2789 void* os::Linux::libnuma_dlsym(void* handle, const char *name) { 2790 void *f = dlvsym(handle, name, "libnuma_1.1"); 2791 if (f == NULL) { 2792 f = dlsym(handle, name); 2793 } 2794 return f; 2795 } 2796 2797 bool os::Linux::libnuma_init() { 2798 // sched_getcpu() should be in libc. 2799 set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t, 2800 dlsym(RTLD_DEFAULT, "sched_getcpu"))); 2801 2802 // If it's not, try a direct syscall. 2803 if (sched_getcpu() == -1) { 2804 set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t, 2805 (void*)&sched_getcpu_syscall)); 2806 } 2807 2808 if (sched_getcpu() != -1) { // Does it work? 2809 void *handle = dlopen("libnuma.so.1", RTLD_LAZY); 2810 if (handle != NULL) { 2811 set_numa_node_to_cpus(CAST_TO_FN_PTR(numa_node_to_cpus_func_t, 2812 libnuma_dlsym(handle, "numa_node_to_cpus"))); 2813 set_numa_max_node(CAST_TO_FN_PTR(numa_max_node_func_t, 2814 libnuma_dlsym(handle, "numa_max_node"))); 2815 set_numa_available(CAST_TO_FN_PTR(numa_available_func_t, 2816 libnuma_dlsym(handle, "numa_available"))); 2817 set_numa_tonode_memory(CAST_TO_FN_PTR(numa_tonode_memory_func_t, 2818 libnuma_dlsym(handle, "numa_tonode_memory"))); 2819 set_numa_interleave_memory(CAST_TO_FN_PTR(numa_interleave_memory_func_t, 2820 libnuma_dlsym(handle, "numa_interleave_memory"))); 2821 set_numa_set_bind_policy(CAST_TO_FN_PTR(numa_set_bind_policy_func_t, 2822 libnuma_dlsym(handle, "numa_set_bind_policy"))); 2823 2824 2825 if (numa_available() != -1) { 2826 set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle, "numa_all_nodes")); 2827 // Create a cpu -> node mapping 2828 _cpu_to_node = new (ResourceObj::C_HEAP, mtInternal) GrowableArray<int>(0, true); 2829 rebuild_cpu_to_node_map(); 2830 return true; 2831 } 2832 } 2833 } 2834 return false; 2835 } 2836 2837 // rebuild_cpu_to_node_map() constructs a table mapping cpud id to node id. 2838 // The table is later used in get_node_by_cpu(). 2839 void os::Linux::rebuild_cpu_to_node_map() { 2840 const size_t NCPUS = 32768; // Since the buffer size computation is very obscure 2841 // in libnuma (possible values are starting from 16, 2842 // and continuing up with every other power of 2, but less 2843 // than the maximum number of CPUs supported by kernel), and 2844 // is a subject to change (in libnuma version 2 the requirements 2845 // are more reasonable) we'll just hardcode the number they use 2846 // in the library. 2847 const size_t BitsPerCLong = sizeof(long) * CHAR_BIT; 2848 2849 size_t cpu_num = os::active_processor_count(); 2850 size_t cpu_map_size = NCPUS / BitsPerCLong; 2851 size_t cpu_map_valid_size = 2852 MIN2((cpu_num + BitsPerCLong - 1) / BitsPerCLong, cpu_map_size); 2853 2854 cpu_to_node()->clear(); 2855 cpu_to_node()->at_grow(cpu_num - 1); 2856 size_t node_num = numa_get_groups_num(); 2857 2858 unsigned long *cpu_map = NEW_C_HEAP_ARRAY(unsigned long, cpu_map_size, mtInternal); 2859 for (size_t i = 0; i < node_num; i++) { 2860 if (numa_node_to_cpus(i, cpu_map, cpu_map_size * sizeof(unsigned long)) != -1) { 2861 for (size_t j = 0; j < cpu_map_valid_size; j++) { 2862 if (cpu_map[j] != 0) { 2863 for (size_t k = 0; k < BitsPerCLong; k++) { 2864 if (cpu_map[j] & (1UL << k)) { 2865 cpu_to_node()->at_put(j * BitsPerCLong + k, i); 2866 } 2867 } 2868 } 2869 } 2870 } 2871 } 2872 FREE_C_HEAP_ARRAY(unsigned long, cpu_map); 2873 } 2874 2875 int os::Linux::get_node_by_cpu(int cpu_id) { 2876 if (cpu_to_node() != NULL && cpu_id >= 0 && cpu_id < cpu_to_node()->length()) { 2877 return cpu_to_node()->at(cpu_id); 2878 } 2879 return -1; 2880 } 2881 2882 GrowableArray<int>* os::Linux::_cpu_to_node; 2883 os::Linux::sched_getcpu_func_t os::Linux::_sched_getcpu; 2884 os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus; 2885 os::Linux::numa_max_node_func_t os::Linux::_numa_max_node; 2886 os::Linux::numa_available_func_t os::Linux::_numa_available; 2887 os::Linux::numa_tonode_memory_func_t os::Linux::_numa_tonode_memory; 2888 os::Linux::numa_interleave_memory_func_t os::Linux::_numa_interleave_memory; 2889 os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy; 2890 unsigned long* os::Linux::_numa_all_nodes; 2891 2892 bool os::pd_uncommit_memory(char* addr, size_t size) { 2893 uintptr_t res = (uintptr_t) ::mmap(addr, size, PROT_NONE, 2894 MAP_PRIVATE|MAP_FIXED|MAP_NORESERVE|MAP_ANONYMOUS, -1, 0); 2895 return res != (uintptr_t) MAP_FAILED; 2896 } 2897 2898 static address get_stack_commited_bottom(address bottom, size_t size) { 2899 address nbot = bottom; 2900 address ntop = bottom + size; 2901 2902 size_t page_sz = os::vm_page_size(); 2903 unsigned pages = size / page_sz; 2904 2905 unsigned char vec[1]; 2906 unsigned imin = 1, imax = pages + 1, imid; 2907 int mincore_return_value = 0; 2908 2909 assert(imin <= imax, "Unexpected page size"); 2910 2911 while (imin < imax) { 2912 imid = (imax + imin) / 2; 2913 nbot = ntop - (imid * page_sz); 2914 2915 // Use a trick with mincore to check whether the page is mapped or not. 2916 // mincore sets vec to 1 if page resides in memory and to 0 if page 2917 // is swapped output but if page we are asking for is unmapped 2918 // it returns -1,ENOMEM 2919 mincore_return_value = mincore(nbot, page_sz, vec); 2920 2921 if (mincore_return_value == -1) { 2922 // Page is not mapped go up 2923 // to find first mapped page 2924 if (errno != EAGAIN) { 2925 assert(errno == ENOMEM, "Unexpected mincore errno"); 2926 imax = imid; 2927 } 2928 } else { 2929 // Page is mapped go down 2930 // to find first not mapped page 2931 imin = imid + 1; 2932 } 2933 } 2934 2935 nbot = nbot + page_sz; 2936 2937 // Adjust stack bottom one page up if last checked page is not mapped 2938 if (mincore_return_value == -1) { 2939 nbot = nbot + page_sz; 2940 } 2941 2942 return nbot; 2943 } 2944 2945 2946 // Linux uses a growable mapping for the stack, and if the mapping for 2947 // the stack guard pages is not removed when we detach a thread the 2948 // stack cannot grow beyond the pages where the stack guard was 2949 // mapped. If at some point later in the process the stack expands to 2950 // that point, the Linux kernel cannot expand the stack any further 2951 // because the guard pages are in the way, and a segfault occurs. 2952 // 2953 // However, it's essential not to split the stack region by unmapping 2954 // a region (leaving a hole) that's already part of the stack mapping, 2955 // so if the stack mapping has already grown beyond the guard pages at 2956 // the time we create them, we have to truncate the stack mapping. 2957 // So, we need to know the extent of the stack mapping when 2958 // create_stack_guard_pages() is called. 2959 2960 // We only need this for stacks that are growable: at the time of 2961 // writing thread stacks don't use growable mappings (i.e. those 2962 // creeated with MAP_GROWSDOWN), and aren't marked "[stack]", so this 2963 // only applies to the main thread. 2964 2965 // If the (growable) stack mapping already extends beyond the point 2966 // where we're going to put our guard pages, truncate the mapping at 2967 // that point by munmap()ping it. This ensures that when we later 2968 // munmap() the guard pages we don't leave a hole in the stack 2969 // mapping. This only affects the main/initial thread 2970 2971 bool os::pd_create_stack_guard_pages(char* addr, size_t size) { 2972 if (os::Linux::is_initial_thread()) { 2973 // As we manually grow stack up to bottom inside create_attached_thread(), 2974 // it's likely that os::Linux::initial_thread_stack_bottom is mapped and 2975 // we don't need to do anything special. 2976 // Check it first, before calling heavy function. 2977 uintptr_t stack_extent = (uintptr_t) os::Linux::initial_thread_stack_bottom(); 2978 unsigned char vec[1]; 2979 2980 if (mincore((address)stack_extent, os::vm_page_size(), vec) == -1) { 2981 // Fallback to slow path on all errors, including EAGAIN 2982 stack_extent = (uintptr_t) get_stack_commited_bottom( 2983 os::Linux::initial_thread_stack_bottom(), 2984 (size_t)addr - stack_extent); 2985 } 2986 2987 if (stack_extent < (uintptr_t)addr) { 2988 ::munmap((void*)stack_extent, (uintptr_t)(addr - stack_extent)); 2989 } 2990 } 2991 2992 return os::commit_memory(addr, size, !ExecMem); 2993 } 2994 2995 // If this is a growable mapping, remove the guard pages entirely by 2996 // munmap()ping them. If not, just call uncommit_memory(). This only 2997 // affects the main/initial thread, but guard against future OS changes 2998 // It's safe to always unmap guard pages for initial thread because we 2999 // always place it right after end of the mapped region 3000 3001 bool os::remove_stack_guard_pages(char* addr, size_t size) { 3002 uintptr_t stack_extent, stack_base; 3003 3004 if (os::Linux::is_initial_thread()) { 3005 return ::munmap(addr, size) == 0; 3006 } 3007 3008 return os::uncommit_memory(addr, size); 3009 } 3010 3011 // If 'fixed' is true, anon_mmap() will attempt to reserve anonymous memory 3012 // at 'requested_addr'. If there are existing memory mappings at the same 3013 // location, however, they will be overwritten. If 'fixed' is false, 3014 // 'requested_addr' is only treated as a hint, the return value may or 3015 // may not start from the requested address. Unlike Linux mmap(), this 3016 // function returns NULL to indicate failure. 3017 static char* anon_mmap(char* requested_addr, size_t bytes, bool fixed) { 3018 char * addr; 3019 int flags; 3020 3021 flags = MAP_PRIVATE | MAP_NORESERVE | MAP_ANONYMOUS; 3022 if (fixed) { 3023 assert((uintptr_t)requested_addr % os::Linux::page_size() == 0, "unaligned address"); 3024 flags |= MAP_FIXED; 3025 } 3026 3027 // Map reserved/uncommitted pages PROT_NONE so we fail early if we 3028 // touch an uncommitted page. Otherwise, the read/write might 3029 // succeed if we have enough swap space to back the physical page. 3030 addr = (char*)::mmap(requested_addr, bytes, PROT_NONE, 3031 flags, -1, 0); 3032 3033 return addr == MAP_FAILED ? NULL : addr; 3034 } 3035 3036 static int anon_munmap(char * addr, size_t size) { 3037 return ::munmap(addr, size) == 0; 3038 } 3039 3040 char* os::pd_reserve_memory(size_t bytes, char* requested_addr, 3041 size_t alignment_hint) { 3042 return anon_mmap(requested_addr, bytes, (requested_addr != NULL)); 3043 } 3044 3045 bool os::pd_release_memory(char* addr, size_t size) { 3046 return anon_munmap(addr, size); 3047 } 3048 3049 static bool linux_mprotect(char* addr, size_t size, int prot) { 3050 // Linux wants the mprotect address argument to be page aligned. 3051 char* bottom = (char*)align_size_down((intptr_t)addr, os::Linux::page_size()); 3052 3053 // According to SUSv3, mprotect() should only be used with mappings 3054 // established by mmap(), and mmap() always maps whole pages. Unaligned 3055 // 'addr' likely indicates problem in the VM (e.g. trying to change 3056 // protection of malloc'ed or statically allocated memory). Check the 3057 // caller if you hit this assert. 3058 assert(addr == bottom, "sanity check"); 3059 3060 size = align_size_up(pointer_delta(addr, bottom, 1) + size, os::Linux::page_size()); 3061 return ::mprotect(bottom, size, prot) == 0; 3062 } 3063 3064 // Set protections specified 3065 bool os::protect_memory(char* addr, size_t bytes, ProtType prot, 3066 bool is_committed) { 3067 unsigned int p = 0; 3068 switch (prot) { 3069 case MEM_PROT_NONE: p = PROT_NONE; break; 3070 case MEM_PROT_READ: p = PROT_READ; break; 3071 case MEM_PROT_RW: p = PROT_READ|PROT_WRITE; break; 3072 case MEM_PROT_RWX: p = PROT_READ|PROT_WRITE|PROT_EXEC; break; 3073 default: 3074 ShouldNotReachHere(); 3075 } 3076 // is_committed is unused. 3077 return linux_mprotect(addr, bytes, p); 3078 } 3079 3080 bool os::guard_memory(char* addr, size_t size) { 3081 return linux_mprotect(addr, size, PROT_NONE); 3082 } 3083 3084 bool os::unguard_memory(char* addr, size_t size) { 3085 return linux_mprotect(addr, size, PROT_READ|PROT_WRITE); 3086 } 3087 3088 bool os::Linux::transparent_huge_pages_sanity_check(bool warn, 3089 size_t page_size) { 3090 bool result = false; 3091 void *p = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE, 3092 MAP_ANONYMOUS|MAP_PRIVATE, 3093 -1, 0); 3094 if (p != MAP_FAILED) { 3095 void *aligned_p = align_ptr_up(p, page_size); 3096 3097 result = madvise(aligned_p, page_size, MADV_HUGEPAGE) == 0; 3098 3099 munmap(p, page_size * 2); 3100 } 3101 3102 if (warn && !result) { 3103 warning("TransparentHugePages is not supported by the operating system."); 3104 } 3105 3106 return result; 3107 } 3108 3109 bool os::Linux::hugetlbfs_sanity_check(bool warn, size_t page_size) { 3110 bool result = false; 3111 void *p = mmap(NULL, page_size, PROT_READ|PROT_WRITE, 3112 MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB, 3113 -1, 0); 3114 3115 if (p != MAP_FAILED) { 3116 // We don't know if this really is a huge page or not. 3117 FILE *fp = fopen("/proc/self/maps", "r"); 3118 if (fp) { 3119 while (!feof(fp)) { 3120 char chars[257]; 3121 long x = 0; 3122 if (fgets(chars, sizeof(chars), fp)) { 3123 if (sscanf(chars, "%lx-%*x", &x) == 1 3124 && x == (long)p) { 3125 if (strstr (chars, "hugepage")) { 3126 result = true; 3127 break; 3128 } 3129 } 3130 } 3131 } 3132 fclose(fp); 3133 } 3134 munmap(p, page_size); 3135 } 3136 3137 if (warn && !result) { 3138 warning("HugeTLBFS is not supported by the operating system."); 3139 } 3140 3141 return result; 3142 } 3143 3144 // Set the coredump_filter bits to include largepages in core dump (bit 6) 3145 // 3146 // From the coredump_filter documentation: 3147 // 3148 // - (bit 0) anonymous private memory 3149 // - (bit 1) anonymous shared memory 3150 // - (bit 2) file-backed private memory 3151 // - (bit 3) file-backed shared memory 3152 // - (bit 4) ELF header pages in file-backed private memory areas (it is 3153 // effective only if the bit 2 is cleared) 3154 // - (bit 5) hugetlb private memory 3155 // - (bit 6) hugetlb shared memory 3156 // 3157 static void set_coredump_filter(void) { 3158 FILE *f; 3159 long cdm; 3160 3161 if ((f = fopen("/proc/self/coredump_filter", "r+")) == NULL) { 3162 return; 3163 } 3164 3165 if (fscanf(f, "%lx", &cdm) != 1) { 3166 fclose(f); 3167 return; 3168 } 3169 3170 rewind(f); 3171 3172 if ((cdm & LARGEPAGES_BIT) == 0) { 3173 cdm |= LARGEPAGES_BIT; 3174 fprintf(f, "%#lx", cdm); 3175 } 3176 3177 fclose(f); 3178 } 3179 3180 // Large page support 3181 3182 static size_t _large_page_size = 0; 3183 3184 size_t os::Linux::find_large_page_size() { 3185 size_t large_page_size = 0; 3186 3187 // large_page_size on Linux is used to round up heap size. x86 uses either 3188 // 2M or 4M page, depending on whether PAE (Physical Address Extensions) 3189 // mode is enabled. AMD64/EM64T uses 2M page in 64bit mode. IA64 can use 3190 // page as large as 256M. 3191 // 3192 // Here we try to figure out page size by parsing /proc/meminfo and looking 3193 // for a line with the following format: 3194 // Hugepagesize: 2048 kB 3195 // 3196 // If we can't determine the value (e.g. /proc is not mounted, or the text 3197 // format has been changed), we'll use the largest page size supported by 3198 // the processor. 3199 3200 #ifndef ZERO 3201 large_page_size = IA32_ONLY(4 * M) AMD64_ONLY(2 * M) IA64_ONLY(256 * M) SPARC_ONLY(4 * M) 3202 ARM32_ONLY(2 * M) PPC_ONLY(4 * M) AARCH64_ONLY(2 * M); 3203 #endif // ZERO 3204 3205 FILE *fp = fopen("/proc/meminfo", "r"); 3206 if (fp) { 3207 while (!feof(fp)) { 3208 int x = 0; 3209 char buf[16]; 3210 if (fscanf(fp, "Hugepagesize: %d", &x) == 1) { 3211 if (x && fgets(buf, sizeof(buf), fp) && strcmp(buf, " kB\n") == 0) { 3212 large_page_size = x * K; 3213 break; 3214 } 3215 } else { 3216 // skip to next line 3217 for (;;) { 3218 int ch = fgetc(fp); 3219 if (ch == EOF || ch == (int)'\n') break; 3220 } 3221 } 3222 } 3223 fclose(fp); 3224 } 3225 3226 if (!FLAG_IS_DEFAULT(LargePageSizeInBytes) && LargePageSizeInBytes != large_page_size) { 3227 warning("Setting LargePageSizeInBytes has no effect on this OS. Large page size is " 3228 SIZE_FORMAT "%s.", byte_size_in_proper_unit(large_page_size), 3229 proper_unit_for_byte_size(large_page_size)); 3230 } 3231 3232 return large_page_size; 3233 } 3234 3235 size_t os::Linux::setup_large_page_size() { 3236 _large_page_size = Linux::find_large_page_size(); 3237 const size_t default_page_size = (size_t)Linux::page_size(); 3238 if (_large_page_size > default_page_size) { 3239 _page_sizes[0] = _large_page_size; 3240 _page_sizes[1] = default_page_size; 3241 _page_sizes[2] = 0; 3242 } 3243 3244 return _large_page_size; 3245 } 3246 3247 bool os::Linux::setup_large_page_type(size_t page_size) { 3248 if (FLAG_IS_DEFAULT(UseHugeTLBFS) && 3249 FLAG_IS_DEFAULT(UseSHM) && 3250 FLAG_IS_DEFAULT(UseTransparentHugePages)) { 3251 3252 // The type of large pages has not been specified by the user. 3253 3254 // Try UseHugeTLBFS and then UseSHM. 3255 UseHugeTLBFS = UseSHM = true; 3256 3257 // Don't try UseTransparentHugePages since there are known 3258 // performance issues with it turned on. This might change in the future. 3259 UseTransparentHugePages = false; 3260 } 3261 3262 if (UseTransparentHugePages) { 3263 bool warn_on_failure = !FLAG_IS_DEFAULT(UseTransparentHugePages); 3264 if (transparent_huge_pages_sanity_check(warn_on_failure, page_size)) { 3265 UseHugeTLBFS = false; 3266 UseSHM = false; 3267 return true; 3268 } 3269 UseTransparentHugePages = false; 3270 } 3271 3272 if (UseHugeTLBFS) { 3273 bool warn_on_failure = !FLAG_IS_DEFAULT(UseHugeTLBFS); 3274 if (hugetlbfs_sanity_check(warn_on_failure, page_size)) { 3275 UseSHM = false; 3276 return true; 3277 } 3278 UseHugeTLBFS = false; 3279 } 3280 3281 return UseSHM; 3282 } 3283 3284 void os::large_page_init() { 3285 if (!UseLargePages && 3286 !UseTransparentHugePages && 3287 !UseHugeTLBFS && 3288 !UseSHM) { 3289 // Not using large pages. 3290 return; 3291 } 3292 3293 if (!FLAG_IS_DEFAULT(UseLargePages) && !UseLargePages) { 3294 // The user explicitly turned off large pages. 3295 // Ignore the rest of the large pages flags. 3296 UseTransparentHugePages = false; 3297 UseHugeTLBFS = false; 3298 UseSHM = false; 3299 return; 3300 } 3301 3302 size_t large_page_size = Linux::setup_large_page_size(); 3303 UseLargePages = Linux::setup_large_page_type(large_page_size); 3304 3305 set_coredump_filter(); 3306 } 3307 3308 #ifndef SHM_HUGETLB 3309 #define SHM_HUGETLB 04000 3310 #endif 3311 3312 char* os::Linux::reserve_memory_special_shm(size_t bytes, size_t alignment, 3313 char* req_addr, bool exec) { 3314 // "exec" is passed in but not used. Creating the shared image for 3315 // the code cache doesn't have an SHM_X executable permission to check. 3316 assert(UseLargePages && UseSHM, "only for SHM large pages"); 3317 assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address"); 3318 3319 if (!is_size_aligned(bytes, os::large_page_size()) || alignment > os::large_page_size()) { 3320 return NULL; // Fallback to small pages. 3321 } 3322 3323 key_t key = IPC_PRIVATE; 3324 char *addr; 3325 3326 bool warn_on_failure = UseLargePages && 3327 (!FLAG_IS_DEFAULT(UseLargePages) || 3328 !FLAG_IS_DEFAULT(UseSHM) || 3329 !FLAG_IS_DEFAULT(LargePageSizeInBytes)); 3330 char msg[128]; 3331 3332 // Create a large shared memory region to attach to based on size. 3333 // Currently, size is the total size of the heap 3334 int shmid = shmget(key, bytes, SHM_HUGETLB|IPC_CREAT|SHM_R|SHM_W); 3335 if (shmid == -1) { 3336 // Possible reasons for shmget failure: 3337 // 1. shmmax is too small for Java heap. 3338 // > check shmmax value: cat /proc/sys/kernel/shmmax 3339 // > increase shmmax value: echo "0xffffffff" > /proc/sys/kernel/shmmax 3340 // 2. not enough large page memory. 3341 // > check available large pages: cat /proc/meminfo 3342 // > increase amount of large pages: 3343 // echo new_value > /proc/sys/vm/nr_hugepages 3344 // Note 1: different Linux may use different name for this property, 3345 // e.g. on Redhat AS-3 it is "hugetlb_pool". 3346 // Note 2: it's possible there's enough physical memory available but 3347 // they are so fragmented after a long run that they can't 3348 // coalesce into large pages. Try to reserve large pages when 3349 // the system is still "fresh". 3350 if (warn_on_failure) { 3351 jio_snprintf(msg, sizeof(msg), "Failed to reserve shared memory (errno = %d).", errno); 3352 warning("%s", msg); 3353 } 3354 return NULL; 3355 } 3356 3357 // attach to the region 3358 addr = (char*)shmat(shmid, req_addr, 0); 3359 int err = errno; 3360 3361 // Remove shmid. If shmat() is successful, the actual shared memory segment 3362 // will be deleted when it's detached by shmdt() or when the process 3363 // terminates. If shmat() is not successful this will remove the shared 3364 // segment immediately. 3365 shmctl(shmid, IPC_RMID, NULL); 3366 3367 if ((intptr_t)addr == -1) { 3368 if (warn_on_failure) { 3369 jio_snprintf(msg, sizeof(msg), "Failed to attach shared memory (errno = %d).", err); 3370 warning("%s", msg); 3371 } 3372 return NULL; 3373 } 3374 3375 return addr; 3376 } 3377 3378 static void warn_on_large_pages_failure(char* req_addr, size_t bytes, 3379 int error) { 3380 assert(error == ENOMEM, "Only expect to fail if no memory is available"); 3381 3382 bool warn_on_failure = UseLargePages && 3383 (!FLAG_IS_DEFAULT(UseLargePages) || 3384 !FLAG_IS_DEFAULT(UseHugeTLBFS) || 3385 !FLAG_IS_DEFAULT(LargePageSizeInBytes)); 3386 3387 if (warn_on_failure) { 3388 char msg[128]; 3389 jio_snprintf(msg, sizeof(msg), "Failed to reserve large pages memory req_addr: " 3390 PTR_FORMAT " bytes: " SIZE_FORMAT " (errno = %d).", req_addr, bytes, error); 3391 warning("%s", msg); 3392 } 3393 } 3394 3395 char* os::Linux::reserve_memory_special_huge_tlbfs_only(size_t bytes, 3396 char* req_addr, 3397 bool exec) { 3398 assert(UseLargePages && UseHugeTLBFS, "only for Huge TLBFS large pages"); 3399 assert(is_size_aligned(bytes, os::large_page_size()), "Unaligned size"); 3400 assert(is_ptr_aligned(req_addr, os::large_page_size()), "Unaligned address"); 3401 3402 int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE; 3403 char* addr = (char*)::mmap(req_addr, bytes, prot, 3404 MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB, 3405 -1, 0); 3406 3407 if (addr == MAP_FAILED) { 3408 warn_on_large_pages_failure(req_addr, bytes, errno); 3409 return NULL; 3410 } 3411 3412 assert(is_ptr_aligned(addr, os::large_page_size()), "Must be"); 3413 3414 return addr; 3415 } 3416 3417 // Helper for os::Linux::reserve_memory_special_huge_tlbfs_mixed(). 3418 // Allocate (using mmap, NO_RESERVE, with small pages) at either a given request address 3419 // (req_addr != NULL) or with a given alignment. 3420 // - bytes shall be a multiple of alignment. 3421 // - req_addr can be NULL. If not NULL, it must be a multiple of alignment. 3422 // - alignment sets the alignment at which memory shall be allocated. 3423 // It must be a multiple of allocation granularity. 3424 // Returns address of memory or NULL. If req_addr was not NULL, will only return 3425 // req_addr or NULL. 3426 static char* anon_mmap_aligned(size_t bytes, size_t alignment, char* req_addr) { 3427 3428 size_t extra_size = bytes; 3429 if (req_addr == NULL && alignment > 0) { 3430 extra_size += alignment; 3431 } 3432 3433 char* start = (char*) ::mmap(req_addr, extra_size, PROT_NONE, 3434 MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, 3435 -1, 0); 3436 if (start == MAP_FAILED) { 3437 start = NULL; 3438 } else { 3439 if (req_addr != NULL) { 3440 if (start != req_addr) { 3441 ::munmap(start, extra_size); 3442 start = NULL; 3443 } 3444 } else { 3445 char* const start_aligned = (char*) align_ptr_up(start, alignment); 3446 char* const end_aligned = start_aligned + bytes; 3447 char* const end = start + extra_size; 3448 if (start_aligned > start) { 3449 ::munmap(start, start_aligned - start); 3450 } 3451 if (end_aligned < end) { 3452 ::munmap(end_aligned, end - end_aligned); 3453 } 3454 start = start_aligned; 3455 } 3456 } 3457 return start; 3458 3459 } 3460 3461 // Reserve memory using mmap(MAP_HUGETLB). 3462 // - bytes shall be a multiple of alignment. 3463 // - req_addr can be NULL. If not NULL, it must be a multiple of alignment. 3464 // - alignment sets the alignment at which memory shall be allocated. 3465 // It must be a multiple of allocation granularity. 3466 // Returns address of memory or NULL. If req_addr was not NULL, will only return 3467 // req_addr or NULL. 3468 char* os::Linux::reserve_memory_special_huge_tlbfs_mixed(size_t bytes, 3469 size_t alignment, 3470 char* req_addr, 3471 bool exec) { 3472 size_t large_page_size = os::large_page_size(); 3473 assert(bytes >= large_page_size, "Shouldn't allocate large pages for small sizes"); 3474 3475 assert(is_ptr_aligned(req_addr, alignment), "Must be"); 3476 assert(is_size_aligned(bytes, alignment), "Must be"); 3477 3478 // First reserve - but not commit - the address range in small pages. 3479 char* const start = anon_mmap_aligned(bytes, alignment, req_addr); 3480 3481 if (start == NULL) { 3482 return NULL; 3483 } 3484 3485 assert(is_ptr_aligned(start, alignment), "Must be"); 3486 3487 char* end = start + bytes; 3488 3489 // Find the regions of the allocated chunk that can be promoted to large pages. 3490 char* lp_start = (char*)align_ptr_up(start, large_page_size); 3491 char* lp_end = (char*)align_ptr_down(end, large_page_size); 3492 3493 size_t lp_bytes = lp_end - lp_start; 3494 3495 assert(is_size_aligned(lp_bytes, large_page_size), "Must be"); 3496 3497 if (lp_bytes == 0) { 3498 // The mapped region doesn't even span the start and the end of a large page. 3499 // Fall back to allocate a non-special area. 3500 ::munmap(start, end - start); 3501 return NULL; 3502 } 3503 3504 int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE; 3505 3506 void* result; 3507 3508 // Commit small-paged leading area. 3509 if (start != lp_start) { 3510 result = ::mmap(start, lp_start - start, prot, 3511 MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, 3512 -1, 0); 3513 if (result == MAP_FAILED) { 3514 ::munmap(lp_start, end - lp_start); 3515 return NULL; 3516 } 3517 } 3518 3519 // Commit large-paged area. 3520 result = ::mmap(lp_start, lp_bytes, prot, 3521 MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED|MAP_HUGETLB, 3522 -1, 0); 3523 if (result == MAP_FAILED) { 3524 warn_on_large_pages_failure(lp_start, lp_bytes, errno); 3525 // If the mmap above fails, the large pages region will be unmapped and we 3526 // have regions before and after with small pages. Release these regions. 3527 // 3528 // | mapped | unmapped | mapped | 3529 // ^ ^ ^ ^ 3530 // start lp_start lp_end end 3531 // 3532 ::munmap(start, lp_start - start); 3533 ::munmap(lp_end, end - lp_end); 3534 return NULL; 3535 } 3536 3537 // Commit small-paged trailing area. 3538 if (lp_end != end) { 3539 result = ::mmap(lp_end, end - lp_end, prot, 3540 MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, 3541 -1, 0); 3542 if (result == MAP_FAILED) { 3543 ::munmap(start, lp_end - start); 3544 return NULL; 3545 } 3546 } 3547 3548 return start; 3549 } 3550 3551 char* os::Linux::reserve_memory_special_huge_tlbfs(size_t bytes, 3552 size_t alignment, 3553 char* req_addr, 3554 bool exec) { 3555 assert(UseLargePages && UseHugeTLBFS, "only for Huge TLBFS large pages"); 3556 assert(is_ptr_aligned(req_addr, alignment), "Must be"); 3557 assert(is_size_aligned(alignment, os::vm_allocation_granularity()), "Must be"); 3558 assert(is_power_of_2(os::large_page_size()), "Must be"); 3559 assert(bytes >= os::large_page_size(), "Shouldn't allocate large pages for small sizes"); 3560 3561 if (is_size_aligned(bytes, os::large_page_size()) && alignment <= os::large_page_size()) { 3562 return reserve_memory_special_huge_tlbfs_only(bytes, req_addr, exec); 3563 } else { 3564 return reserve_memory_special_huge_tlbfs_mixed(bytes, alignment, req_addr, exec); 3565 } 3566 } 3567 3568 char* os::reserve_memory_special(size_t bytes, size_t alignment, 3569 char* req_addr, bool exec) { 3570 assert(UseLargePages, "only for large pages"); 3571 3572 char* addr; 3573 if (UseSHM) { 3574 addr = os::Linux::reserve_memory_special_shm(bytes, alignment, req_addr, exec); 3575 } else { 3576 assert(UseHugeTLBFS, "must be"); 3577 addr = os::Linux::reserve_memory_special_huge_tlbfs(bytes, alignment, req_addr, exec); 3578 } 3579 3580 if (addr != NULL) { 3581 if (UseNUMAInterleaving) { 3582 numa_make_global(addr, bytes); 3583 } 3584 3585 // The memory is committed 3586 MemTracker::record_virtual_memory_reserve_and_commit((address)addr, bytes, CALLER_PC); 3587 } 3588 3589 return addr; 3590 } 3591 3592 bool os::Linux::release_memory_special_shm(char* base, size_t bytes) { 3593 // detaching the SHM segment will also delete it, see reserve_memory_special_shm() 3594 return shmdt(base) == 0; 3595 } 3596 3597 bool os::Linux::release_memory_special_huge_tlbfs(char* base, size_t bytes) { 3598 return pd_release_memory(base, bytes); 3599 } 3600 3601 bool os::release_memory_special(char* base, size_t bytes) { 3602 bool res; 3603 if (MemTracker::tracking_level() > NMT_minimal) { 3604 Tracker tkr = MemTracker::get_virtual_memory_release_tracker(); 3605 res = os::Linux::release_memory_special_impl(base, bytes); 3606 if (res) { 3607 tkr.record((address)base, bytes); 3608 } 3609 3610 } else { 3611 res = os::Linux::release_memory_special_impl(base, bytes); 3612 } 3613 return res; 3614 } 3615 3616 bool os::Linux::release_memory_special_impl(char* base, size_t bytes) { 3617 assert(UseLargePages, "only for large pages"); 3618 bool res; 3619 3620 if (UseSHM) { 3621 res = os::Linux::release_memory_special_shm(base, bytes); 3622 } else { 3623 assert(UseHugeTLBFS, "must be"); 3624 res = os::Linux::release_memory_special_huge_tlbfs(base, bytes); 3625 } 3626 return res; 3627 } 3628 3629 size_t os::large_page_size() { 3630 return _large_page_size; 3631 } 3632 3633 // With SysV SHM the entire memory region must be allocated as shared 3634 // memory. 3635 // HugeTLBFS allows application to commit large page memory on demand. 3636 // However, when committing memory with HugeTLBFS fails, the region 3637 // that was supposed to be committed will lose the old reservation 3638 // and allow other threads to steal that memory region. Because of this 3639 // behavior we can't commit HugeTLBFS memory. 3640 bool os::can_commit_large_page_memory() { 3641 return UseTransparentHugePages; 3642 } 3643 3644 bool os::can_execute_large_page_memory() { 3645 return UseTransparentHugePages || UseHugeTLBFS; 3646 } 3647 3648 // Reserve memory at an arbitrary address, only if that area is 3649 // available (and not reserved for something else). 3650 3651 char* os::pd_attempt_reserve_memory_at(size_t bytes, char* requested_addr) { 3652 const int max_tries = 10; 3653 char* base[max_tries]; 3654 size_t size[max_tries]; 3655 const size_t gap = 0x000000; 3656 3657 // Assert only that the size is a multiple of the page size, since 3658 // that's all that mmap requires, and since that's all we really know 3659 // about at this low abstraction level. If we need higher alignment, 3660 // we can either pass an alignment to this method or verify alignment 3661 // in one of the methods further up the call chain. See bug 5044738. 3662 assert(bytes % os::vm_page_size() == 0, "reserving unexpected size block"); 3663 3664 // Repeatedly allocate blocks until the block is allocated at the 3665 // right spot. 3666 3667 // Linux mmap allows caller to pass an address as hint; give it a try first, 3668 // if kernel honors the hint then we can return immediately. 3669 char * addr = anon_mmap(requested_addr, bytes, false); 3670 if (addr == requested_addr) { 3671 return requested_addr; 3672 } 3673 3674 if (addr != NULL) { 3675 // mmap() is successful but it fails to reserve at the requested address 3676 anon_munmap(addr, bytes); 3677 } 3678 3679 int i; 3680 for (i = 0; i < max_tries; ++i) { 3681 base[i] = reserve_memory(bytes); 3682 3683 if (base[i] != NULL) { 3684 // Is this the block we wanted? 3685 if (base[i] == requested_addr) { 3686 size[i] = bytes; 3687 break; 3688 } 3689 3690 // Does this overlap the block we wanted? Give back the overlapped 3691 // parts and try again. 3692 3693 ptrdiff_t top_overlap = requested_addr + (bytes + gap) - base[i]; 3694 if (top_overlap >= 0 && (size_t)top_overlap < bytes) { 3695 unmap_memory(base[i], top_overlap); 3696 base[i] += top_overlap; 3697 size[i] = bytes - top_overlap; 3698 } else { 3699 ptrdiff_t bottom_overlap = base[i] + bytes - requested_addr; 3700 if (bottom_overlap >= 0 && (size_t)bottom_overlap < bytes) { 3701 unmap_memory(requested_addr, bottom_overlap); 3702 size[i] = bytes - bottom_overlap; 3703 } else { 3704 size[i] = bytes; 3705 } 3706 } 3707 } 3708 } 3709 3710 // Give back the unused reserved pieces. 3711 3712 for (int j = 0; j < i; ++j) { 3713 if (base[j] != NULL) { 3714 unmap_memory(base[j], size[j]); 3715 } 3716 } 3717 3718 if (i < max_tries) { 3719 return requested_addr; 3720 } else { 3721 return NULL; 3722 } 3723 } 3724 3725 size_t os::read(int fd, void *buf, unsigned int nBytes) { 3726 return ::read(fd, buf, nBytes); 3727 } 3728 3729 size_t os::read_at(int fd, void *buf, unsigned int nBytes, jlong offset) { 3730 return ::pread(fd, buf, nBytes, offset); 3731 } 3732 3733 // Short sleep, direct OS call. 3734 // 3735 // Note: certain versions of Linux CFS scheduler (since 2.6.23) do not guarantee 3736 // sched_yield(2) will actually give up the CPU: 3737 // 3738 // * Alone on this pariticular CPU, keeps running. 3739 // * Before the introduction of "skip_buddy" with "compat_yield" disabled 3740 // (pre 2.6.39). 3741 // 3742 // So calling this with 0 is an alternative. 3743 // 3744 void os::naked_short_sleep(jlong ms) { 3745 struct timespec req; 3746 3747 assert(ms < 1000, "Un-interruptable sleep, short time use only"); 3748 req.tv_sec = 0; 3749 if (ms > 0) { 3750 req.tv_nsec = (ms % 1000) * 1000000; 3751 } else { 3752 req.tv_nsec = 1; 3753 } 3754 3755 nanosleep(&req, NULL); 3756 3757 return; 3758 } 3759 3760 // Sleep forever; naked call to OS-specific sleep; use with CAUTION 3761 void os::infinite_sleep() { 3762 while (true) { // sleep forever ... 3763 ::sleep(100); // ... 100 seconds at a time 3764 } 3765 } 3766 3767 // Used to convert frequent JVM_Yield() to nops 3768 bool os::dont_yield() { 3769 return DontYieldALot; 3770 } 3771 3772 void os::naked_yield() { 3773 sched_yield(); 3774 } 3775 3776 //////////////////////////////////////////////////////////////////////////////// 3777 // thread priority support 3778 3779 // Note: Normal Linux applications are run with SCHED_OTHER policy. SCHED_OTHER 3780 // only supports dynamic priority, static priority must be zero. For real-time 3781 // applications, Linux supports SCHED_RR which allows static priority (1-99). 3782 // However, for large multi-threaded applications, SCHED_RR is not only slower 3783 // than SCHED_OTHER, but also very unstable (my volano tests hang hard 4 out 3784 // of 5 runs - Sep 2005). 3785 // 3786 // The following code actually changes the niceness of kernel-thread/LWP. It 3787 // has an assumption that setpriority() only modifies one kernel-thread/LWP, 3788 // not the entire user process, and user level threads are 1:1 mapped to kernel 3789 // threads. It has always been the case, but could change in the future. For 3790 // this reason, the code should not be used as default (ThreadPriorityPolicy=0). 3791 // It is only used when ThreadPriorityPolicy=1 and requires root privilege. 3792 3793 int os::java_to_os_priority[CriticalPriority + 1] = { 3794 19, // 0 Entry should never be used 3795 3796 4, // 1 MinPriority 3797 3, // 2 3798 2, // 3 3799 3800 1, // 4 3801 0, // 5 NormPriority 3802 -1, // 6 3803 3804 -2, // 7 3805 -3, // 8 3806 -4, // 9 NearMaxPriority 3807 3808 -5, // 10 MaxPriority 3809 3810 -5 // 11 CriticalPriority 3811 }; 3812 3813 static int prio_init() { 3814 if (ThreadPriorityPolicy == 1) { 3815 // Only root can raise thread priority. Don't allow ThreadPriorityPolicy=1 3816 // if effective uid is not root. Perhaps, a more elegant way of doing 3817 // this is to test CAP_SYS_NICE capability, but that will require libcap.so 3818 if (geteuid() != 0) { 3819 if (!FLAG_IS_DEFAULT(ThreadPriorityPolicy)) { 3820 warning("-XX:ThreadPriorityPolicy requires root privilege on Linux"); 3821 } 3822 ThreadPriorityPolicy = 0; 3823 } 3824 } 3825 if (UseCriticalJavaThreadPriority) { 3826 os::java_to_os_priority[MaxPriority] = os::java_to_os_priority[CriticalPriority]; 3827 } 3828 return 0; 3829 } 3830 3831 OSReturn os::set_native_priority(Thread* thread, int newpri) { 3832 if (!UseThreadPriorities || ThreadPriorityPolicy == 0) return OS_OK; 3833 3834 int ret = setpriority(PRIO_PROCESS, thread->osthread()->thread_id(), newpri); 3835 return (ret == 0) ? OS_OK : OS_ERR; 3836 } 3837 3838 OSReturn os::get_native_priority(const Thread* const thread, 3839 int *priority_ptr) { 3840 if (!UseThreadPriorities || ThreadPriorityPolicy == 0) { 3841 *priority_ptr = java_to_os_priority[NormPriority]; 3842 return OS_OK; 3843 } 3844 3845 errno = 0; 3846 *priority_ptr = getpriority(PRIO_PROCESS, thread->osthread()->thread_id()); 3847 return (*priority_ptr != -1 || errno == 0 ? OS_OK : OS_ERR); 3848 } 3849 3850 // Hint to the underlying OS that a task switch would not be good. 3851 // Void return because it's a hint and can fail. 3852 void os::hint_no_preempt() {} 3853 3854 //////////////////////////////////////////////////////////////////////////////// 3855 // suspend/resume support 3856 3857 // the low-level signal-based suspend/resume support is a remnant from the 3858 // old VM-suspension that used to be for java-suspension, safepoints etc, 3859 // within hotspot. Now there is a single use-case for this: 3860 // - calling get_thread_pc() on the VMThread by the flat-profiler task 3861 // that runs in the watcher thread. 3862 // The remaining code is greatly simplified from the more general suspension 3863 // code that used to be used. 3864 // 3865 // The protocol is quite simple: 3866 // - suspend: 3867 // - sends a signal to the target thread 3868 // - polls the suspend state of the osthread using a yield loop 3869 // - target thread signal handler (SR_handler) sets suspend state 3870 // and blocks in sigsuspend until continued 3871 // - resume: 3872 // - sets target osthread state to continue 3873 // - sends signal to end the sigsuspend loop in the SR_handler 3874 // 3875 // Note that the SR_lock plays no role in this suspend/resume protocol. 3876 3877 static void resume_clear_context(OSThread *osthread) { 3878 osthread->set_ucontext(NULL); 3879 osthread->set_siginfo(NULL); 3880 } 3881 3882 static void suspend_save_context(OSThread *osthread, siginfo_t* siginfo, 3883 ucontext_t* context) { 3884 osthread->set_ucontext(context); 3885 osthread->set_siginfo(siginfo); 3886 } 3887 3888 // Handler function invoked when a thread's execution is suspended or 3889 // resumed. We have to be careful that only async-safe functions are 3890 // called here (Note: most pthread functions are not async safe and 3891 // should be avoided.) 3892 // 3893 // Note: sigwait() is a more natural fit than sigsuspend() from an 3894 // interface point of view, but sigwait() prevents the signal hander 3895 // from being run. libpthread would get very confused by not having 3896 // its signal handlers run and prevents sigwait()'s use with the 3897 // mutex granting granting signal. 3898 // 3899 // Currently only ever called on the VMThread and JavaThreads (PC sampling) 3900 // 3901 static void SR_handler(int sig, siginfo_t* siginfo, ucontext_t* context) { 3902 // Save and restore errno to avoid confusing native code with EINTR 3903 // after sigsuspend. 3904 int old_errno = errno; 3905 3906 Thread* thread = Thread::current(); 3907 OSThread* osthread = thread->osthread(); 3908 assert(thread->is_VM_thread() || thread->is_Java_thread(), "Must be VMThread or JavaThread"); 3909 3910 os::SuspendResume::State current = osthread->sr.state(); 3911 if (current == os::SuspendResume::SR_SUSPEND_REQUEST) { 3912 suspend_save_context(osthread, siginfo, context); 3913 3914 // attempt to switch the state, we assume we had a SUSPEND_REQUEST 3915 os::SuspendResume::State state = osthread->sr.suspended(); 3916 if (state == os::SuspendResume::SR_SUSPENDED) { 3917 sigset_t suspend_set; // signals for sigsuspend() 3918 3919 // get current set of blocked signals and unblock resume signal 3920 pthread_sigmask(SIG_BLOCK, NULL, &suspend_set); 3921 sigdelset(&suspend_set, SR_signum); 3922 3923 sr_semaphore.signal(); 3924 // wait here until we are resumed 3925 while (1) { 3926 sigsuspend(&suspend_set); 3927 3928 os::SuspendResume::State result = osthread->sr.running(); 3929 if (result == os::SuspendResume::SR_RUNNING) { 3930 sr_semaphore.signal(); 3931 break; 3932 } 3933 } 3934 3935 } else if (state == os::SuspendResume::SR_RUNNING) { 3936 // request was cancelled, continue 3937 } else { 3938 ShouldNotReachHere(); 3939 } 3940 3941 resume_clear_context(osthread); 3942 } else if (current == os::SuspendResume::SR_RUNNING) { 3943 // request was cancelled, continue 3944 } else if (current == os::SuspendResume::SR_WAKEUP_REQUEST) { 3945 // ignore 3946 } else { 3947 // ignore 3948 } 3949 3950 errno = old_errno; 3951 } 3952 3953 static int SR_initialize() { 3954 struct sigaction act; 3955 char *s; 3956 3957 // Get signal number to use for suspend/resume 3958 if ((s = ::getenv("_JAVA_SR_SIGNUM")) != 0) { 3959 int sig = ::strtol(s, 0, 10); 3960 if (sig > MAX2(SIGSEGV, SIGBUS) && // See 4355769. 3961 sig < NSIG) { // Must be legal signal and fit into sigflags[]. 3962 SR_signum = sig; 3963 } else { 3964 warning("You set _JAVA_SR_SIGNUM=%d. It must be in range [%d, %d]. Using %d instead.", 3965 sig, MAX2(SIGSEGV, SIGBUS)+1, NSIG-1, SR_signum); 3966 } 3967 } 3968 3969 assert(SR_signum > SIGSEGV && SR_signum > SIGBUS, 3970 "SR_signum must be greater than max(SIGSEGV, SIGBUS), see 4355769"); 3971 3972 sigemptyset(&SR_sigset); 3973 sigaddset(&SR_sigset, SR_signum); 3974 3975 // Set up signal handler for suspend/resume 3976 act.sa_flags = SA_RESTART|SA_SIGINFO; 3977 act.sa_handler = (void (*)(int)) SR_handler; 3978 3979 // SR_signum is blocked by default. 3980 // 4528190 - We also need to block pthread restart signal (32 on all 3981 // supported Linux platforms). Note that LinuxThreads need to block 3982 // this signal for all threads to work properly. So we don't have 3983 // to use hard-coded signal number when setting up the mask. 3984 pthread_sigmask(SIG_BLOCK, NULL, &act.sa_mask); 3985 3986 if (sigaction(SR_signum, &act, 0) == -1) { 3987 return -1; 3988 } 3989 3990 // Save signal flag 3991 os::Linux::set_our_sigflags(SR_signum, act.sa_flags); 3992 return 0; 3993 } 3994 3995 static int sr_notify(OSThread* osthread) { 3996 int status = pthread_kill(osthread->pthread_id(), SR_signum); 3997 assert_status(status == 0, status, "pthread_kill"); 3998 return status; 3999 } 4000 4001 // "Randomly" selected value for how long we want to spin 4002 // before bailing out on suspending a thread, also how often 4003 // we send a signal to a thread we want to resume 4004 static const int RANDOMLY_LARGE_INTEGER = 1000000; 4005 static const int RANDOMLY_LARGE_INTEGER2 = 100; 4006 4007 // returns true on success and false on error - really an error is fatal 4008 // but this seems the normal response to library errors 4009 static bool do_suspend(OSThread* osthread) { 4010 assert(osthread->sr.is_running(), "thread should be running"); 4011 assert(!sr_semaphore.trywait(), "semaphore has invalid state"); 4012 4013 // mark as suspended and send signal 4014 if (osthread->sr.request_suspend() != os::SuspendResume::SR_SUSPEND_REQUEST) { 4015 // failed to switch, state wasn't running? 4016 ShouldNotReachHere(); 4017 return false; 4018 } 4019 4020 if (sr_notify(osthread) != 0) { 4021 ShouldNotReachHere(); 4022 } 4023 4024 // managed to send the signal and switch to SUSPEND_REQUEST, now wait for SUSPENDED 4025 while (true) { 4026 if (sr_semaphore.timedwait(0, 2 * NANOSECS_PER_MILLISEC)) { 4027 break; 4028 } else { 4029 // timeout 4030 os::SuspendResume::State cancelled = osthread->sr.cancel_suspend(); 4031 if (cancelled == os::SuspendResume::SR_RUNNING) { 4032 return false; 4033 } else if (cancelled == os::SuspendResume::SR_SUSPENDED) { 4034 // make sure that we consume the signal on the semaphore as well 4035 sr_semaphore.wait(); 4036 break; 4037 } else { 4038 ShouldNotReachHere(); 4039 return false; 4040 } 4041 } 4042 } 4043 4044 guarantee(osthread->sr.is_suspended(), "Must be suspended"); 4045 return true; 4046 } 4047 4048 static void do_resume(OSThread* osthread) { 4049 assert(osthread->sr.is_suspended(), "thread should be suspended"); 4050 assert(!sr_semaphore.trywait(), "invalid semaphore state"); 4051 4052 if (osthread->sr.request_wakeup() != os::SuspendResume::SR_WAKEUP_REQUEST) { 4053 // failed to switch to WAKEUP_REQUEST 4054 ShouldNotReachHere(); 4055 return; 4056 } 4057 4058 while (true) { 4059 if (sr_notify(osthread) == 0) { 4060 if (sr_semaphore.timedwait(0, 2 * NANOSECS_PER_MILLISEC)) { 4061 if (osthread->sr.is_running()) { 4062 return; 4063 } 4064 } 4065 } else { 4066 ShouldNotReachHere(); 4067 } 4068 } 4069 4070 guarantee(osthread->sr.is_running(), "Must be running!"); 4071 } 4072 4073 /////////////////////////////////////////////////////////////////////////////////// 4074 // signal handling (except suspend/resume) 4075 4076 // This routine may be used by user applications as a "hook" to catch signals. 4077 // The user-defined signal handler must pass unrecognized signals to this 4078 // routine, and if it returns true (non-zero), then the signal handler must 4079 // return immediately. If the flag "abort_if_unrecognized" is true, then this 4080 // routine will never retun false (zero), but instead will execute a VM panic 4081 // routine kill the process. 4082 // 4083 // If this routine returns false, it is OK to call it again. This allows 4084 // the user-defined signal handler to perform checks either before or after 4085 // the VM performs its own checks. Naturally, the user code would be making 4086 // a serious error if it tried to handle an exception (such as a null check 4087 // or breakpoint) that the VM was generating for its own correct operation. 4088 // 4089 // This routine may recognize any of the following kinds of signals: 4090 // SIGBUS, SIGSEGV, SIGILL, SIGFPE, SIGQUIT, SIGPIPE, SIGXFSZ, SIGUSR1. 4091 // It should be consulted by handlers for any of those signals. 4092 // 4093 // The caller of this routine must pass in the three arguments supplied 4094 // to the function referred to in the "sa_sigaction" (not the "sa_handler") 4095 // field of the structure passed to sigaction(). This routine assumes that 4096 // the sa_flags field passed to sigaction() includes SA_SIGINFO and SA_RESTART. 4097 // 4098 // Note that the VM will print warnings if it detects conflicting signal 4099 // handlers, unless invoked with the option "-XX:+AllowUserSignalHandlers". 4100 // 4101 extern "C" JNIEXPORT int JVM_handle_linux_signal(int signo, 4102 siginfo_t* siginfo, 4103 void* ucontext, 4104 int abort_if_unrecognized); 4105 4106 void signalHandler(int sig, siginfo_t* info, void* uc) { 4107 assert(info != NULL && uc != NULL, "it must be old kernel"); 4108 int orig_errno = errno; // Preserve errno value over signal handler. 4109 JVM_handle_linux_signal(sig, info, uc, true); 4110 errno = orig_errno; 4111 } 4112 4113 4114 // This boolean allows users to forward their own non-matching signals 4115 // to JVM_handle_linux_signal, harmlessly. 4116 bool os::Linux::signal_handlers_are_installed = false; 4117 4118 // For signal-chaining 4119 struct sigaction sigact[NSIG]; 4120 uint64_t sigs = 0; 4121 #if (64 < NSIG-1) 4122 #error "Not all signals can be encoded in sigs. Adapt its type!" 4123 #endif 4124 bool os::Linux::libjsig_is_loaded = false; 4125 typedef struct sigaction *(*get_signal_t)(int); 4126 get_signal_t os::Linux::get_signal_action = NULL; 4127 4128 struct sigaction* os::Linux::get_chained_signal_action(int sig) { 4129 struct sigaction *actp = NULL; 4130 4131 if (libjsig_is_loaded) { 4132 // Retrieve the old signal handler from libjsig 4133 actp = (*get_signal_action)(sig); 4134 } 4135 if (actp == NULL) { 4136 // Retrieve the preinstalled signal handler from jvm 4137 actp = get_preinstalled_handler(sig); 4138 } 4139 4140 return actp; 4141 } 4142 4143 static bool call_chained_handler(struct sigaction *actp, int sig, 4144 siginfo_t *siginfo, void *context) { 4145 // Call the old signal handler 4146 if (actp->sa_handler == SIG_DFL) { 4147 // It's more reasonable to let jvm treat it as an unexpected exception 4148 // instead of taking the default action. 4149 return false; 4150 } else if (actp->sa_handler != SIG_IGN) { 4151 if ((actp->sa_flags & SA_NODEFER) == 0) { 4152 // automaticlly block the signal 4153 sigaddset(&(actp->sa_mask), sig); 4154 } 4155 4156 sa_handler_t hand = NULL; 4157 sa_sigaction_t sa = NULL; 4158 bool siginfo_flag_set = (actp->sa_flags & SA_SIGINFO) != 0; 4159 // retrieve the chained handler 4160 if (siginfo_flag_set) { 4161 sa = actp->sa_sigaction; 4162 } else { 4163 hand = actp->sa_handler; 4164 } 4165 4166 if ((actp->sa_flags & SA_RESETHAND) != 0) { 4167 actp->sa_handler = SIG_DFL; 4168 } 4169 4170 // try to honor the signal mask 4171 sigset_t oset; 4172 pthread_sigmask(SIG_SETMASK, &(actp->sa_mask), &oset); 4173 4174 // call into the chained handler 4175 if (siginfo_flag_set) { 4176 (*sa)(sig, siginfo, context); 4177 } else { 4178 (*hand)(sig); 4179 } 4180 4181 // restore the signal mask 4182 pthread_sigmask(SIG_SETMASK, &oset, 0); 4183 } 4184 // Tell jvm's signal handler the signal is taken care of. 4185 return true; 4186 } 4187 4188 bool os::Linux::chained_handler(int sig, siginfo_t* siginfo, void* context) { 4189 bool chained = false; 4190 // signal-chaining 4191 if (UseSignalChaining) { 4192 struct sigaction *actp = get_chained_signal_action(sig); 4193 if (actp != NULL) { 4194 chained = call_chained_handler(actp, sig, siginfo, context); 4195 } 4196 } 4197 return chained; 4198 } 4199 4200 struct sigaction* os::Linux::get_preinstalled_handler(int sig) { 4201 if ((((uint64_t)1 << (sig-1)) & sigs) != 0) { 4202 return &sigact[sig]; 4203 } 4204 return NULL; 4205 } 4206 4207 void os::Linux::save_preinstalled_handler(int sig, struct sigaction& oldAct) { 4208 assert(sig > 0 && sig < NSIG, "vm signal out of expected range"); 4209 sigact[sig] = oldAct; 4210 sigs |= (uint64_t)1 << (sig-1); 4211 } 4212 4213 // for diagnostic 4214 int sigflags[NSIG]; 4215 4216 int os::Linux::get_our_sigflags(int sig) { 4217 assert(sig > 0 && sig < NSIG, "vm signal out of expected range"); 4218 return sigflags[sig]; 4219 } 4220 4221 void os::Linux::set_our_sigflags(int sig, int flags) { 4222 assert(sig > 0 && sig < NSIG, "vm signal out of expected range"); 4223 if (sig > 0 && sig < NSIG) { 4224 sigflags[sig] = flags; 4225 } 4226 } 4227 4228 void os::Linux::set_signal_handler(int sig, bool set_installed) { 4229 // Check for overwrite. 4230 struct sigaction oldAct; 4231 sigaction(sig, (struct sigaction*)NULL, &oldAct); 4232 4233 void* oldhand = oldAct.sa_sigaction 4234 ? CAST_FROM_FN_PTR(void*, oldAct.sa_sigaction) 4235 : CAST_FROM_FN_PTR(void*, oldAct.sa_handler); 4236 if (oldhand != CAST_FROM_FN_PTR(void*, SIG_DFL) && 4237 oldhand != CAST_FROM_FN_PTR(void*, SIG_IGN) && 4238 oldhand != CAST_FROM_FN_PTR(void*, (sa_sigaction_t)signalHandler)) { 4239 if (AllowUserSignalHandlers || !set_installed) { 4240 // Do not overwrite; user takes responsibility to forward to us. 4241 return; 4242 } else if (UseSignalChaining) { 4243 // save the old handler in jvm 4244 save_preinstalled_handler(sig, oldAct); 4245 // libjsig also interposes the sigaction() call below and saves the 4246 // old sigaction on it own. 4247 } else { 4248 fatal("Encountered unexpected pre-existing sigaction handler " 4249 "%#lx for signal %d.", (long)oldhand, sig); 4250 } 4251 } 4252 4253 struct sigaction sigAct; 4254 sigfillset(&(sigAct.sa_mask)); 4255 sigAct.sa_handler = SIG_DFL; 4256 if (!set_installed) { 4257 sigAct.sa_flags = SA_SIGINFO|SA_RESTART; 4258 } else { 4259 sigAct.sa_sigaction = signalHandler; 4260 sigAct.sa_flags = SA_SIGINFO|SA_RESTART; 4261 } 4262 // Save flags, which are set by ours 4263 assert(sig > 0 && sig < NSIG, "vm signal out of expected range"); 4264 sigflags[sig] = sigAct.sa_flags; 4265 4266 int ret = sigaction(sig, &sigAct, &oldAct); 4267 assert(ret == 0, "check"); 4268 4269 void* oldhand2 = oldAct.sa_sigaction 4270 ? CAST_FROM_FN_PTR(void*, oldAct.sa_sigaction) 4271 : CAST_FROM_FN_PTR(void*, oldAct.sa_handler); 4272 assert(oldhand2 == oldhand, "no concurrent signal handler installation"); 4273 } 4274 4275 // install signal handlers for signals that HotSpot needs to 4276 // handle in order to support Java-level exception handling. 4277 4278 void os::Linux::install_signal_handlers() { 4279 if (!signal_handlers_are_installed) { 4280 signal_handlers_are_installed = true; 4281 4282 // signal-chaining 4283 typedef void (*signal_setting_t)(); 4284 signal_setting_t begin_signal_setting = NULL; 4285 signal_setting_t end_signal_setting = NULL; 4286 begin_signal_setting = CAST_TO_FN_PTR(signal_setting_t, 4287 dlsym(RTLD_DEFAULT, "JVM_begin_signal_setting")); 4288 if (begin_signal_setting != NULL) { 4289 end_signal_setting = CAST_TO_FN_PTR(signal_setting_t, 4290 dlsym(RTLD_DEFAULT, "JVM_end_signal_setting")); 4291 get_signal_action = CAST_TO_FN_PTR(get_signal_t, 4292 dlsym(RTLD_DEFAULT, "JVM_get_signal_action")); 4293 libjsig_is_loaded = true; 4294 assert(UseSignalChaining, "should enable signal-chaining"); 4295 } 4296 if (libjsig_is_loaded) { 4297 // Tell libjsig jvm is setting signal handlers 4298 (*begin_signal_setting)(); 4299 } 4300 4301 set_signal_handler(SIGSEGV, true); 4302 set_signal_handler(SIGPIPE, true); 4303 set_signal_handler(SIGBUS, true); 4304 set_signal_handler(SIGILL, true); 4305 set_signal_handler(SIGFPE, true); 4306 #if defined(PPC64) 4307 set_signal_handler(SIGTRAP, true); 4308 #endif 4309 set_signal_handler(SIGXFSZ, true); 4310 4311 if (libjsig_is_loaded) { 4312 // Tell libjsig jvm finishes setting signal handlers 4313 (*end_signal_setting)(); 4314 } 4315 4316 // We don't activate signal checker if libjsig is in place, we trust ourselves 4317 // and if UserSignalHandler is installed all bets are off. 4318 // Log that signal checking is off only if -verbose:jni is specified. 4319 if (CheckJNICalls) { 4320 if (libjsig_is_loaded) { 4321 if (PrintJNIResolving) { 4322 tty->print_cr("Info: libjsig is activated, all active signal checking is disabled"); 4323 } 4324 check_signals = false; 4325 } 4326 if (AllowUserSignalHandlers) { 4327 if (PrintJNIResolving) { 4328 tty->print_cr("Info: AllowUserSignalHandlers is activated, all active signal checking is disabled"); 4329 } 4330 check_signals = false; 4331 } 4332 } 4333 } 4334 } 4335 4336 // This is the fastest way to get thread cpu time on Linux. 4337 // Returns cpu time (user+sys) for any thread, not only for current. 4338 // POSIX compliant clocks are implemented in the kernels 2.6.16+. 4339 // It might work on 2.6.10+ with a special kernel/glibc patch. 4340 // For reference, please, see IEEE Std 1003.1-2004: 4341 // http://www.unix.org/single_unix_specification 4342 4343 jlong os::Linux::fast_thread_cpu_time(clockid_t clockid) { 4344 struct timespec tp; 4345 int rc = os::Linux::clock_gettime(clockid, &tp); 4346 assert(rc == 0, "clock_gettime is expected to return 0 code"); 4347 4348 return (tp.tv_sec * NANOSECS_PER_SEC) + tp.tv_nsec; 4349 } 4350 4351 ///// 4352 // glibc on Linux platform uses non-documented flag 4353 // to indicate, that some special sort of signal 4354 // trampoline is used. 4355 // We will never set this flag, and we should 4356 // ignore this flag in our diagnostic 4357 #ifdef SIGNIFICANT_SIGNAL_MASK 4358 #undef SIGNIFICANT_SIGNAL_MASK 4359 #endif 4360 #define SIGNIFICANT_SIGNAL_MASK (~0x04000000) 4361 4362 static const char* get_signal_handler_name(address handler, 4363 char* buf, int buflen) { 4364 int offset = 0; 4365 bool found = os::dll_address_to_library_name(handler, buf, buflen, &offset); 4366 if (found) { 4367 // skip directory names 4368 const char *p1, *p2; 4369 p1 = buf; 4370 size_t len = strlen(os::file_separator()); 4371 while ((p2 = strstr(p1, os::file_separator())) != NULL) p1 = p2 + len; 4372 jio_snprintf(buf, buflen, "%s+0x%x", p1, offset); 4373 } else { 4374 jio_snprintf(buf, buflen, PTR_FORMAT, handler); 4375 } 4376 return buf; 4377 } 4378 4379 static void print_signal_handler(outputStream* st, int sig, 4380 char* buf, size_t buflen) { 4381 struct sigaction sa; 4382 4383 sigaction(sig, NULL, &sa); 4384 4385 // See comment for SIGNIFICANT_SIGNAL_MASK define 4386 sa.sa_flags &= SIGNIFICANT_SIGNAL_MASK; 4387 4388 st->print("%s: ", os::exception_name(sig, buf, buflen)); 4389 4390 address handler = (sa.sa_flags & SA_SIGINFO) 4391 ? CAST_FROM_FN_PTR(address, sa.sa_sigaction) 4392 : CAST_FROM_FN_PTR(address, sa.sa_handler); 4393 4394 if (handler == CAST_FROM_FN_PTR(address, SIG_DFL)) { 4395 st->print("SIG_DFL"); 4396 } else if (handler == CAST_FROM_FN_PTR(address, SIG_IGN)) { 4397 st->print("SIG_IGN"); 4398 } else { 4399 st->print("[%s]", get_signal_handler_name(handler, buf, buflen)); 4400 } 4401 4402 st->print(", sa_mask[0]="); 4403 os::Posix::print_signal_set_short(st, &sa.sa_mask); 4404 4405 address rh = VMError::get_resetted_sighandler(sig); 4406 // May be, handler was resetted by VMError? 4407 if (rh != NULL) { 4408 handler = rh; 4409 sa.sa_flags = VMError::get_resetted_sigflags(sig) & SIGNIFICANT_SIGNAL_MASK; 4410 } 4411 4412 st->print(", sa_flags="); 4413 os::Posix::print_sa_flags(st, sa.sa_flags); 4414 4415 // Check: is it our handler? 4416 if (handler == CAST_FROM_FN_PTR(address, (sa_sigaction_t)signalHandler) || 4417 handler == CAST_FROM_FN_PTR(address, (sa_sigaction_t)SR_handler)) { 4418 // It is our signal handler 4419 // check for flags, reset system-used one! 4420 if ((int)sa.sa_flags != os::Linux::get_our_sigflags(sig)) { 4421 st->print( 4422 ", flags was changed from " PTR32_FORMAT ", consider using jsig library", 4423 os::Linux::get_our_sigflags(sig)); 4424 } 4425 } 4426 st->cr(); 4427 } 4428 4429 4430 #define DO_SIGNAL_CHECK(sig) \ 4431 do { \ 4432 if (!sigismember(&check_signal_done, sig)) { \ 4433 os::Linux::check_signal_handler(sig); \ 4434 } \ 4435 } while (0) 4436 4437 // This method is a periodic task to check for misbehaving JNI applications 4438 // under CheckJNI, we can add any periodic checks here 4439 4440 void os::run_periodic_checks() { 4441 if (check_signals == false) return; 4442 4443 // SEGV and BUS if overridden could potentially prevent 4444 // generation of hs*.log in the event of a crash, debugging 4445 // such a case can be very challenging, so we absolutely 4446 // check the following for a good measure: 4447 DO_SIGNAL_CHECK(SIGSEGV); 4448 DO_SIGNAL_CHECK(SIGILL); 4449 DO_SIGNAL_CHECK(SIGFPE); 4450 DO_SIGNAL_CHECK(SIGBUS); 4451 DO_SIGNAL_CHECK(SIGPIPE); 4452 DO_SIGNAL_CHECK(SIGXFSZ); 4453 #if defined(PPC64) 4454 DO_SIGNAL_CHECK(SIGTRAP); 4455 #endif 4456 4457 // ReduceSignalUsage allows the user to override these handlers 4458 // see comments at the very top and jvm_solaris.h 4459 if (!ReduceSignalUsage) { 4460 DO_SIGNAL_CHECK(SHUTDOWN1_SIGNAL); 4461 DO_SIGNAL_CHECK(SHUTDOWN2_SIGNAL); 4462 DO_SIGNAL_CHECK(SHUTDOWN3_SIGNAL); 4463 DO_SIGNAL_CHECK(BREAK_SIGNAL); 4464 } 4465 4466 DO_SIGNAL_CHECK(SR_signum); 4467 } 4468 4469 typedef int (*os_sigaction_t)(int, const struct sigaction *, struct sigaction *); 4470 4471 static os_sigaction_t os_sigaction = NULL; 4472 4473 void os::Linux::check_signal_handler(int sig) { 4474 char buf[O_BUFLEN]; 4475 address jvmHandler = NULL; 4476 4477 4478 struct sigaction act; 4479 if (os_sigaction == NULL) { 4480 // only trust the default sigaction, in case it has been interposed 4481 os_sigaction = (os_sigaction_t)dlsym(RTLD_DEFAULT, "sigaction"); 4482 if (os_sigaction == NULL) return; 4483 } 4484 4485 os_sigaction(sig, (struct sigaction*)NULL, &act); 4486 4487 4488 act.sa_flags &= SIGNIFICANT_SIGNAL_MASK; 4489 4490 address thisHandler = (act.sa_flags & SA_SIGINFO) 4491 ? CAST_FROM_FN_PTR(address, act.sa_sigaction) 4492 : CAST_FROM_FN_PTR(address, act.sa_handler); 4493 4494 4495 switch (sig) { 4496 case SIGSEGV: 4497 case SIGBUS: 4498 case SIGFPE: 4499 case SIGPIPE: 4500 case SIGILL: 4501 case SIGXFSZ: 4502 jvmHandler = CAST_FROM_FN_PTR(address, (sa_sigaction_t)signalHandler); 4503 break; 4504 4505 case SHUTDOWN1_SIGNAL: 4506 case SHUTDOWN2_SIGNAL: 4507 case SHUTDOWN3_SIGNAL: 4508 case BREAK_SIGNAL: 4509 jvmHandler = (address)user_handler(); 4510 break; 4511 4512 default: 4513 if (sig == SR_signum) { 4514 jvmHandler = CAST_FROM_FN_PTR(address, (sa_sigaction_t)SR_handler); 4515 } else { 4516 return; 4517 } 4518 break; 4519 } 4520 4521 if (thisHandler != jvmHandler) { 4522 tty->print("Warning: %s handler ", exception_name(sig, buf, O_BUFLEN)); 4523 tty->print("expected:%s", get_signal_handler_name(jvmHandler, buf, O_BUFLEN)); 4524 tty->print_cr(" found:%s", get_signal_handler_name(thisHandler, buf, O_BUFLEN)); 4525 // No need to check this sig any longer 4526 sigaddset(&check_signal_done, sig); 4527 // Running under non-interactive shell, SHUTDOWN2_SIGNAL will be reassigned SIG_IGN 4528 if (sig == SHUTDOWN2_SIGNAL && !isatty(fileno(stdin))) { 4529 tty->print_cr("Running in non-interactive shell, %s handler is replaced by shell", 4530 exception_name(sig, buf, O_BUFLEN)); 4531 } 4532 } else if(os::Linux::get_our_sigflags(sig) != 0 && (int)act.sa_flags != os::Linux::get_our_sigflags(sig)) { 4533 tty->print("Warning: %s handler flags ", exception_name(sig, buf, O_BUFLEN)); 4534 tty->print("expected:"); 4535 os::Posix::print_sa_flags(tty, os::Linux::get_our_sigflags(sig)); 4536 tty->cr(); 4537 tty->print(" found:"); 4538 os::Posix::print_sa_flags(tty, act.sa_flags); 4539 tty->cr(); 4540 // No need to check this sig any longer 4541 sigaddset(&check_signal_done, sig); 4542 } 4543 4544 // Dump all the signal 4545 if (sigismember(&check_signal_done, sig)) { 4546 print_signal_handlers(tty, buf, O_BUFLEN); 4547 } 4548 } 4549 4550 extern void report_error(char* file_name, int line_no, char* title, 4551 char* format, ...); 4552 4553 // this is called _before_ the most of global arguments have been parsed 4554 void os::init(void) { 4555 char dummy; // used to get a guess on initial stack address 4556 // first_hrtime = gethrtime(); 4557 4558 clock_tics_per_sec = sysconf(_SC_CLK_TCK); 4559 4560 init_random(1234567); 4561 4562 ThreadCritical::initialize(); 4563 4564 Linux::set_page_size(sysconf(_SC_PAGESIZE)); 4565 if (Linux::page_size() == -1) { 4566 fatal("os_linux.cpp: os::init: sysconf failed (%s)", 4567 strerror(errno)); 4568 } 4569 init_page_sizes((size_t) Linux::page_size()); 4570 4571 Linux::initialize_system_info(); 4572 4573 // main_thread points to the aboriginal thread 4574 Linux::_main_thread = pthread_self(); 4575 4576 Linux::clock_init(); 4577 initial_time_count = javaTimeNanos(); 4578 4579 // pthread_condattr initialization for monotonic clock 4580 int status; 4581 pthread_condattr_t* _condattr = os::Linux::condAttr(); 4582 if ((status = pthread_condattr_init(_condattr)) != 0) { 4583 fatal("pthread_condattr_init: %s", strerror(status)); 4584 } 4585 // Only set the clock if CLOCK_MONOTONIC is available 4586 if (os::supports_monotonic_clock()) { 4587 if ((status = pthread_condattr_setclock(_condattr, CLOCK_MONOTONIC)) != 0) { 4588 if (status == EINVAL) { 4589 warning("Unable to use monotonic clock with relative timed-waits" \ 4590 " - changes to the time-of-day clock may have adverse affects"); 4591 } else { 4592 fatal("pthread_condattr_setclock: %s", strerror(status)); 4593 } 4594 } 4595 } 4596 // else it defaults to CLOCK_REALTIME 4597 4598 // retrieve entry point for pthread_setname_np 4599 Linux::_pthread_setname_np = 4600 (int(*)(pthread_t, const char*))dlsym(RTLD_DEFAULT, "pthread_setname_np"); 4601 4602 } 4603 4604 // To install functions for atexit system call 4605 extern "C" { 4606 static void perfMemory_exit_helper() { 4607 perfMemory_exit(); 4608 } 4609 } 4610 4611 // this is called _after_ the global arguments have been parsed 4612 jint os::init_2(void) { 4613 Linux::fast_thread_clock_init(); 4614 4615 // Allocate a single page and mark it as readable for safepoint polling 4616 address polling_page = (address) ::mmap(NULL, Linux::page_size(), PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); 4617 guarantee(polling_page != MAP_FAILED, "os::init_2: failed to allocate polling page"); 4618 4619 os::set_polling_page(polling_page); 4620 4621 #ifndef PRODUCT 4622 if (Verbose && PrintMiscellaneous) { 4623 tty->print("[SafePoint Polling address: " INTPTR_FORMAT "]\n", 4624 (intptr_t)polling_page); 4625 } 4626 #endif 4627 4628 if (!UseMembar) { 4629 address mem_serialize_page = (address) ::mmap(NULL, Linux::page_size(), PROT_READ | PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); 4630 guarantee(mem_serialize_page != MAP_FAILED, "mmap Failed for memory serialize page"); 4631 os::set_memory_serialize_page(mem_serialize_page); 4632 4633 #ifndef PRODUCT 4634 if (Verbose && PrintMiscellaneous) { 4635 tty->print("[Memory Serialize Page address: " INTPTR_FORMAT "]\n", 4636 (intptr_t)mem_serialize_page); 4637 } 4638 #endif 4639 } 4640 4641 // initialize suspend/resume support - must do this before signal_sets_init() 4642 if (SR_initialize() != 0) { 4643 perror("SR_initialize failed"); 4644 return JNI_ERR; 4645 } 4646 4647 Linux::signal_sets_init(); 4648 Linux::install_signal_handlers(); 4649 4650 // Check minimum allowable stack size for thread creation and to initialize 4651 // the java system classes, including StackOverflowError - depends on page 4652 // size. Add a page for compiler2 recursion in main thread. 4653 // Add in 2*BytesPerWord times page size to account for VM stack during 4654 // class initialization depending on 32 or 64 bit VM. 4655 os::Linux::min_stack_allowed = MAX2(os::Linux::min_stack_allowed, 4656 JavaThread::stack_guard_zone_size() + 4657 JavaThread::stack_shadow_zone_size() + 4658 (2*BytesPerWord COMPILER2_PRESENT(+1)) * Linux::vm_default_page_size()); 4659 4660 size_t threadStackSizeInBytes = ThreadStackSize * K; 4661 if (threadStackSizeInBytes != 0 && 4662 threadStackSizeInBytes < os::Linux::min_stack_allowed) { 4663 tty->print_cr("\nThe stack size specified is too small, " 4664 "Specify at least " SIZE_FORMAT "k", 4665 os::Linux::min_stack_allowed/ K); 4666 return JNI_ERR; 4667 } 4668 4669 // Make the stack size a multiple of the page size so that 4670 // the yellow/red zones can be guarded. 4671 JavaThread::set_stack_size_at_create(round_to(threadStackSizeInBytes, 4672 vm_page_size())); 4673 4674 Linux::capture_initial_stack(JavaThread::stack_size_at_create()); 4675 4676 #if defined(IA32) 4677 workaround_expand_exec_shield_cs_limit(); 4678 #endif 4679 4680 Linux::libpthread_init(); 4681 if (PrintMiscellaneous && (Verbose || WizardMode)) { 4682 tty->print_cr("[HotSpot is running with %s, %s]\n", 4683 Linux::glibc_version(), Linux::libpthread_version()); 4684 } 4685 4686 if (UseNUMA) { 4687 if (!Linux::libnuma_init()) { 4688 UseNUMA = false; 4689 } else { 4690 if ((Linux::numa_max_node() < 1)) { 4691 // There's only one node(they start from 0), disable NUMA. 4692 UseNUMA = false; 4693 } 4694 } 4695 // With SHM and HugeTLBFS large pages we cannot uncommit a page, so there's no way 4696 // we can make the adaptive lgrp chunk resizing work. If the user specified 4697 // both UseNUMA and UseLargePages (or UseSHM/UseHugeTLBFS) on the command line - warn and 4698 // disable adaptive resizing. 4699 if (UseNUMA && UseLargePages && !can_commit_large_page_memory()) { 4700 if (FLAG_IS_DEFAULT(UseNUMA)) { 4701 UseNUMA = false; 4702 } else { 4703 if (FLAG_IS_DEFAULT(UseLargePages) && 4704 FLAG_IS_DEFAULT(UseSHM) && 4705 FLAG_IS_DEFAULT(UseHugeTLBFS)) { 4706 UseLargePages = false; 4707 } else if (UseAdaptiveSizePolicy || UseAdaptiveNUMAChunkSizing) { 4708 warning("UseNUMA is not fully compatible with SHM/HugeTLBFS large pages, disabling adaptive resizing (-XX:-UseAdaptiveSizePolicy -XX:-UseAdaptiveNUMAChunkSizing)"); 4709 UseAdaptiveSizePolicy = false; 4710 UseAdaptiveNUMAChunkSizing = false; 4711 } 4712 } 4713 } 4714 if (!UseNUMA && ForceNUMA) { 4715 UseNUMA = true; 4716 } 4717 } 4718 4719 if (MaxFDLimit) { 4720 // set the number of file descriptors to max. print out error 4721 // if getrlimit/setrlimit fails but continue regardless. 4722 struct rlimit nbr_files; 4723 int status = getrlimit(RLIMIT_NOFILE, &nbr_files); 4724 if (status != 0) { 4725 if (PrintMiscellaneous && (Verbose || WizardMode)) { 4726 perror("os::init_2 getrlimit failed"); 4727 } 4728 } else { 4729 nbr_files.rlim_cur = nbr_files.rlim_max; 4730 status = setrlimit(RLIMIT_NOFILE, &nbr_files); 4731 if (status != 0) { 4732 if (PrintMiscellaneous && (Verbose || WizardMode)) { 4733 perror("os::init_2 setrlimit failed"); 4734 } 4735 } 4736 } 4737 } 4738 4739 // Initialize lock used to serialize thread creation (see os::create_thread) 4740 Linux::set_createThread_lock(new Mutex(Mutex::leaf, "createThread_lock", false)); 4741 4742 // at-exit methods are called in the reverse order of their registration. 4743 // atexit functions are called on return from main or as a result of a 4744 // call to exit(3C). There can be only 32 of these functions registered 4745 // and atexit() does not set errno. 4746 4747 if (PerfAllowAtExitRegistration) { 4748 // only register atexit functions if PerfAllowAtExitRegistration is set. 4749 // atexit functions can be delayed until process exit time, which 4750 // can be problematic for embedded VM situations. Embedded VMs should 4751 // call DestroyJavaVM() to assure that VM resources are released. 4752 4753 // note: perfMemory_exit_helper atexit function may be removed in 4754 // the future if the appropriate cleanup code can be added to the 4755 // VM_Exit VMOperation's doit method. 4756 if (atexit(perfMemory_exit_helper) != 0) { 4757 warning("os::init_2 atexit(perfMemory_exit_helper) failed"); 4758 } 4759 } 4760 4761 // initialize thread priority policy 4762 prio_init(); 4763 4764 return JNI_OK; 4765 } 4766 4767 // Mark the polling page as unreadable 4768 void os::make_polling_page_unreadable(void) { 4769 if (!guard_memory((char*)_polling_page, Linux::page_size())) { 4770 fatal("Could not disable polling page"); 4771 } 4772 } 4773 4774 // Mark the polling page as readable 4775 void os::make_polling_page_readable(void) { 4776 if (!linux_mprotect((char *)_polling_page, Linux::page_size(), PROT_READ)) { 4777 fatal("Could not enable polling page"); 4778 } 4779 } 4780 4781 // older glibc versions don't have this macro (which expands to 4782 // an optimized bit-counting function) so we have to roll our own 4783 #ifndef CPU_COUNT 4784 4785 static int _cpu_count(const cpu_set_t* cpus) { 4786 int count = 0; 4787 // only look up to the number of configured processors 4788 for (int i = 0; i < os::processor_count(); i++) { 4789 if (CPU_ISSET(i, cpus)) { 4790 count++; 4791 } 4792 } 4793 return count; 4794 } 4795 4796 #define CPU_COUNT(cpus) _cpu_count(cpus) 4797 4798 #endif // CPU_COUNT 4799 4800 // Get the current number of available processors for this process. 4801 // This value can change at any time during a process's lifetime. 4802 // sched_getaffinity gives an accurate answer as it accounts for cpusets. 4803 // If it appears there may be more than 1024 processors then we do a 4804 // dynamic check - see 6515172 for details. 4805 // If anything goes wrong we fallback to returning the number of online 4806 // processors - which can be greater than the number available to the process. 4807 int os::active_processor_count() { 4808 cpu_set_t cpus; // can represent at most 1024 (CPU_SETSIZE) processors 4809 cpu_set_t* cpus_p = &cpus; 4810 int cpus_size = sizeof(cpu_set_t); 4811 4812 int configured_cpus = processor_count(); // upper bound on available cpus 4813 int cpu_count = 0; 4814 4815 // old build platforms may not support dynamic cpu sets 4816 #ifdef CPU_ALLOC 4817 4818 // To enable easy testing of the dynamic path on different platforms we 4819 // introduce a diagnostic flag: UseCpuAllocPath 4820 if (configured_cpus >= CPU_SETSIZE || UseCpuAllocPath) { 4821 // kernel may use a mask bigger than cpu_set_t 4822 log_trace(os)("active_processor_count: using dynamic path %s" 4823 "- configured processors: %d", 4824 UseCpuAllocPath ? "(forced) " : "", 4825 configured_cpus); 4826 cpus_p = CPU_ALLOC(configured_cpus); 4827 if (cpus_p != NULL) { 4828 cpus_size = CPU_ALLOC_SIZE(configured_cpus); 4829 // zero it just to be safe 4830 CPU_ZERO_S(cpus_size, cpus_p); 4831 } 4832 else { 4833 // failed to allocate so fallback to online cpus 4834 int online_cpus = ::sysconf(_SC_NPROCESSORS_ONLN); 4835 log_trace(os)("active_processor_count: " 4836 "CPU_ALLOC failed (%s) - using " 4837 "online processor count: %d", 4838 strerror(errno), online_cpus); 4839 return online_cpus; 4840 } 4841 } 4842 else { 4843 log_trace(os)("active_processor_count: using static path - configured processors: %d", 4844 configured_cpus); 4845 } 4846 #else // CPU_ALLOC 4847 // these stubs won't be executed 4848 #define CPU_COUNT_S(size, cpus) -1 4849 #define CPU_FREE(cpus) 4850 4851 log_trace(os)("active_processor_count: only static path available - configured processors: %d", 4852 configured_cpus); 4853 #endif // CPU_ALLOC 4854 4855 // pid 0 means the current thread - which we have to assume represents the process 4856 if (sched_getaffinity(0, cpus_size, cpus_p) == 0) { 4857 if (cpus_p != &cpus) { // can only be true when CPU_ALLOC used 4858 cpu_count = CPU_COUNT_S(cpus_size, cpus_p); 4859 } 4860 else { 4861 cpu_count = CPU_COUNT(cpus_p); 4862 } 4863 log_trace(os)("active_processor_count: sched_getaffinity processor count: %d", cpu_count); 4864 } 4865 else { 4866 cpu_count = ::sysconf(_SC_NPROCESSORS_ONLN); 4867 warning("sched_getaffinity failed (%s)- using online processor count (%d) " 4868 "which may exceed available processors", strerror(errno), cpu_count); 4869 } 4870 4871 if (cpus_p != &cpus) { // can only be true when CPU_ALLOC used 4872 CPU_FREE(cpus_p); 4873 } 4874 4875 assert(cpu_count > 0 && cpu_count <= processor_count(), "sanity check"); 4876 return cpu_count; 4877 } 4878 4879 void os::set_native_thread_name(const char *name) { 4880 if (Linux::_pthread_setname_np) { 4881 char buf [16]; // according to glibc manpage, 16 chars incl. '/0' 4882 snprintf(buf, sizeof(buf), "%s", name); 4883 buf[sizeof(buf) - 1] = '\0'; 4884 const int rc = Linux::_pthread_setname_np(pthread_self(), buf); 4885 // ERANGE should not happen; all other errors should just be ignored. 4886 assert(rc != ERANGE, "pthread_setname_np failed"); 4887 } 4888 } 4889 4890 bool os::distribute_processes(uint length, uint* distribution) { 4891 // Not yet implemented. 4892 return false; 4893 } 4894 4895 bool os::bind_to_processor(uint processor_id) { 4896 // Not yet implemented. 4897 return false; 4898 } 4899 4900 /// 4901 4902 void os::SuspendedThreadTask::internal_do_task() { 4903 if (do_suspend(_thread->osthread())) { 4904 SuspendedThreadTaskContext context(_thread, _thread->osthread()->ucontext()); 4905 do_task(context); 4906 do_resume(_thread->osthread()); 4907 } 4908 } 4909 4910 class PcFetcher : public os::SuspendedThreadTask { 4911 public: 4912 PcFetcher(Thread* thread) : os::SuspendedThreadTask(thread) {} 4913 ExtendedPC result(); 4914 protected: 4915 void do_task(const os::SuspendedThreadTaskContext& context); 4916 private: 4917 ExtendedPC _epc; 4918 }; 4919 4920 ExtendedPC PcFetcher::result() { 4921 guarantee(is_done(), "task is not done yet."); 4922 return _epc; 4923 } 4924 4925 void PcFetcher::do_task(const os::SuspendedThreadTaskContext& context) { 4926 Thread* thread = context.thread(); 4927 OSThread* osthread = thread->osthread(); 4928 if (osthread->ucontext() != NULL) { 4929 _epc = os::Linux::ucontext_get_pc((const ucontext_t *) context.ucontext()); 4930 } else { 4931 // NULL context is unexpected, double-check this is the VMThread 4932 guarantee(thread->is_VM_thread(), "can only be called for VMThread"); 4933 } 4934 } 4935 4936 // Suspends the target using the signal mechanism and then grabs the PC before 4937 // resuming the target. Used by the flat-profiler only 4938 ExtendedPC os::get_thread_pc(Thread* thread) { 4939 // Make sure that it is called by the watcher for the VMThread 4940 assert(Thread::current()->is_Watcher_thread(), "Must be watcher"); 4941 assert(thread->is_VM_thread(), "Can only be called for VMThread"); 4942 4943 PcFetcher fetcher(thread); 4944 fetcher.run(); 4945 return fetcher.result(); 4946 } 4947 4948 //////////////////////////////////////////////////////////////////////////////// 4949 // debug support 4950 4951 bool os::find(address addr, outputStream* st) { 4952 Dl_info dlinfo; 4953 memset(&dlinfo, 0, sizeof(dlinfo)); 4954 if (dladdr(addr, &dlinfo) != 0) { 4955 st->print(PTR_FORMAT ": ", p2i(addr)); 4956 if (dlinfo.dli_sname != NULL && dlinfo.dli_saddr != NULL) { 4957 st->print("%s+" PTR_FORMAT, dlinfo.dli_sname, 4958 p2i(addr) - p2i(dlinfo.dli_saddr)); 4959 } else if (dlinfo.dli_fbase != NULL) { 4960 st->print("<offset " PTR_FORMAT ">", p2i(addr) - p2i(dlinfo.dli_fbase)); 4961 } else { 4962 st->print("<absolute address>"); 4963 } 4964 if (dlinfo.dli_fname != NULL) { 4965 st->print(" in %s", dlinfo.dli_fname); 4966 } 4967 if (dlinfo.dli_fbase != NULL) { 4968 st->print(" at " PTR_FORMAT, p2i(dlinfo.dli_fbase)); 4969 } 4970 st->cr(); 4971 4972 if (Verbose) { 4973 // decode some bytes around the PC 4974 address begin = clamp_address_in_page(addr-40, addr, os::vm_page_size()); 4975 address end = clamp_address_in_page(addr+40, addr, os::vm_page_size()); 4976 address lowest = (address) dlinfo.dli_sname; 4977 if (!lowest) lowest = (address) dlinfo.dli_fbase; 4978 if (begin < lowest) begin = lowest; 4979 Dl_info dlinfo2; 4980 if (dladdr(end, &dlinfo2) != 0 && dlinfo2.dli_saddr != dlinfo.dli_saddr 4981 && end > dlinfo2.dli_saddr && dlinfo2.dli_saddr > begin) { 4982 end = (address) dlinfo2.dli_saddr; 4983 } 4984 Disassembler::decode(begin, end, st); 4985 } 4986 return true; 4987 } 4988 return false; 4989 } 4990 4991 //////////////////////////////////////////////////////////////////////////////// 4992 // misc 4993 4994 // This does not do anything on Linux. This is basically a hook for being 4995 // able to use structured exception handling (thread-local exception filters) 4996 // on, e.g., Win32. 4997 void 4998 os::os_exception_wrapper(java_call_t f, JavaValue* value, const methodHandle& method, 4999 JavaCallArguments* args, Thread* thread) { 5000 f(value, method, args, thread); 5001 } 5002 5003 void os::print_statistics() { 5004 } 5005 5006 bool os::message_box(const char* title, const char* message) { 5007 int i; 5008 fdStream err(defaultStream::error_fd()); 5009 for (i = 0; i < 78; i++) err.print_raw("="); 5010 err.cr(); 5011 err.print_raw_cr(title); 5012 for (i = 0; i < 78; i++) err.print_raw("-"); 5013 err.cr(); 5014 err.print_raw_cr(message); 5015 for (i = 0; i < 78; i++) err.print_raw("="); 5016 err.cr(); 5017 5018 char buf[16]; 5019 // Prevent process from exiting upon "read error" without consuming all CPU 5020 while (::read(0, buf, sizeof(buf)) <= 0) { ::sleep(100); } 5021 5022 return buf[0] == 'y' || buf[0] == 'Y'; 5023 } 5024 5025 int os::stat(const char *path, struct stat *sbuf) { 5026 char pathbuf[MAX_PATH]; 5027 if (strlen(path) > MAX_PATH - 1) { 5028 errno = ENAMETOOLONG; 5029 return -1; 5030 } 5031 os::native_path(strcpy(pathbuf, path)); 5032 return ::stat(pathbuf, sbuf); 5033 } 5034 5035 bool os::check_heap(bool force) { 5036 return true; 5037 } 5038 5039 // Is a (classpath) directory empty? 5040 bool os::dir_is_empty(const char* path) { 5041 DIR *dir = NULL; 5042 struct dirent *ptr; 5043 5044 dir = opendir(path); 5045 if (dir == NULL) return true; 5046 5047 // Scan the directory 5048 bool result = true; 5049 char buf[sizeof(struct dirent) + MAX_PATH]; 5050 while (result && (ptr = ::readdir(dir)) != NULL) { 5051 if (strcmp(ptr->d_name, ".") != 0 && strcmp(ptr->d_name, "..") != 0) { 5052 result = false; 5053 } 5054 } 5055 closedir(dir); 5056 return result; 5057 } 5058 5059 // This code originates from JDK's sysOpen and open64_w 5060 // from src/solaris/hpi/src/system_md.c 5061 5062 int os::open(const char *path, int oflag, int mode) { 5063 if (strlen(path) > MAX_PATH - 1) { 5064 errno = ENAMETOOLONG; 5065 return -1; 5066 } 5067 5068 // All file descriptors that are opened in the Java process and not 5069 // specifically destined for a subprocess should have the close-on-exec 5070 // flag set. If we don't set it, then careless 3rd party native code 5071 // might fork and exec without closing all appropriate file descriptors 5072 // (e.g. as we do in closeDescriptors in UNIXProcess.c), and this in 5073 // turn might: 5074 // 5075 // - cause end-of-file to fail to be detected on some file 5076 // descriptors, resulting in mysterious hangs, or 5077 // 5078 // - might cause an fopen in the subprocess to fail on a system 5079 // suffering from bug 1085341. 5080 // 5081 // (Yes, the default setting of the close-on-exec flag is a Unix 5082 // design flaw) 5083 // 5084 // See: 5085 // 1085341: 32-bit stdio routines should support file descriptors >255 5086 // 4843136: (process) pipe file descriptor from Runtime.exec not being closed 5087 // 6339493: (process) Runtime.exec does not close all file descriptors on Solaris 9 5088 // 5089 // Modern Linux kernels (after 2.6.23 2007) support O_CLOEXEC with open(). 5090 // O_CLOEXEC is preferable to using FD_CLOEXEC on an open file descriptor 5091 // because it saves a system call and removes a small window where the flag 5092 // is unset. On ancient Linux kernels the O_CLOEXEC flag will be ignored 5093 // and we fall back to using FD_CLOEXEC (see below). 5094 #ifdef O_CLOEXEC 5095 oflag |= O_CLOEXEC; 5096 #endif 5097 5098 int fd = ::open64(path, oflag, mode); 5099 if (fd == -1) return -1; 5100 5101 //If the open succeeded, the file might still be a directory 5102 { 5103 struct stat64 buf64; 5104 int ret = ::fstat64(fd, &buf64); 5105 int st_mode = buf64.st_mode; 5106 5107 if (ret != -1) { 5108 if ((st_mode & S_IFMT) == S_IFDIR) { 5109 errno = EISDIR; 5110 ::close(fd); 5111 return -1; 5112 } 5113 } else { 5114 ::close(fd); 5115 return -1; 5116 } 5117 } 5118 5119 #ifdef FD_CLOEXEC 5120 // Validate that the use of the O_CLOEXEC flag on open above worked. 5121 // With recent kernels, we will perform this check exactly once. 5122 static sig_atomic_t O_CLOEXEC_is_known_to_work = 0; 5123 if (!O_CLOEXEC_is_known_to_work) { 5124 int flags = ::fcntl(fd, F_GETFD); 5125 if (flags != -1) { 5126 if ((flags & FD_CLOEXEC) != 0) 5127 O_CLOEXEC_is_known_to_work = 1; 5128 else 5129 ::fcntl(fd, F_SETFD, flags | FD_CLOEXEC); 5130 } 5131 } 5132 #endif 5133 5134 return fd; 5135 } 5136 5137 5138 // create binary file, rewriting existing file if required 5139 int os::create_binary_file(const char* path, bool rewrite_existing) { 5140 int oflags = O_WRONLY | O_CREAT; 5141 if (!rewrite_existing) { 5142 oflags |= O_EXCL; 5143 } 5144 return ::open64(path, oflags, S_IREAD | S_IWRITE); 5145 } 5146 5147 // return current position of file pointer 5148 jlong os::current_file_offset(int fd) { 5149 return (jlong)::lseek64(fd, (off64_t)0, SEEK_CUR); 5150 } 5151 5152 // move file pointer to the specified offset 5153 jlong os::seek_to_file_offset(int fd, jlong offset) { 5154 return (jlong)::lseek64(fd, (off64_t)offset, SEEK_SET); 5155 } 5156 5157 // This code originates from JDK's sysAvailable 5158 // from src/solaris/hpi/src/native_threads/src/sys_api_td.c 5159 5160 int os::available(int fd, jlong *bytes) { 5161 jlong cur, end; 5162 int mode; 5163 struct stat64 buf64; 5164 5165 if (::fstat64(fd, &buf64) >= 0) { 5166 mode = buf64.st_mode; 5167 if (S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) { 5168 int n; 5169 if (::ioctl(fd, FIONREAD, &n) >= 0) { 5170 *bytes = n; 5171 return 1; 5172 } 5173 } 5174 } 5175 if ((cur = ::lseek64(fd, 0L, SEEK_CUR)) == -1) { 5176 return 0; 5177 } else if ((end = ::lseek64(fd, 0L, SEEK_END)) == -1) { 5178 return 0; 5179 } else if (::lseek64(fd, cur, SEEK_SET) == -1) { 5180 return 0; 5181 } 5182 *bytes = end - cur; 5183 return 1; 5184 } 5185 5186 // Map a block of memory. 5187 char* os::pd_map_memory(int fd, const char* file_name, size_t file_offset, 5188 char *addr, size_t bytes, bool read_only, 5189 bool allow_exec) { 5190 int prot; 5191 int flags = MAP_PRIVATE; 5192 5193 if (read_only) { 5194 prot = PROT_READ; 5195 } else { 5196 prot = PROT_READ | PROT_WRITE; 5197 } 5198 5199 if (allow_exec) { 5200 prot |= PROT_EXEC; 5201 } 5202 5203 if (addr != NULL) { 5204 flags |= MAP_FIXED; 5205 } 5206 5207 char* mapped_address = (char*)mmap(addr, (size_t)bytes, prot, flags, 5208 fd, file_offset); 5209 if (mapped_address == MAP_FAILED) { 5210 return NULL; 5211 } 5212 return mapped_address; 5213 } 5214 5215 5216 // Remap a block of memory. 5217 char* os::pd_remap_memory(int fd, const char* file_name, size_t file_offset, 5218 char *addr, size_t bytes, bool read_only, 5219 bool allow_exec) { 5220 // same as map_memory() on this OS 5221 return os::map_memory(fd, file_name, file_offset, addr, bytes, read_only, 5222 allow_exec); 5223 } 5224 5225 5226 // Unmap a block of memory. 5227 bool os::pd_unmap_memory(char* addr, size_t bytes) { 5228 return munmap(addr, bytes) == 0; 5229 } 5230 5231 static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time); 5232 5233 static clockid_t thread_cpu_clockid(Thread* thread) { 5234 pthread_t tid = thread->osthread()->pthread_id(); 5235 clockid_t clockid; 5236 5237 // Get thread clockid 5238 int rc = os::Linux::pthread_getcpuclockid(tid, &clockid); 5239 assert(rc == 0, "pthread_getcpuclockid is expected to return 0 code"); 5240 return clockid; 5241 } 5242 5243 // current_thread_cpu_time(bool) and thread_cpu_time(Thread*, bool) 5244 // are used by JVM M&M and JVMTI to get user+sys or user CPU time 5245 // of a thread. 5246 // 5247 // current_thread_cpu_time() and thread_cpu_time(Thread*) returns 5248 // the fast estimate available on the platform. 5249 5250 jlong os::current_thread_cpu_time() { 5251 if (os::Linux::supports_fast_thread_cpu_time()) { 5252 return os::Linux::fast_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID); 5253 } else { 5254 // return user + sys since the cost is the same 5255 return slow_thread_cpu_time(Thread::current(), true /* user + sys */); 5256 } 5257 } 5258 5259 jlong os::thread_cpu_time(Thread* thread) { 5260 // consistent with what current_thread_cpu_time() returns 5261 if (os::Linux::supports_fast_thread_cpu_time()) { 5262 return os::Linux::fast_thread_cpu_time(thread_cpu_clockid(thread)); 5263 } else { 5264 return slow_thread_cpu_time(thread, true /* user + sys */); 5265 } 5266 } 5267 5268 jlong os::current_thread_cpu_time(bool user_sys_cpu_time) { 5269 if (user_sys_cpu_time && os::Linux::supports_fast_thread_cpu_time()) { 5270 return os::Linux::fast_thread_cpu_time(CLOCK_THREAD_CPUTIME_ID); 5271 } else { 5272 return slow_thread_cpu_time(Thread::current(), user_sys_cpu_time); 5273 } 5274 } 5275 5276 jlong os::thread_cpu_time(Thread *thread, bool user_sys_cpu_time) { 5277 if (user_sys_cpu_time && os::Linux::supports_fast_thread_cpu_time()) { 5278 return os::Linux::fast_thread_cpu_time(thread_cpu_clockid(thread)); 5279 } else { 5280 return slow_thread_cpu_time(thread, user_sys_cpu_time); 5281 } 5282 } 5283 5284 // -1 on error. 5285 static jlong slow_thread_cpu_time(Thread *thread, bool user_sys_cpu_time) { 5286 pid_t tid = thread->osthread()->thread_id(); 5287 char *s; 5288 char stat[2048]; 5289 int statlen; 5290 char proc_name[64]; 5291 int count; 5292 long sys_time, user_time; 5293 char cdummy; 5294 int idummy; 5295 long ldummy; 5296 FILE *fp; 5297 5298 snprintf(proc_name, 64, "/proc/self/task/%d/stat", tid); 5299 fp = fopen(proc_name, "r"); 5300 if (fp == NULL) return -1; 5301 statlen = fread(stat, 1, 2047, fp); 5302 stat[statlen] = '\0'; 5303 fclose(fp); 5304 5305 // Skip pid and the command string. Note that we could be dealing with 5306 // weird command names, e.g. user could decide to rename java launcher 5307 // to "java 1.4.2 :)", then the stat file would look like 5308 // 1234 (java 1.4.2 :)) R ... ... 5309 // We don't really need to know the command string, just find the last 5310 // occurrence of ")" and then start parsing from there. See bug 4726580. 5311 s = strrchr(stat, ')'); 5312 if (s == NULL) return -1; 5313 5314 // Skip blank chars 5315 do { s++; } while (s && isspace(*s)); 5316 5317 count = sscanf(s,"%c %d %d %d %d %d %lu %lu %lu %lu %lu %lu %lu", 5318 &cdummy, &idummy, &idummy, &idummy, &idummy, &idummy, 5319 &ldummy, &ldummy, &ldummy, &ldummy, &ldummy, 5320 &user_time, &sys_time); 5321 if (count != 13) return -1; 5322 if (user_sys_cpu_time) { 5323 return ((jlong)sys_time + (jlong)user_time) * (1000000000 / clock_tics_per_sec); 5324 } else { 5325 return (jlong)user_time * (1000000000 / clock_tics_per_sec); 5326 } 5327 } 5328 5329 void os::current_thread_cpu_time_info(jvmtiTimerInfo *info_ptr) { 5330 info_ptr->max_value = ALL_64_BITS; // will not wrap in less than 64 bits 5331 info_ptr->may_skip_backward = false; // elapsed time not wall time 5332 info_ptr->may_skip_forward = false; // elapsed time not wall time 5333 info_ptr->kind = JVMTI_TIMER_TOTAL_CPU; // user+system time is returned 5334 } 5335 5336 void os::thread_cpu_time_info(jvmtiTimerInfo *info_ptr) { 5337 info_ptr->max_value = ALL_64_BITS; // will not wrap in less than 64 bits 5338 info_ptr->may_skip_backward = false; // elapsed time not wall time 5339 info_ptr->may_skip_forward = false; // elapsed time not wall time 5340 info_ptr->kind = JVMTI_TIMER_TOTAL_CPU; // user+system time is returned 5341 } 5342 5343 bool os::is_thread_cpu_time_supported() { 5344 return true; 5345 } 5346 5347 // System loadavg support. Returns -1 if load average cannot be obtained. 5348 // Linux doesn't yet have a (official) notion of processor sets, 5349 // so just return the system wide load average. 5350 int os::loadavg(double loadavg[], int nelem) { 5351 return ::getloadavg(loadavg, nelem); 5352 } 5353 5354 void os::pause() { 5355 char filename[MAX_PATH]; 5356 if (PauseAtStartupFile && PauseAtStartupFile[0]) { 5357 jio_snprintf(filename, MAX_PATH, "%s", PauseAtStartupFile); 5358 } else { 5359 jio_snprintf(filename, MAX_PATH, "./vm.paused.%d", current_process_id()); 5360 } 5361 5362 int fd = ::open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0666); 5363 if (fd != -1) { 5364 struct stat buf; 5365 ::close(fd); 5366 while (::stat(filename, &buf) == 0) { 5367 (void)::poll(NULL, 0, 100); 5368 } 5369 } else { 5370 jio_fprintf(stderr, 5371 "Could not open pause file '%s', continuing immediately.\n", filename); 5372 } 5373 } 5374 5375 5376 // Refer to the comments in os_solaris.cpp park-unpark. The next two 5377 // comment paragraphs are worth repeating here: 5378 // 5379 // Assumption: 5380 // Only one parker can exist on an event, which is why we allocate 5381 // them per-thread. Multiple unparkers can coexist. 5382 // 5383 // _Event serves as a restricted-range semaphore. 5384 // -1 : thread is blocked, i.e. there is a waiter 5385 // 0 : neutral: thread is running or ready, 5386 // could have been signaled after a wait started 5387 // 1 : signaled - thread is running or ready 5388 // 5389 5390 // utility to compute the abstime argument to timedwait: 5391 // millis is the relative timeout time 5392 // abstime will be the absolute timeout time 5393 // TODO: replace compute_abstime() with unpackTime() 5394 5395 static struct timespec* compute_abstime(timespec* abstime, jlong millis) { 5396 if (millis < 0) millis = 0; 5397 5398 jlong seconds = millis / 1000; 5399 millis %= 1000; 5400 if (seconds > 50000000) { // see man cond_timedwait(3T) 5401 seconds = 50000000; 5402 } 5403 5404 if (os::supports_monotonic_clock()) { 5405 struct timespec now; 5406 int status = os::Linux::clock_gettime(CLOCK_MONOTONIC, &now); 5407 assert_status(status == 0, status, "clock_gettime"); 5408 abstime->tv_sec = now.tv_sec + seconds; 5409 long nanos = now.tv_nsec + millis * NANOSECS_PER_MILLISEC; 5410 if (nanos >= NANOSECS_PER_SEC) { 5411 abstime->tv_sec += 1; 5412 nanos -= NANOSECS_PER_SEC; 5413 } 5414 abstime->tv_nsec = nanos; 5415 } else { 5416 struct timeval now; 5417 int status = gettimeofday(&now, NULL); 5418 assert(status == 0, "gettimeofday"); 5419 abstime->tv_sec = now.tv_sec + seconds; 5420 long usec = now.tv_usec + millis * 1000; 5421 if (usec >= 1000000) { 5422 abstime->tv_sec += 1; 5423 usec -= 1000000; 5424 } 5425 abstime->tv_nsec = usec * 1000; 5426 } 5427 return abstime; 5428 } 5429 5430 void os::PlatformEvent::park() { // AKA "down()" 5431 // Transitions for _Event: 5432 // -1 => -1 : illegal 5433 // 1 => 0 : pass - return immediately 5434 // 0 => -1 : block; then set _Event to 0 before returning 5435 5436 // Invariant: Only the thread associated with the Event/PlatformEvent 5437 // may call park(). 5438 // TODO: assert that _Assoc != NULL or _Assoc == Self 5439 assert(_nParked == 0, "invariant"); 5440 5441 int v; 5442 for (;;) { 5443 v = _Event; 5444 if (Atomic::cmpxchg(v-1, &_Event, v) == v) break; 5445 } 5446 guarantee(v >= 0, "invariant"); 5447 if (v == 0) { 5448 // Do this the hard way by blocking ... 5449 int status = pthread_mutex_lock(_mutex); 5450 assert_status(status == 0, status, "mutex_lock"); 5451 guarantee(_nParked == 0, "invariant"); 5452 ++_nParked; 5453 while (_Event < 0) { 5454 status = pthread_cond_wait(_cond, _mutex); 5455 // for some reason, under 2.7 lwp_cond_wait() may return ETIME ... 5456 // Treat this the same as if the wait was interrupted 5457 if (status == ETIME) { status = EINTR; } 5458 assert_status(status == 0 || status == EINTR, status, "cond_wait"); 5459 } 5460 --_nParked; 5461 5462 _Event = 0; 5463 status = pthread_mutex_unlock(_mutex); 5464 assert_status(status == 0, status, "mutex_unlock"); 5465 // Paranoia to ensure our locked and lock-free paths interact 5466 // correctly with each other. 5467 OrderAccess::fence(); 5468 } 5469 guarantee(_Event >= 0, "invariant"); 5470 } 5471 5472 int os::PlatformEvent::park(jlong millis) { 5473 // Transitions for _Event: 5474 // -1 => -1 : illegal 5475 // 1 => 0 : pass - return immediately 5476 // 0 => -1 : block; then set _Event to 0 before returning 5477 5478 guarantee(_nParked == 0, "invariant"); 5479 5480 int v; 5481 for (;;) { 5482 v = _Event; 5483 if (Atomic::cmpxchg(v-1, &_Event, v) == v) break; 5484 } 5485 guarantee(v >= 0, "invariant"); 5486 if (v != 0) return OS_OK; 5487 5488 // We do this the hard way, by blocking the thread. 5489 // Consider enforcing a minimum timeout value. 5490 struct timespec abst; 5491 compute_abstime(&abst, millis); 5492 5493 int ret = OS_TIMEOUT; 5494 int status = pthread_mutex_lock(_mutex); 5495 assert_status(status == 0, status, "mutex_lock"); 5496 guarantee(_nParked == 0, "invariant"); 5497 ++_nParked; 5498 5499 // Object.wait(timo) will return because of 5500 // (a) notification 5501 // (b) timeout 5502 // (c) thread.interrupt 5503 // 5504 // Thread.interrupt and object.notify{All} both call Event::set. 5505 // That is, we treat thread.interrupt as a special case of notification. 5506 // We ignore spurious OS wakeups unless FilterSpuriousWakeups is false. 5507 // We assume all ETIME returns are valid. 5508 // 5509 // TODO: properly differentiate simultaneous notify+interrupt. 5510 // In that case, we should propagate the notify to another waiter. 5511 5512 while (_Event < 0) { 5513 status = pthread_cond_timedwait(_cond, _mutex, &abst); 5514 assert_status(status == 0 || status == EINTR || 5515 status == ETIME || status == ETIMEDOUT, 5516 status, "cond_timedwait"); 5517 if (!FilterSpuriousWakeups) break; // previous semantics 5518 if (status == ETIME || status == ETIMEDOUT) break; 5519 // We consume and ignore EINTR and spurious wakeups. 5520 } 5521 --_nParked; 5522 if (_Event >= 0) { 5523 ret = OS_OK; 5524 } 5525 _Event = 0; 5526 status = pthread_mutex_unlock(_mutex); 5527 assert_status(status == 0, status, "mutex_unlock"); 5528 assert(_nParked == 0, "invariant"); 5529 // Paranoia to ensure our locked and lock-free paths interact 5530 // correctly with each other. 5531 OrderAccess::fence(); 5532 return ret; 5533 } 5534 5535 void os::PlatformEvent::unpark() { 5536 // Transitions for _Event: 5537 // 0 => 1 : just return 5538 // 1 => 1 : just return 5539 // -1 => either 0 or 1; must signal target thread 5540 // That is, we can safely transition _Event from -1 to either 5541 // 0 or 1. 5542 // See also: "Semaphores in Plan 9" by Mullender & Cox 5543 // 5544 // Note: Forcing a transition from "-1" to "1" on an unpark() means 5545 // that it will take two back-to-back park() calls for the owning 5546 // thread to block. This has the benefit of forcing a spurious return 5547 // from the first park() call after an unpark() call which will help 5548 // shake out uses of park() and unpark() without condition variables. 5549 5550 if (Atomic::xchg(1, &_Event) >= 0) return; 5551 5552 // Wait for the thread associated with the event to vacate 5553 int status = pthread_mutex_lock(_mutex); 5554 assert_status(status == 0, status, "mutex_lock"); 5555 int AnyWaiters = _nParked; 5556 assert(AnyWaiters == 0 || AnyWaiters == 1, "invariant"); 5557 status = pthread_mutex_unlock(_mutex); 5558 assert_status(status == 0, status, "mutex_unlock"); 5559 if (AnyWaiters != 0) { 5560 // Note that we signal() *after* dropping the lock for "immortal" Events. 5561 // This is safe and avoids a common class of futile wakeups. In rare 5562 // circumstances this can cause a thread to return prematurely from 5563 // cond_{timed}wait() but the spurious wakeup is benign and the victim 5564 // will simply re-test the condition and re-park itself. 5565 // This provides particular benefit if the underlying platform does not 5566 // provide wait morphing. 5567 status = pthread_cond_signal(_cond); 5568 assert_status(status == 0, status, "cond_signal"); 5569 } 5570 } 5571 5572 5573 // JSR166 5574 // ------------------------------------------------------- 5575 5576 // The solaris and linux implementations of park/unpark are fairly 5577 // conservative for now, but can be improved. They currently use a 5578 // mutex/condvar pair, plus a a count. 5579 // Park decrements count if > 0, else does a condvar wait. Unpark 5580 // sets count to 1 and signals condvar. Only one thread ever waits 5581 // on the condvar. Contention seen when trying to park implies that someone 5582 // is unparking you, so don't wait. And spurious returns are fine, so there 5583 // is no need to track notifications. 5584 5585 // This code is common to linux and solaris and will be moved to a 5586 // common place in dolphin. 5587 // 5588 // The passed in time value is either a relative time in nanoseconds 5589 // or an absolute time in milliseconds. Either way it has to be unpacked 5590 // into suitable seconds and nanoseconds components and stored in the 5591 // given timespec structure. 5592 // Given time is a 64-bit value and the time_t used in the timespec is only 5593 // a signed-32-bit value (except on 64-bit Linux) we have to watch for 5594 // overflow if times way in the future are given. Further on Solaris versions 5595 // prior to 10 there is a restriction (see cond_timedwait) that the specified 5596 // number of seconds, in abstime, is less than current_time + 100,000,000. 5597 // As it will be 28 years before "now + 100000000" will overflow we can 5598 // ignore overflow and just impose a hard-limit on seconds using the value 5599 // of "now + 100,000,000". This places a limit on the timeout of about 3.17 5600 // years from "now". 5601 5602 static void unpackTime(timespec* absTime, bool isAbsolute, jlong time) { 5603 assert(time > 0, "convertTime"); 5604 time_t max_secs = 0; 5605 5606 if (!os::supports_monotonic_clock() || isAbsolute) { 5607 struct timeval now; 5608 int status = gettimeofday(&now, NULL); 5609 assert(status == 0, "gettimeofday"); 5610 5611 max_secs = now.tv_sec + MAX_SECS; 5612 5613 if (isAbsolute) { 5614 jlong secs = time / 1000; 5615 if (secs > max_secs) { 5616 absTime->tv_sec = max_secs; 5617 } else { 5618 absTime->tv_sec = secs; 5619 } 5620 absTime->tv_nsec = (time % 1000) * NANOSECS_PER_MILLISEC; 5621 } else { 5622 jlong secs = time / NANOSECS_PER_SEC; 5623 if (secs >= MAX_SECS) { 5624 absTime->tv_sec = max_secs; 5625 absTime->tv_nsec = 0; 5626 } else { 5627 absTime->tv_sec = now.tv_sec + secs; 5628 absTime->tv_nsec = (time % NANOSECS_PER_SEC) + now.tv_usec*1000; 5629 if (absTime->tv_nsec >= NANOSECS_PER_SEC) { 5630 absTime->tv_nsec -= NANOSECS_PER_SEC; 5631 ++absTime->tv_sec; // note: this must be <= max_secs 5632 } 5633 } 5634 } 5635 } else { 5636 // must be relative using monotonic clock 5637 struct timespec now; 5638 int status = os::Linux::clock_gettime(CLOCK_MONOTONIC, &now); 5639 assert_status(status == 0, status, "clock_gettime"); 5640 max_secs = now.tv_sec + MAX_SECS; 5641 jlong secs = time / NANOSECS_PER_SEC; 5642 if (secs >= MAX_SECS) { 5643 absTime->tv_sec = max_secs; 5644 absTime->tv_nsec = 0; 5645 } else { 5646 absTime->tv_sec = now.tv_sec + secs; 5647 absTime->tv_nsec = (time % NANOSECS_PER_SEC) + now.tv_nsec; 5648 if (absTime->tv_nsec >= NANOSECS_PER_SEC) { 5649 absTime->tv_nsec -= NANOSECS_PER_SEC; 5650 ++absTime->tv_sec; // note: this must be <= max_secs 5651 } 5652 } 5653 } 5654 assert(absTime->tv_sec >= 0, "tv_sec < 0"); 5655 assert(absTime->tv_sec <= max_secs, "tv_sec > max_secs"); 5656 assert(absTime->tv_nsec >= 0, "tv_nsec < 0"); 5657 assert(absTime->tv_nsec < NANOSECS_PER_SEC, "tv_nsec >= nanos_per_sec"); 5658 } 5659 5660 void Parker::park(bool isAbsolute, jlong time) { 5661 // Ideally we'd do something useful while spinning, such 5662 // as calling unpackTime(). 5663 5664 // Optional fast-path check: 5665 // Return immediately if a permit is available. 5666 // We depend on Atomic::xchg() having full barrier semantics 5667 // since we are doing a lock-free update to _counter. 5668 if (Atomic::xchg(0, &_counter) > 0) return; 5669 5670 Thread* thread = Thread::current(); 5671 assert(thread->is_Java_thread(), "Must be JavaThread"); 5672 JavaThread *jt = (JavaThread *)thread; 5673 5674 // Optional optimization -- avoid state transitions if there's an interrupt pending. 5675 // Check interrupt before trying to wait 5676 if (Thread::is_interrupted(thread, false)) { 5677 return; 5678 } 5679 5680 // Next, demultiplex/decode time arguments 5681 timespec absTime; 5682 if (time < 0 || (isAbsolute && time == 0)) { // don't wait at all 5683 return; 5684 } 5685 if (time > 0) { 5686 unpackTime(&absTime, isAbsolute, time); 5687 } 5688 5689 5690 // Enter safepoint region 5691 // Beware of deadlocks such as 6317397. 5692 // The per-thread Parker:: mutex is a classic leaf-lock. 5693 // In particular a thread must never block on the Threads_lock while 5694 // holding the Parker:: mutex. If safepoints are pending both the 5695 // the ThreadBlockInVM() CTOR and DTOR may grab Threads_lock. 5696 ThreadBlockInVM tbivm(jt); 5697 5698 // Don't wait if cannot get lock since interference arises from 5699 // unblocking. Also. check interrupt before trying wait 5700 if (Thread::is_interrupted(thread, false) || pthread_mutex_trylock(_mutex) != 0) { 5701 return; 5702 } 5703 5704 int status; 5705 if (_counter > 0) { // no wait needed 5706 _counter = 0; 5707 status = pthread_mutex_unlock(_mutex); 5708 assert_status(status == 0, status, "invariant"); 5709 // Paranoia to ensure our locked and lock-free paths interact 5710 // correctly with each other and Java-level accesses. 5711 OrderAccess::fence(); 5712 return; 5713 } 5714 5715 #ifdef ASSERT 5716 // Don't catch signals while blocked; let the running threads have the signals. 5717 // (This allows a debugger to break into the running thread.) 5718 sigset_t oldsigs; 5719 sigset_t* allowdebug_blocked = os::Linux::allowdebug_blocked_signals(); 5720 pthread_sigmask(SIG_BLOCK, allowdebug_blocked, &oldsigs); 5721 #endif 5722 5723 OSThreadWaitState osts(thread->osthread(), false /* not Object.wait() */); 5724 jt->set_suspend_equivalent(); 5725 // cleared by handle_special_suspend_equivalent_condition() or java_suspend_self() 5726 5727 assert(_cur_index == -1, "invariant"); 5728 if (time == 0) { 5729 _cur_index = REL_INDEX; // arbitrary choice when not timed 5730 status = pthread_cond_wait(&_cond[_cur_index], _mutex); 5731 } else { 5732 _cur_index = isAbsolute ? ABS_INDEX : REL_INDEX; 5733 status = pthread_cond_timedwait(&_cond[_cur_index], _mutex, &absTime); 5734 } 5735 _cur_index = -1; 5736 assert_status(status == 0 || status == EINTR || 5737 status == ETIME || status == ETIMEDOUT, 5738 status, "cond_timedwait"); 5739 5740 #ifdef ASSERT 5741 pthread_sigmask(SIG_SETMASK, &oldsigs, NULL); 5742 #endif 5743 5744 _counter = 0; 5745 status = pthread_mutex_unlock(_mutex); 5746 assert_status(status == 0, status, "invariant"); 5747 // Paranoia to ensure our locked and lock-free paths interact 5748 // correctly with each other and Java-level accesses. 5749 OrderAccess::fence(); 5750 5751 // If externally suspended while waiting, re-suspend 5752 if (jt->handle_special_suspend_equivalent_condition()) { 5753 jt->java_suspend_self(); 5754 } 5755 } 5756 5757 void Parker::unpark() { 5758 int status = pthread_mutex_lock(_mutex); 5759 assert_status(status == 0, status, "invariant"); 5760 const int s = _counter; 5761 _counter = 1; 5762 // must capture correct index before unlocking 5763 int index = _cur_index; 5764 status = pthread_mutex_unlock(_mutex); 5765 assert_status(status == 0, status, "invariant"); 5766 if (s < 1 && index != -1) { 5767 // thread is definitely parked 5768 status = pthread_cond_signal(&_cond[index]); 5769 assert_status(status == 0, status, "invariant"); 5770 } 5771 } 5772 5773 5774 extern char** environ; 5775 5776 // Run the specified command in a separate process. Return its exit value, 5777 // or -1 on failure (e.g. can't fork a new process). 5778 // Unlike system(), this function can be called from signal handler. It 5779 // doesn't block SIGINT et al. 5780 int os::fork_and_exec(char* cmd) { 5781 const char * argv[4] = {"sh", "-c", cmd, NULL}; 5782 5783 pid_t pid = fork(); 5784 5785 if (pid < 0) { 5786 // fork failed 5787 return -1; 5788 5789 } else if (pid == 0) { 5790 // child process 5791 5792 execve("/bin/sh", (char* const*)argv, environ); 5793 5794 // execve failed 5795 _exit(-1); 5796 5797 } else { 5798 // copied from J2SE ..._waitForProcessExit() in UNIXProcess_md.c; we don't 5799 // care about the actual exit code, for now. 5800 5801 int status; 5802 5803 // Wait for the child process to exit. This returns immediately if 5804 // the child has already exited. */ 5805 while (waitpid(pid, &status, 0) < 0) { 5806 switch (errno) { 5807 case ECHILD: return 0; 5808 case EINTR: break; 5809 default: return -1; 5810 } 5811 } 5812 5813 if (WIFEXITED(status)) { 5814 // The child exited normally; get its exit code. 5815 return WEXITSTATUS(status); 5816 } else if (WIFSIGNALED(status)) { 5817 // The child exited because of a signal 5818 // The best value to return is 0x80 + signal number, 5819 // because that is what all Unix shells do, and because 5820 // it allows callers to distinguish between process exit and 5821 // process death by signal. 5822 return 0x80 + WTERMSIG(status); 5823 } else { 5824 // Unknown exit code; pass it through 5825 return status; 5826 } 5827 } 5828 } 5829 5830 // is_headless_jre() 5831 // 5832 // Test for the existence of xawt/libmawt.so or libawt_xawt.so 5833 // in order to report if we are running in a headless jre 5834 // 5835 // Since JDK8 xawt/libmawt.so was moved into the same directory 5836 // as libawt.so, and renamed libawt_xawt.so 5837 // 5838 bool os::is_headless_jre() { 5839 struct stat statbuf; 5840 char buf[MAXPATHLEN]; 5841 char libmawtpath[MAXPATHLEN]; 5842 const char *xawtstr = "/xawt/libmawt.so"; 5843 const char *new_xawtstr = "/libawt_xawt.so"; 5844 char *p; 5845 5846 // Get path to libjvm.so 5847 os::jvm_path(buf, sizeof(buf)); 5848 5849 // Get rid of libjvm.so 5850 p = strrchr(buf, '/'); 5851 if (p == NULL) { 5852 return false; 5853 } else { 5854 *p = '\0'; 5855 } 5856 5857 // Get rid of client or server 5858 p = strrchr(buf, '/'); 5859 if (p == NULL) { 5860 return false; 5861 } else { 5862 *p = '\0'; 5863 } 5864 5865 // check xawt/libmawt.so 5866 strcpy(libmawtpath, buf); 5867 strcat(libmawtpath, xawtstr); 5868 if (::stat(libmawtpath, &statbuf) == 0) return false; 5869 5870 // check libawt_xawt.so 5871 strcpy(libmawtpath, buf); 5872 strcat(libmawtpath, new_xawtstr); 5873 if (::stat(libmawtpath, &statbuf) == 0) return false; 5874 5875 return true; 5876 } 5877 5878 // Get the default path to the core file 5879 // Returns the length of the string 5880 int os::get_core_path(char* buffer, size_t bufferSize) { 5881 /* 5882 * Max length of /proc/sys/kernel/core_pattern is 128 characters. 5883 * See https://www.kernel.org/doc/Documentation/sysctl/kernel.txt 5884 */ 5885 const int core_pattern_len = 129; 5886 char core_pattern[core_pattern_len] = {0}; 5887 5888 int core_pattern_file = ::open("/proc/sys/kernel/core_pattern", O_RDONLY); 5889 if (core_pattern_file == -1) { 5890 return -1; 5891 } 5892 5893 ssize_t ret = ::read(core_pattern_file, core_pattern, core_pattern_len); 5894 ::close(core_pattern_file); 5895 if (ret <= 0 || ret >= core_pattern_len || core_pattern[0] == '\n') { 5896 return -1; 5897 } 5898 if (core_pattern[ret-1] == '\n') { 5899 core_pattern[ret-1] = '\0'; 5900 } else { 5901 core_pattern[ret] = '\0'; 5902 } 5903 5904 char *pid_pos = strstr(core_pattern, "%p"); 5905 int written; 5906 5907 if (core_pattern[0] == '/') { 5908 written = jio_snprintf(buffer, bufferSize, "%s", core_pattern); 5909 } else { 5910 char cwd[PATH_MAX]; 5911 5912 const char* p = get_current_directory(cwd, PATH_MAX); 5913 if (p == NULL) { 5914 return -1; 5915 } 5916 5917 if (core_pattern[0] == '|') { 5918 written = jio_snprintf(buffer, bufferSize, 5919 "\"%s\" (or dumping to %s/core.%d)", 5920 &core_pattern[1], p, current_process_id()); 5921 } else { 5922 written = jio_snprintf(buffer, bufferSize, "%s/%s", p, core_pattern); 5923 } 5924 } 5925 5926 if (written < 0) { 5927 return -1; 5928 } 5929 5930 if (((size_t)written < bufferSize) && (pid_pos == NULL) && (core_pattern[0] != '|')) { 5931 int core_uses_pid_file = ::open("/proc/sys/kernel/core_uses_pid", O_RDONLY); 5932 5933 if (core_uses_pid_file != -1) { 5934 char core_uses_pid = 0; 5935 ssize_t ret = ::read(core_uses_pid_file, &core_uses_pid, 1); 5936 ::close(core_uses_pid_file); 5937 5938 if (core_uses_pid == '1') { 5939 jio_snprintf(buffer + written, bufferSize - written, 5940 ".%d", current_process_id()); 5941 } 5942 } 5943 } 5944 5945 return strlen(buffer); 5946 } 5947 5948 bool os::start_debugging(char *buf, int buflen) { 5949 int len = (int)strlen(buf); 5950 char *p = &buf[len]; 5951 5952 jio_snprintf(p, buflen-len, 5953 "\n\n" 5954 "Do you want to debug the problem?\n\n" 5955 "To debug, run 'gdb /proc/%d/exe %d'; then switch to thread " UINTX_FORMAT " (" INTPTR_FORMAT ")\n" 5956 "Enter 'yes' to launch gdb automatically (PATH must include gdb)\n" 5957 "Otherwise, press RETURN to abort...", 5958 os::current_process_id(), os::current_process_id(), 5959 os::current_thread_id(), os::current_thread_id()); 5960 5961 bool yes = os::message_box("Unexpected Error", buf); 5962 5963 if (yes) { 5964 // yes, user asked VM to launch debugger 5965 jio_snprintf(buf, sizeof(buf), "gdb /proc/%d/exe %d", 5966 os::current_process_id(), os::current_process_id()); 5967 5968 os::fork_and_exec(buf); 5969 yes = false; 5970 } 5971 return yes; 5972 } 5973 5974 5975 5976 /////////////// Unit tests /////////////// 5977 5978 #ifndef PRODUCT 5979 5980 #define test_log(...) \ 5981 do { \ 5982 if (VerboseInternalVMTests) { \ 5983 tty->print_cr(__VA_ARGS__); \ 5984 tty->flush(); \ 5985 } \ 5986 } while (false) 5987 5988 class TestReserveMemorySpecial : AllStatic { 5989 public: 5990 static void small_page_write(void* addr, size_t size) { 5991 size_t page_size = os::vm_page_size(); 5992 5993 char* end = (char*)addr + size; 5994 for (char* p = (char*)addr; p < end; p += page_size) { 5995 *p = 1; 5996 } 5997 } 5998 5999 static void test_reserve_memory_special_huge_tlbfs_only(size_t size) { 6000 if (!UseHugeTLBFS) { 6001 return; 6002 } 6003 6004 test_log("test_reserve_memory_special_huge_tlbfs_only(" SIZE_FORMAT ")", size); 6005 6006 char* addr = os::Linux::reserve_memory_special_huge_tlbfs_only(size, NULL, false); 6007 6008 if (addr != NULL) { 6009 small_page_write(addr, size); 6010 6011 os::Linux::release_memory_special_huge_tlbfs(addr, size); 6012 } 6013 } 6014 6015 static void test_reserve_memory_special_huge_tlbfs_only() { 6016 if (!UseHugeTLBFS) { 6017 return; 6018 } 6019 6020 size_t lp = os::large_page_size(); 6021 6022 for (size_t size = lp; size <= lp * 10; size += lp) { 6023 test_reserve_memory_special_huge_tlbfs_only(size); 6024 } 6025 } 6026 6027 static void test_reserve_memory_special_huge_tlbfs_mixed() { 6028 size_t lp = os::large_page_size(); 6029 size_t ag = os::vm_allocation_granularity(); 6030 6031 // sizes to test 6032 const size_t sizes[] = { 6033 lp, lp + ag, lp + lp / 2, lp * 2, 6034 lp * 2 + ag, lp * 2 - ag, lp * 2 + lp / 2, 6035 lp * 10, lp * 10 + lp / 2 6036 }; 6037 const int num_sizes = sizeof(sizes) / sizeof(size_t); 6038 6039 // For each size/alignment combination, we test three scenarios: 6040 // 1) with req_addr == NULL 6041 // 2) with a non-null req_addr at which we expect to successfully allocate 6042 // 3) with a non-null req_addr which contains a pre-existing mapping, at which we 6043 // expect the allocation to either fail or to ignore req_addr 6044 6045 // Pre-allocate two areas; they shall be as large as the largest allocation 6046 // and aligned to the largest alignment we will be testing. 6047 const size_t mapping_size = sizes[num_sizes - 1] * 2; 6048 char* const mapping1 = (char*) ::mmap(NULL, mapping_size, 6049 PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, 6050 -1, 0); 6051 assert(mapping1 != MAP_FAILED, "should work"); 6052 6053 char* const mapping2 = (char*) ::mmap(NULL, mapping_size, 6054 PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, 6055 -1, 0); 6056 assert(mapping2 != MAP_FAILED, "should work"); 6057 6058 // Unmap the first mapping, but leave the second mapping intact: the first 6059 // mapping will serve as a value for a "good" req_addr (case 2). The second 6060 // mapping, still intact, as "bad" req_addr (case 3). 6061 ::munmap(mapping1, mapping_size); 6062 6063 // Case 1 6064 test_log("%s, req_addr NULL:", __FUNCTION__); 6065 test_log("size align result"); 6066 6067 for (int i = 0; i < num_sizes; i++) { 6068 const size_t size = sizes[i]; 6069 for (size_t alignment = ag; is_size_aligned(size, alignment); alignment *= 2) { 6070 char* p = os::Linux::reserve_memory_special_huge_tlbfs_mixed(size, alignment, NULL, false); 6071 test_log(SIZE_FORMAT_HEX " " SIZE_FORMAT_HEX " -> " PTR_FORMAT " %s", 6072 size, alignment, p2i(p), (p != NULL ? "" : "(failed)")); 6073 if (p != NULL) { 6074 assert(is_ptr_aligned(p, alignment), "must be"); 6075 small_page_write(p, size); 6076 os::Linux::release_memory_special_huge_tlbfs(p, size); 6077 } 6078 } 6079 } 6080 6081 // Case 2 6082 test_log("%s, req_addr non-NULL:", __FUNCTION__); 6083 test_log("size align req_addr result"); 6084 6085 for (int i = 0; i < num_sizes; i++) { 6086 const size_t size = sizes[i]; 6087 for (size_t alignment = ag; is_size_aligned(size, alignment); alignment *= 2) { 6088 char* const req_addr = (char*) align_ptr_up(mapping1, alignment); 6089 char* p = os::Linux::reserve_memory_special_huge_tlbfs_mixed(size, alignment, req_addr, false); 6090 test_log(SIZE_FORMAT_HEX " " SIZE_FORMAT_HEX " " PTR_FORMAT " -> " PTR_FORMAT " %s", 6091 size, alignment, p2i(req_addr), p2i(p), 6092 ((p != NULL ? (p == req_addr ? "(exact match)" : "") : "(failed)"))); 6093 if (p != NULL) { 6094 assert(p == req_addr, "must be"); 6095 small_page_write(p, size); 6096 os::Linux::release_memory_special_huge_tlbfs(p, size); 6097 } 6098 } 6099 } 6100 6101 // Case 3 6102 test_log("%s, req_addr non-NULL with preexisting mapping:", __FUNCTION__); 6103 test_log("size align req_addr result"); 6104 6105 for (int i = 0; i < num_sizes; i++) { 6106 const size_t size = sizes[i]; 6107 for (size_t alignment = ag; is_size_aligned(size, alignment); alignment *= 2) { 6108 char* const req_addr = (char*) align_ptr_up(mapping2, alignment); 6109 char* p = os::Linux::reserve_memory_special_huge_tlbfs_mixed(size, alignment, req_addr, false); 6110 test_log(SIZE_FORMAT_HEX " " SIZE_FORMAT_HEX " " PTR_FORMAT " -> " PTR_FORMAT " %s", 6111 size, alignment, p2i(req_addr), p2i(p), ((p != NULL ? "" : "(failed)"))); 6112 // as the area around req_addr contains already existing mappings, the API should always 6113 // return NULL (as per contract, it cannot return another address) 6114 assert(p == NULL, "must be"); 6115 } 6116 } 6117 6118 ::munmap(mapping2, mapping_size); 6119 6120 } 6121 6122 static void test_reserve_memory_special_huge_tlbfs() { 6123 if (!UseHugeTLBFS) { 6124 return; 6125 } 6126 6127 test_reserve_memory_special_huge_tlbfs_only(); 6128 test_reserve_memory_special_huge_tlbfs_mixed(); 6129 } 6130 6131 static void test_reserve_memory_special_shm(size_t size, size_t alignment) { 6132 if (!UseSHM) { 6133 return; 6134 } 6135 6136 test_log("test_reserve_memory_special_shm(" SIZE_FORMAT ", " SIZE_FORMAT ")", size, alignment); 6137 6138 char* addr = os::Linux::reserve_memory_special_shm(size, alignment, NULL, false); 6139 6140 if (addr != NULL) { 6141 assert(is_ptr_aligned(addr, alignment), "Check"); 6142 assert(is_ptr_aligned(addr, os::large_page_size()), "Check"); 6143 6144 small_page_write(addr, size); 6145 6146 os::Linux::release_memory_special_shm(addr, size); 6147 } 6148 } 6149 6150 static void test_reserve_memory_special_shm() { 6151 size_t lp = os::large_page_size(); 6152 size_t ag = os::vm_allocation_granularity(); 6153 6154 for (size_t size = ag; size < lp * 3; size += ag) { 6155 for (size_t alignment = ag; is_size_aligned(size, alignment); alignment *= 2) { 6156 test_reserve_memory_special_shm(size, alignment); 6157 } 6158 } 6159 } 6160 6161 static void test() { 6162 test_reserve_memory_special_huge_tlbfs(); 6163 test_reserve_memory_special_shm(); 6164 } 6165 }; 6166 6167 void TestReserveMemorySpecial_test() { 6168 TestReserveMemorySpecial::test(); 6169 } 6170 6171 #endif