118 #define MAX_SECS 100000000
119
120 // for timer info max values which include all bits
121 #define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF)
122
123 #define LARGEPAGES_BIT (1 << 6)
124 ////////////////////////////////////////////////////////////////////////////////
125 // global variables
126 julong os::Linux::_physical_memory = 0;
127
128 address os::Linux::_initial_thread_stack_bottom = NULL;
129 uintptr_t os::Linux::_initial_thread_stack_size = 0;
130
131 int (*os::Linux::_clock_gettime)(clockid_t, struct timespec *) = NULL;
132 int (*os::Linux::_pthread_getcpuclockid)(pthread_t, clockid_t *) = NULL;
133 int (*os::Linux::_pthread_setname_np)(pthread_t, const char*) = NULL;
134 Mutex* os::Linux::_createThread_lock = NULL;
135 pthread_t os::Linux::_main_thread;
136 int os::Linux::_page_size = -1;
137 const int os::Linux::_vm_default_page_size = (8 * K);
138 bool os::Linux::_is_floating_stack = false;
139 bool os::Linux::_is_NPTL = false;
140 bool os::Linux::_supports_fast_thread_cpu_time = false;
141 const char * os::Linux::_glibc_version = NULL;
142 const char * os::Linux::_libpthread_version = NULL;
143 pthread_condattr_t os::Linux::_condattr[1];
144
145 static jlong initial_time_count=0;
146
147 static int clock_tics_per_sec = 100;
148
149 // For diagnostics to print a message once. see run_periodic_checks
150 static sigset_t check_signal_done;
151 static bool check_signals = true;
152
153 static pid_t _initial_pid = 0;
154
155 // Signal number used to suspend/resume a thread
156
157 // do not use any signal number less than SIGSEGV, see 4355769
158 static int SR_signum = SIGUSR2;
159 sigset_t SR_sigset;
160
161 // Declarations
162 static void unpackTime(timespec* absTime, bool isAbsolute, jlong time);
163
164 // utility functions
165
166 static int SR_initialize();
167
168 julong os::available_memory() {
169 return Linux::available_memory();
170 }
171
172 julong os::Linux::available_memory() {
173 // values in struct sysinfo are "unsigned long"
174 struct sysinfo si;
206 #define SYS_gettid 186
207 #else
208 #ifdef __sparc__
209 #define SYS_gettid 143
210 #else
211 #error define gettid for the arch
212 #endif
213 #endif
214 #endif
215 #endif
216 #endif
217
218 // Cpu architecture string
219 static char cpu_arch[] = HOTSPOT_LIB_ARCH;
220
221
222 // pid_t gettid()
223 //
224 // Returns the kernel thread id of the currently running thread. Kernel
225 // thread id is used to access /proc.
226 //
227 // (Note that getpid() on LinuxThreads returns kernel thread id too; but
228 // on NPTL, it returns the same pid for all threads, as required by POSIX.)
229 //
230 pid_t os::Linux::gettid() {
231 int rslt = syscall(SYS_gettid);
232 if (rslt == -1) {
233 // old kernel, no NPTL support
234 return getpid();
235 } else {
236 return (pid_t)rslt;
237 }
238 }
239
240 // Most versions of linux have a bug where the number of processors are
241 // determined by looking at the /proc file system. In a chroot environment,
242 // the system call returns 1. This causes the VM to act as if it is
243 // a single processor and elide locking (see is_MP() call).
244 static bool unsafe_chroot_detected = false;
245 static const char *unstable_chroot_error = "/proc file system not found.\n"
246 "Java may be unstable running multithreaded in a chroot "
247 "environment on Linux when /proc filesystem is not mounted.";
248
249 void os::Linux::initialize_system_info() {
250 set_processor_count(sysconf(_SC_NPROCESSORS_CONF));
251 if (processor_count() == 1) {
252 pid_t pid = os::Linux::gettid();
253 char fname[32];
254 jio_snprintf(fname, sizeof(fname), "/proc/%d", pid);
255 FILE *fp = fopen(fname, "r");
256 if (fp == NULL) {
257 unsafe_chroot_detected = true;
491 OSThread* osthread = thread->osthread();
492 osthread->set_caller_sigmask(caller_sigmask);
493
494 pthread_sigmask(SIG_UNBLOCK, os::Linux::unblocked_signals(), NULL);
495
496 if (!ReduceSignalUsage) {
497 if (thread->is_VM_thread()) {
498 // Only the VM thread handles BREAK_SIGNAL ...
499 pthread_sigmask(SIG_UNBLOCK, vm_signals(), NULL);
500 } else {
501 // ... all other threads block BREAK_SIGNAL
502 pthread_sigmask(SIG_BLOCK, vm_signals(), NULL);
503 }
504 }
505 }
506
507 //////////////////////////////////////////////////////////////////////////////
508 // detecting pthread library
509
510 void os::Linux::libpthread_init() {
511 // Save glibc and pthread version strings. Note that _CS_GNU_LIBC_VERSION
512 // and _CS_GNU_LIBPTHREAD_VERSION are supported in glibc >= 2.3.2. Use a
513 // generic name for earlier versions.
514 // Define macros here so we can build HotSpot on old systems.
515 #ifndef _CS_GNU_LIBC_VERSION
516 #define _CS_GNU_LIBC_VERSION 2
517 #endif
518 #ifndef _CS_GNU_LIBPTHREAD_VERSION
519 #define _CS_GNU_LIBPTHREAD_VERSION 3
520 #endif
521
522 size_t n = confstr(_CS_GNU_LIBC_VERSION, NULL, 0);
523 if (n > 0) {
524 char *str = (char *)malloc(n, mtInternal);
525 confstr(_CS_GNU_LIBC_VERSION, str, n);
526 os::Linux::set_glibc_version(str);
527 } else {
528 // _CS_GNU_LIBC_VERSION is not supported, try gnu_get_libc_version()
529 static char _gnu_libc_version[32];
530 jio_snprintf(_gnu_libc_version, sizeof(_gnu_libc_version),
531 "glibc %s %s", gnu_get_libc_version(), gnu_get_libc_release());
532 os::Linux::set_glibc_version(_gnu_libc_version);
533 }
534
535 n = confstr(_CS_GNU_LIBPTHREAD_VERSION, NULL, 0);
536 if (n > 0) {
537 char *str = (char *)malloc(n, mtInternal);
538 confstr(_CS_GNU_LIBPTHREAD_VERSION, str, n);
539 // Vanilla RH-9 (glibc 2.3.2) has a bug that confstr() always tells
540 // us "NPTL-0.29" even we are running with LinuxThreads. Check if this
541 // is the case. LinuxThreads has a hard limit on max number of threads.
542 // So sysconf(_SC_THREAD_THREADS_MAX) will return a positive value.
543 // On the other hand, NPTL does not have such a limit, sysconf()
544 // will return -1 and errno is not changed. Check if it is really NPTL.
545 if (strcmp(os::Linux::glibc_version(), "glibc 2.3.2") == 0 &&
546 strstr(str, "NPTL") &&
547 sysconf(_SC_THREAD_THREADS_MAX) > 0) {
548 free(str);
549 os::Linux::set_libpthread_version("linuxthreads");
550 } else {
551 os::Linux::set_libpthread_version(str);
552 }
553 } else {
554 // glibc before 2.3.2 only has LinuxThreads.
555 os::Linux::set_libpthread_version("linuxthreads");
556 }
557
558 if (strstr(libpthread_version(), "NPTL")) {
559 os::Linux::set_is_NPTL();
560 } else {
561 os::Linux::set_is_LinuxThreads();
562 }
563
564 // LinuxThreads have two flavors: floating-stack mode, which allows variable
565 // stack size; and fixed-stack mode. NPTL is always floating-stack.
566 if (os::Linux::is_NPTL() || os::Linux::supports_variable_stack_size()) {
567 os::Linux::set_is_floating_stack();
568 }
569 }
570
571 /////////////////////////////////////////////////////////////////////////////
572 // thread stack
573
574 // Force Linux kernel to expand current thread stack. If "bottom" is close
575 // to the stack guard, caller should block all signals.
576 //
577 // MAP_GROWSDOWN:
578 // A special mmap() flag that is used to implement thread stacks. It tells
579 // kernel that the memory region should extend downwards when needed. This
580 // allows early versions of LinuxThreads to only mmap the first few pages
581 // when creating a new thread. Linux kernel will automatically expand thread
582 // stack as needed (on page faults).
583 //
584 // However, because the memory region of a MAP_GROWSDOWN stack can grow on
585 // demand, if a page fault happens outside an already mapped MAP_GROWSDOWN
586 // region, it's hard to tell if the fault is due to a legitimate stack
587 // access or because of reading/writing non-exist memory (e.g. buffer
588 // overrun). As a rule, if the fault happens below current stack pointer,
589 // Linux kernel does not expand stack, instead a SIGSEGV is sent to the
590 // application (see Linux kernel fault.c).
591 //
592 // This Linux feature can cause SIGSEGV when VM bangs thread stack for
593 // stack overflow detection.
594 //
595 // Newer version of LinuxThreads (since glibc-2.2, or, RH-7.x) and NPTL do
596 // not use this flag. However, the stack of initial thread is not created
597 // by pthread, it is still MAP_GROWSDOWN. Also it's possible (though
598 // unlikely) that user code can create a thread with MAP_GROWSDOWN stack
599 // and then attach the thread to JVM.
600 //
601 // To get around the problem and allow stack banging on Linux, we need to
602 // manually expand thread stack after receiving the SIGSEGV.
603 //
604 // There are two ways to expand thread stack to address "bottom", we used
605 // both of them in JVM before 1.5:
606 // 1. adjust stack pointer first so that it is below "bottom", and then
607 // touch "bottom"
608 // 2. mmap() the page in question
609 //
610 // Now alternate signal stack is gone, it's harder to use 2. For instance,
611 // if current sp is already near the lower end of page 101, and we need to
612 // call mmap() to map page 100, it is possible that part of the mmap() frame
613 // will be placed in page 100. When page 100 is mapped, it is zero-filled.
614 // That will destroy the mmap() frame and cause VM to crash.
615 //
616 // The following code works by adjusting sp first, then accessing the "bottom"
617 // page to force a page fault. Linux kernel will then automatically expand the
618 // stack mapping.
619 //
654
655 bool os::Linux::manually_expand_stack(JavaThread * t, address addr) {
656 assert(t!=NULL, "just checking");
657 assert(t->osthread()->expanding_stack(), "expand should be set");
658 assert(t->stack_base() != NULL, "stack_base was not initialized");
659
660 if (addr < t->stack_base() && addr >= t->stack_yellow_zone_base()) {
661 sigset_t mask_all, old_sigset;
662 sigfillset(&mask_all);
663 pthread_sigmask(SIG_SETMASK, &mask_all, &old_sigset);
664 _expand_stack_to(addr);
665 pthread_sigmask(SIG_SETMASK, &old_sigset, NULL);
666 return true;
667 }
668 return false;
669 }
670
671 //////////////////////////////////////////////////////////////////////////////
672 // create new thread
673
674 static address highest_vm_reserved_address();
675
676 // check if it's safe to start a new thread
677 static bool _thread_safety_check(Thread* thread) {
678 if (os::Linux::is_LinuxThreads() && !os::Linux::is_floating_stack()) {
679 // Fixed stack LinuxThreads (SuSE Linux/x86, and some versions of Redhat)
680 // Heap is mmap'ed at lower end of memory space. Thread stacks are
681 // allocated (MAP_FIXED) from high address space. Every thread stack
682 // occupies a fixed size slot (usually 2Mbytes, but user can change
683 // it to other values if they rebuild LinuxThreads).
684 //
685 // Problem with MAP_FIXED is that mmap() can still succeed even part of
686 // the memory region has already been mmap'ed. That means if we have too
687 // many threads and/or very large heap, eventually thread stack will
688 // collide with heap.
689 //
690 // Here we try to prevent heap/stack collision by comparing current
691 // stack bottom with the highest address that has been mmap'ed by JVM
692 // plus a safety margin for memory maps created by native code.
693 //
694 // This feature can be disabled by setting ThreadSafetyMargin to 0
695 //
696 if (ThreadSafetyMargin > 0) {
697 address stack_bottom = os::current_stack_base() - os::current_stack_size();
698
699 // not safe if our stack extends below the safety margin
700 return stack_bottom - ThreadSafetyMargin >= highest_vm_reserved_address();
701 } else {
702 return true;
703 }
704 } else {
705 // Floating stack LinuxThreads or NPTL:
706 // Unlike fixed stack LinuxThreads, thread stacks are not MAP_FIXED. When
707 // there's not enough space left, pthread_create() will fail. If we come
708 // here, that means enough space has been reserved for stack.
709 return true;
710 }
711 }
712
713 // Thread start routine for all newly created threads
714 static void *java_start(Thread *thread) {
715 // Try to randomize the cache line index of hot stack frames.
716 // This helps when threads of the same stack traces evict each other's
717 // cache lines. The threads can be either from the same JVM instance, or
718 // from different JVM instances. The benefit is especially true for
719 // processors with hyperthreading technology.
720 static int counter = 0;
721 int pid = os::current_process_id();
722 alloca(((pid ^ counter++) & 7) * 128);
723
724 ThreadLocalStorage::set_thread(thread);
725
726 OSThread* osthread = thread->osthread();
727 Monitor* sync = osthread->startThread_lock();
728
729 // non floating stack LinuxThreads needs extra check, see above
730 if (!_thread_safety_check(thread)) {
731 // notify parent thread
732 MutexLockerEx ml(sync, Mutex::_no_safepoint_check_flag);
733 osthread->set_state(ZOMBIE);
734 sync->notify_all();
735 return NULL;
736 }
737
738 // thread_id is kernel thread id (similar to Solaris LWP id)
739 osthread->set_thread_id(os::Linux::gettid());
740
741 if (UseNUMA) {
742 int lgrp_id = os::numa_get_group_id();
743 if (lgrp_id != -1) {
744 thread->set_lgrp_id(lgrp_id);
745 }
746 }
747 // initialize signal mask for this thread
748 os::Linux::hotspot_sigmask(thread);
749
750 // initialize floating point control register
751 os::Linux::init_thread_fpu_state();
752
753 // handshaking with parent thread
754 {
755 MutexLockerEx ml(sync, Mutex::_no_safepoint_check_flag);
756
757 // notify parent thread
816 case os::pgc_thread:
817 case os::cgc_thread:
818 case os::watcher_thread:
819 if (VMThreadStackSize > 0) stack_size = (size_t)(VMThreadStackSize * K);
820 break;
821 }
822 }
823
824 stack_size = MAX2(stack_size, os::Linux::min_stack_allowed);
825 pthread_attr_setstacksize(&attr, stack_size);
826 } else {
827 // let pthread_create() pick the default value.
828 }
829
830 // glibc guard page
831 pthread_attr_setguardsize(&attr, os::Linux::default_guard_size(thr_type));
832
833 ThreadState state;
834
835 {
836 // Serialize thread creation if we are running with fixed stack LinuxThreads
837 bool lock = os::Linux::is_LinuxThreads() && !os::Linux::is_floating_stack();
838 if (lock) {
839 os::Linux::createThread_lock()->lock_without_safepoint_check();
840 }
841
842 pthread_t tid;
843 int ret = pthread_create(&tid, &attr, (void* (*)(void*)) java_start, thread);
844
845 pthread_attr_destroy(&attr);
846
847 if (ret != 0) {
848 if (PrintMiscellaneous && (Verbose || WizardMode)) {
849 perror("pthread_create()");
850 }
851 // Need to clean up stuff we've allocated so far
852 thread->set_osthread(NULL);
853 delete osthread;
854 if (lock) os::Linux::createThread_lock()->unlock();
855 return false;
856 }
857
858 // Store pthread info into the OSThread
859 osthread->set_pthread_id(tid);
860
861 // Wait until child thread is either initialized or aborted
862 {
863 Monitor* sync_with_child = osthread->startThread_lock();
864 MutexLockerEx ml(sync_with_child, Mutex::_no_safepoint_check_flag);
865 while ((state = osthread->get_state()) == ALLOCATED) {
866 sync_with_child->wait(Mutex::_no_safepoint_check_flag);
867 }
868 }
869
870 if (lock) {
871 os::Linux::createThread_lock()->unlock();
872 }
873 }
874
875 // Aborted due to thread limit being reached
876 if (state == ZOMBIE) {
877 thread->set_osthread(NULL);
878 delete osthread;
879 return false;
880 }
881
882 // The thread is returned suspended (in state INITIALIZED),
883 // and is started higher up in the call chain
884 assert(state == INITIALIZED, "race condition");
885 return true;
886 }
887
888 /////////////////////////////////////////////////////////////////////////////
889 // attach existing thread
890
891 // bootstrap the main thread
892 bool os::create_main_thread(JavaThread* thread) {
1480 // sure it is async-safe and can handle partially initialized VM.
1481 void os::abort(bool dump_core, void* siginfo, void* context) {
1482 os::shutdown();
1483 if (dump_core) {
1484 #ifndef PRODUCT
1485 fdStream out(defaultStream::output_fd());
1486 out.print_raw("Current thread is ");
1487 char buf[16];
1488 jio_snprintf(buf, sizeof(buf), UINTX_FORMAT, os::current_thread_id());
1489 out.print_raw_cr(buf);
1490 out.print_raw_cr("Dumping core ...");
1491 #endif
1492 ::abort(); // dump core
1493 }
1494
1495 ::exit(1);
1496 }
1497
1498 // Die immediately, no exit hook, no abort hook, no cleanup.
1499 void os::die() {
1500 // _exit() on LinuxThreads only kills current thread
1501 ::abort();
1502 }
1503
1504
1505 // This method is a copy of JDK's sysGetLastErrorString
1506 // from src/solaris/hpi/src/system_md.c
1507
1508 size_t os::lasterror(char *buf, size_t len) {
1509 if (errno == 0) return 0;
1510
1511 const char *s = ::strerror(errno);
1512 size_t n = ::strlen(s);
1513 if (n >= len) {
1514 n = len - 1;
1515 }
1516 ::strncpy(buf, s, n);
1517 buf[n] = '\0';
1518 return n;
1519 }
1520
1521 intx os::current_thread_id() { return (intx)pthread_self(); }
1522 int os::current_process_id() {
1523
1524 // Under the old linux thread library, linux gives each thread
1525 // its own process id. Because of this each thread will return
1526 // a different pid if this method were to return the result
1527 // of getpid(2). Linux provides no api that returns the pid
1528 // of the launcher thread for the vm. This implementation
1529 // returns a unique pid, the pid of the launcher thread
1530 // that starts the vm 'process'.
1531
1532 // Under the NPTL, getpid() returns the same pid as the
1533 // launcher thread rather than a unique pid per thread.
1534 // Use gettid() if you want the old pre NPTL behaviour.
1535
1536 // if you are looking for the result of a call to getpid() that
1537 // returns a unique pid for the calling thread, then look at the
1538 // OSThread::thread_id() method in osThread_linux.hpp file
1539
1540 return (int)(_initial_pid ? _initial_pid : getpid());
1541 }
1542
1543 // DLL functions
1544
1545 const char* os::dll_file_extension() { return ".so"; }
1546
1547 // This must be hard coded because it's the system's temporary
1548 // directory not the java application's temp directory, ala java.io.tmpdir.
1549 const char* os::get_temp_directory() { return "/tmp"; }
1550
1551 static bool file_exists(const char* filename) {
1552 struct stat statbuf;
1553 if (filename == NULL || strlen(filename) == 0) {
1554 return false;
1555 }
1556 return os::stat(filename, &statbuf) == 0;
1557 }
1558
1559 bool os::dll_build_name(char* buffer, size_t buflen,
1560 const char* pname, const char* fname) {
2166 !_print_ascii_file("/etc/ltib-release", st) &&
2167 !_print_ascii_file("/etc/angstrom-version", st) &&
2168 !_print_ascii_file("/etc/system-release", st) &&
2169 !_print_ascii_file("/etc/os-release", st)) {
2170
2171 if (file_exists("/etc/debian_version")) {
2172 st->print("Debian ");
2173 _print_ascii_file("/etc/debian_version", st);
2174 } else {
2175 st->print("Linux");
2176 }
2177 }
2178 st->cr();
2179 }
2180
2181 void os::Linux::print_libversion_info(outputStream* st) {
2182 // libc, pthread
2183 st->print("libc:");
2184 st->print("%s ", os::Linux::glibc_version());
2185 st->print("%s ", os::Linux::libpthread_version());
2186 if (os::Linux::is_LinuxThreads()) {
2187 st->print("(%s stack)", os::Linux::is_floating_stack() ? "floating" : "fixed");
2188 }
2189 st->cr();
2190 }
2191
2192 void os::Linux::print_full_memory_info(outputStream* st) {
2193 st->print("\n/proc/meminfo:\n");
2194 _print_ascii_file("/proc/meminfo", st);
2195 st->cr();
2196 }
2197
2198 void os::print_memory_info(outputStream* st) {
2199
2200 st->print("Memory:");
2201 st->print(" %dk page", os::vm_page_size()>>10);
2202
2203 // values in struct sysinfo are "unsigned long"
2204 struct sysinfo si;
2205 sysinfo(&si);
2206
2207 st->print(", physical " UINT64_FORMAT "k",
2208 os::physical_memory() >> 10);
3027
3028 return os::commit_memory(addr, size, !ExecMem);
3029 }
3030
3031 // If this is a growable mapping, remove the guard pages entirely by
3032 // munmap()ping them. If not, just call uncommit_memory(). This only
3033 // affects the main/initial thread, but guard against future OS changes
3034 // It's safe to always unmap guard pages for initial thread because we
3035 // always place it right after end of the mapped region
3036
3037 bool os::remove_stack_guard_pages(char* addr, size_t size) {
3038 uintptr_t stack_extent, stack_base;
3039
3040 if (os::Linux::is_initial_thread()) {
3041 return ::munmap(addr, size) == 0;
3042 }
3043
3044 return os::uncommit_memory(addr, size);
3045 }
3046
3047 static address _highest_vm_reserved_address = NULL;
3048
3049 // If 'fixed' is true, anon_mmap() will attempt to reserve anonymous memory
3050 // at 'requested_addr'. If there are existing memory mappings at the same
3051 // location, however, they will be overwritten. If 'fixed' is false,
3052 // 'requested_addr' is only treated as a hint, the return value may or
3053 // may not start from the requested address. Unlike Linux mmap(), this
3054 // function returns NULL to indicate failure.
3055 static char* anon_mmap(char* requested_addr, size_t bytes, bool fixed) {
3056 char * addr;
3057 int flags;
3058
3059 flags = MAP_PRIVATE | MAP_NORESERVE | MAP_ANONYMOUS;
3060 if (fixed) {
3061 assert((uintptr_t)requested_addr % os::Linux::page_size() == 0, "unaligned address");
3062 flags |= MAP_FIXED;
3063 }
3064
3065 // Map reserved/uncommitted pages PROT_NONE so we fail early if we
3066 // touch an uncommitted page. Otherwise, the read/write might
3067 // succeed if we have enough swap space to back the physical page.
3068 addr = (char*)::mmap(requested_addr, bytes, PROT_NONE,
3069 flags, -1, 0);
3070
3071 if (addr != MAP_FAILED) {
3072 // anon_mmap() should only get called during VM initialization,
3073 // don't need lock (actually we can skip locking even it can be called
3074 // from multiple threads, because _highest_vm_reserved_address is just a
3075 // hint about the upper limit of non-stack memory regions.)
3076 if ((address)addr + bytes > _highest_vm_reserved_address) {
3077 _highest_vm_reserved_address = (address)addr + bytes;
3078 }
3079 }
3080
3081 return addr == MAP_FAILED ? NULL : addr;
3082 }
3083
3084 // Don't update _highest_vm_reserved_address, because there might be memory
3085 // regions above addr + size. If so, releasing a memory region only creates
3086 // a hole in the address space, it doesn't help prevent heap-stack collision.
3087 //
3088 static int anon_munmap(char * addr, size_t size) {
3089 return ::munmap(addr, size) == 0;
3090 }
3091
3092 char* os::pd_reserve_memory(size_t bytes, char* requested_addr,
3093 size_t alignment_hint) {
3094 return anon_mmap(requested_addr, bytes, (requested_addr != NULL));
3095 }
3096
3097 bool os::pd_release_memory(char* addr, size_t size) {
3098 return anon_munmap(addr, size);
3099 }
3100
3101 static address highest_vm_reserved_address() {
3102 return _highest_vm_reserved_address;
3103 }
3104
3105 static bool linux_mprotect(char* addr, size_t size, int prot) {
3106 // Linux wants the mprotect address argument to be page aligned.
3107 char* bottom = (char*)align_size_down((intptr_t)addr, os::Linux::page_size());
3108
3109 // According to SUSv3, mprotect() should only be used with mappings
3110 // established by mmap(), and mmap() always maps whole pages. Unaligned
3111 // 'addr' likely indicates problem in the VM (e.g. trying to change
3112 // protection of malloc'ed or statically allocated memory). Check the
3113 // caller if you hit this assert.
3114 assert(addr == bottom, "sanity check");
3115
3116 size = align_size_up(pointer_delta(addr, bottom, 1) + size, os::Linux::page_size());
3117 return ::mprotect(bottom, size, prot) == 0;
3118 }
3119
3120 // Set protections specified
3121 bool os::protect_memory(char* addr, size_t bytes, ProtType prot,
3122 bool is_committed) {
3123 unsigned int p = 0;
3124 switch (prot) {
3701 return UseTransparentHugePages || UseHugeTLBFS;
3702 }
3703
3704 // Reserve memory at an arbitrary address, only if that area is
3705 // available (and not reserved for something else).
3706
3707 char* os::pd_attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
3708 const int max_tries = 10;
3709 char* base[max_tries];
3710 size_t size[max_tries];
3711 const size_t gap = 0x000000;
3712
3713 // Assert only that the size is a multiple of the page size, since
3714 // that's all that mmap requires, and since that's all we really know
3715 // about at this low abstraction level. If we need higher alignment,
3716 // we can either pass an alignment to this method or verify alignment
3717 // in one of the methods further up the call chain. See bug 5044738.
3718 assert(bytes % os::vm_page_size() == 0, "reserving unexpected size block");
3719
3720 // Repeatedly allocate blocks until the block is allocated at the
3721 // right spot. Give up after max_tries. Note that reserve_memory() will
3722 // automatically update _highest_vm_reserved_address if the call is
3723 // successful. The variable tracks the highest memory address every reserved
3724 // by JVM. It is used to detect heap-stack collision if running with
3725 // fixed-stack LinuxThreads. Because here we may attempt to reserve more
3726 // space than needed, it could confuse the collision detecting code. To
3727 // solve the problem, save current _highest_vm_reserved_address and
3728 // calculate the correct value before return.
3729 address old_highest = _highest_vm_reserved_address;
3730
3731 // Linux mmap allows caller to pass an address as hint; give it a try first,
3732 // if kernel honors the hint then we can return immediately.
3733 char * addr = anon_mmap(requested_addr, bytes, false);
3734 if (addr == requested_addr) {
3735 return requested_addr;
3736 }
3737
3738 if (addr != NULL) {
3739 // mmap() is successful but it fails to reserve at the requested address
3740 anon_munmap(addr, bytes);
3741 }
3742
3743 int i;
3744 for (i = 0; i < max_tries; ++i) {
3745 base[i] = reserve_memory(bytes);
3746
3747 if (base[i] != NULL) {
3748 // Is this the block we wanted?
3749 if (base[i] == requested_addr) {
3763 ptrdiff_t bottom_overlap = base[i] + bytes - requested_addr;
3764 if (bottom_overlap >= 0 && (size_t)bottom_overlap < bytes) {
3765 unmap_memory(requested_addr, bottom_overlap);
3766 size[i] = bytes - bottom_overlap;
3767 } else {
3768 size[i] = bytes;
3769 }
3770 }
3771 }
3772 }
3773
3774 // Give back the unused reserved pieces.
3775
3776 for (int j = 0; j < i; ++j) {
3777 if (base[j] != NULL) {
3778 unmap_memory(base[j], size[j]);
3779 }
3780 }
3781
3782 if (i < max_tries) {
3783 _highest_vm_reserved_address = MAX2(old_highest, (address)requested_addr + bytes);
3784 return requested_addr;
3785 } else {
3786 _highest_vm_reserved_address = old_highest;
3787 return NULL;
3788 }
3789 }
3790
3791 size_t os::read(int fd, void *buf, unsigned int nBytes) {
3792 return ::read(fd, buf, nBytes);
3793 }
3794
3795 size_t os::read_at(int fd, void *buf, unsigned int nBytes, jlong offset) {
3796 return ::pread(fd, buf, nBytes, offset);
3797 }
3798
3799 // Short sleep, direct OS call.
3800 //
3801 // Note: certain versions of Linux CFS scheduler (since 2.6.23) do not guarantee
3802 // sched_yield(2) will actually give up the CPU:
3803 //
3804 // * Alone on this pariticular CPU, keeps running.
3805 // * Before the introduction of "skip_buddy" with "compat_yield" disabled
3806 // (pre 2.6.39).
4610
4611 extern bool signal_name(int signo, char* buf, size_t len);
4612
4613 const char* os::exception_name(int exception_code, char* buf, size_t size) {
4614 if (0 < exception_code && exception_code <= SIGRTMAX) {
4615 // signal
4616 if (!signal_name(exception_code, buf, size)) {
4617 jio_snprintf(buf, size, "SIG%d", exception_code);
4618 }
4619 return buf;
4620 } else {
4621 return NULL;
4622 }
4623 }
4624
4625 // this is called _before_ the most of global arguments have been parsed
4626 void os::init(void) {
4627 char dummy; // used to get a guess on initial stack address
4628 // first_hrtime = gethrtime();
4629
4630 // With LinuxThreads the JavaMain thread pid (primordial thread)
4631 // is different than the pid of the java launcher thread.
4632 // So, on Linux, the launcher thread pid is passed to the VM
4633 // via the sun.java.launcher.pid property.
4634 // Use this property instead of getpid() if it was correctly passed.
4635 // See bug 6351349.
4636 pid_t java_launcher_pid = (pid_t) Arguments::sun_java_launcher_pid();
4637
4638 _initial_pid = (java_launcher_pid > 0) ? java_launcher_pid : getpid();
4639
4640 clock_tics_per_sec = sysconf(_SC_CLK_TCK);
4641
4642 init_random(1234567);
4643
4644 ThreadCritical::initialize();
4645
4646 Linux::set_page_size(sysconf(_SC_PAGESIZE));
4647 if (Linux::page_size() == -1) {
4648 fatal(err_msg("os_linux.cpp: os::init: sysconf failed (%s)",
4649 strerror(errno)));
4650 }
4651 init_page_sizes((size_t) Linux::page_size());
4652
4653 Linux::initialize_system_info();
4654
4655 // main_thread points to the aboriginal thread
4656 Linux::_main_thread = pthread_self();
4657
4658 Linux::clock_init();
4659 initial_time_count = javaTimeNanos();
4752 threadStackSizeInBytes < os::Linux::min_stack_allowed) {
4753 tty->print_cr("\nThe stack size specified is too small, "
4754 "Specify at least %dk",
4755 os::Linux::min_stack_allowed/ K);
4756 return JNI_ERR;
4757 }
4758
4759 // Make the stack size a multiple of the page size so that
4760 // the yellow/red zones can be guarded.
4761 JavaThread::set_stack_size_at_create(round_to(threadStackSizeInBytes,
4762 vm_page_size()));
4763
4764 Linux::capture_initial_stack(JavaThread::stack_size_at_create());
4765
4766 #if defined(IA32)
4767 workaround_expand_exec_shield_cs_limit();
4768 #endif
4769
4770 Linux::libpthread_init();
4771 if (PrintMiscellaneous && (Verbose || WizardMode)) {
4772 tty->print_cr("[HotSpot is running with %s, %s(%s)]\n",
4773 Linux::glibc_version(), Linux::libpthread_version(),
4774 Linux::is_floating_stack() ? "floating stack" : "fixed stack");
4775 }
4776
4777 if (UseNUMA) {
4778 if (!Linux::libnuma_init()) {
4779 UseNUMA = false;
4780 } else {
4781 if ((Linux::numa_max_node() < 1)) {
4782 // There's only one node(they start from 0), disable NUMA.
4783 UseNUMA = false;
4784 }
4785 }
4786 // With SHM and HugeTLBFS large pages we cannot uncommit a page, so there's no way
4787 // we can make the adaptive lgrp chunk resizing work. If the user specified
4788 // both UseNUMA and UseLargePages (or UseSHM/UseHugeTLBFS) on the command line - warn and
4789 // disable adaptive resizing.
4790 if (UseNUMA && UseLargePages && !can_commit_large_page_memory()) {
4791 if (FLAG_IS_DEFAULT(UseNUMA)) {
4792 UseNUMA = false;
4793 } else {
4794 if (FLAG_IS_DEFAULT(UseLargePages) &&
4929 if (osthread->ucontext() != NULL) {
4930 _epc = os::Linux::ucontext_get_pc((ucontext_t *) context.ucontext());
4931 } else {
4932 // NULL context is unexpected, double-check this is the VMThread
4933 guarantee(thread->is_VM_thread(), "can only be called for VMThread");
4934 }
4935 }
4936
4937 // Suspends the target using the signal mechanism and then grabs the PC before
4938 // resuming the target. Used by the flat-profiler only
4939 ExtendedPC os::get_thread_pc(Thread* thread) {
4940 // Make sure that it is called by the watcher for the VMThread
4941 assert(Thread::current()->is_Watcher_thread(), "Must be watcher");
4942 assert(thread->is_VM_thread(), "Can only be called for VMThread");
4943
4944 PcFetcher fetcher(thread);
4945 fetcher.run();
4946 return fetcher.result();
4947 }
4948
4949 int os::Linux::safe_cond_timedwait(pthread_cond_t *_cond,
4950 pthread_mutex_t *_mutex,
4951 const struct timespec *_abstime) {
4952 if (is_NPTL()) {
4953 return pthread_cond_timedwait(_cond, _mutex, _abstime);
4954 } else {
4955 // 6292965: LinuxThreads pthread_cond_timedwait() resets FPU control
4956 // word back to default 64bit precision if condvar is signaled. Java
4957 // wants 53bit precision. Save and restore current value.
4958 int fpu = get_fpu_control_word();
4959 int status = pthread_cond_timedwait(_cond, _mutex, _abstime);
4960 set_fpu_control_word(fpu);
4961 return status;
4962 }
4963 }
4964
4965 ////////////////////////////////////////////////////////////////////////////////
4966 // debug support
4967
4968 bool os::find(address addr, outputStream* st) {
4969 Dl_info dlinfo;
4970 memset(&dlinfo, 0, sizeof(dlinfo));
4971 if (dladdr(addr, &dlinfo) != 0) {
4972 st->print(PTR_FORMAT ": ", addr);
4973 if (dlinfo.dli_sname != NULL && dlinfo.dli_saddr != NULL) {
4974 st->print("%s+%#x", dlinfo.dli_sname,
4975 addr - (intptr_t)dlinfo.dli_saddr);
4976 } else if (dlinfo.dli_fbase != NULL) {
4977 st->print("<offset %#x>", addr - (intptr_t)dlinfo.dli_fbase);
4978 } else {
4979 st->print("<absolute address>");
4980 }
4981 if (dlinfo.dli_fname != NULL) {
4982 st->print(" in %s", dlinfo.dli_fname);
4983 }
4984 if (dlinfo.dli_fbase != NULL) {
5568 int ret = OS_TIMEOUT;
5569 int status = pthread_mutex_lock(_mutex);
5570 assert_status(status == 0, status, "mutex_lock");
5571 guarantee(_nParked == 0, "invariant");
5572 ++_nParked;
5573
5574 // Object.wait(timo) will return because of
5575 // (a) notification
5576 // (b) timeout
5577 // (c) thread.interrupt
5578 //
5579 // Thread.interrupt and object.notify{All} both call Event::set.
5580 // That is, we treat thread.interrupt as a special case of notification.
5581 // We ignore spurious OS wakeups unless FilterSpuriousWakeups is false.
5582 // We assume all ETIME returns are valid.
5583 //
5584 // TODO: properly differentiate simultaneous notify+interrupt.
5585 // In that case, we should propagate the notify to another waiter.
5586
5587 while (_Event < 0) {
5588 status = os::Linux::safe_cond_timedwait(_cond, _mutex, &abst);
5589 if (status != 0 && WorkAroundNPTLTimedWaitHang) {
5590 pthread_cond_destroy(_cond);
5591 pthread_cond_init(_cond, os::Linux::condAttr());
5592 }
5593 assert_status(status == 0 || status == EINTR ||
5594 status == ETIME || status == ETIMEDOUT,
5595 status, "cond_timedwait");
5596 if (!FilterSpuriousWakeups) break; // previous semantics
5597 if (status == ETIME || status == ETIMEDOUT) break;
5598 // We consume and ignore EINTR and spurious wakeups.
5599 }
5600 --_nParked;
5601 if (_Event >= 0) {
5602 ret = OS_OK;
5603 }
5604 _Event = 0;
5605 status = pthread_mutex_unlock(_mutex);
5606 assert_status(status == 0, status, "mutex_unlock");
5607 assert(_nParked == 0, "invariant");
5608 // Paranoia to ensure our locked and lock-free paths interact
5796 }
5797
5798 #ifdef ASSERT
5799 // Don't catch signals while blocked; let the running threads have the signals.
5800 // (This allows a debugger to break into the running thread.)
5801 sigset_t oldsigs;
5802 sigset_t* allowdebug_blocked = os::Linux::allowdebug_blocked_signals();
5803 pthread_sigmask(SIG_BLOCK, allowdebug_blocked, &oldsigs);
5804 #endif
5805
5806 OSThreadWaitState osts(thread->osthread(), false /* not Object.wait() */);
5807 jt->set_suspend_equivalent();
5808 // cleared by handle_special_suspend_equivalent_condition() or java_suspend_self()
5809
5810 assert(_cur_index == -1, "invariant");
5811 if (time == 0) {
5812 _cur_index = REL_INDEX; // arbitrary choice when not timed
5813 status = pthread_cond_wait(&_cond[_cur_index], _mutex);
5814 } else {
5815 _cur_index = isAbsolute ? ABS_INDEX : REL_INDEX;
5816 status = os::Linux::safe_cond_timedwait(&_cond[_cur_index], _mutex, &absTime);
5817 if (status != 0 && WorkAroundNPTLTimedWaitHang) {
5818 pthread_cond_destroy(&_cond[_cur_index]);
5819 pthread_cond_init(&_cond[_cur_index], isAbsolute ? NULL : os::Linux::condAttr());
5820 }
5821 }
5822 _cur_index = -1;
5823 assert_status(status == 0 || status == EINTR ||
5824 status == ETIME || status == ETIMEDOUT,
5825 status, "cond_timedwait");
5826
5827 #ifdef ASSERT
5828 pthread_sigmask(SIG_SETMASK, &oldsigs, NULL);
5829 #endif
5830
5831 _counter = 0;
5832 status = pthread_mutex_unlock(_mutex);
5833 assert_status(status == 0, status, "invariant");
5834 // Paranoia to ensure our locked and lock-free paths interact
5835 // correctly with each other and Java-level accesses.
5836 OrderAccess::fence();
|
118 #define MAX_SECS 100000000
119
120 // for timer info max values which include all bits
121 #define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF)
122
123 #define LARGEPAGES_BIT (1 << 6)
124 ////////////////////////////////////////////////////////////////////////////////
125 // global variables
126 julong os::Linux::_physical_memory = 0;
127
128 address os::Linux::_initial_thread_stack_bottom = NULL;
129 uintptr_t os::Linux::_initial_thread_stack_size = 0;
130
131 int (*os::Linux::_clock_gettime)(clockid_t, struct timespec *) = NULL;
132 int (*os::Linux::_pthread_getcpuclockid)(pthread_t, clockid_t *) = NULL;
133 int (*os::Linux::_pthread_setname_np)(pthread_t, const char*) = NULL;
134 Mutex* os::Linux::_createThread_lock = NULL;
135 pthread_t os::Linux::_main_thread;
136 int os::Linux::_page_size = -1;
137 const int os::Linux::_vm_default_page_size = (8 * K);
138 bool os::Linux::_supports_fast_thread_cpu_time = false;
139 const char * os::Linux::_glibc_version = NULL;
140 const char * os::Linux::_libpthread_version = NULL;
141 pthread_condattr_t os::Linux::_condattr[1];
142
143 static jlong initial_time_count=0;
144
145 static int clock_tics_per_sec = 100;
146
147 // For diagnostics to print a message once. see run_periodic_checks
148 static sigset_t check_signal_done;
149 static bool check_signals = true;
150
151 // Signal number used to suspend/resume a thread
152
153 // do not use any signal number less than SIGSEGV, see 4355769
154 static int SR_signum = SIGUSR2;
155 sigset_t SR_sigset;
156
157 // Declarations
158 static void unpackTime(timespec* absTime, bool isAbsolute, jlong time);
159
160 // utility functions
161
162 static int SR_initialize();
163
164 julong os::available_memory() {
165 return Linux::available_memory();
166 }
167
168 julong os::Linux::available_memory() {
169 // values in struct sysinfo are "unsigned long"
170 struct sysinfo si;
202 #define SYS_gettid 186
203 #else
204 #ifdef __sparc__
205 #define SYS_gettid 143
206 #else
207 #error define gettid for the arch
208 #endif
209 #endif
210 #endif
211 #endif
212 #endif
213
214 // Cpu architecture string
215 static char cpu_arch[] = HOTSPOT_LIB_ARCH;
216
217
218 // pid_t gettid()
219 //
220 // Returns the kernel thread id of the currently running thread. Kernel
221 // thread id is used to access /proc.
222 pid_t os::Linux::gettid() {
223 int rslt = syscall(SYS_gettid);
224 assert(rslt != -1, "must be."); // old linuxthreads implementation?
225 return (pid_t)rslt;
226 }
227
228 // Most versions of linux have a bug where the number of processors are
229 // determined by looking at the /proc file system. In a chroot environment,
230 // the system call returns 1. This causes the VM to act as if it is
231 // a single processor and elide locking (see is_MP() call).
232 static bool unsafe_chroot_detected = false;
233 static const char *unstable_chroot_error = "/proc file system not found.\n"
234 "Java may be unstable running multithreaded in a chroot "
235 "environment on Linux when /proc filesystem is not mounted.";
236
237 void os::Linux::initialize_system_info() {
238 set_processor_count(sysconf(_SC_NPROCESSORS_CONF));
239 if (processor_count() == 1) {
240 pid_t pid = os::Linux::gettid();
241 char fname[32];
242 jio_snprintf(fname, sizeof(fname), "/proc/%d", pid);
243 FILE *fp = fopen(fname, "r");
244 if (fp == NULL) {
245 unsafe_chroot_detected = true;
479 OSThread* osthread = thread->osthread();
480 osthread->set_caller_sigmask(caller_sigmask);
481
482 pthread_sigmask(SIG_UNBLOCK, os::Linux::unblocked_signals(), NULL);
483
484 if (!ReduceSignalUsage) {
485 if (thread->is_VM_thread()) {
486 // Only the VM thread handles BREAK_SIGNAL ...
487 pthread_sigmask(SIG_UNBLOCK, vm_signals(), NULL);
488 } else {
489 // ... all other threads block BREAK_SIGNAL
490 pthread_sigmask(SIG_BLOCK, vm_signals(), NULL);
491 }
492 }
493 }
494
495 //////////////////////////////////////////////////////////////////////////////
496 // detecting pthread library
497
498 void os::Linux::libpthread_init() {
499 // Save glibc and pthread version strings.
500 #if !defined(_CS_GNU_LIBC_VERSION) || \
501 !defined(_CS_GNU_LIBPTHREAD_VERSION)
502 #error "glibc too old (< 2.3.2)"
503 #endif
504
505 size_t n = confstr(_CS_GNU_LIBC_VERSION, NULL, 0);
506 assert(n > 0, "cannot retrieve glibc version");
507 char *str = (char *)malloc(n, mtInternal);
508 confstr(_CS_GNU_LIBC_VERSION, str, n);
509 os::Linux::set_glibc_version(str);
510
511 n = confstr(_CS_GNU_LIBPTHREAD_VERSION, NULL, 0);
512 assert(n > 0, "cannot retrieve pthread version");
513 str = (char *)malloc(n, mtInternal);
514 confstr(_CS_GNU_LIBPTHREAD_VERSION, str, n);
515 os::Linux::set_libpthread_version(str);
516 }
517
518 /////////////////////////////////////////////////////////////////////////////
519 // thread stack expansion
520
521 // os::Linux::manually_expand_stack() takes care of expanding the thread
522 // stack. Note that this is normally not needed: pthread stacks allocate
523 // thread stack using mmap() without MAP_NORESERVE, so the stack is already
524 // committed. Therefore it is not necessary to expand the stack manually.
525 //
526 // Manually expanding the stack was historically needed on LinuxThreads
527 // thread stacks, which were allocated with mmap(MAP_GROWSDOWN). Nowadays
528 // it is kept to deal with very rare corner cases:
529 //
530 // For one, user may run the VM on an own implementation of threads
531 // whose stacks are - like the old LinuxThreads - implemented using
532 // mmap(MAP_GROWSDOWN).
533 //
534 // Also, this coding may be needed if the VM is running on the primordial
535 // thread. Normally we avoid running on the primordial thread; however,
536 // user may still invoke the VM on the primordial thread.
537 //
538 // The following historical comment describes the details about running
539 // on a thread stack allocated with mmap(MAP_GROWSDOWN):
540
541
542 // Force Linux kernel to expand current thread stack. If "bottom" is close
543 // to the stack guard, caller should block all signals.
544 //
545 // MAP_GROWSDOWN:
546 // A special mmap() flag that is used to implement thread stacks. It tells
547 // kernel that the memory region should extend downwards when needed. This
548 // allows early versions of LinuxThreads to only mmap the first few pages
549 // when creating a new thread. Linux kernel will automatically expand thread
550 // stack as needed (on page faults).
551 //
552 // However, because the memory region of a MAP_GROWSDOWN stack can grow on
553 // demand, if a page fault happens outside an already mapped MAP_GROWSDOWN
554 // region, it's hard to tell if the fault is due to a legitimate stack
555 // access or because of reading/writing non-exist memory (e.g. buffer
556 // overrun). As a rule, if the fault happens below current stack pointer,
557 // Linux kernel does not expand stack, instead a SIGSEGV is sent to the
558 // application (see Linux kernel fault.c).
559 //
560 // This Linux feature can cause SIGSEGV when VM bangs thread stack for
561 // stack overflow detection.
562 //
563 // Newer version of LinuxThreads (since glibc-2.2, or, RH-7.x) and NPTL do
564 // not use MAP_GROWSDOWN.
565 //
566 // To get around the problem and allow stack banging on Linux, we need to
567 // manually expand thread stack after receiving the SIGSEGV.
568 //
569 // There are two ways to expand thread stack to address "bottom", we used
570 // both of them in JVM before 1.5:
571 // 1. adjust stack pointer first so that it is below "bottom", and then
572 // touch "bottom"
573 // 2. mmap() the page in question
574 //
575 // Now alternate signal stack is gone, it's harder to use 2. For instance,
576 // if current sp is already near the lower end of page 101, and we need to
577 // call mmap() to map page 100, it is possible that part of the mmap() frame
578 // will be placed in page 100. When page 100 is mapped, it is zero-filled.
579 // That will destroy the mmap() frame and cause VM to crash.
580 //
581 // The following code works by adjusting sp first, then accessing the "bottom"
582 // page to force a page fault. Linux kernel will then automatically expand the
583 // stack mapping.
584 //
619
620 bool os::Linux::manually_expand_stack(JavaThread * t, address addr) {
621 assert(t!=NULL, "just checking");
622 assert(t->osthread()->expanding_stack(), "expand should be set");
623 assert(t->stack_base() != NULL, "stack_base was not initialized");
624
625 if (addr < t->stack_base() && addr >= t->stack_yellow_zone_base()) {
626 sigset_t mask_all, old_sigset;
627 sigfillset(&mask_all);
628 pthread_sigmask(SIG_SETMASK, &mask_all, &old_sigset);
629 _expand_stack_to(addr);
630 pthread_sigmask(SIG_SETMASK, &old_sigset, NULL);
631 return true;
632 }
633 return false;
634 }
635
636 //////////////////////////////////////////////////////////////////////////////
637 // create new thread
638
639 // Thread start routine for all newly created threads
640 static void *java_start(Thread *thread) {
641 // Try to randomize the cache line index of hot stack frames.
642 // This helps when threads of the same stack traces evict each other's
643 // cache lines. The threads can be either from the same JVM instance, or
644 // from different JVM instances. The benefit is especially true for
645 // processors with hyperthreading technology.
646 static int counter = 0;
647 int pid = os::current_process_id();
648 alloca(((pid ^ counter++) & 7) * 128);
649
650 ThreadLocalStorage::set_thread(thread);
651
652 OSThread* osthread = thread->osthread();
653 Monitor* sync = osthread->startThread_lock();
654
655 // thread_id is kernel thread id (similar to Solaris LWP id)
656 osthread->set_thread_id(os::Linux::gettid());
657
658 if (UseNUMA) {
659 int lgrp_id = os::numa_get_group_id();
660 if (lgrp_id != -1) {
661 thread->set_lgrp_id(lgrp_id);
662 }
663 }
664 // initialize signal mask for this thread
665 os::Linux::hotspot_sigmask(thread);
666
667 // initialize floating point control register
668 os::Linux::init_thread_fpu_state();
669
670 // handshaking with parent thread
671 {
672 MutexLockerEx ml(sync, Mutex::_no_safepoint_check_flag);
673
674 // notify parent thread
733 case os::pgc_thread:
734 case os::cgc_thread:
735 case os::watcher_thread:
736 if (VMThreadStackSize > 0) stack_size = (size_t)(VMThreadStackSize * K);
737 break;
738 }
739 }
740
741 stack_size = MAX2(stack_size, os::Linux::min_stack_allowed);
742 pthread_attr_setstacksize(&attr, stack_size);
743 } else {
744 // let pthread_create() pick the default value.
745 }
746
747 // glibc guard page
748 pthread_attr_setguardsize(&attr, os::Linux::default_guard_size(thr_type));
749
750 ThreadState state;
751
752 {
753 pthread_t tid;
754 int ret = pthread_create(&tid, &attr, (void* (*)(void*)) java_start, thread);
755
756 pthread_attr_destroy(&attr);
757
758 if (ret != 0) {
759 if (PrintMiscellaneous && (Verbose || WizardMode)) {
760 perror("pthread_create()");
761 }
762 // Need to clean up stuff we've allocated so far
763 thread->set_osthread(NULL);
764 delete osthread;
765 return false;
766 }
767
768 // Store pthread info into the OSThread
769 osthread->set_pthread_id(tid);
770
771 // Wait until child thread is either initialized or aborted
772 {
773 Monitor* sync_with_child = osthread->startThread_lock();
774 MutexLockerEx ml(sync_with_child, Mutex::_no_safepoint_check_flag);
775 while ((state = osthread->get_state()) == ALLOCATED) {
776 sync_with_child->wait(Mutex::_no_safepoint_check_flag);
777 }
778 }
779 }
780
781 // Aborted due to thread limit being reached
782 if (state == ZOMBIE) {
783 thread->set_osthread(NULL);
784 delete osthread;
785 return false;
786 }
787
788 // The thread is returned suspended (in state INITIALIZED),
789 // and is started higher up in the call chain
790 assert(state == INITIALIZED, "race condition");
791 return true;
792 }
793
794 /////////////////////////////////////////////////////////////////////////////
795 // attach existing thread
796
797 // bootstrap the main thread
798 bool os::create_main_thread(JavaThread* thread) {
1386 // sure it is async-safe and can handle partially initialized VM.
1387 void os::abort(bool dump_core, void* siginfo, void* context) {
1388 os::shutdown();
1389 if (dump_core) {
1390 #ifndef PRODUCT
1391 fdStream out(defaultStream::output_fd());
1392 out.print_raw("Current thread is ");
1393 char buf[16];
1394 jio_snprintf(buf, sizeof(buf), UINTX_FORMAT, os::current_thread_id());
1395 out.print_raw_cr(buf);
1396 out.print_raw_cr("Dumping core ...");
1397 #endif
1398 ::abort(); // dump core
1399 }
1400
1401 ::exit(1);
1402 }
1403
1404 // Die immediately, no exit hook, no abort hook, no cleanup.
1405 void os::die() {
1406 ::abort();
1407 }
1408
1409
1410 // This method is a copy of JDK's sysGetLastErrorString
1411 // from src/solaris/hpi/src/system_md.c
1412
1413 size_t os::lasterror(char *buf, size_t len) {
1414 if (errno == 0) return 0;
1415
1416 const char *s = ::strerror(errno);
1417 size_t n = ::strlen(s);
1418 if (n >= len) {
1419 n = len - 1;
1420 }
1421 ::strncpy(buf, s, n);
1422 buf[n] = '\0';
1423 return n;
1424 }
1425
1426 intx os::current_thread_id() { return (intx)pthread_self(); }
1427 int os::current_process_id() {
1428 return ::getpid();
1429 }
1430
1431 // DLL functions
1432
1433 const char* os::dll_file_extension() { return ".so"; }
1434
1435 // This must be hard coded because it's the system's temporary
1436 // directory not the java application's temp directory, ala java.io.tmpdir.
1437 const char* os::get_temp_directory() { return "/tmp"; }
1438
1439 static bool file_exists(const char* filename) {
1440 struct stat statbuf;
1441 if (filename == NULL || strlen(filename) == 0) {
1442 return false;
1443 }
1444 return os::stat(filename, &statbuf) == 0;
1445 }
1446
1447 bool os::dll_build_name(char* buffer, size_t buflen,
1448 const char* pname, const char* fname) {
2054 !_print_ascii_file("/etc/ltib-release", st) &&
2055 !_print_ascii_file("/etc/angstrom-version", st) &&
2056 !_print_ascii_file("/etc/system-release", st) &&
2057 !_print_ascii_file("/etc/os-release", st)) {
2058
2059 if (file_exists("/etc/debian_version")) {
2060 st->print("Debian ");
2061 _print_ascii_file("/etc/debian_version", st);
2062 } else {
2063 st->print("Linux");
2064 }
2065 }
2066 st->cr();
2067 }
2068
2069 void os::Linux::print_libversion_info(outputStream* st) {
2070 // libc, pthread
2071 st->print("libc:");
2072 st->print("%s ", os::Linux::glibc_version());
2073 st->print("%s ", os::Linux::libpthread_version());
2074 st->cr();
2075 }
2076
2077 void os::Linux::print_full_memory_info(outputStream* st) {
2078 st->print("\n/proc/meminfo:\n");
2079 _print_ascii_file("/proc/meminfo", st);
2080 st->cr();
2081 }
2082
2083 void os::print_memory_info(outputStream* st) {
2084
2085 st->print("Memory:");
2086 st->print(" %dk page", os::vm_page_size()>>10);
2087
2088 // values in struct sysinfo are "unsigned long"
2089 struct sysinfo si;
2090 sysinfo(&si);
2091
2092 st->print(", physical " UINT64_FORMAT "k",
2093 os::physical_memory() >> 10);
2912
2913 return os::commit_memory(addr, size, !ExecMem);
2914 }
2915
2916 // If this is a growable mapping, remove the guard pages entirely by
2917 // munmap()ping them. If not, just call uncommit_memory(). This only
2918 // affects the main/initial thread, but guard against future OS changes
2919 // It's safe to always unmap guard pages for initial thread because we
2920 // always place it right after end of the mapped region
2921
2922 bool os::remove_stack_guard_pages(char* addr, size_t size) {
2923 uintptr_t stack_extent, stack_base;
2924
2925 if (os::Linux::is_initial_thread()) {
2926 return ::munmap(addr, size) == 0;
2927 }
2928
2929 return os::uncommit_memory(addr, size);
2930 }
2931
2932 // If 'fixed' is true, anon_mmap() will attempt to reserve anonymous memory
2933 // at 'requested_addr'. If there are existing memory mappings at the same
2934 // location, however, they will be overwritten. If 'fixed' is false,
2935 // 'requested_addr' is only treated as a hint, the return value may or
2936 // may not start from the requested address. Unlike Linux mmap(), this
2937 // function returns NULL to indicate failure.
2938 static char* anon_mmap(char* requested_addr, size_t bytes, bool fixed) {
2939 char * addr;
2940 int flags;
2941
2942 flags = MAP_PRIVATE | MAP_NORESERVE | MAP_ANONYMOUS;
2943 if (fixed) {
2944 assert((uintptr_t)requested_addr % os::Linux::page_size() == 0, "unaligned address");
2945 flags |= MAP_FIXED;
2946 }
2947
2948 // Map reserved/uncommitted pages PROT_NONE so we fail early if we
2949 // touch an uncommitted page. Otherwise, the read/write might
2950 // succeed if we have enough swap space to back the physical page.
2951 addr = (char*)::mmap(requested_addr, bytes, PROT_NONE,
2952 flags, -1, 0);
2953
2954 return addr == MAP_FAILED ? NULL : addr;
2955 }
2956
2957 static int anon_munmap(char * addr, size_t size) {
2958 return ::munmap(addr, size) == 0;
2959 }
2960
2961 char* os::pd_reserve_memory(size_t bytes, char* requested_addr,
2962 size_t alignment_hint) {
2963 return anon_mmap(requested_addr, bytes, (requested_addr != NULL));
2964 }
2965
2966 bool os::pd_release_memory(char* addr, size_t size) {
2967 return anon_munmap(addr, size);
2968 }
2969
2970 static bool linux_mprotect(char* addr, size_t size, int prot) {
2971 // Linux wants the mprotect address argument to be page aligned.
2972 char* bottom = (char*)align_size_down((intptr_t)addr, os::Linux::page_size());
2973
2974 // According to SUSv3, mprotect() should only be used with mappings
2975 // established by mmap(), and mmap() always maps whole pages. Unaligned
2976 // 'addr' likely indicates problem in the VM (e.g. trying to change
2977 // protection of malloc'ed or statically allocated memory). Check the
2978 // caller if you hit this assert.
2979 assert(addr == bottom, "sanity check");
2980
2981 size = align_size_up(pointer_delta(addr, bottom, 1) + size, os::Linux::page_size());
2982 return ::mprotect(bottom, size, prot) == 0;
2983 }
2984
2985 // Set protections specified
2986 bool os::protect_memory(char* addr, size_t bytes, ProtType prot,
2987 bool is_committed) {
2988 unsigned int p = 0;
2989 switch (prot) {
3566 return UseTransparentHugePages || UseHugeTLBFS;
3567 }
3568
3569 // Reserve memory at an arbitrary address, only if that area is
3570 // available (and not reserved for something else).
3571
3572 char* os::pd_attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
3573 const int max_tries = 10;
3574 char* base[max_tries];
3575 size_t size[max_tries];
3576 const size_t gap = 0x000000;
3577
3578 // Assert only that the size is a multiple of the page size, since
3579 // that's all that mmap requires, and since that's all we really know
3580 // about at this low abstraction level. If we need higher alignment,
3581 // we can either pass an alignment to this method or verify alignment
3582 // in one of the methods further up the call chain. See bug 5044738.
3583 assert(bytes % os::vm_page_size() == 0, "reserving unexpected size block");
3584
3585 // Repeatedly allocate blocks until the block is allocated at the
3586 // right spot.
3587
3588 // Linux mmap allows caller to pass an address as hint; give it a try first,
3589 // if kernel honors the hint then we can return immediately.
3590 char * addr = anon_mmap(requested_addr, bytes, false);
3591 if (addr == requested_addr) {
3592 return requested_addr;
3593 }
3594
3595 if (addr != NULL) {
3596 // mmap() is successful but it fails to reserve at the requested address
3597 anon_munmap(addr, bytes);
3598 }
3599
3600 int i;
3601 for (i = 0; i < max_tries; ++i) {
3602 base[i] = reserve_memory(bytes);
3603
3604 if (base[i] != NULL) {
3605 // Is this the block we wanted?
3606 if (base[i] == requested_addr) {
3620 ptrdiff_t bottom_overlap = base[i] + bytes - requested_addr;
3621 if (bottom_overlap >= 0 && (size_t)bottom_overlap < bytes) {
3622 unmap_memory(requested_addr, bottom_overlap);
3623 size[i] = bytes - bottom_overlap;
3624 } else {
3625 size[i] = bytes;
3626 }
3627 }
3628 }
3629 }
3630
3631 // Give back the unused reserved pieces.
3632
3633 for (int j = 0; j < i; ++j) {
3634 if (base[j] != NULL) {
3635 unmap_memory(base[j], size[j]);
3636 }
3637 }
3638
3639 if (i < max_tries) {
3640 return requested_addr;
3641 } else {
3642 return NULL;
3643 }
3644 }
3645
3646 size_t os::read(int fd, void *buf, unsigned int nBytes) {
3647 return ::read(fd, buf, nBytes);
3648 }
3649
3650 size_t os::read_at(int fd, void *buf, unsigned int nBytes, jlong offset) {
3651 return ::pread(fd, buf, nBytes, offset);
3652 }
3653
3654 // Short sleep, direct OS call.
3655 //
3656 // Note: certain versions of Linux CFS scheduler (since 2.6.23) do not guarantee
3657 // sched_yield(2) will actually give up the CPU:
3658 //
3659 // * Alone on this pariticular CPU, keeps running.
3660 // * Before the introduction of "skip_buddy" with "compat_yield" disabled
3661 // (pre 2.6.39).
4465
4466 extern bool signal_name(int signo, char* buf, size_t len);
4467
4468 const char* os::exception_name(int exception_code, char* buf, size_t size) {
4469 if (0 < exception_code && exception_code <= SIGRTMAX) {
4470 // signal
4471 if (!signal_name(exception_code, buf, size)) {
4472 jio_snprintf(buf, size, "SIG%d", exception_code);
4473 }
4474 return buf;
4475 } else {
4476 return NULL;
4477 }
4478 }
4479
4480 // this is called _before_ the most of global arguments have been parsed
4481 void os::init(void) {
4482 char dummy; // used to get a guess on initial stack address
4483 // first_hrtime = gethrtime();
4484
4485 clock_tics_per_sec = sysconf(_SC_CLK_TCK);
4486
4487 init_random(1234567);
4488
4489 ThreadCritical::initialize();
4490
4491 Linux::set_page_size(sysconf(_SC_PAGESIZE));
4492 if (Linux::page_size() == -1) {
4493 fatal(err_msg("os_linux.cpp: os::init: sysconf failed (%s)",
4494 strerror(errno)));
4495 }
4496 init_page_sizes((size_t) Linux::page_size());
4497
4498 Linux::initialize_system_info();
4499
4500 // main_thread points to the aboriginal thread
4501 Linux::_main_thread = pthread_self();
4502
4503 Linux::clock_init();
4504 initial_time_count = javaTimeNanos();
4597 threadStackSizeInBytes < os::Linux::min_stack_allowed) {
4598 tty->print_cr("\nThe stack size specified is too small, "
4599 "Specify at least %dk",
4600 os::Linux::min_stack_allowed/ K);
4601 return JNI_ERR;
4602 }
4603
4604 // Make the stack size a multiple of the page size so that
4605 // the yellow/red zones can be guarded.
4606 JavaThread::set_stack_size_at_create(round_to(threadStackSizeInBytes,
4607 vm_page_size()));
4608
4609 Linux::capture_initial_stack(JavaThread::stack_size_at_create());
4610
4611 #if defined(IA32)
4612 workaround_expand_exec_shield_cs_limit();
4613 #endif
4614
4615 Linux::libpthread_init();
4616 if (PrintMiscellaneous && (Verbose || WizardMode)) {
4617 tty->print_cr("[HotSpot is running with %s, %s]\n",
4618 Linux::glibc_version(), Linux::libpthread_version());
4619 }
4620
4621 if (UseNUMA) {
4622 if (!Linux::libnuma_init()) {
4623 UseNUMA = false;
4624 } else {
4625 if ((Linux::numa_max_node() < 1)) {
4626 // There's only one node(they start from 0), disable NUMA.
4627 UseNUMA = false;
4628 }
4629 }
4630 // With SHM and HugeTLBFS large pages we cannot uncommit a page, so there's no way
4631 // we can make the adaptive lgrp chunk resizing work. If the user specified
4632 // both UseNUMA and UseLargePages (or UseSHM/UseHugeTLBFS) on the command line - warn and
4633 // disable adaptive resizing.
4634 if (UseNUMA && UseLargePages && !can_commit_large_page_memory()) {
4635 if (FLAG_IS_DEFAULT(UseNUMA)) {
4636 UseNUMA = false;
4637 } else {
4638 if (FLAG_IS_DEFAULT(UseLargePages) &&
4773 if (osthread->ucontext() != NULL) {
4774 _epc = os::Linux::ucontext_get_pc((ucontext_t *) context.ucontext());
4775 } else {
4776 // NULL context is unexpected, double-check this is the VMThread
4777 guarantee(thread->is_VM_thread(), "can only be called for VMThread");
4778 }
4779 }
4780
4781 // Suspends the target using the signal mechanism and then grabs the PC before
4782 // resuming the target. Used by the flat-profiler only
4783 ExtendedPC os::get_thread_pc(Thread* thread) {
4784 // Make sure that it is called by the watcher for the VMThread
4785 assert(Thread::current()->is_Watcher_thread(), "Must be watcher");
4786 assert(thread->is_VM_thread(), "Can only be called for VMThread");
4787
4788 PcFetcher fetcher(thread);
4789 fetcher.run();
4790 return fetcher.result();
4791 }
4792
4793 ////////////////////////////////////////////////////////////////////////////////
4794 // debug support
4795
4796 bool os::find(address addr, outputStream* st) {
4797 Dl_info dlinfo;
4798 memset(&dlinfo, 0, sizeof(dlinfo));
4799 if (dladdr(addr, &dlinfo) != 0) {
4800 st->print(PTR_FORMAT ": ", addr);
4801 if (dlinfo.dli_sname != NULL && dlinfo.dli_saddr != NULL) {
4802 st->print("%s+%#x", dlinfo.dli_sname,
4803 addr - (intptr_t)dlinfo.dli_saddr);
4804 } else if (dlinfo.dli_fbase != NULL) {
4805 st->print("<offset %#x>", addr - (intptr_t)dlinfo.dli_fbase);
4806 } else {
4807 st->print("<absolute address>");
4808 }
4809 if (dlinfo.dli_fname != NULL) {
4810 st->print(" in %s", dlinfo.dli_fname);
4811 }
4812 if (dlinfo.dli_fbase != NULL) {
5396 int ret = OS_TIMEOUT;
5397 int status = pthread_mutex_lock(_mutex);
5398 assert_status(status == 0, status, "mutex_lock");
5399 guarantee(_nParked == 0, "invariant");
5400 ++_nParked;
5401
5402 // Object.wait(timo) will return because of
5403 // (a) notification
5404 // (b) timeout
5405 // (c) thread.interrupt
5406 //
5407 // Thread.interrupt and object.notify{All} both call Event::set.
5408 // That is, we treat thread.interrupt as a special case of notification.
5409 // We ignore spurious OS wakeups unless FilterSpuriousWakeups is false.
5410 // We assume all ETIME returns are valid.
5411 //
5412 // TODO: properly differentiate simultaneous notify+interrupt.
5413 // In that case, we should propagate the notify to another waiter.
5414
5415 while (_Event < 0) {
5416 status = pthread_cond_timedwait(_cond, _mutex, &abst);
5417 if (status != 0 && WorkAroundNPTLTimedWaitHang) {
5418 pthread_cond_destroy(_cond);
5419 pthread_cond_init(_cond, os::Linux::condAttr());
5420 }
5421 assert_status(status == 0 || status == EINTR ||
5422 status == ETIME || status == ETIMEDOUT,
5423 status, "cond_timedwait");
5424 if (!FilterSpuriousWakeups) break; // previous semantics
5425 if (status == ETIME || status == ETIMEDOUT) break;
5426 // We consume and ignore EINTR and spurious wakeups.
5427 }
5428 --_nParked;
5429 if (_Event >= 0) {
5430 ret = OS_OK;
5431 }
5432 _Event = 0;
5433 status = pthread_mutex_unlock(_mutex);
5434 assert_status(status == 0, status, "mutex_unlock");
5435 assert(_nParked == 0, "invariant");
5436 // Paranoia to ensure our locked and lock-free paths interact
5624 }
5625
5626 #ifdef ASSERT
5627 // Don't catch signals while blocked; let the running threads have the signals.
5628 // (This allows a debugger to break into the running thread.)
5629 sigset_t oldsigs;
5630 sigset_t* allowdebug_blocked = os::Linux::allowdebug_blocked_signals();
5631 pthread_sigmask(SIG_BLOCK, allowdebug_blocked, &oldsigs);
5632 #endif
5633
5634 OSThreadWaitState osts(thread->osthread(), false /* not Object.wait() */);
5635 jt->set_suspend_equivalent();
5636 // cleared by handle_special_suspend_equivalent_condition() or java_suspend_self()
5637
5638 assert(_cur_index == -1, "invariant");
5639 if (time == 0) {
5640 _cur_index = REL_INDEX; // arbitrary choice when not timed
5641 status = pthread_cond_wait(&_cond[_cur_index], _mutex);
5642 } else {
5643 _cur_index = isAbsolute ? ABS_INDEX : REL_INDEX;
5644 status = pthread_cond_timedwait(&_cond[_cur_index], _mutex, &absTime);
5645 if (status != 0 && WorkAroundNPTLTimedWaitHang) {
5646 pthread_cond_destroy(&_cond[_cur_index]);
5647 pthread_cond_init(&_cond[_cur_index], isAbsolute ? NULL : os::Linux::condAttr());
5648 }
5649 }
5650 _cur_index = -1;
5651 assert_status(status == 0 || status == EINTR ||
5652 status == ETIME || status == ETIMEDOUT,
5653 status, "cond_timedwait");
5654
5655 #ifdef ASSERT
5656 pthread_sigmask(SIG_SETMASK, &oldsigs, NULL);
5657 #endif
5658
5659 _counter = 0;
5660 status = pthread_mutex_unlock(_mutex);
5661 assert_status(status == 0, status, "invariant");
5662 // Paranoia to ensure our locked and lock-free paths interact
5663 // correctly with each other and Java-level accesses.
5664 OrderAccess::fence();
|