< prev index next >

src/os/linux/vm/os_linux.cpp

Print this page
rev 8517 : 8078513: [linux]  Clean up code relevant to LinuxThreads implementation
Reviewed-by: dholmes, sla, coleenp


 118 #define MAX_SECS 100000000
 119 
 120 // for timer info max values which include all bits
 121 #define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF)
 122 
 123 #define LARGEPAGES_BIT (1 << 6)
 124 ////////////////////////////////////////////////////////////////////////////////
 125 // global variables
 126 julong os::Linux::_physical_memory = 0;
 127 
 128 address   os::Linux::_initial_thread_stack_bottom = NULL;
 129 uintptr_t os::Linux::_initial_thread_stack_size   = 0;
 130 
 131 int (*os::Linux::_clock_gettime)(clockid_t, struct timespec *) = NULL;
 132 int (*os::Linux::_pthread_getcpuclockid)(pthread_t, clockid_t *) = NULL;
 133 int (*os::Linux::_pthread_setname_np)(pthread_t, const char*) = NULL;
 134 Mutex* os::Linux::_createThread_lock = NULL;
 135 pthread_t os::Linux::_main_thread;
 136 int os::Linux::_page_size = -1;
 137 const int os::Linux::_vm_default_page_size = (8 * K);
 138 bool os::Linux::_is_floating_stack = false;
 139 bool os::Linux::_is_NPTL = false;
 140 bool os::Linux::_supports_fast_thread_cpu_time = false;
 141 const char * os::Linux::_glibc_version = NULL;
 142 const char * os::Linux::_libpthread_version = NULL;
 143 pthread_condattr_t os::Linux::_condattr[1];
 144 
 145 static jlong initial_time_count=0;
 146 
 147 static int clock_tics_per_sec = 100;
 148 
 149 // For diagnostics to print a message once. see run_periodic_checks
 150 static sigset_t check_signal_done;
 151 static bool check_signals = true;
 152 
 153 static pid_t _initial_pid = 0;
 154 
 155 // Signal number used to suspend/resume a thread
 156 
 157 // do not use any signal number less than SIGSEGV, see 4355769
 158 static int SR_signum = SIGUSR2;
 159 sigset_t SR_sigset;
 160 
 161 // Declarations
 162 static void unpackTime(timespec* absTime, bool isAbsolute, jlong time);
 163 
 164 // utility functions
 165 
 166 static int SR_initialize();
 167 
 168 julong os::available_memory() {
 169   return Linux::available_memory();
 170 }
 171 
 172 julong os::Linux::available_memory() {
 173   // values in struct sysinfo are "unsigned long"
 174   struct sysinfo si;


 206         #define SYS_gettid 186
 207       #else
 208         #ifdef __sparc__
 209           #define SYS_gettid 143
 210         #else
 211           #error define gettid for the arch
 212         #endif
 213       #endif
 214     #endif
 215   #endif
 216 #endif
 217 
 218 // Cpu architecture string
 219 static char cpu_arch[] = HOTSPOT_LIB_ARCH;
 220 
 221 
 222 // pid_t gettid()
 223 //
 224 // Returns the kernel thread id of the currently running thread. Kernel
 225 // thread id is used to access /proc.
 226 //
 227 // (Note that getpid() on LinuxThreads returns kernel thread id too; but
 228 // on NPTL, it returns the same pid for all threads, as required by POSIX.)
 229 //
 230 pid_t os::Linux::gettid() {
 231   int rslt = syscall(SYS_gettid);
 232   if (rslt == -1) {
 233     // old kernel, no NPTL support
 234     return getpid();
 235   } else {
 236     return (pid_t)rslt;
 237   }
 238 }
 239 
 240 // Most versions of linux have a bug where the number of processors are
 241 // determined by looking at the /proc file system.  In a chroot environment,
 242 // the system call returns 1.  This causes the VM to act as if it is
 243 // a single processor and elide locking (see is_MP() call).
 244 static bool unsafe_chroot_detected = false;
 245 static const char *unstable_chroot_error = "/proc file system not found.\n"
 246                      "Java may be unstable running multithreaded in a chroot "
 247                      "environment on Linux when /proc filesystem is not mounted.";
 248 
 249 void os::Linux::initialize_system_info() {
 250   set_processor_count(sysconf(_SC_NPROCESSORS_CONF));
 251   if (processor_count() == 1) {
 252     pid_t pid = os::Linux::gettid();
 253     char fname[32];
 254     jio_snprintf(fname, sizeof(fname), "/proc/%d", pid);
 255     FILE *fp = fopen(fname, "r");
 256     if (fp == NULL) {
 257       unsafe_chroot_detected = true;


 491   OSThread* osthread = thread->osthread();
 492   osthread->set_caller_sigmask(caller_sigmask);
 493 
 494   pthread_sigmask(SIG_UNBLOCK, os::Linux::unblocked_signals(), NULL);
 495 
 496   if (!ReduceSignalUsage) {
 497     if (thread->is_VM_thread()) {
 498       // Only the VM thread handles BREAK_SIGNAL ...
 499       pthread_sigmask(SIG_UNBLOCK, vm_signals(), NULL);
 500     } else {
 501       // ... all other threads block BREAK_SIGNAL
 502       pthread_sigmask(SIG_BLOCK, vm_signals(), NULL);
 503     }
 504   }
 505 }
 506 
 507 //////////////////////////////////////////////////////////////////////////////
 508 // detecting pthread library
 509 
 510 void os::Linux::libpthread_init() {
 511   // Save glibc and pthread version strings. Note that _CS_GNU_LIBC_VERSION
 512   // and _CS_GNU_LIBPTHREAD_VERSION are supported in glibc >= 2.3.2. Use a
 513   // generic name for earlier versions.
 514   // Define macros here so we can build HotSpot on old systems.
 515 #ifndef _CS_GNU_LIBC_VERSION
 516   #define _CS_GNU_LIBC_VERSION 2
 517 #endif
 518 #ifndef _CS_GNU_LIBPTHREAD_VERSION
 519   #define _CS_GNU_LIBPTHREAD_VERSION 3
 520 #endif
 521 
 522   size_t n = confstr(_CS_GNU_LIBC_VERSION, NULL, 0);
 523   if (n > 0) {
 524     char *str = (char *)malloc(n, mtInternal);
 525     confstr(_CS_GNU_LIBC_VERSION, str, n);
 526     os::Linux::set_glibc_version(str);
 527   } else {
 528     // _CS_GNU_LIBC_VERSION is not supported, try gnu_get_libc_version()
 529     static char _gnu_libc_version[32];
 530     jio_snprintf(_gnu_libc_version, sizeof(_gnu_libc_version),
 531                  "glibc %s %s", gnu_get_libc_version(), gnu_get_libc_release());
 532     os::Linux::set_glibc_version(_gnu_libc_version);
 533   }
 534 
 535   n = confstr(_CS_GNU_LIBPTHREAD_VERSION, NULL, 0);
 536   if (n > 0) {
 537     char *str = (char *)malloc(n, mtInternal);
 538     confstr(_CS_GNU_LIBPTHREAD_VERSION, str, n);
 539     // Vanilla RH-9 (glibc 2.3.2) has a bug that confstr() always tells
 540     // us "NPTL-0.29" even we are running with LinuxThreads. Check if this
 541     // is the case. LinuxThreads has a hard limit on max number of threads.
 542     // So sysconf(_SC_THREAD_THREADS_MAX) will return a positive value.
 543     // On the other hand, NPTL does not have such a limit, sysconf()
 544     // will return -1 and errno is not changed. Check if it is really NPTL.
 545     if (strcmp(os::Linux::glibc_version(), "glibc 2.3.2") == 0 &&
 546         strstr(str, "NPTL") &&
 547         sysconf(_SC_THREAD_THREADS_MAX) > 0) {
 548       free(str);
 549       os::Linux::set_libpthread_version("linuxthreads");
 550     } else {
 551       os::Linux::set_libpthread_version(str);
 552     }
 553   } else {
 554     // glibc before 2.3.2 only has LinuxThreads.
 555     os::Linux::set_libpthread_version("linuxthreads");
 556   }
 557 
 558   if (strstr(libpthread_version(), "NPTL")) {
 559     os::Linux::set_is_NPTL();
 560   } else {
 561     os::Linux::set_is_LinuxThreads();
 562   }
 563 
 564   // LinuxThreads have two flavors: floating-stack mode, which allows variable
 565   // stack size; and fixed-stack mode. NPTL is always floating-stack.
 566   if (os::Linux::is_NPTL() || os::Linux::supports_variable_stack_size()) {
 567     os::Linux::set_is_floating_stack();
 568   }
 569 }
 570 
 571 /////////////////////////////////////////////////////////////////////////////
 572 // thread stack





















 573 
 574 // Force Linux kernel to expand current thread stack. If "bottom" is close
 575 // to the stack guard, caller should block all signals.
 576 //
 577 // MAP_GROWSDOWN:
 578 //   A special mmap() flag that is used to implement thread stacks. It tells
 579 //   kernel that the memory region should extend downwards when needed. This
 580 //   allows early versions of LinuxThreads to only mmap the first few pages
 581 //   when creating a new thread. Linux kernel will automatically expand thread
 582 //   stack as needed (on page faults).
 583 //
 584 //   However, because the memory region of a MAP_GROWSDOWN stack can grow on
 585 //   demand, if a page fault happens outside an already mapped MAP_GROWSDOWN
 586 //   region, it's hard to tell if the fault is due to a legitimate stack
 587 //   access or because of reading/writing non-exist memory (e.g. buffer
 588 //   overrun). As a rule, if the fault happens below current stack pointer,
 589 //   Linux kernel does not expand stack, instead a SIGSEGV is sent to the
 590 //   application (see Linux kernel fault.c).
 591 //
 592 //   This Linux feature can cause SIGSEGV when VM bangs thread stack for
 593 //   stack overflow detection.
 594 //
 595 //   Newer version of LinuxThreads (since glibc-2.2, or, RH-7.x) and NPTL do
 596 //   not use this flag. However, the stack of initial thread is not created
 597 //   by pthread, it is still MAP_GROWSDOWN. Also it's possible (though
 598 //   unlikely) that user code can create a thread with MAP_GROWSDOWN stack
 599 //   and then attach the thread to JVM.
 600 //
 601 // To get around the problem and allow stack banging on Linux, we need to
 602 // manually expand thread stack after receiving the SIGSEGV.
 603 //
 604 // There are two ways to expand thread stack to address "bottom", we used
 605 // both of them in JVM before 1.5:
 606 //   1. adjust stack pointer first so that it is below "bottom", and then
 607 //      touch "bottom"
 608 //   2. mmap() the page in question
 609 //
 610 // Now alternate signal stack is gone, it's harder to use 2. For instance,
 611 // if current sp is already near the lower end of page 101, and we need to
 612 // call mmap() to map page 100, it is possible that part of the mmap() frame
 613 // will be placed in page 100. When page 100 is mapped, it is zero-filled.
 614 // That will destroy the mmap() frame and cause VM to crash.
 615 //
 616 // The following code works by adjusting sp first, then accessing the "bottom"
 617 // page to force a page fault. Linux kernel will then automatically expand the
 618 // stack mapping.
 619 //


 654 
 655 bool os::Linux::manually_expand_stack(JavaThread * t, address addr) {
 656   assert(t!=NULL, "just checking");
 657   assert(t->osthread()->expanding_stack(), "expand should be set");
 658   assert(t->stack_base() != NULL, "stack_base was not initialized");
 659 
 660   if (addr <  t->stack_base() && addr >= t->stack_yellow_zone_base()) {
 661     sigset_t mask_all, old_sigset;
 662     sigfillset(&mask_all);
 663     pthread_sigmask(SIG_SETMASK, &mask_all, &old_sigset);
 664     _expand_stack_to(addr);
 665     pthread_sigmask(SIG_SETMASK, &old_sigset, NULL);
 666     return true;
 667   }
 668   return false;
 669 }
 670 
 671 //////////////////////////////////////////////////////////////////////////////
 672 // create new thread
 673 
 674 static address highest_vm_reserved_address();
 675 
 676 // check if it's safe to start a new thread
 677 static bool _thread_safety_check(Thread* thread) {
 678   if (os::Linux::is_LinuxThreads() && !os::Linux::is_floating_stack()) {
 679     // Fixed stack LinuxThreads (SuSE Linux/x86, and some versions of Redhat)
 680     //   Heap is mmap'ed at lower end of memory space. Thread stacks are
 681     //   allocated (MAP_FIXED) from high address space. Every thread stack
 682     //   occupies a fixed size slot (usually 2Mbytes, but user can change
 683     //   it to other values if they rebuild LinuxThreads).
 684     //
 685     // Problem with MAP_FIXED is that mmap() can still succeed even part of
 686     // the memory region has already been mmap'ed. That means if we have too
 687     // many threads and/or very large heap, eventually thread stack will
 688     // collide with heap.
 689     //
 690     // Here we try to prevent heap/stack collision by comparing current
 691     // stack bottom with the highest address that has been mmap'ed by JVM
 692     // plus a safety margin for memory maps created by native code.
 693     //
 694     // This feature can be disabled by setting ThreadSafetyMargin to 0
 695     //
 696     if (ThreadSafetyMargin > 0) {
 697       address stack_bottom = os::current_stack_base() - os::current_stack_size();
 698 
 699       // not safe if our stack extends below the safety margin
 700       return stack_bottom - ThreadSafetyMargin >= highest_vm_reserved_address();
 701     } else {
 702       return true;
 703     }
 704   } else {
 705     // Floating stack LinuxThreads or NPTL:
 706     //   Unlike fixed stack LinuxThreads, thread stacks are not MAP_FIXED. When
 707     //   there's not enough space left, pthread_create() will fail. If we come
 708     //   here, that means enough space has been reserved for stack.
 709     return true;
 710   }
 711 }
 712 
 713 // Thread start routine for all newly created threads
 714 static void *java_start(Thread *thread) {
 715   // Try to randomize the cache line index of hot stack frames.
 716   // This helps when threads of the same stack traces evict each other's
 717   // cache lines. The threads can be either from the same JVM instance, or
 718   // from different JVM instances. The benefit is especially true for
 719   // processors with hyperthreading technology.
 720   static int counter = 0;
 721   int pid = os::current_process_id();
 722   alloca(((pid ^ counter++) & 7) * 128);
 723 
 724   ThreadLocalStorage::set_thread(thread);
 725 
 726   OSThread* osthread = thread->osthread();
 727   Monitor* sync = osthread->startThread_lock();
 728 
 729   // non floating stack LinuxThreads needs extra check, see above
 730   if (!_thread_safety_check(thread)) {
 731     // notify parent thread
 732     MutexLockerEx ml(sync, Mutex::_no_safepoint_check_flag);
 733     osthread->set_state(ZOMBIE);
 734     sync->notify_all();
 735     return NULL;
 736   }
 737 
 738   // thread_id is kernel thread id (similar to Solaris LWP id)
 739   osthread->set_thread_id(os::Linux::gettid());
 740 
 741   if (UseNUMA) {
 742     int lgrp_id = os::numa_get_group_id();
 743     if (lgrp_id != -1) {
 744       thread->set_lgrp_id(lgrp_id);
 745     }
 746   }
 747   // initialize signal mask for this thread
 748   os::Linux::hotspot_sigmask(thread);
 749 
 750   // initialize floating point control register
 751   os::Linux::init_thread_fpu_state();
 752 
 753   // handshaking with parent thread
 754   {
 755     MutexLockerEx ml(sync, Mutex::_no_safepoint_check_flag);
 756 
 757     // notify parent thread


 816       case os::pgc_thread:
 817       case os::cgc_thread:
 818       case os::watcher_thread:
 819         if (VMThreadStackSize > 0) stack_size = (size_t)(VMThreadStackSize * K);
 820         break;
 821       }
 822     }
 823 
 824     stack_size = MAX2(stack_size, os::Linux::min_stack_allowed);
 825     pthread_attr_setstacksize(&attr, stack_size);
 826   } else {
 827     // let pthread_create() pick the default value.
 828   }
 829 
 830   // glibc guard page
 831   pthread_attr_setguardsize(&attr, os::Linux::default_guard_size(thr_type));
 832 
 833   ThreadState state;
 834 
 835   {
 836     // Serialize thread creation if we are running with fixed stack LinuxThreads
 837     bool lock = os::Linux::is_LinuxThreads() && !os::Linux::is_floating_stack();
 838     if (lock) {
 839       os::Linux::createThread_lock()->lock_without_safepoint_check();
 840     }
 841 
 842     pthread_t tid;
 843     int ret = pthread_create(&tid, &attr, (void* (*)(void*)) java_start, thread);
 844 
 845     pthread_attr_destroy(&attr);
 846 
 847     if (ret != 0) {
 848       if (PrintMiscellaneous && (Verbose || WizardMode)) {
 849         perror("pthread_create()");
 850       }
 851       // Need to clean up stuff we've allocated so far
 852       thread->set_osthread(NULL);
 853       delete osthread;
 854       if (lock) os::Linux::createThread_lock()->unlock();
 855       return false;
 856     }
 857 
 858     // Store pthread info into the OSThread
 859     osthread->set_pthread_id(tid);
 860 
 861     // Wait until child thread is either initialized or aborted
 862     {
 863       Monitor* sync_with_child = osthread->startThread_lock();
 864       MutexLockerEx ml(sync_with_child, Mutex::_no_safepoint_check_flag);
 865       while ((state = osthread->get_state()) == ALLOCATED) {
 866         sync_with_child->wait(Mutex::_no_safepoint_check_flag);
 867       }
 868     }
 869 
 870     if (lock) {
 871       os::Linux::createThread_lock()->unlock();
 872     }
 873   }
 874 
 875   // Aborted due to thread limit being reached
 876   if (state == ZOMBIE) {
 877     thread->set_osthread(NULL);
 878     delete osthread;
 879     return false;
 880   }
 881 
 882   // The thread is returned suspended (in state INITIALIZED),
 883   // and is started higher up in the call chain
 884   assert(state == INITIALIZED, "race condition");
 885   return true;
 886 }
 887 
 888 /////////////////////////////////////////////////////////////////////////////
 889 // attach existing thread
 890 
 891 // bootstrap the main thread
 892 bool os::create_main_thread(JavaThread* thread) {


1480 // sure it is async-safe and can handle partially initialized VM.
1481 void os::abort(bool dump_core, void* siginfo, void* context) {
1482   os::shutdown();
1483   if (dump_core) {
1484 #ifndef PRODUCT
1485     fdStream out(defaultStream::output_fd());
1486     out.print_raw("Current thread is ");
1487     char buf[16];
1488     jio_snprintf(buf, sizeof(buf), UINTX_FORMAT, os::current_thread_id());
1489     out.print_raw_cr(buf);
1490     out.print_raw_cr("Dumping core ...");
1491 #endif
1492     ::abort(); // dump core
1493   }
1494 
1495   ::exit(1);
1496 }
1497 
1498 // Die immediately, no exit hook, no abort hook, no cleanup.
1499 void os::die() {
1500   // _exit() on LinuxThreads only kills current thread
1501   ::abort();
1502 }
1503 
1504 
1505 // This method is a copy of JDK's sysGetLastErrorString
1506 // from src/solaris/hpi/src/system_md.c
1507 
1508 size_t os::lasterror(char *buf, size_t len) {
1509   if (errno == 0)  return 0;
1510 
1511   const char *s = ::strerror(errno);
1512   size_t n = ::strlen(s);
1513   if (n >= len) {
1514     n = len - 1;
1515   }
1516   ::strncpy(buf, s, n);
1517   buf[n] = '\0';
1518   return n;
1519 }
1520 
1521 intx os::current_thread_id() { return (intx)pthread_self(); }
1522 int os::current_process_id() {
1523 
1524   // Under the old linux thread library, linux gives each thread
1525   // its own process id. Because of this each thread will return
1526   // a different pid if this method were to return the result
1527   // of getpid(2). Linux provides no api that returns the pid
1528   // of the launcher thread for the vm. This implementation
1529   // returns a unique pid, the pid of the launcher thread
1530   // that starts the vm 'process'.
1531 
1532   // Under the NPTL, getpid() returns the same pid as the
1533   // launcher thread rather than a unique pid per thread.
1534   // Use gettid() if you want the old pre NPTL behaviour.
1535 
1536   // if you are looking for the result of a call to getpid() that
1537   // returns a unique pid for the calling thread, then look at the
1538   // OSThread::thread_id() method in osThread_linux.hpp file
1539 
1540   return (int)(_initial_pid ? _initial_pid : getpid());
1541 }
1542 
1543 // DLL functions
1544 
1545 const char* os::dll_file_extension() { return ".so"; }
1546 
1547 // This must be hard coded because it's the system's temporary
1548 // directory not the java application's temp directory, ala java.io.tmpdir.
1549 const char* os::get_temp_directory() { return "/tmp"; }
1550 
1551 static bool file_exists(const char* filename) {
1552   struct stat statbuf;
1553   if (filename == NULL || strlen(filename) == 0) {
1554     return false;
1555   }
1556   return os::stat(filename, &statbuf) == 0;
1557 }
1558 
1559 bool os::dll_build_name(char* buffer, size_t buflen,
1560                         const char* pname, const char* fname) {


2166       !_print_ascii_file("/etc/ltib-release", st) &&
2167       !_print_ascii_file("/etc/angstrom-version", st) &&
2168       !_print_ascii_file("/etc/system-release", st) &&
2169       !_print_ascii_file("/etc/os-release", st)) {
2170 
2171     if (file_exists("/etc/debian_version")) {
2172       st->print("Debian ");
2173       _print_ascii_file("/etc/debian_version", st);
2174     } else {
2175       st->print("Linux");
2176     }
2177   }
2178   st->cr();
2179 }
2180 
2181 void os::Linux::print_libversion_info(outputStream* st) {
2182   // libc, pthread
2183   st->print("libc:");
2184   st->print("%s ", os::Linux::glibc_version());
2185   st->print("%s ", os::Linux::libpthread_version());
2186   if (os::Linux::is_LinuxThreads()) {
2187     st->print("(%s stack)", os::Linux::is_floating_stack() ? "floating" : "fixed");
2188   }
2189   st->cr();
2190 }
2191 
2192 void os::Linux::print_full_memory_info(outputStream* st) {
2193   st->print("\n/proc/meminfo:\n");
2194   _print_ascii_file("/proc/meminfo", st);
2195   st->cr();
2196 }
2197 
2198 void os::print_memory_info(outputStream* st) {
2199 
2200   st->print("Memory:");
2201   st->print(" %dk page", os::vm_page_size()>>10);
2202 
2203   // values in struct sysinfo are "unsigned long"
2204   struct sysinfo si;
2205   sysinfo(&si);
2206 
2207   st->print(", physical " UINT64_FORMAT "k",
2208             os::physical_memory() >> 10);


3027 
3028   return os::commit_memory(addr, size, !ExecMem);
3029 }
3030 
3031 // If this is a growable mapping, remove the guard pages entirely by
3032 // munmap()ping them.  If not, just call uncommit_memory(). This only
3033 // affects the main/initial thread, but guard against future OS changes
3034 // It's safe to always unmap guard pages for initial thread because we
3035 // always place it right after end of the mapped region
3036 
3037 bool os::remove_stack_guard_pages(char* addr, size_t size) {
3038   uintptr_t stack_extent, stack_base;
3039 
3040   if (os::Linux::is_initial_thread()) {
3041     return ::munmap(addr, size) == 0;
3042   }
3043 
3044   return os::uncommit_memory(addr, size);
3045 }
3046 
3047 static address _highest_vm_reserved_address = NULL;
3048 
3049 // If 'fixed' is true, anon_mmap() will attempt to reserve anonymous memory
3050 // at 'requested_addr'. If there are existing memory mappings at the same
3051 // location, however, they will be overwritten. If 'fixed' is false,
3052 // 'requested_addr' is only treated as a hint, the return value may or
3053 // may not start from the requested address. Unlike Linux mmap(), this
3054 // function returns NULL to indicate failure.
3055 static char* anon_mmap(char* requested_addr, size_t bytes, bool fixed) {
3056   char * addr;
3057   int flags;
3058 
3059   flags = MAP_PRIVATE | MAP_NORESERVE | MAP_ANONYMOUS;
3060   if (fixed) {
3061     assert((uintptr_t)requested_addr % os::Linux::page_size() == 0, "unaligned address");
3062     flags |= MAP_FIXED;
3063   }
3064 
3065   // Map reserved/uncommitted pages PROT_NONE so we fail early if we
3066   // touch an uncommitted page. Otherwise, the read/write might
3067   // succeed if we have enough swap space to back the physical page.
3068   addr = (char*)::mmap(requested_addr, bytes, PROT_NONE,
3069                        flags, -1, 0);
3070 
3071   if (addr != MAP_FAILED) {
3072     // anon_mmap() should only get called during VM initialization,
3073     // don't need lock (actually we can skip locking even it can be called
3074     // from multiple threads, because _highest_vm_reserved_address is just a
3075     // hint about the upper limit of non-stack memory regions.)
3076     if ((address)addr + bytes > _highest_vm_reserved_address) {
3077       _highest_vm_reserved_address = (address)addr + bytes;
3078     }
3079   }
3080 
3081   return addr == MAP_FAILED ? NULL : addr;
3082 }
3083 
3084 // Don't update _highest_vm_reserved_address, because there might be memory
3085 // regions above addr + size. If so, releasing a memory region only creates
3086 // a hole in the address space, it doesn't help prevent heap-stack collision.
3087 //
3088 static int anon_munmap(char * addr, size_t size) {
3089   return ::munmap(addr, size) == 0;
3090 }
3091 
3092 char* os::pd_reserve_memory(size_t bytes, char* requested_addr,
3093                             size_t alignment_hint) {
3094   return anon_mmap(requested_addr, bytes, (requested_addr != NULL));
3095 }
3096 
3097 bool os::pd_release_memory(char* addr, size_t size) {
3098   return anon_munmap(addr, size);
3099 }
3100 
3101 static address highest_vm_reserved_address() {
3102   return _highest_vm_reserved_address;
3103 }
3104 
3105 static bool linux_mprotect(char* addr, size_t size, int prot) {
3106   // Linux wants the mprotect address argument to be page aligned.
3107   char* bottom = (char*)align_size_down((intptr_t)addr, os::Linux::page_size());
3108 
3109   // According to SUSv3, mprotect() should only be used with mappings
3110   // established by mmap(), and mmap() always maps whole pages. Unaligned
3111   // 'addr' likely indicates problem in the VM (e.g. trying to change
3112   // protection of malloc'ed or statically allocated memory). Check the
3113   // caller if you hit this assert.
3114   assert(addr == bottom, "sanity check");
3115 
3116   size = align_size_up(pointer_delta(addr, bottom, 1) + size, os::Linux::page_size());
3117   return ::mprotect(bottom, size, prot) == 0;
3118 }
3119 
3120 // Set protections specified
3121 bool os::protect_memory(char* addr, size_t bytes, ProtType prot,
3122                         bool is_committed) {
3123   unsigned int p = 0;
3124   switch (prot) {


3701   return UseTransparentHugePages || UseHugeTLBFS;
3702 }
3703 
3704 // Reserve memory at an arbitrary address, only if that area is
3705 // available (and not reserved for something else).
3706 
3707 char* os::pd_attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
3708   const int max_tries = 10;
3709   char* base[max_tries];
3710   size_t size[max_tries];
3711   const size_t gap = 0x000000;
3712 
3713   // Assert only that the size is a multiple of the page size, since
3714   // that's all that mmap requires, and since that's all we really know
3715   // about at this low abstraction level.  If we need higher alignment,
3716   // we can either pass an alignment to this method or verify alignment
3717   // in one of the methods further up the call chain.  See bug 5044738.
3718   assert(bytes % os::vm_page_size() == 0, "reserving unexpected size block");
3719 
3720   // Repeatedly allocate blocks until the block is allocated at the
3721   // right spot. Give up after max_tries. Note that reserve_memory() will
3722   // automatically update _highest_vm_reserved_address if the call is
3723   // successful. The variable tracks the highest memory address every reserved
3724   // by JVM. It is used to detect heap-stack collision if running with
3725   // fixed-stack LinuxThreads. Because here we may attempt to reserve more
3726   // space than needed, it could confuse the collision detecting code. To
3727   // solve the problem, save current _highest_vm_reserved_address and
3728   // calculate the correct value before return.
3729   address old_highest = _highest_vm_reserved_address;
3730 
3731   // Linux mmap allows caller to pass an address as hint; give it a try first,
3732   // if kernel honors the hint then we can return immediately.
3733   char * addr = anon_mmap(requested_addr, bytes, false);
3734   if (addr == requested_addr) {
3735     return requested_addr;
3736   }
3737 
3738   if (addr != NULL) {
3739     // mmap() is successful but it fails to reserve at the requested address
3740     anon_munmap(addr, bytes);
3741   }
3742 
3743   int i;
3744   for (i = 0; i < max_tries; ++i) {
3745     base[i] = reserve_memory(bytes);
3746 
3747     if (base[i] != NULL) {
3748       // Is this the block we wanted?
3749       if (base[i] == requested_addr) {


3763         ptrdiff_t bottom_overlap = base[i] + bytes - requested_addr;
3764         if (bottom_overlap >= 0 && (size_t)bottom_overlap < bytes) {
3765           unmap_memory(requested_addr, bottom_overlap);
3766           size[i] = bytes - bottom_overlap;
3767         } else {
3768           size[i] = bytes;
3769         }
3770       }
3771     }
3772   }
3773 
3774   // Give back the unused reserved pieces.
3775 
3776   for (int j = 0; j < i; ++j) {
3777     if (base[j] != NULL) {
3778       unmap_memory(base[j], size[j]);
3779     }
3780   }
3781 
3782   if (i < max_tries) {
3783     _highest_vm_reserved_address = MAX2(old_highest, (address)requested_addr + bytes);
3784     return requested_addr;
3785   } else {
3786     _highest_vm_reserved_address = old_highest;
3787     return NULL;
3788   }
3789 }
3790 
3791 size_t os::read(int fd, void *buf, unsigned int nBytes) {
3792   return ::read(fd, buf, nBytes);
3793 }
3794 
3795 size_t os::read_at(int fd, void *buf, unsigned int nBytes, jlong offset) {
3796   return ::pread(fd, buf, nBytes, offset);
3797 }
3798 
3799 // Short sleep, direct OS call.
3800 //
3801 // Note: certain versions of Linux CFS scheduler (since 2.6.23) do not guarantee
3802 // sched_yield(2) will actually give up the CPU:
3803 //
3804 //   * Alone on this pariticular CPU, keeps running.
3805 //   * Before the introduction of "skip_buddy" with "compat_yield" disabled
3806 //     (pre 2.6.39).


4610 
4611 extern bool signal_name(int signo, char* buf, size_t len);
4612 
4613 const char* os::exception_name(int exception_code, char* buf, size_t size) {
4614   if (0 < exception_code && exception_code <= SIGRTMAX) {
4615     // signal
4616     if (!signal_name(exception_code, buf, size)) {
4617       jio_snprintf(buf, size, "SIG%d", exception_code);
4618     }
4619     return buf;
4620   } else {
4621     return NULL;
4622   }
4623 }
4624 
4625 // this is called _before_ the most of global arguments have been parsed
4626 void os::init(void) {
4627   char dummy;   // used to get a guess on initial stack address
4628 //  first_hrtime = gethrtime();
4629 
4630   // With LinuxThreads the JavaMain thread pid (primordial thread)
4631   // is different than the pid of the java launcher thread.
4632   // So, on Linux, the launcher thread pid is passed to the VM
4633   // via the sun.java.launcher.pid property.
4634   // Use this property instead of getpid() if it was correctly passed.
4635   // See bug 6351349.
4636   pid_t java_launcher_pid = (pid_t) Arguments::sun_java_launcher_pid();
4637 
4638   _initial_pid = (java_launcher_pid > 0) ? java_launcher_pid : getpid();
4639 
4640   clock_tics_per_sec = sysconf(_SC_CLK_TCK);
4641 
4642   init_random(1234567);
4643 
4644   ThreadCritical::initialize();
4645 
4646   Linux::set_page_size(sysconf(_SC_PAGESIZE));
4647   if (Linux::page_size() == -1) {
4648     fatal(err_msg("os_linux.cpp: os::init: sysconf failed (%s)",
4649                   strerror(errno)));
4650   }
4651   init_page_sizes((size_t) Linux::page_size());
4652 
4653   Linux::initialize_system_info();
4654 
4655   // main_thread points to the aboriginal thread
4656   Linux::_main_thread = pthread_self();
4657 
4658   Linux::clock_init();
4659   initial_time_count = javaTimeNanos();


4752       threadStackSizeInBytes < os::Linux::min_stack_allowed) {
4753     tty->print_cr("\nThe stack size specified is too small, "
4754                   "Specify at least %dk",
4755                   os::Linux::min_stack_allowed/ K);
4756     return JNI_ERR;
4757   }
4758 
4759   // Make the stack size a multiple of the page size so that
4760   // the yellow/red zones can be guarded.
4761   JavaThread::set_stack_size_at_create(round_to(threadStackSizeInBytes,
4762                                                 vm_page_size()));
4763 
4764   Linux::capture_initial_stack(JavaThread::stack_size_at_create());
4765 
4766 #if defined(IA32)
4767   workaround_expand_exec_shield_cs_limit();
4768 #endif
4769 
4770   Linux::libpthread_init();
4771   if (PrintMiscellaneous && (Verbose || WizardMode)) {
4772     tty->print_cr("[HotSpot is running with %s, %s(%s)]\n",
4773                   Linux::glibc_version(), Linux::libpthread_version(),
4774                   Linux::is_floating_stack() ? "floating stack" : "fixed stack");
4775   }
4776 
4777   if (UseNUMA) {
4778     if (!Linux::libnuma_init()) {
4779       UseNUMA = false;
4780     } else {
4781       if ((Linux::numa_max_node() < 1)) {
4782         // There's only one node(they start from 0), disable NUMA.
4783         UseNUMA = false;
4784       }
4785     }
4786     // With SHM and HugeTLBFS large pages we cannot uncommit a page, so there's no way
4787     // we can make the adaptive lgrp chunk resizing work. If the user specified
4788     // both UseNUMA and UseLargePages (or UseSHM/UseHugeTLBFS) on the command line - warn and
4789     // disable adaptive resizing.
4790     if (UseNUMA && UseLargePages && !can_commit_large_page_memory()) {
4791       if (FLAG_IS_DEFAULT(UseNUMA)) {
4792         UseNUMA = false;
4793       } else {
4794         if (FLAG_IS_DEFAULT(UseLargePages) &&


4929   if (osthread->ucontext() != NULL) {
4930     _epc = os::Linux::ucontext_get_pc((ucontext_t *) context.ucontext());
4931   } else {
4932     // NULL context is unexpected, double-check this is the VMThread
4933     guarantee(thread->is_VM_thread(), "can only be called for VMThread");
4934   }
4935 }
4936 
4937 // Suspends the target using the signal mechanism and then grabs the PC before
4938 // resuming the target. Used by the flat-profiler only
4939 ExtendedPC os::get_thread_pc(Thread* thread) {
4940   // Make sure that it is called by the watcher for the VMThread
4941   assert(Thread::current()->is_Watcher_thread(), "Must be watcher");
4942   assert(thread->is_VM_thread(), "Can only be called for VMThread");
4943 
4944   PcFetcher fetcher(thread);
4945   fetcher.run();
4946   return fetcher.result();
4947 }
4948 
4949 int os::Linux::safe_cond_timedwait(pthread_cond_t *_cond,
4950                                    pthread_mutex_t *_mutex,
4951                                    const struct timespec *_abstime) {
4952   if (is_NPTL()) {
4953     return pthread_cond_timedwait(_cond, _mutex, _abstime);
4954   } else {
4955     // 6292965: LinuxThreads pthread_cond_timedwait() resets FPU control
4956     // word back to default 64bit precision if condvar is signaled. Java
4957     // wants 53bit precision.  Save and restore current value.
4958     int fpu = get_fpu_control_word();
4959     int status = pthread_cond_timedwait(_cond, _mutex, _abstime);
4960     set_fpu_control_word(fpu);
4961     return status;
4962   }
4963 }
4964 
4965 ////////////////////////////////////////////////////////////////////////////////
4966 // debug support
4967 
4968 bool os::find(address addr, outputStream* st) {
4969   Dl_info dlinfo;
4970   memset(&dlinfo, 0, sizeof(dlinfo));
4971   if (dladdr(addr, &dlinfo) != 0) {
4972     st->print(PTR_FORMAT ": ", addr);
4973     if (dlinfo.dli_sname != NULL && dlinfo.dli_saddr != NULL) {
4974       st->print("%s+%#x", dlinfo.dli_sname,
4975                 addr - (intptr_t)dlinfo.dli_saddr);
4976     } else if (dlinfo.dli_fbase != NULL) {
4977       st->print("<offset %#x>", addr - (intptr_t)dlinfo.dli_fbase);
4978     } else {
4979       st->print("<absolute address>");
4980     }
4981     if (dlinfo.dli_fname != NULL) {
4982       st->print(" in %s", dlinfo.dli_fname);
4983     }
4984     if (dlinfo.dli_fbase != NULL) {


5568   int ret = OS_TIMEOUT;
5569   int status = pthread_mutex_lock(_mutex);
5570   assert_status(status == 0, status, "mutex_lock");
5571   guarantee(_nParked == 0, "invariant");
5572   ++_nParked;
5573 
5574   // Object.wait(timo) will return because of
5575   // (a) notification
5576   // (b) timeout
5577   // (c) thread.interrupt
5578   //
5579   // Thread.interrupt and object.notify{All} both call Event::set.
5580   // That is, we treat thread.interrupt as a special case of notification.
5581   // We ignore spurious OS wakeups unless FilterSpuriousWakeups is false.
5582   // We assume all ETIME returns are valid.
5583   //
5584   // TODO: properly differentiate simultaneous notify+interrupt.
5585   // In that case, we should propagate the notify to another waiter.
5586 
5587   while (_Event < 0) {
5588     status = os::Linux::safe_cond_timedwait(_cond, _mutex, &abst);
5589     if (status != 0 && WorkAroundNPTLTimedWaitHang) {
5590       pthread_cond_destroy(_cond);
5591       pthread_cond_init(_cond, os::Linux::condAttr());
5592     }
5593     assert_status(status == 0 || status == EINTR ||
5594                   status == ETIME || status == ETIMEDOUT,
5595                   status, "cond_timedwait");
5596     if (!FilterSpuriousWakeups) break;                 // previous semantics
5597     if (status == ETIME || status == ETIMEDOUT) break;
5598     // We consume and ignore EINTR and spurious wakeups.
5599   }
5600   --_nParked;
5601   if (_Event >= 0) {
5602     ret = OS_OK;
5603   }
5604   _Event = 0;
5605   status = pthread_mutex_unlock(_mutex);
5606   assert_status(status == 0, status, "mutex_unlock");
5607   assert(_nParked == 0, "invariant");
5608   // Paranoia to ensure our locked and lock-free paths interact


5796   }
5797 
5798 #ifdef ASSERT
5799   // Don't catch signals while blocked; let the running threads have the signals.
5800   // (This allows a debugger to break into the running thread.)
5801   sigset_t oldsigs;
5802   sigset_t* allowdebug_blocked = os::Linux::allowdebug_blocked_signals();
5803   pthread_sigmask(SIG_BLOCK, allowdebug_blocked, &oldsigs);
5804 #endif
5805 
5806   OSThreadWaitState osts(thread->osthread(), false /* not Object.wait() */);
5807   jt->set_suspend_equivalent();
5808   // cleared by handle_special_suspend_equivalent_condition() or java_suspend_self()
5809 
5810   assert(_cur_index == -1, "invariant");
5811   if (time == 0) {
5812     _cur_index = REL_INDEX; // arbitrary choice when not timed
5813     status = pthread_cond_wait(&_cond[_cur_index], _mutex);
5814   } else {
5815     _cur_index = isAbsolute ? ABS_INDEX : REL_INDEX;
5816     status = os::Linux::safe_cond_timedwait(&_cond[_cur_index], _mutex, &absTime);
5817     if (status != 0 && WorkAroundNPTLTimedWaitHang) {
5818       pthread_cond_destroy(&_cond[_cur_index]);
5819       pthread_cond_init(&_cond[_cur_index], isAbsolute ? NULL : os::Linux::condAttr());
5820     }
5821   }
5822   _cur_index = -1;
5823   assert_status(status == 0 || status == EINTR ||
5824                 status == ETIME || status == ETIMEDOUT,
5825                 status, "cond_timedwait");
5826 
5827 #ifdef ASSERT
5828   pthread_sigmask(SIG_SETMASK, &oldsigs, NULL);
5829 #endif
5830 
5831   _counter = 0;
5832   status = pthread_mutex_unlock(_mutex);
5833   assert_status(status == 0, status, "invariant");
5834   // Paranoia to ensure our locked and lock-free paths interact
5835   // correctly with each other and Java-level accesses.
5836   OrderAccess::fence();




 118 #define MAX_SECS 100000000
 119 
 120 // for timer info max values which include all bits
 121 #define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF)
 122 
 123 #define LARGEPAGES_BIT (1 << 6)
 124 ////////////////////////////////////////////////////////////////////////////////
 125 // global variables
 126 julong os::Linux::_physical_memory = 0;
 127 
 128 address   os::Linux::_initial_thread_stack_bottom = NULL;
 129 uintptr_t os::Linux::_initial_thread_stack_size   = 0;
 130 
 131 int (*os::Linux::_clock_gettime)(clockid_t, struct timespec *) = NULL;
 132 int (*os::Linux::_pthread_getcpuclockid)(pthread_t, clockid_t *) = NULL;
 133 int (*os::Linux::_pthread_setname_np)(pthread_t, const char*) = NULL;
 134 Mutex* os::Linux::_createThread_lock = NULL;
 135 pthread_t os::Linux::_main_thread;
 136 int os::Linux::_page_size = -1;
 137 const int os::Linux::_vm_default_page_size = (8 * K);


 138 bool os::Linux::_supports_fast_thread_cpu_time = false;
 139 const char * os::Linux::_glibc_version = NULL;
 140 const char * os::Linux::_libpthread_version = NULL;
 141 pthread_condattr_t os::Linux::_condattr[1];
 142 
 143 static jlong initial_time_count=0;
 144 
 145 static int clock_tics_per_sec = 100;
 146 
 147 // For diagnostics to print a message once. see run_periodic_checks
 148 static sigset_t check_signal_done;
 149 static bool check_signals = true;
 150 


 151 // Signal number used to suspend/resume a thread
 152 
 153 // do not use any signal number less than SIGSEGV, see 4355769
 154 static int SR_signum = SIGUSR2;
 155 sigset_t SR_sigset;
 156 
 157 // Declarations
 158 static void unpackTime(timespec* absTime, bool isAbsolute, jlong time);
 159 
 160 // utility functions
 161 
 162 static int SR_initialize();
 163 
 164 julong os::available_memory() {
 165   return Linux::available_memory();
 166 }
 167 
 168 julong os::Linux::available_memory() {
 169   // values in struct sysinfo are "unsigned long"
 170   struct sysinfo si;


 202         #define SYS_gettid 186
 203       #else
 204         #ifdef __sparc__
 205           #define SYS_gettid 143
 206         #else
 207           #error define gettid for the arch
 208         #endif
 209       #endif
 210     #endif
 211   #endif
 212 #endif
 213 
 214 // Cpu architecture string
 215 static char cpu_arch[] = HOTSPOT_LIB_ARCH;
 216 
 217 
 218 // pid_t gettid()
 219 //
 220 // Returns the kernel thread id of the currently running thread. Kernel
 221 // thread id is used to access /proc.




 222 pid_t os::Linux::gettid() {
 223   int rslt = syscall(SYS_gettid);
 224   assert(rslt != -1, "must be."); // old linuxthreads implementation?



 225   return (pid_t)rslt;

 226 }
 227 
 228 // Most versions of linux have a bug where the number of processors are
 229 // determined by looking at the /proc file system.  In a chroot environment,
 230 // the system call returns 1.  This causes the VM to act as if it is
 231 // a single processor and elide locking (see is_MP() call).
 232 static bool unsafe_chroot_detected = false;
 233 static const char *unstable_chroot_error = "/proc file system not found.\n"
 234                      "Java may be unstable running multithreaded in a chroot "
 235                      "environment on Linux when /proc filesystem is not mounted.";
 236 
 237 void os::Linux::initialize_system_info() {
 238   set_processor_count(sysconf(_SC_NPROCESSORS_CONF));
 239   if (processor_count() == 1) {
 240     pid_t pid = os::Linux::gettid();
 241     char fname[32];
 242     jio_snprintf(fname, sizeof(fname), "/proc/%d", pid);
 243     FILE *fp = fopen(fname, "r");
 244     if (fp == NULL) {
 245       unsafe_chroot_detected = true;


 479   OSThread* osthread = thread->osthread();
 480   osthread->set_caller_sigmask(caller_sigmask);
 481 
 482   pthread_sigmask(SIG_UNBLOCK, os::Linux::unblocked_signals(), NULL);
 483 
 484   if (!ReduceSignalUsage) {
 485     if (thread->is_VM_thread()) {
 486       // Only the VM thread handles BREAK_SIGNAL ...
 487       pthread_sigmask(SIG_UNBLOCK, vm_signals(), NULL);
 488     } else {
 489       // ... all other threads block BREAK_SIGNAL
 490       pthread_sigmask(SIG_BLOCK, vm_signals(), NULL);
 491     }
 492   }
 493 }
 494 
 495 //////////////////////////////////////////////////////////////////////////////
 496 // detecting pthread library
 497 
 498 void os::Linux::libpthread_init() {
 499   // Save glibc and pthread version strings.
 500 #if !defined(_CS_GNU_LIBC_VERSION) || \
 501     !defined(_CS_GNU_LIBPTHREAD_VERSION)
 502   #error "glibc too old (< 2.3.2)"





 503 #endif
 504 
 505   size_t n = confstr(_CS_GNU_LIBC_VERSION, NULL, 0);
 506   assert(n > 0, "cannot retrieve glibc version");
 507   char *str = (char *)malloc(n, mtInternal);
 508   confstr(_CS_GNU_LIBC_VERSION, str, n);
 509   os::Linux::set_glibc_version(str);







 510 
 511   n = confstr(_CS_GNU_LIBPTHREAD_VERSION, NULL, 0);
 512   assert(n > 0, "cannot retrieve pthread version");
 513   str = (char *)malloc(n, mtInternal);
 514   confstr(_CS_GNU_LIBPTHREAD_VERSION, str, n);












 515   os::Linux::set_libpthread_version(str);

















 516 }
 517 
 518 /////////////////////////////////////////////////////////////////////////////
 519 // thread stack expansion
 520 
 521 // os::Linux::manually_expand_stack() takes care of expanding the thread
 522 // stack. Note that this is normally not needed: pthread stacks allocate
 523 // thread stack using mmap() without MAP_NORESERVE, so the stack is already
 524 // committed. Therefore it is not necessary to expand the stack manually.
 525 //
 526 // Manually expanding the stack was historically needed on LinuxThreads
 527 // thread stacks, which were allocated with mmap(MAP_GROWSDOWN). Nowadays
 528 // it is kept to deal with very rare corner cases:
 529 //
 530 // For one, user may run the VM on an own implementation of threads
 531 // whose stacks are - like the old LinuxThreads - implemented using
 532 // mmap(MAP_GROWSDOWN).
 533 //
 534 // Also, this coding may be needed if the VM is running on the primordial
 535 // thread. Normally we avoid running on the primordial thread; however,
 536 // user may still invoke the VM on the primordial thread.
 537 //
 538 // The following historical comment describes the details about running
 539 // on a thread stack allocated with mmap(MAP_GROWSDOWN):
 540 
 541 
 542 // Force Linux kernel to expand current thread stack. If "bottom" is close
 543 // to the stack guard, caller should block all signals.
 544 //
 545 // MAP_GROWSDOWN:
 546 //   A special mmap() flag that is used to implement thread stacks. It tells
 547 //   kernel that the memory region should extend downwards when needed. This
 548 //   allows early versions of LinuxThreads to only mmap the first few pages
 549 //   when creating a new thread. Linux kernel will automatically expand thread
 550 //   stack as needed (on page faults).
 551 //
 552 //   However, because the memory region of a MAP_GROWSDOWN stack can grow on
 553 //   demand, if a page fault happens outside an already mapped MAP_GROWSDOWN
 554 //   region, it's hard to tell if the fault is due to a legitimate stack
 555 //   access or because of reading/writing non-exist memory (e.g. buffer
 556 //   overrun). As a rule, if the fault happens below current stack pointer,
 557 //   Linux kernel does not expand stack, instead a SIGSEGV is sent to the
 558 //   application (see Linux kernel fault.c).
 559 //
 560 //   This Linux feature can cause SIGSEGV when VM bangs thread stack for
 561 //   stack overflow detection.
 562 //
 563 //   Newer version of LinuxThreads (since glibc-2.2, or, RH-7.x) and NPTL do
 564 //   not use MAP_GROWSDOWN.



 565 //
 566 // To get around the problem and allow stack banging on Linux, we need to
 567 // manually expand thread stack after receiving the SIGSEGV.
 568 //
 569 // There are two ways to expand thread stack to address "bottom", we used
 570 // both of them in JVM before 1.5:
 571 //   1. adjust stack pointer first so that it is below "bottom", and then
 572 //      touch "bottom"
 573 //   2. mmap() the page in question
 574 //
 575 // Now alternate signal stack is gone, it's harder to use 2. For instance,
 576 // if current sp is already near the lower end of page 101, and we need to
 577 // call mmap() to map page 100, it is possible that part of the mmap() frame
 578 // will be placed in page 100. When page 100 is mapped, it is zero-filled.
 579 // That will destroy the mmap() frame and cause VM to crash.
 580 //
 581 // The following code works by adjusting sp first, then accessing the "bottom"
 582 // page to force a page fault. Linux kernel will then automatically expand the
 583 // stack mapping.
 584 //


 619 
 620 bool os::Linux::manually_expand_stack(JavaThread * t, address addr) {
 621   assert(t!=NULL, "just checking");
 622   assert(t->osthread()->expanding_stack(), "expand should be set");
 623   assert(t->stack_base() != NULL, "stack_base was not initialized");
 624 
 625   if (addr <  t->stack_base() && addr >= t->stack_yellow_zone_base()) {
 626     sigset_t mask_all, old_sigset;
 627     sigfillset(&mask_all);
 628     pthread_sigmask(SIG_SETMASK, &mask_all, &old_sigset);
 629     _expand_stack_to(addr);
 630     pthread_sigmask(SIG_SETMASK, &old_sigset, NULL);
 631     return true;
 632   }
 633   return false;
 634 }
 635 
 636 //////////////////////////////////////////////////////////////////////////////
 637 // create new thread
 638 







































 639 // Thread start routine for all newly created threads
 640 static void *java_start(Thread *thread) {
 641   // Try to randomize the cache line index of hot stack frames.
 642   // This helps when threads of the same stack traces evict each other's
 643   // cache lines. The threads can be either from the same JVM instance, or
 644   // from different JVM instances. The benefit is especially true for
 645   // processors with hyperthreading technology.
 646   static int counter = 0;
 647   int pid = os::current_process_id();
 648   alloca(((pid ^ counter++) & 7) * 128);
 649 
 650   ThreadLocalStorage::set_thread(thread);
 651 
 652   OSThread* osthread = thread->osthread();
 653   Monitor* sync = osthread->startThread_lock();
 654 









 655   // thread_id is kernel thread id (similar to Solaris LWP id)
 656   osthread->set_thread_id(os::Linux::gettid());
 657 
 658   if (UseNUMA) {
 659     int lgrp_id = os::numa_get_group_id();
 660     if (lgrp_id != -1) {
 661       thread->set_lgrp_id(lgrp_id);
 662     }
 663   }
 664   // initialize signal mask for this thread
 665   os::Linux::hotspot_sigmask(thread);
 666 
 667   // initialize floating point control register
 668   os::Linux::init_thread_fpu_state();
 669 
 670   // handshaking with parent thread
 671   {
 672     MutexLockerEx ml(sync, Mutex::_no_safepoint_check_flag);
 673 
 674     // notify parent thread


 733       case os::pgc_thread:
 734       case os::cgc_thread:
 735       case os::watcher_thread:
 736         if (VMThreadStackSize > 0) stack_size = (size_t)(VMThreadStackSize * K);
 737         break;
 738       }
 739     }
 740 
 741     stack_size = MAX2(stack_size, os::Linux::min_stack_allowed);
 742     pthread_attr_setstacksize(&attr, stack_size);
 743   } else {
 744     // let pthread_create() pick the default value.
 745   }
 746 
 747   // glibc guard page
 748   pthread_attr_setguardsize(&attr, os::Linux::default_guard_size(thr_type));
 749 
 750   ThreadState state;
 751 
 752   {






 753     pthread_t tid;
 754     int ret = pthread_create(&tid, &attr, (void* (*)(void*)) java_start, thread);
 755 
 756     pthread_attr_destroy(&attr);
 757 
 758     if (ret != 0) {
 759       if (PrintMiscellaneous && (Verbose || WizardMode)) {
 760         perror("pthread_create()");
 761       }
 762       // Need to clean up stuff we've allocated so far
 763       thread->set_osthread(NULL);
 764       delete osthread;

 765       return false;
 766     }
 767 
 768     // Store pthread info into the OSThread
 769     osthread->set_pthread_id(tid);
 770 
 771     // Wait until child thread is either initialized or aborted
 772     {
 773       Monitor* sync_with_child = osthread->startThread_lock();
 774       MutexLockerEx ml(sync_with_child, Mutex::_no_safepoint_check_flag);
 775       while ((state = osthread->get_state()) == ALLOCATED) {
 776         sync_with_child->wait(Mutex::_no_safepoint_check_flag);
 777       }
 778     }




 779   }
 780 
 781   // Aborted due to thread limit being reached
 782   if (state == ZOMBIE) {
 783     thread->set_osthread(NULL);
 784     delete osthread;
 785     return false;
 786   }
 787 
 788   // The thread is returned suspended (in state INITIALIZED),
 789   // and is started higher up in the call chain
 790   assert(state == INITIALIZED, "race condition");
 791   return true;
 792 }
 793 
 794 /////////////////////////////////////////////////////////////////////////////
 795 // attach existing thread
 796 
 797 // bootstrap the main thread
 798 bool os::create_main_thread(JavaThread* thread) {


1386 // sure it is async-safe and can handle partially initialized VM.
1387 void os::abort(bool dump_core, void* siginfo, void* context) {
1388   os::shutdown();
1389   if (dump_core) {
1390 #ifndef PRODUCT
1391     fdStream out(defaultStream::output_fd());
1392     out.print_raw("Current thread is ");
1393     char buf[16];
1394     jio_snprintf(buf, sizeof(buf), UINTX_FORMAT, os::current_thread_id());
1395     out.print_raw_cr(buf);
1396     out.print_raw_cr("Dumping core ...");
1397 #endif
1398     ::abort(); // dump core
1399   }
1400 
1401   ::exit(1);
1402 }
1403 
1404 // Die immediately, no exit hook, no abort hook, no cleanup.
1405 void os::die() {

1406   ::abort();
1407 }
1408 
1409 
1410 // This method is a copy of JDK's sysGetLastErrorString
1411 // from src/solaris/hpi/src/system_md.c
1412 
1413 size_t os::lasterror(char *buf, size_t len) {
1414   if (errno == 0)  return 0;
1415 
1416   const char *s = ::strerror(errno);
1417   size_t n = ::strlen(s);
1418   if (n >= len) {
1419     n = len - 1;
1420   }
1421   ::strncpy(buf, s, n);
1422   buf[n] = '\0';
1423   return n;
1424 }
1425 
1426 intx os::current_thread_id() { return (intx)pthread_self(); }
1427 int os::current_process_id() {
1428   return ::getpid();

















1429 }
1430 
1431 // DLL functions
1432 
1433 const char* os::dll_file_extension() { return ".so"; }
1434 
1435 // This must be hard coded because it's the system's temporary
1436 // directory not the java application's temp directory, ala java.io.tmpdir.
1437 const char* os::get_temp_directory() { return "/tmp"; }
1438 
1439 static bool file_exists(const char* filename) {
1440   struct stat statbuf;
1441   if (filename == NULL || strlen(filename) == 0) {
1442     return false;
1443   }
1444   return os::stat(filename, &statbuf) == 0;
1445 }
1446 
1447 bool os::dll_build_name(char* buffer, size_t buflen,
1448                         const char* pname, const char* fname) {


2054       !_print_ascii_file("/etc/ltib-release", st) &&
2055       !_print_ascii_file("/etc/angstrom-version", st) &&
2056       !_print_ascii_file("/etc/system-release", st) &&
2057       !_print_ascii_file("/etc/os-release", st)) {
2058 
2059     if (file_exists("/etc/debian_version")) {
2060       st->print("Debian ");
2061       _print_ascii_file("/etc/debian_version", st);
2062     } else {
2063       st->print("Linux");
2064     }
2065   }
2066   st->cr();
2067 }
2068 
2069 void os::Linux::print_libversion_info(outputStream* st) {
2070   // libc, pthread
2071   st->print("libc:");
2072   st->print("%s ", os::Linux::glibc_version());
2073   st->print("%s ", os::Linux::libpthread_version());



2074   st->cr();
2075 }
2076 
2077 void os::Linux::print_full_memory_info(outputStream* st) {
2078   st->print("\n/proc/meminfo:\n");
2079   _print_ascii_file("/proc/meminfo", st);
2080   st->cr();
2081 }
2082 
2083 void os::print_memory_info(outputStream* st) {
2084 
2085   st->print("Memory:");
2086   st->print(" %dk page", os::vm_page_size()>>10);
2087 
2088   // values in struct sysinfo are "unsigned long"
2089   struct sysinfo si;
2090   sysinfo(&si);
2091 
2092   st->print(", physical " UINT64_FORMAT "k",
2093             os::physical_memory() >> 10);


2912 
2913   return os::commit_memory(addr, size, !ExecMem);
2914 }
2915 
2916 // If this is a growable mapping, remove the guard pages entirely by
2917 // munmap()ping them.  If not, just call uncommit_memory(). This only
2918 // affects the main/initial thread, but guard against future OS changes
2919 // It's safe to always unmap guard pages for initial thread because we
2920 // always place it right after end of the mapped region
2921 
2922 bool os::remove_stack_guard_pages(char* addr, size_t size) {
2923   uintptr_t stack_extent, stack_base;
2924 
2925   if (os::Linux::is_initial_thread()) {
2926     return ::munmap(addr, size) == 0;
2927   }
2928 
2929   return os::uncommit_memory(addr, size);
2930 }
2931 


2932 // If 'fixed' is true, anon_mmap() will attempt to reserve anonymous memory
2933 // at 'requested_addr'. If there are existing memory mappings at the same
2934 // location, however, they will be overwritten. If 'fixed' is false,
2935 // 'requested_addr' is only treated as a hint, the return value may or
2936 // may not start from the requested address. Unlike Linux mmap(), this
2937 // function returns NULL to indicate failure.
2938 static char* anon_mmap(char* requested_addr, size_t bytes, bool fixed) {
2939   char * addr;
2940   int flags;
2941 
2942   flags = MAP_PRIVATE | MAP_NORESERVE | MAP_ANONYMOUS;
2943   if (fixed) {
2944     assert((uintptr_t)requested_addr % os::Linux::page_size() == 0, "unaligned address");
2945     flags |= MAP_FIXED;
2946   }
2947 
2948   // Map reserved/uncommitted pages PROT_NONE so we fail early if we
2949   // touch an uncommitted page. Otherwise, the read/write might
2950   // succeed if we have enough swap space to back the physical page.
2951   addr = (char*)::mmap(requested_addr, bytes, PROT_NONE,
2952                        flags, -1, 0);
2953 










2954   return addr == MAP_FAILED ? NULL : addr;
2955 }
2956 




2957 static int anon_munmap(char * addr, size_t size) {
2958   return ::munmap(addr, size) == 0;
2959 }
2960 
2961 char* os::pd_reserve_memory(size_t bytes, char* requested_addr,
2962                             size_t alignment_hint) {
2963   return anon_mmap(requested_addr, bytes, (requested_addr != NULL));
2964 }
2965 
2966 bool os::pd_release_memory(char* addr, size_t size) {
2967   return anon_munmap(addr, size);
2968 }
2969 




2970 static bool linux_mprotect(char* addr, size_t size, int prot) {
2971   // Linux wants the mprotect address argument to be page aligned.
2972   char* bottom = (char*)align_size_down((intptr_t)addr, os::Linux::page_size());
2973 
2974   // According to SUSv3, mprotect() should only be used with mappings
2975   // established by mmap(), and mmap() always maps whole pages. Unaligned
2976   // 'addr' likely indicates problem in the VM (e.g. trying to change
2977   // protection of malloc'ed or statically allocated memory). Check the
2978   // caller if you hit this assert.
2979   assert(addr == bottom, "sanity check");
2980 
2981   size = align_size_up(pointer_delta(addr, bottom, 1) + size, os::Linux::page_size());
2982   return ::mprotect(bottom, size, prot) == 0;
2983 }
2984 
2985 // Set protections specified
2986 bool os::protect_memory(char* addr, size_t bytes, ProtType prot,
2987                         bool is_committed) {
2988   unsigned int p = 0;
2989   switch (prot) {


3566   return UseTransparentHugePages || UseHugeTLBFS;
3567 }
3568 
3569 // Reserve memory at an arbitrary address, only if that area is
3570 // available (and not reserved for something else).
3571 
3572 char* os::pd_attempt_reserve_memory_at(size_t bytes, char* requested_addr) {
3573   const int max_tries = 10;
3574   char* base[max_tries];
3575   size_t size[max_tries];
3576   const size_t gap = 0x000000;
3577 
3578   // Assert only that the size is a multiple of the page size, since
3579   // that's all that mmap requires, and since that's all we really know
3580   // about at this low abstraction level.  If we need higher alignment,
3581   // we can either pass an alignment to this method or verify alignment
3582   // in one of the methods further up the call chain.  See bug 5044738.
3583   assert(bytes % os::vm_page_size() == 0, "reserving unexpected size block");
3584 
3585   // Repeatedly allocate blocks until the block is allocated at the
3586   // right spot.








3587 
3588   // Linux mmap allows caller to pass an address as hint; give it a try first,
3589   // if kernel honors the hint then we can return immediately.
3590   char * addr = anon_mmap(requested_addr, bytes, false);
3591   if (addr == requested_addr) {
3592     return requested_addr;
3593   }
3594 
3595   if (addr != NULL) {
3596     // mmap() is successful but it fails to reserve at the requested address
3597     anon_munmap(addr, bytes);
3598   }
3599 
3600   int i;
3601   for (i = 0; i < max_tries; ++i) {
3602     base[i] = reserve_memory(bytes);
3603 
3604     if (base[i] != NULL) {
3605       // Is this the block we wanted?
3606       if (base[i] == requested_addr) {


3620         ptrdiff_t bottom_overlap = base[i] + bytes - requested_addr;
3621         if (bottom_overlap >= 0 && (size_t)bottom_overlap < bytes) {
3622           unmap_memory(requested_addr, bottom_overlap);
3623           size[i] = bytes - bottom_overlap;
3624         } else {
3625           size[i] = bytes;
3626         }
3627       }
3628     }
3629   }
3630 
3631   // Give back the unused reserved pieces.
3632 
3633   for (int j = 0; j < i; ++j) {
3634     if (base[j] != NULL) {
3635       unmap_memory(base[j], size[j]);
3636     }
3637   }
3638 
3639   if (i < max_tries) {

3640     return requested_addr;
3641   } else {

3642     return NULL;
3643   }
3644 }
3645 
3646 size_t os::read(int fd, void *buf, unsigned int nBytes) {
3647   return ::read(fd, buf, nBytes);
3648 }
3649 
3650 size_t os::read_at(int fd, void *buf, unsigned int nBytes, jlong offset) {
3651   return ::pread(fd, buf, nBytes, offset);
3652 }
3653 
3654 // Short sleep, direct OS call.
3655 //
3656 // Note: certain versions of Linux CFS scheduler (since 2.6.23) do not guarantee
3657 // sched_yield(2) will actually give up the CPU:
3658 //
3659 //   * Alone on this pariticular CPU, keeps running.
3660 //   * Before the introduction of "skip_buddy" with "compat_yield" disabled
3661 //     (pre 2.6.39).


4465 
4466 extern bool signal_name(int signo, char* buf, size_t len);
4467 
4468 const char* os::exception_name(int exception_code, char* buf, size_t size) {
4469   if (0 < exception_code && exception_code <= SIGRTMAX) {
4470     // signal
4471     if (!signal_name(exception_code, buf, size)) {
4472       jio_snprintf(buf, size, "SIG%d", exception_code);
4473     }
4474     return buf;
4475   } else {
4476     return NULL;
4477   }
4478 }
4479 
4480 // this is called _before_ the most of global arguments have been parsed
4481 void os::init(void) {
4482   char dummy;   // used to get a guess on initial stack address
4483 //  first_hrtime = gethrtime();
4484 










4485   clock_tics_per_sec = sysconf(_SC_CLK_TCK);
4486 
4487   init_random(1234567);
4488 
4489   ThreadCritical::initialize();
4490 
4491   Linux::set_page_size(sysconf(_SC_PAGESIZE));
4492   if (Linux::page_size() == -1) {
4493     fatal(err_msg("os_linux.cpp: os::init: sysconf failed (%s)",
4494                   strerror(errno)));
4495   }
4496   init_page_sizes((size_t) Linux::page_size());
4497 
4498   Linux::initialize_system_info();
4499 
4500   // main_thread points to the aboriginal thread
4501   Linux::_main_thread = pthread_self();
4502 
4503   Linux::clock_init();
4504   initial_time_count = javaTimeNanos();


4597       threadStackSizeInBytes < os::Linux::min_stack_allowed) {
4598     tty->print_cr("\nThe stack size specified is too small, "
4599                   "Specify at least %dk",
4600                   os::Linux::min_stack_allowed/ K);
4601     return JNI_ERR;
4602   }
4603 
4604   // Make the stack size a multiple of the page size so that
4605   // the yellow/red zones can be guarded.
4606   JavaThread::set_stack_size_at_create(round_to(threadStackSizeInBytes,
4607                                                 vm_page_size()));
4608 
4609   Linux::capture_initial_stack(JavaThread::stack_size_at_create());
4610 
4611 #if defined(IA32)
4612   workaround_expand_exec_shield_cs_limit();
4613 #endif
4614 
4615   Linux::libpthread_init();
4616   if (PrintMiscellaneous && (Verbose || WizardMode)) {
4617     tty->print_cr("[HotSpot is running with %s, %s]\n",
4618                   Linux::glibc_version(), Linux::libpthread_version());

4619   }
4620 
4621   if (UseNUMA) {
4622     if (!Linux::libnuma_init()) {
4623       UseNUMA = false;
4624     } else {
4625       if ((Linux::numa_max_node() < 1)) {
4626         // There's only one node(they start from 0), disable NUMA.
4627         UseNUMA = false;
4628       }
4629     }
4630     // With SHM and HugeTLBFS large pages we cannot uncommit a page, so there's no way
4631     // we can make the adaptive lgrp chunk resizing work. If the user specified
4632     // both UseNUMA and UseLargePages (or UseSHM/UseHugeTLBFS) on the command line - warn and
4633     // disable adaptive resizing.
4634     if (UseNUMA && UseLargePages && !can_commit_large_page_memory()) {
4635       if (FLAG_IS_DEFAULT(UseNUMA)) {
4636         UseNUMA = false;
4637       } else {
4638         if (FLAG_IS_DEFAULT(UseLargePages) &&


4773   if (osthread->ucontext() != NULL) {
4774     _epc = os::Linux::ucontext_get_pc((ucontext_t *) context.ucontext());
4775   } else {
4776     // NULL context is unexpected, double-check this is the VMThread
4777     guarantee(thread->is_VM_thread(), "can only be called for VMThread");
4778   }
4779 }
4780 
4781 // Suspends the target using the signal mechanism and then grabs the PC before
4782 // resuming the target. Used by the flat-profiler only
4783 ExtendedPC os::get_thread_pc(Thread* thread) {
4784   // Make sure that it is called by the watcher for the VMThread
4785   assert(Thread::current()->is_Watcher_thread(), "Must be watcher");
4786   assert(thread->is_VM_thread(), "Can only be called for VMThread");
4787 
4788   PcFetcher fetcher(thread);
4789   fetcher.run();
4790   return fetcher.result();
4791 }
4792 
















4793 ////////////////////////////////////////////////////////////////////////////////
4794 // debug support
4795 
4796 bool os::find(address addr, outputStream* st) {
4797   Dl_info dlinfo;
4798   memset(&dlinfo, 0, sizeof(dlinfo));
4799   if (dladdr(addr, &dlinfo) != 0) {
4800     st->print(PTR_FORMAT ": ", addr);
4801     if (dlinfo.dli_sname != NULL && dlinfo.dli_saddr != NULL) {
4802       st->print("%s+%#x", dlinfo.dli_sname,
4803                 addr - (intptr_t)dlinfo.dli_saddr);
4804     } else if (dlinfo.dli_fbase != NULL) {
4805       st->print("<offset %#x>", addr - (intptr_t)dlinfo.dli_fbase);
4806     } else {
4807       st->print("<absolute address>");
4808     }
4809     if (dlinfo.dli_fname != NULL) {
4810       st->print(" in %s", dlinfo.dli_fname);
4811     }
4812     if (dlinfo.dli_fbase != NULL) {


5396   int ret = OS_TIMEOUT;
5397   int status = pthread_mutex_lock(_mutex);
5398   assert_status(status == 0, status, "mutex_lock");
5399   guarantee(_nParked == 0, "invariant");
5400   ++_nParked;
5401 
5402   // Object.wait(timo) will return because of
5403   // (a) notification
5404   // (b) timeout
5405   // (c) thread.interrupt
5406   //
5407   // Thread.interrupt and object.notify{All} both call Event::set.
5408   // That is, we treat thread.interrupt as a special case of notification.
5409   // We ignore spurious OS wakeups unless FilterSpuriousWakeups is false.
5410   // We assume all ETIME returns are valid.
5411   //
5412   // TODO: properly differentiate simultaneous notify+interrupt.
5413   // In that case, we should propagate the notify to another waiter.
5414 
5415   while (_Event < 0) {
5416     status = pthread_cond_timedwait(_cond, _mutex, &abst);
5417     if (status != 0 && WorkAroundNPTLTimedWaitHang) {
5418       pthread_cond_destroy(_cond);
5419       pthread_cond_init(_cond, os::Linux::condAttr());
5420     }
5421     assert_status(status == 0 || status == EINTR ||
5422                   status == ETIME || status == ETIMEDOUT,
5423                   status, "cond_timedwait");
5424     if (!FilterSpuriousWakeups) break;                 // previous semantics
5425     if (status == ETIME || status == ETIMEDOUT) break;
5426     // We consume and ignore EINTR and spurious wakeups.
5427   }
5428   --_nParked;
5429   if (_Event >= 0) {
5430     ret = OS_OK;
5431   }
5432   _Event = 0;
5433   status = pthread_mutex_unlock(_mutex);
5434   assert_status(status == 0, status, "mutex_unlock");
5435   assert(_nParked == 0, "invariant");
5436   // Paranoia to ensure our locked and lock-free paths interact


5624   }
5625 
5626 #ifdef ASSERT
5627   // Don't catch signals while blocked; let the running threads have the signals.
5628   // (This allows a debugger to break into the running thread.)
5629   sigset_t oldsigs;
5630   sigset_t* allowdebug_blocked = os::Linux::allowdebug_blocked_signals();
5631   pthread_sigmask(SIG_BLOCK, allowdebug_blocked, &oldsigs);
5632 #endif
5633 
5634   OSThreadWaitState osts(thread->osthread(), false /* not Object.wait() */);
5635   jt->set_suspend_equivalent();
5636   // cleared by handle_special_suspend_equivalent_condition() or java_suspend_self()
5637 
5638   assert(_cur_index == -1, "invariant");
5639   if (time == 0) {
5640     _cur_index = REL_INDEX; // arbitrary choice when not timed
5641     status = pthread_cond_wait(&_cond[_cur_index], _mutex);
5642   } else {
5643     _cur_index = isAbsolute ? ABS_INDEX : REL_INDEX;
5644     status = pthread_cond_timedwait(&_cond[_cur_index], _mutex, &absTime);
5645     if (status != 0 && WorkAroundNPTLTimedWaitHang) {
5646       pthread_cond_destroy(&_cond[_cur_index]);
5647       pthread_cond_init(&_cond[_cur_index], isAbsolute ? NULL : os::Linux::condAttr());
5648     }
5649   }
5650   _cur_index = -1;
5651   assert_status(status == 0 || status == EINTR ||
5652                 status == ETIME || status == ETIMEDOUT,
5653                 status, "cond_timedwait");
5654 
5655 #ifdef ASSERT
5656   pthread_sigmask(SIG_SETMASK, &oldsigs, NULL);
5657 #endif
5658 
5659   _counter = 0;
5660   status = pthread_mutex_unlock(_mutex);
5661   assert_status(status == 0, status, "invariant");
5662   // Paranoia to ensure our locked and lock-free paths interact
5663   // correctly with each other and Java-level accesses.
5664   OrderAccess::fence();


< prev index next >