src/os/linux/vm/os_linux.cpp

Print this page
rev 2869 : 7117303: VM uses non-monotonic time source and complains that it is non-monotonic
Summary: Replaces calls to os::javaTimeMillis(), which does not guarantee montonicity, in GC code to os::javaTimeNanos() with a suitable conversion factor. os::javaTimeNanos() mostly guarantees montonicity depending on the underlying OS implementation and, as a result, a better alternative. Changes in OS files are to make use of the newly defined constants in globalDefinitions.hpp.
Reviewed-by:


 110 # include <sys/wait.h>
 111 # include <pwd.h>
 112 # include <poll.h>
 113 # include <semaphore.h>
 114 # include <fcntl.h>
 115 # include <string.h>
 116 # include <syscall.h>
 117 # include <sys/sysinfo.h>
 118 # include <gnu/libc-version.h>
 119 # include <sys/ipc.h>
 120 # include <sys/shm.h>
 121 # include <link.h>
 122 # include <stdint.h>
 123 # include <inttypes.h>
 124 # include <sys/ioctl.h>
 125 
 126 #define MAX_PATH    (2 * K)
 127 
 128 // for timer info max values which include all bits
 129 #define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF)
 130 #define SEC_IN_NANOSECS  1000000000LL
 131 
 132 #define LARGEPAGES_BIT (1 << 6)
 133 ////////////////////////////////////////////////////////////////////////////////
 134 // global variables
 135 julong os::Linux::_physical_memory = 0;
 136 
 137 address   os::Linux::_initial_thread_stack_bottom = NULL;
 138 uintptr_t os::Linux::_initial_thread_stack_size   = 0;
 139 
 140 int (*os::Linux::_clock_gettime)(clockid_t, struct timespec *) = NULL;
 141 int (*os::Linux::_pthread_getcpuclockid)(pthread_t, clockid_t *) = NULL;
 142 Mutex* os::Linux::_createThread_lock = NULL;
 143 pthread_t os::Linux::_main_thread;
 144 int os::Linux::_page_size = -1;
 145 bool os::Linux::_is_floating_stack = false;
 146 bool os::Linux::_is_NPTL = false;
 147 bool os::Linux::_supports_fast_thread_cpu_time = false;
 148 const char * os::Linux::_glibc_version = NULL;
 149 const char * os::Linux::_libpthread_version = NULL;
 150 


3242 
3243   if (i < max_tries) {
3244     _highest_vm_reserved_address = MAX2(old_highest, (address)requested_addr + bytes);
3245     return requested_addr;
3246   } else {
3247     _highest_vm_reserved_address = old_highest;
3248     return NULL;
3249   }
3250 }
3251 
3252 size_t os::read(int fd, void *buf, unsigned int nBytes) {
3253   return ::read(fd, buf, nBytes);
3254 }
3255 
3256 // TODO-FIXME: reconcile Solaris' os::sleep with the linux variation.
3257 // Solaris uses poll(), linux uses park().
3258 // Poll() is likely a better choice, assuming that Thread.interrupt()
3259 // generates a SIGUSRx signal. Note that SIGUSR1 can interfere with
3260 // SIGSEGV, see 4355769.
3261 
3262 const int NANOSECS_PER_MILLISECS = 1000000;
3263 
3264 int os::sleep(Thread* thread, jlong millis, bool interruptible) {
3265   assert(thread == Thread::current(),  "thread consistency check");
3266 
3267   ParkEvent * const slp = thread->_SleepEvent ;
3268   slp->reset() ;
3269   OrderAccess::fence() ;
3270 
3271   if (interruptible) {
3272     jlong prevtime = javaTimeNanos();
3273 
3274     for (;;) {
3275       if (os::is_interrupted(thread, true)) {
3276         return OS_INTRPT;
3277       }
3278 
3279       jlong newtime = javaTimeNanos();
3280 
3281       if (newtime - prevtime < 0) {
3282         // time moving backwards, should only happen if no monotonic clock
3283         // not a guarantee() because JVM should not abort on kernel/glibc bugs
3284         assert(!Linux::supports_monotonic_clock(), "time moving backwards");
3285       } else {
3286         millis -= (newtime - prevtime) / NANOSECS_PER_MILLISECS;
3287       }
3288 
3289       if(millis <= 0) {
3290         return OS_OK;
3291       }
3292 
3293       prevtime = newtime;
3294 
3295       {
3296         assert(thread->is_Java_thread(), "sanity check");
3297         JavaThread *jt = (JavaThread *) thread;
3298         ThreadBlockInVM tbivm(jt);
3299         OSThreadWaitState osts(jt->osthread(), false /* not Object.wait() */);
3300 
3301         jt->set_suspend_equivalent();
3302         // cleared by handle_special_suspend_equivalent_condition() or
3303         // java_suspend_self() via check_and_wait_while_suspended()
3304 
3305         slp->park(millis);
3306 
3307         // were we externally suspended while we were waiting?
3308         jt->check_and_wait_while_suspended();
3309       }
3310     }
3311   } else {
3312     OSThreadWaitState osts(thread->osthread(), false /* not Object.wait() */);
3313     jlong prevtime = javaTimeNanos();
3314 
3315     for (;;) {
3316       // It'd be nice to avoid the back-to-back javaTimeNanos() calls on
3317       // the 1st iteration ...
3318       jlong newtime = javaTimeNanos();
3319 
3320       if (newtime - prevtime < 0) {
3321         // time moving backwards, should only happen if no monotonic clock
3322         // not a guarantee() because JVM should not abort on kernel/glibc bugs
3323         assert(!Linux::supports_monotonic_clock(), "time moving backwards");
3324       } else {
3325         millis -= (newtime - prevtime) / NANOSECS_PER_MILLISECS;
3326       }
3327 
3328       if(millis <= 0) break ;
3329 
3330       prevtime = newtime;
3331       slp->park(millis);
3332     }
3333     return OS_OK ;
3334   }
3335 }
3336 
3337 int os::naked_sleep() {
3338   // %% make the sleep time an integer flag. for now use 1 millisec.
3339   return os::sleep(Thread::current(), 1, false);
3340 }
3341 
3342 // Sleep forever; naked call to OS-specific sleep; use with CAUTION
3343 void os::infinite_sleep() {
3344   while (true) {    // sleep forever ...
3345     ::sleep(100);   // ... 100 seconds at a time


3907           tty->print_cr("Info: AllowUserSignalHandlers is activated, all active signal checking is disabled");
3908         }
3909         check_signals = false;
3910       }
3911     }
3912   }
3913 }
3914 
3915 // This is the fastest way to get thread cpu time on Linux.
3916 // Returns cpu time (user+sys) for any thread, not only for current.
3917 // POSIX compliant clocks are implemented in the kernels 2.6.16+.
3918 // It might work on 2.6.10+ with a special kernel/glibc patch.
3919 // For reference, please, see IEEE Std 1003.1-2004:
3920 //   http://www.unix.org/single_unix_specification
3921 
3922 jlong os::Linux::fast_thread_cpu_time(clockid_t clockid) {
3923   struct timespec tp;
3924   int rc = os::Linux::clock_gettime(clockid, &tp);
3925   assert(rc == 0, "clock_gettime is expected to return 0 code");
3926 
3927   return (tp.tv_sec * SEC_IN_NANOSECS) + tp.tv_nsec;
3928 }
3929 
3930 /////
3931 // glibc on Linux platform uses non-documented flag
3932 // to indicate, that some special sort of signal
3933 // trampoline is used.
3934 // We will never set this flag, and we should
3935 // ignore this flag in our diagnostic
3936 #ifdef SIGNIFICANT_SIGNAL_MASK
3937 #undef SIGNIFICANT_SIGNAL_MASK
3938 #endif
3939 #define SIGNIFICANT_SIGNAL_MASK (~0x04000000)
3940 
3941 static const char* get_signal_handler_name(address handler,
3942                                            char* buf, int buflen) {
3943   int offset;
3944   bool found = os::dll_address_to_library_name(handler, buf, buflen, &offset);
3945   if (found) {
3946     // skip directory names
3947     const char *p1, *p2;


5148   // circumstances this can cause a thread to return prematurely from
5149   // cond_{timed}wait() but the spurious wakeup is benign and the victim will
5150   // simply re-test the condition and re-park itself.
5151 }
5152 
5153 
5154 // JSR166
5155 // -------------------------------------------------------
5156 
5157 /*
5158  * The solaris and linux implementations of park/unpark are fairly
5159  * conservative for now, but can be improved. They currently use a
5160  * mutex/condvar pair, plus a a count.
5161  * Park decrements count if > 0, else does a condvar wait.  Unpark
5162  * sets count to 1 and signals condvar.  Only one thread ever waits
5163  * on the condvar. Contention seen when trying to park implies that someone
5164  * is unparking you, so don't wait. And spurious returns are fine, so there
5165  * is no need to track notifications.
5166  */
5167 
5168 
5169 #define NANOSECS_PER_SEC 1000000000
5170 #define NANOSECS_PER_MILLISEC 1000000
5171 #define MAX_SECS 100000000
5172 /*
5173  * This code is common to linux and solaris and will be moved to a
5174  * common place in dolphin.
5175  *
5176  * The passed in time value is either a relative time in nanoseconds
5177  * or an absolute time in milliseconds. Either way it has to be unpacked
5178  * into suitable seconds and nanoseconds components and stored in the
5179  * given timespec structure.
5180  * Given time is a 64-bit value and the time_t used in the timespec is only
5181  * a signed-32-bit value (except on 64-bit Linux) we have to watch for
5182  * overflow if times way in the future are given. Further on Solaris versions
5183  * prior to 10 there is a restriction (see cond_timedwait) that the specified
5184  * number of seconds, in abstime, is less than current_time  + 100,000,000.
5185  * As it will be 28 years before "now + 100000000" will overflow we can
5186  * ignore overflow and just impose a hard-limit on seconds using the value
5187  * of "now + 100,000,000". This places a limit on the timeout of about 3.17
5188  * years from "now".
5189  */
5190 




 110 # include <sys/wait.h>
 111 # include <pwd.h>
 112 # include <poll.h>
 113 # include <semaphore.h>
 114 # include <fcntl.h>
 115 # include <string.h>
 116 # include <syscall.h>
 117 # include <sys/sysinfo.h>
 118 # include <gnu/libc-version.h>
 119 # include <sys/ipc.h>
 120 # include <sys/shm.h>
 121 # include <link.h>
 122 # include <stdint.h>
 123 # include <inttypes.h>
 124 # include <sys/ioctl.h>
 125 
 126 #define MAX_PATH    (2 * K)
 127 
 128 // for timer info max values which include all bits
 129 #define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF)

 130 
 131 #define LARGEPAGES_BIT (1 << 6)
 132 ////////////////////////////////////////////////////////////////////////////////
 133 // global variables
 134 julong os::Linux::_physical_memory = 0;
 135 
 136 address   os::Linux::_initial_thread_stack_bottom = NULL;
 137 uintptr_t os::Linux::_initial_thread_stack_size   = 0;
 138 
 139 int (*os::Linux::_clock_gettime)(clockid_t, struct timespec *) = NULL;
 140 int (*os::Linux::_pthread_getcpuclockid)(pthread_t, clockid_t *) = NULL;
 141 Mutex* os::Linux::_createThread_lock = NULL;
 142 pthread_t os::Linux::_main_thread;
 143 int os::Linux::_page_size = -1;
 144 bool os::Linux::_is_floating_stack = false;
 145 bool os::Linux::_is_NPTL = false;
 146 bool os::Linux::_supports_fast_thread_cpu_time = false;
 147 const char * os::Linux::_glibc_version = NULL;
 148 const char * os::Linux::_libpthread_version = NULL;
 149 


3241 
3242   if (i < max_tries) {
3243     _highest_vm_reserved_address = MAX2(old_highest, (address)requested_addr + bytes);
3244     return requested_addr;
3245   } else {
3246     _highest_vm_reserved_address = old_highest;
3247     return NULL;
3248   }
3249 }
3250 
3251 size_t os::read(int fd, void *buf, unsigned int nBytes) {
3252   return ::read(fd, buf, nBytes);
3253 }
3254 
3255 // TODO-FIXME: reconcile Solaris' os::sleep with the linux variation.
3256 // Solaris uses poll(), linux uses park().
3257 // Poll() is likely a better choice, assuming that Thread.interrupt()
3258 // generates a SIGUSRx signal. Note that SIGUSR1 can interfere with
3259 // SIGSEGV, see 4355769.
3260 


3261 int os::sleep(Thread* thread, jlong millis, bool interruptible) {
3262   assert(thread == Thread::current(),  "thread consistency check");
3263 
3264   ParkEvent * const slp = thread->_SleepEvent ;
3265   slp->reset() ;
3266   OrderAccess::fence() ;
3267 
3268   if (interruptible) {
3269     jlong prevtime = javaTimeNanos();
3270 
3271     for (;;) {
3272       if (os::is_interrupted(thread, true)) {
3273         return OS_INTRPT;
3274       }
3275 
3276       jlong newtime = javaTimeNanos();
3277 
3278       if (newtime - prevtime < 0) {
3279         // time moving backwards, should only happen if no monotonic clock
3280         // not a guarantee() because JVM should not abort on kernel/glibc bugs
3281         assert(!Linux::supports_monotonic_clock(), "time moving backwards");
3282       } else {
3283         millis -= (newtime - prevtime) / NANOSECS_PER_MILLISEC;
3284       }
3285 
3286       if(millis <= 0) {
3287         return OS_OK;
3288       }
3289 
3290       prevtime = newtime;
3291 
3292       {
3293         assert(thread->is_Java_thread(), "sanity check");
3294         JavaThread *jt = (JavaThread *) thread;
3295         ThreadBlockInVM tbivm(jt);
3296         OSThreadWaitState osts(jt->osthread(), false /* not Object.wait() */);
3297 
3298         jt->set_suspend_equivalent();
3299         // cleared by handle_special_suspend_equivalent_condition() or
3300         // java_suspend_self() via check_and_wait_while_suspended()
3301 
3302         slp->park(millis);
3303 
3304         // were we externally suspended while we were waiting?
3305         jt->check_and_wait_while_suspended();
3306       }
3307     }
3308   } else {
3309     OSThreadWaitState osts(thread->osthread(), false /* not Object.wait() */);
3310     jlong prevtime = javaTimeNanos();
3311 
3312     for (;;) {
3313       // It'd be nice to avoid the back-to-back javaTimeNanos() calls on
3314       // the 1st iteration ...
3315       jlong newtime = javaTimeNanos();
3316 
3317       if (newtime - prevtime < 0) {
3318         // time moving backwards, should only happen if no monotonic clock
3319         // not a guarantee() because JVM should not abort on kernel/glibc bugs
3320         assert(!Linux::supports_monotonic_clock(), "time moving backwards");
3321       } else {
3322         millis -= (newtime - prevtime) / NANOSECS_PER_MILLISEC;
3323       }
3324 
3325       if(millis <= 0) break ;
3326 
3327       prevtime = newtime;
3328       slp->park(millis);
3329     }
3330     return OS_OK ;
3331   }
3332 }
3333 
3334 int os::naked_sleep() {
3335   // %% make the sleep time an integer flag. for now use 1 millisec.
3336   return os::sleep(Thread::current(), 1, false);
3337 }
3338 
3339 // Sleep forever; naked call to OS-specific sleep; use with CAUTION
3340 void os::infinite_sleep() {
3341   while (true) {    // sleep forever ...
3342     ::sleep(100);   // ... 100 seconds at a time


3904           tty->print_cr("Info: AllowUserSignalHandlers is activated, all active signal checking is disabled");
3905         }
3906         check_signals = false;
3907       }
3908     }
3909   }
3910 }
3911 
3912 // This is the fastest way to get thread cpu time on Linux.
3913 // Returns cpu time (user+sys) for any thread, not only for current.
3914 // POSIX compliant clocks are implemented in the kernels 2.6.16+.
3915 // It might work on 2.6.10+ with a special kernel/glibc patch.
3916 // For reference, please, see IEEE Std 1003.1-2004:
3917 //   http://www.unix.org/single_unix_specification
3918 
3919 jlong os::Linux::fast_thread_cpu_time(clockid_t clockid) {
3920   struct timespec tp;
3921   int rc = os::Linux::clock_gettime(clockid, &tp);
3922   assert(rc == 0, "clock_gettime is expected to return 0 code");
3923 
3924   return (tp.tv_sec * NANOSECS_PER_SEC) + tp.tv_nsec;
3925 }
3926 
3927 /////
3928 // glibc on Linux platform uses non-documented flag
3929 // to indicate, that some special sort of signal
3930 // trampoline is used.
3931 // We will never set this flag, and we should
3932 // ignore this flag in our diagnostic
3933 #ifdef SIGNIFICANT_SIGNAL_MASK
3934 #undef SIGNIFICANT_SIGNAL_MASK
3935 #endif
3936 #define SIGNIFICANT_SIGNAL_MASK (~0x04000000)
3937 
3938 static const char* get_signal_handler_name(address handler,
3939                                            char* buf, int buflen) {
3940   int offset;
3941   bool found = os::dll_address_to_library_name(handler, buf, buflen, &offset);
3942   if (found) {
3943     // skip directory names
3944     const char *p1, *p2;


5145   // circumstances this can cause a thread to return prematurely from
5146   // cond_{timed}wait() but the spurious wakeup is benign and the victim will
5147   // simply re-test the condition and re-park itself.
5148 }
5149 
5150 
5151 // JSR166
5152 // -------------------------------------------------------
5153 
5154 /*
5155  * The solaris and linux implementations of park/unpark are fairly
5156  * conservative for now, but can be improved. They currently use a
5157  * mutex/condvar pair, plus a a count.
5158  * Park decrements count if > 0, else does a condvar wait.  Unpark
5159  * sets count to 1 and signals condvar.  Only one thread ever waits
5160  * on the condvar. Contention seen when trying to park implies that someone
5161  * is unparking you, so don't wait. And spurious returns are fine, so there
5162  * is no need to track notifications.
5163  */
5164 



5165 #define MAX_SECS 100000000
5166 /*
5167  * This code is common to linux and solaris and will be moved to a
5168  * common place in dolphin.
5169  *
5170  * The passed in time value is either a relative time in nanoseconds
5171  * or an absolute time in milliseconds. Either way it has to be unpacked
5172  * into suitable seconds and nanoseconds components and stored in the
5173  * given timespec structure.
5174  * Given time is a 64-bit value and the time_t used in the timespec is only
5175  * a signed-32-bit value (except on 64-bit Linux) we have to watch for
5176  * overflow if times way in the future are given. Further on Solaris versions
5177  * prior to 10 there is a restriction (see cond_timedwait) that the specified
5178  * number of seconds, in abstime, is less than current_time  + 100,000,000.
5179  * As it will be 28 years before "now + 100000000" will overflow we can
5180  * ignore overflow and just impose a hard-limit on seconds using the value
5181  * of "now + 100,000,000". This places a limit on the timeout of about 3.17
5182  * years from "now".
5183  */
5184