1 /*
   2  * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef OS_LINUX_OS_LINUX_HPP
  26 #define OS_LINUX_OS_LINUX_HPP
  27 
  28 // Linux_OS defines the interface to Linux operating systems
  29 
  30 // Information about the protection of the page at address '0' on this os.
  31 static bool zero_page_read_protected() { return true; }
  32 
  33 class Linux {
  34   friend class CgroupSubsystem;
  35   friend class os;
  36   friend class OSContainer;
  37   friend class TestReserveMemorySpecial;
  38 
  39   static bool libjsig_is_loaded;        // libjsig that interposes sigaction(),
  40                                         // __sigaction(), signal() is loaded
  41   static struct sigaction *(*get_signal_action)(int);
  42 
  43   static void check_signal_handler(int sig);
  44 
  45   static int (*_pthread_getcpuclockid)(pthread_t, clockid_t *);
  46   static int (*_pthread_setname_np)(pthread_t, const char*);
  47 
  48   static address   _initial_thread_stack_bottom;
  49   static uintptr_t _initial_thread_stack_size;
  50 
  51   static const char *_glibc_version;
  52   static const char *_libpthread_version;
  53 
  54   static bool _supports_fast_thread_cpu_time;
  55 
  56   static GrowableArray<int>* _cpu_to_node;
  57   static GrowableArray<int>* _nindex_to_node;
  58 
  59  protected:
  60 
  61   static julong _physical_memory;
  62   static pthread_t _main_thread;
  63   static int _page_size;
  64 
  65   static julong available_memory();
  66   static julong physical_memory() { return _physical_memory; }
  67   static void set_physical_memory(julong phys_mem) { _physical_memory = phys_mem; }
  68   static int active_processor_count();
  69 
  70   static void initialize_system_info();
  71 
  72   static int commit_memory_impl(char* addr, size_t bytes, bool exec);
  73   static int commit_memory_impl(char* addr, size_t bytes,
  74                                 size_t alignment_hint, bool exec);
  75 
  76   static void set_glibc_version(const char *s)      { _glibc_version = s; }
  77   static void set_libpthread_version(const char *s) { _libpthread_version = s; }
  78 
  79   static void rebuild_cpu_to_node_map();
  80   static void rebuild_nindex_to_node_map();
  81   static GrowableArray<int>* cpu_to_node()    { return _cpu_to_node; }
  82   static GrowableArray<int>* nindex_to_node()  { return _nindex_to_node; }
  83 
  84   static size_t find_large_page_size();
  85   static size_t setup_large_page_size();
  86 
  87   static bool setup_large_page_type(size_t page_size);
  88   static bool transparent_huge_pages_sanity_check(bool warn, size_t pages_size);
  89   static bool hugetlbfs_sanity_check(bool warn, size_t page_size);
  90 
  91   static char* reserve_memory_special_shm(size_t bytes, size_t alignment, char* req_addr, bool exec);
  92   static char* reserve_memory_special_huge_tlbfs(size_t bytes, size_t alignment, char* req_addr, bool exec);
  93   static char* reserve_memory_special_huge_tlbfs_only(size_t bytes, char* req_addr, bool exec);
  94   static char* reserve_memory_special_huge_tlbfs_mixed(size_t bytes, size_t alignment, char* req_addr, bool exec);
  95 
  96   static bool release_memory_special_impl(char* base, size_t bytes);
  97   static bool release_memory_special_shm(char* base, size_t bytes);
  98   static bool release_memory_special_huge_tlbfs(char* base, size_t bytes);
  99 
 100   static void print_full_memory_info(outputStream* st);
 101   static void print_container_info(outputStream* st);
 102   static void print_steal_info(outputStream* st);
 103   static void print_distro_info(outputStream* st);
 104   static void print_libversion_info(outputStream* st);
 105   static void print_proc_sys_info(outputStream* st);
 106   static void print_ld_preload_file(outputStream* st);
 107   static void print_uptime_info(outputStream* st);
 108 
 109  public:
 110   struct CPUPerfTicks {
 111     uint64_t used;
 112     uint64_t usedKernel;
 113     uint64_t total;
 114     uint64_t steal;
 115     bool     has_steal_ticks;
 116   };
 117 
 118   // which_logical_cpu=-1 returns accumulated ticks for all cpus.
 119   static bool get_tick_information(CPUPerfTicks* pticks, int which_logical_cpu);
 120   static bool _stack_is_executable;
 121   static void *dlopen_helper(const char *name, char *ebuf, int ebuflen);
 122   static void *dll_load_in_vmthread(const char *name, char *ebuf, int ebuflen);
 123 
 124   static void init_thread_fpu_state();
 125   static int  get_fpu_control_word();
 126   static void set_fpu_control_word(int fpu_control);
 127   static pthread_t main_thread(void)                                { return _main_thread; }
 128   // returns kernel thread id (similar to LWP id on Solaris), which can be
 129   // used to access /proc
 130   static pid_t gettid();
 131   static void hotspot_sigmask(Thread* thread);
 132 
 133   static address   initial_thread_stack_bottom(void)                { return _initial_thread_stack_bottom; }
 134   static uintptr_t initial_thread_stack_size(void)                  { return _initial_thread_stack_size; }
 135 
 136   static int page_size(void)                                        { return _page_size; }
 137   static void set_page_size(int val)                                { _page_size = val; }
 138 
 139   static address   ucontext_get_pc(const ucontext_t* uc);
 140   static void ucontext_set_pc(ucontext_t* uc, address pc);
 141   static intptr_t* ucontext_get_sp(const ucontext_t* uc);
 142   static intptr_t* ucontext_get_fp(const ucontext_t* uc);
 143 
 144   // For Analyzer Forte AsyncGetCallTrace profiling support:
 145   //
 146   // This interface should be declared in os_linux_i486.hpp, but
 147   // that file provides extensions to the os class and not the
 148   // Linux class.
 149   static ExtendedPC fetch_frame_from_ucontext(Thread* thread, const ucontext_t* uc,
 150                                               intptr_t** ret_sp, intptr_t** ret_fp);
 151 
 152   static bool get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr);
 153 
 154   // This boolean allows users to forward their own non-matching signals
 155   // to JVM_handle_linux_signal, harmlessly.
 156   static bool signal_handlers_are_installed;
 157 
 158   static int get_our_sigflags(int);
 159   static void set_our_sigflags(int, int);
 160   static void signal_sets_init();
 161   static void install_signal_handlers();
 162   static void set_signal_handler(int, bool);
 163 
 164   static sigset_t* unblocked_signals();
 165   static sigset_t* vm_signals();
 166 
 167   // For signal-chaining
 168   static struct sigaction *get_chained_signal_action(int sig);
 169   static bool chained_handler(int sig, siginfo_t* siginfo, void* context);
 170 
 171   // GNU libc and libpthread version strings
 172   static const char *glibc_version()          { return _glibc_version; }
 173   static const char *libpthread_version()     { return _libpthread_version; }
 174 
 175   static void libpthread_init();
 176   static void sched_getcpu_init();
 177   static bool libnuma_init();
 178   static void* libnuma_dlsym(void* handle, const char* name);
 179   // libnuma v2 (libnuma_1.2) symbols
 180   static void* libnuma_v2_dlsym(void* handle, const char* name);
 181 
 182   // Return default guard size for the specified thread type
 183   static size_t default_guard_size(os::ThreadType thr_type);
 184 
 185   static void capture_initial_stack(size_t max_size);
 186 
 187   // Stack overflow handling
 188   static bool manually_expand_stack(JavaThread * t, address addr);
 189 
 190   // fast POSIX clocks support
 191   static void fast_thread_clock_init(void);
 192 
 193   static int pthread_getcpuclockid(pthread_t tid, clockid_t *clock_id) {
 194     return _pthread_getcpuclockid ? _pthread_getcpuclockid(tid, clock_id) : -1;
 195   }
 196 
 197   static bool supports_fast_thread_cpu_time() {
 198     return _supports_fast_thread_cpu_time;
 199   }
 200 
 201   static jlong fast_thread_cpu_time(clockid_t clockid);
 202 
 203   // Stack repair handling
 204 
 205   // none present
 206 
 207  private:
 208   static void numa_init();
 209   static void expand_stack_to(address bottom);
 210 
 211   typedef int (*sched_getcpu_func_t)(void);
 212   typedef int (*numa_node_to_cpus_func_t)(int node, unsigned long *buffer, int bufferlen);
 213   typedef int (*numa_max_node_func_t)(void);
 214   typedef int (*numa_num_configured_nodes_func_t)(void);
 215   typedef int (*numa_available_func_t)(void);
 216   typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int node);
 217   typedef void (*numa_interleave_memory_func_t)(void *start, size_t size, unsigned long *nodemask);
 218   typedef void (*numa_interleave_memory_v2_func_t)(void *start, size_t size, struct bitmask* mask);
 219   typedef struct bitmask* (*numa_get_membind_func_t)(void);
 220   typedef struct bitmask* (*numa_get_interleave_mask_func_t)(void);
 221   typedef long (*numa_move_pages_func_t)(int pid, unsigned long count, void **pages, const int *nodes, int *status, int flags);
 222   typedef void (*numa_set_preferred_func_t)(int node);
 223   typedef void (*numa_set_bind_policy_func_t)(int policy);
 224   typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp, unsigned int n);
 225   typedef int (*numa_distance_func_t)(int node1, int node2);
 226 
 227   static sched_getcpu_func_t _sched_getcpu;
 228   static numa_node_to_cpus_func_t _numa_node_to_cpus;
 229   static numa_max_node_func_t _numa_max_node;
 230   static numa_num_configured_nodes_func_t _numa_num_configured_nodes;
 231   static numa_available_func_t _numa_available;
 232   static numa_tonode_memory_func_t _numa_tonode_memory;
 233   static numa_interleave_memory_func_t _numa_interleave_memory;
 234   static numa_interleave_memory_v2_func_t _numa_interleave_memory_v2;
 235   static numa_set_bind_policy_func_t _numa_set_bind_policy;
 236   static numa_bitmask_isbitset_func_t _numa_bitmask_isbitset;
 237   static numa_distance_func_t _numa_distance;
 238   static numa_get_membind_func_t _numa_get_membind;
 239   static numa_get_interleave_mask_func_t _numa_get_interleave_mask;
 240   static numa_move_pages_func_t _numa_move_pages;
 241   static numa_set_preferred_func_t _numa_set_preferred;
 242   static unsigned long* _numa_all_nodes;
 243   static struct bitmask* _numa_all_nodes_ptr;
 244   static struct bitmask* _numa_nodes_ptr;
 245   static struct bitmask* _numa_interleave_bitmask;
 246   static struct bitmask* _numa_membind_bitmask;
 247 
 248   static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; }
 249   static void set_numa_node_to_cpus(numa_node_to_cpus_func_t func) { _numa_node_to_cpus = func; }
 250   static void set_numa_max_node(numa_max_node_func_t func) { _numa_max_node = func; }
 251   static void set_numa_num_configured_nodes(numa_num_configured_nodes_func_t func) { _numa_num_configured_nodes = func; }
 252   static void set_numa_available(numa_available_func_t func) { _numa_available = func; }
 253   static void set_numa_tonode_memory(numa_tonode_memory_func_t func) { _numa_tonode_memory = func; }
 254   static void set_numa_interleave_memory(numa_interleave_memory_func_t func) { _numa_interleave_memory = func; }
 255   static void set_numa_interleave_memory_v2(numa_interleave_memory_v2_func_t func) { _numa_interleave_memory_v2 = func; }
 256   static void set_numa_set_bind_policy(numa_set_bind_policy_func_t func) { _numa_set_bind_policy = func; }
 257   static void set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t func) { _numa_bitmask_isbitset = func; }
 258   static void set_numa_distance(numa_distance_func_t func) { _numa_distance = func; }
 259   static void set_numa_get_membind(numa_get_membind_func_t func) { _numa_get_membind = func; }
 260   static void set_numa_get_interleave_mask(numa_get_interleave_mask_func_t func) { _numa_get_interleave_mask = func; }
 261   static void set_numa_move_pages(numa_move_pages_func_t func) { _numa_move_pages = func; }
 262   static void set_numa_set_preferred(numa_set_preferred_func_t func) { _numa_set_preferred = func; }
 263   static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; }
 264   static void set_numa_all_nodes_ptr(struct bitmask **ptr) { _numa_all_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
 265   static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr = (ptr == NULL ? NULL : *ptr); }
 266   static void set_numa_interleave_bitmask(struct bitmask* ptr)     { _numa_interleave_bitmask = ptr ;   }
 267   static void set_numa_membind_bitmask(struct bitmask* ptr)        { _numa_membind_bitmask = ptr ;      }
 268   static int sched_getcpu_syscall(void);
 269 
 270   enum NumaAllocationPolicy{
 271     NotInitialized,
 272     Membind,
 273     Interleave
 274   };
 275   static NumaAllocationPolicy _current_numa_policy;
 276 
 277  public:
 278   static int sched_getcpu()  { return _sched_getcpu != NULL ? _sched_getcpu() : -1; }
 279   static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen) {
 280     return _numa_node_to_cpus != NULL ? _numa_node_to_cpus(node, buffer, bufferlen) : -1;
 281   }
 282   static int numa_max_node() { return _numa_max_node != NULL ? _numa_max_node() : -1; }
 283   static int numa_num_configured_nodes() {
 284     return _numa_num_configured_nodes != NULL ? _numa_num_configured_nodes() : -1;
 285   }
 286   static int numa_available() { return _numa_available != NULL ? _numa_available() : -1; }
 287   static int numa_tonode_memory(void *start, size_t size, int node) {
 288     return _numa_tonode_memory != NULL ? _numa_tonode_memory(start, size, node) : -1;
 289   }
 290 
 291   static bool is_running_in_interleave_mode() {
 292     return _current_numa_policy == Interleave;
 293   }
 294 
 295   static void set_configured_numa_policy(NumaAllocationPolicy numa_policy) {
 296     _current_numa_policy = numa_policy;
 297   }
 298 
 299   static NumaAllocationPolicy identify_numa_policy() {
 300     for (int node = 0; node <= Linux::numa_max_node(); node++) {
 301       if (Linux::_numa_bitmask_isbitset(Linux::_numa_interleave_bitmask, node)) {
 302         return Interleave;
 303       }
 304     }
 305     return Membind;
 306   }
 307 
 308   static void numa_interleave_memory(void *start, size_t size) {
 309     // Prefer v2 API
 310     if (_numa_interleave_memory_v2 != NULL) {
 311       if (is_running_in_interleave_mode()) {
 312         _numa_interleave_memory_v2(start, size, _numa_interleave_bitmask);
 313       } else if (_numa_membind_bitmask != NULL) {
 314         _numa_interleave_memory_v2(start, size, _numa_membind_bitmask);
 315       }
 316     } else if (_numa_interleave_memory != NULL) {
 317       _numa_interleave_memory(start, size, _numa_all_nodes);
 318     }
 319   }
 320   static void numa_set_preferred(int node) {
 321     if (_numa_set_preferred != NULL) {
 322       _numa_set_preferred(node);
 323     }
 324   }
 325   static void numa_set_bind_policy(int policy) {
 326     if (_numa_set_bind_policy != NULL) {
 327       _numa_set_bind_policy(policy);
 328     }
 329   }
 330   static int numa_distance(int node1, int node2) {
 331     return _numa_distance != NULL ? _numa_distance(node1, node2) : -1;
 332   }
 333   static long numa_move_pages(int pid, unsigned long count, void **pages, const int *nodes, int *status, int flags) {
 334     return _numa_move_pages != NULL ? _numa_move_pages(pid, count, pages, nodes, status, flags) : -1;
 335   }
 336   static int get_node_by_cpu(int cpu_id);
 337   static int get_existing_num_nodes();
 338   // Check if numa node is configured (non-zero memory node).
 339   static bool is_node_in_configured_nodes(unsigned int n) {
 340     if (_numa_bitmask_isbitset != NULL && _numa_all_nodes_ptr != NULL) {
 341       return _numa_bitmask_isbitset(_numa_all_nodes_ptr, n);
 342     } else
 343       return false;
 344   }
 345   // Check if numa node exists in the system (including zero memory nodes).
 346   static bool is_node_in_existing_nodes(unsigned int n) {
 347     if (_numa_bitmask_isbitset != NULL && _numa_nodes_ptr != NULL) {
 348       return _numa_bitmask_isbitset(_numa_nodes_ptr, n);
 349     } else if (_numa_bitmask_isbitset != NULL && _numa_all_nodes_ptr != NULL) {
 350       // Not all libnuma API v2 implement numa_nodes_ptr, so it's not possible
 351       // to trust the API version for checking its absence. On the other hand,
 352       // numa_nodes_ptr found in libnuma 2.0.9 and above is the only way to get
 353       // a complete view of all numa nodes in the system, hence numa_nodes_ptr
 354       // is used to handle CPU and nodes on architectures (like PowerPC) where
 355       // there can exist nodes with CPUs but no memory or vice-versa and the
 356       // nodes may be non-contiguous. For most of the architectures, like
 357       // x86_64, numa_node_ptr presents the same node set as found in
 358       // numa_all_nodes_ptr so it's possible to use numa_all_nodes_ptr as a
 359       // substitute.
 360       return _numa_bitmask_isbitset(_numa_all_nodes_ptr, n);
 361     } else
 362       return false;
 363   }
 364   // Check if node is in bound node set.
 365   static bool is_node_in_bound_nodes(int node) {
 366     if (_numa_bitmask_isbitset != NULL) {
 367       if (is_running_in_interleave_mode()) {
 368         return _numa_bitmask_isbitset(_numa_interleave_bitmask, node);
 369       } else {
 370         return _numa_membind_bitmask != NULL ? _numa_bitmask_isbitset(_numa_membind_bitmask, node) : false;
 371       }
 372     }
 373     return false;
 374   }
 375   // Check if bound to only one numa node.
 376   // Returns true if bound to a single numa node, otherwise returns false.
 377   static bool is_bound_to_single_node() {
 378     int nodes = 0;
 379     struct bitmask* bmp = NULL;
 380     unsigned int node = 0;
 381     unsigned int highest_node_number = 0;
 382 
 383     if (_numa_get_membind != NULL && _numa_max_node != NULL && _numa_bitmask_isbitset != NULL) {
 384       bmp = _numa_get_membind();
 385       highest_node_number = _numa_max_node();
 386     } else {
 387       return false;
 388     }
 389 
 390     for (node = 0; node <= highest_node_number; node++) {
 391       if (_numa_bitmask_isbitset(bmp, node)) {
 392         nodes++;
 393       }
 394     }
 395 
 396     if (nodes == 1) {
 397       return true;
 398     } else {
 399       return false;
 400     }
 401   }
 402 
 403   static const GrowableArray<int>* numa_nindex_to_node() {
 404     return _nindex_to_node;
 405   }
 406 };
 407 
 408 #endif // OS_LINUX_OS_LINUX_HPP