1 /* 2 * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef OS_LINUX_OS_LINUX_HPP 26 #define OS_LINUX_OS_LINUX_HPP 27 28 // Linux_OS defines the interface to Linux operating systems 29 30 // Information about the protection of the page at address '0' on this os. 31 static bool zero_page_read_protected() { return true; } 32 33 class Linux { 34 friend class CgroupSubsystem; 35 friend class os; 36 friend class OSContainer; 37 friend class TestReserveMemorySpecial; 38 39 static bool libjsig_is_loaded; // libjsig that interposes sigaction(), 40 // __sigaction(), signal() is loaded 41 static struct sigaction *(*get_signal_action)(int); 42 43 static void check_signal_handler(int sig); 44 45 static int (*_pthread_getcpuclockid)(pthread_t, clockid_t *); 46 static int (*_pthread_setname_np)(pthread_t, const char*); 47 48 static address _initial_thread_stack_bottom; 49 static uintptr_t _initial_thread_stack_size; 50 51 static const char *_glibc_version; 52 static const char *_libpthread_version; 53 54 static bool _supports_fast_thread_cpu_time; 55 56 static GrowableArray<int>* _cpu_to_node; 57 static GrowableArray<int>* _nindex_to_node; 58 59 protected: 60 61 static julong _physical_memory; 62 static pthread_t _main_thread; 63 static int _page_size; 64 65 static julong available_memory(); 66 static julong physical_memory() { return _physical_memory; } 67 static void set_physical_memory(julong phys_mem) { _physical_memory = phys_mem; } 68 static int active_processor_count(); 69 70 static void initialize_system_info(); 71 72 static int commit_memory_impl(char* addr, size_t bytes, bool exec); 73 static int commit_memory_impl(char* addr, size_t bytes, 74 size_t alignment_hint, bool exec); 75 76 static void set_glibc_version(const char *s) { _glibc_version = s; } 77 static void set_libpthread_version(const char *s) { _libpthread_version = s; } 78 79 static void rebuild_cpu_to_node_map(); 80 static void rebuild_nindex_to_node_map(); 81 static GrowableArray<int>* cpu_to_node() { return _cpu_to_node; } 82 static GrowableArray<int>* nindex_to_node() { return _nindex_to_node; } 83 84 static size_t find_large_page_size(); 85 static size_t setup_large_page_size(); 86 87 static bool setup_large_page_type(size_t page_size); 88 static bool transparent_huge_pages_sanity_check(bool warn, size_t pages_size); 89 static bool hugetlbfs_sanity_check(bool warn, size_t page_size); 90 91 static char* reserve_memory_special_shm(size_t bytes, size_t alignment, char* req_addr, bool exec); 92 static char* reserve_memory_special_huge_tlbfs(size_t bytes, size_t alignment, char* req_addr, bool exec); 93 static char* reserve_memory_special_huge_tlbfs_only(size_t bytes, char* req_addr, bool exec); 94 static char* reserve_memory_special_huge_tlbfs_mixed(size_t bytes, size_t alignment, char* req_addr, bool exec); 95 96 static bool release_memory_special_impl(char* base, size_t bytes); 97 static bool release_memory_special_shm(char* base, size_t bytes); 98 static bool release_memory_special_huge_tlbfs(char* base, size_t bytes); 99 100 static void print_full_memory_info(outputStream* st); 101 static void print_container_info(outputStream* st); 102 static void print_steal_info(outputStream* st); 103 static void print_distro_info(outputStream* st); 104 static void print_libversion_info(outputStream* st); 105 static void print_proc_sys_info(outputStream* st); 106 static void print_ld_preload_file(outputStream* st); 107 static void print_uptime_info(outputStream* st); 108 109 public: 110 struct CPUPerfTicks { 111 uint64_t used; 112 uint64_t usedKernel; 113 uint64_t total; 114 uint64_t steal; 115 bool has_steal_ticks; 116 }; 117 118 // which_logical_cpu=-1 returns accumulated ticks for all cpus. 119 static bool get_tick_information(CPUPerfTicks* pticks, int which_logical_cpu); 120 static bool _stack_is_executable; 121 static void *dlopen_helper(const char *name, char *ebuf, int ebuflen); 122 static void *dll_load_in_vmthread(const char *name, char *ebuf, int ebuflen); 123 124 static void init_thread_fpu_state(); 125 static int get_fpu_control_word(); 126 static void set_fpu_control_word(int fpu_control); 127 static pthread_t main_thread(void) { return _main_thread; } 128 // returns kernel thread id (similar to LWP id on Solaris), which can be 129 // used to access /proc 130 static pid_t gettid(); 131 static void hotspot_sigmask(Thread* thread); 132 133 static address initial_thread_stack_bottom(void) { return _initial_thread_stack_bottom; } 134 static uintptr_t initial_thread_stack_size(void) { return _initial_thread_stack_size; } 135 136 static int page_size(void) { return _page_size; } 137 static void set_page_size(int val) { _page_size = val; } 138 139 static address ucontext_get_pc(const ucontext_t* uc); 140 static void ucontext_set_pc(ucontext_t* uc, address pc); 141 static intptr_t* ucontext_get_sp(const ucontext_t* uc); 142 static intptr_t* ucontext_get_fp(const ucontext_t* uc); 143 144 // For Analyzer Forte AsyncGetCallTrace profiling support: 145 // 146 // This interface should be declared in os_linux_i486.hpp, but 147 // that file provides extensions to the os class and not the 148 // Linux class. 149 static ExtendedPC fetch_frame_from_ucontext(Thread* thread, const ucontext_t* uc, 150 intptr_t** ret_sp, intptr_t** ret_fp); 151 152 static bool get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr); 153 154 // This boolean allows users to forward their own non-matching signals 155 // to JVM_handle_linux_signal, harmlessly. 156 static bool signal_handlers_are_installed; 157 158 static int get_our_sigflags(int); 159 static void set_our_sigflags(int, int); 160 static void signal_sets_init(); 161 static void install_signal_handlers(); 162 static void set_signal_handler(int, bool); 163 164 static sigset_t* unblocked_signals(); 165 static sigset_t* vm_signals(); 166 167 // For signal-chaining 168 static struct sigaction *get_chained_signal_action(int sig); 169 static bool chained_handler(int sig, siginfo_t* siginfo, void* context); 170 171 // GNU libc and libpthread version strings 172 static const char *glibc_version() { return _glibc_version; } 173 static const char *libpthread_version() { return _libpthread_version; } 174 175 static void libpthread_init(); 176 static void sched_getcpu_init(); 177 static bool libnuma_init(); 178 static void* libnuma_dlsym(void* handle, const char* name); 179 // libnuma v2 (libnuma_1.2) symbols 180 static void* libnuma_v2_dlsym(void* handle, const char* name); 181 182 // Return default guard size for the specified thread type 183 static size_t default_guard_size(os::ThreadType thr_type); 184 185 static void capture_initial_stack(size_t max_size); 186 187 // Stack overflow handling 188 static bool manually_expand_stack(JavaThread * t, address addr); 189 190 // fast POSIX clocks support 191 static void fast_thread_clock_init(void); 192 193 static int pthread_getcpuclockid(pthread_t tid, clockid_t *clock_id) { 194 return _pthread_getcpuclockid ? _pthread_getcpuclockid(tid, clock_id) : -1; 195 } 196 197 static bool supports_fast_thread_cpu_time() { 198 return _supports_fast_thread_cpu_time; 199 } 200 201 static jlong fast_thread_cpu_time(clockid_t clockid); 202 203 // Stack repair handling 204 205 // none present 206 207 private: 208 static void numa_init(); 209 static void expand_stack_to(address bottom); 210 211 typedef int (*sched_getcpu_func_t)(void); 212 typedef int (*numa_node_to_cpus_func_t)(int node, unsigned long *buffer, int bufferlen); 213 typedef int (*numa_max_node_func_t)(void); 214 typedef int (*numa_num_configured_nodes_func_t)(void); 215 typedef int (*numa_available_func_t)(void); 216 typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int node); 217 typedef void (*numa_interleave_memory_func_t)(void *start, size_t size, unsigned long *nodemask); 218 typedef void (*numa_interleave_memory_v2_func_t)(void *start, size_t size, struct bitmask* mask); 219 typedef struct bitmask* (*numa_get_membind_func_t)(void); 220 typedef struct bitmask* (*numa_get_interleave_mask_func_t)(void); 221 typedef long (*numa_move_pages_func_t)(int pid, unsigned long count, void **pages, const int *nodes, int *status, int flags); 222 typedef void (*numa_set_preferred_func_t)(int node); 223 typedef void (*numa_set_bind_policy_func_t)(int policy); 224 typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp, unsigned int n); 225 typedef int (*numa_distance_func_t)(int node1, int node2); 226 227 static sched_getcpu_func_t _sched_getcpu; 228 static numa_node_to_cpus_func_t _numa_node_to_cpus; 229 static numa_max_node_func_t _numa_max_node; 230 static numa_num_configured_nodes_func_t _numa_num_configured_nodes; 231 static numa_available_func_t _numa_available; 232 static numa_tonode_memory_func_t _numa_tonode_memory; 233 static numa_interleave_memory_func_t _numa_interleave_memory; 234 static numa_interleave_memory_v2_func_t _numa_interleave_memory_v2; 235 static numa_set_bind_policy_func_t _numa_set_bind_policy; 236 static numa_bitmask_isbitset_func_t _numa_bitmask_isbitset; 237 static numa_distance_func_t _numa_distance; 238 static numa_get_membind_func_t _numa_get_membind; 239 static numa_get_interleave_mask_func_t _numa_get_interleave_mask; 240 static numa_move_pages_func_t _numa_move_pages; 241 static numa_set_preferred_func_t _numa_set_preferred; 242 static unsigned long* _numa_all_nodes; 243 static struct bitmask* _numa_all_nodes_ptr; 244 static struct bitmask* _numa_nodes_ptr; 245 static struct bitmask* _numa_interleave_bitmask; 246 static struct bitmask* _numa_membind_bitmask; 247 248 static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; } 249 static void set_numa_node_to_cpus(numa_node_to_cpus_func_t func) { _numa_node_to_cpus = func; } 250 static void set_numa_max_node(numa_max_node_func_t func) { _numa_max_node = func; } 251 static void set_numa_num_configured_nodes(numa_num_configured_nodes_func_t func) { _numa_num_configured_nodes = func; } 252 static void set_numa_available(numa_available_func_t func) { _numa_available = func; } 253 static void set_numa_tonode_memory(numa_tonode_memory_func_t func) { _numa_tonode_memory = func; } 254 static void set_numa_interleave_memory(numa_interleave_memory_func_t func) { _numa_interleave_memory = func; } 255 static void set_numa_interleave_memory_v2(numa_interleave_memory_v2_func_t func) { _numa_interleave_memory_v2 = func; } 256 static void set_numa_set_bind_policy(numa_set_bind_policy_func_t func) { _numa_set_bind_policy = func; } 257 static void set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t func) { _numa_bitmask_isbitset = func; } 258 static void set_numa_distance(numa_distance_func_t func) { _numa_distance = func; } 259 static void set_numa_get_membind(numa_get_membind_func_t func) { _numa_get_membind = func; } 260 static void set_numa_get_interleave_mask(numa_get_interleave_mask_func_t func) { _numa_get_interleave_mask = func; } 261 static void set_numa_move_pages(numa_move_pages_func_t func) { _numa_move_pages = func; } 262 static void set_numa_set_preferred(numa_set_preferred_func_t func) { _numa_set_preferred = func; } 263 static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; } 264 static void set_numa_all_nodes_ptr(struct bitmask **ptr) { _numa_all_nodes_ptr = (ptr == NULL ? NULL : *ptr); } 265 static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr = (ptr == NULL ? NULL : *ptr); } 266 static void set_numa_interleave_bitmask(struct bitmask* ptr) { _numa_interleave_bitmask = ptr ; } 267 static void set_numa_membind_bitmask(struct bitmask* ptr) { _numa_membind_bitmask = ptr ; } 268 static int sched_getcpu_syscall(void); 269 270 enum NumaAllocationPolicy{ 271 NotInitialized, 272 Membind, 273 Interleave 274 }; 275 static NumaAllocationPolicy _current_numa_policy; 276 277 public: 278 static int sched_getcpu() { return _sched_getcpu != NULL ? _sched_getcpu() : -1; } 279 static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen) { 280 return _numa_node_to_cpus != NULL ? _numa_node_to_cpus(node, buffer, bufferlen) : -1; 281 } 282 static int numa_max_node() { return _numa_max_node != NULL ? _numa_max_node() : -1; } 283 static int numa_num_configured_nodes() { 284 return _numa_num_configured_nodes != NULL ? _numa_num_configured_nodes() : -1; 285 } 286 static int numa_available() { return _numa_available != NULL ? _numa_available() : -1; } 287 static int numa_tonode_memory(void *start, size_t size, int node) { 288 return _numa_tonode_memory != NULL ? _numa_tonode_memory(start, size, node) : -1; 289 } 290 291 static bool is_running_in_interleave_mode() { 292 return _current_numa_policy == Interleave; 293 } 294 295 static void set_configured_numa_policy(NumaAllocationPolicy numa_policy) { 296 _current_numa_policy = numa_policy; 297 } 298 299 static NumaAllocationPolicy identify_numa_policy() { 300 for (int node = 0; node <= Linux::numa_max_node(); node++) { 301 if (Linux::_numa_bitmask_isbitset(Linux::_numa_interleave_bitmask, node)) { 302 return Interleave; 303 } 304 } 305 return Membind; 306 } 307 308 static void numa_interleave_memory(void *start, size_t size) { 309 // Prefer v2 API 310 if (_numa_interleave_memory_v2 != NULL) { 311 if (is_running_in_interleave_mode()) { 312 _numa_interleave_memory_v2(start, size, _numa_interleave_bitmask); 313 } else if (_numa_membind_bitmask != NULL) { 314 _numa_interleave_memory_v2(start, size, _numa_membind_bitmask); 315 } 316 } else if (_numa_interleave_memory != NULL) { 317 _numa_interleave_memory(start, size, _numa_all_nodes); 318 } 319 } 320 static void numa_set_preferred(int node) { 321 if (_numa_set_preferred != NULL) { 322 _numa_set_preferred(node); 323 } 324 } 325 static void numa_set_bind_policy(int policy) { 326 if (_numa_set_bind_policy != NULL) { 327 _numa_set_bind_policy(policy); 328 } 329 } 330 static int numa_distance(int node1, int node2) { 331 return _numa_distance != NULL ? _numa_distance(node1, node2) : -1; 332 } 333 static long numa_move_pages(int pid, unsigned long count, void **pages, const int *nodes, int *status, int flags) { 334 return _numa_move_pages != NULL ? _numa_move_pages(pid, count, pages, nodes, status, flags) : -1; 335 } 336 static int get_node_by_cpu(int cpu_id); 337 static int get_existing_num_nodes(); 338 // Check if numa node is configured (non-zero memory node). 339 static bool is_node_in_configured_nodes(unsigned int n) { 340 if (_numa_bitmask_isbitset != NULL && _numa_all_nodes_ptr != NULL) { 341 return _numa_bitmask_isbitset(_numa_all_nodes_ptr, n); 342 } else 343 return false; 344 } 345 // Check if numa node exists in the system (including zero memory nodes). 346 static bool is_node_in_existing_nodes(unsigned int n) { 347 if (_numa_bitmask_isbitset != NULL && _numa_nodes_ptr != NULL) { 348 return _numa_bitmask_isbitset(_numa_nodes_ptr, n); 349 } else if (_numa_bitmask_isbitset != NULL && _numa_all_nodes_ptr != NULL) { 350 // Not all libnuma API v2 implement numa_nodes_ptr, so it's not possible 351 // to trust the API version for checking its absence. On the other hand, 352 // numa_nodes_ptr found in libnuma 2.0.9 and above is the only way to get 353 // a complete view of all numa nodes in the system, hence numa_nodes_ptr 354 // is used to handle CPU and nodes on architectures (like PowerPC) where 355 // there can exist nodes with CPUs but no memory or vice-versa and the 356 // nodes may be non-contiguous. For most of the architectures, like 357 // x86_64, numa_node_ptr presents the same node set as found in 358 // numa_all_nodes_ptr so it's possible to use numa_all_nodes_ptr as a 359 // substitute. 360 return _numa_bitmask_isbitset(_numa_all_nodes_ptr, n); 361 } else 362 return false; 363 } 364 // Check if node is in bound node set. 365 static bool is_node_in_bound_nodes(int node) { 366 if (_numa_bitmask_isbitset != NULL) { 367 if (is_running_in_interleave_mode()) { 368 return _numa_bitmask_isbitset(_numa_interleave_bitmask, node); 369 } else { 370 return _numa_membind_bitmask != NULL ? _numa_bitmask_isbitset(_numa_membind_bitmask, node) : false; 371 } 372 } 373 return false; 374 } 375 // Check if bound to only one numa node. 376 // Returns true if bound to a single numa node, otherwise returns false. 377 static bool is_bound_to_single_node() { 378 int nodes = 0; 379 struct bitmask* bmp = NULL; 380 unsigned int node = 0; 381 unsigned int highest_node_number = 0; 382 383 if (_numa_get_membind != NULL && _numa_max_node != NULL && _numa_bitmask_isbitset != NULL) { 384 bmp = _numa_get_membind(); 385 highest_node_number = _numa_max_node(); 386 } else { 387 return false; 388 } 389 390 for (node = 0; node <= highest_node_number; node++) { 391 if (_numa_bitmask_isbitset(bmp, node)) { 392 nodes++; 393 } 394 } 395 396 if (nodes == 1) { 397 return true; 398 } else { 399 return false; 400 } 401 } 402 403 static const GrowableArray<int>* numa_nindex_to_node() { 404 return _nindex_to_node; 405 } 406 }; 407 408 #endif // OS_LINUX_OS_LINUX_HPP