1 /* 2 * Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 #include <assert.h> 27 #include <limits.h> 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <signal.h> 31 #include <pthread.h> 32 #include <sys/types.h> 33 #include <sys/socket.h> 34 #include <sys/time.h> 35 #include <sys/resource.h> 36 #include <sys/uio.h> 37 #include <unistd.h> 38 #include <errno.h> 39 #include <poll.h> 40 #include "jvm.h" 41 #include "net_util.h" 42 43 /* 44 * Stack allocated by thread when doing blocking operation 45 */ 46 typedef struct threadEntry { 47 pthread_t thr; /* this thread */ 48 struct threadEntry *next; /* next thread */ 49 int intr; /* interrupted */ 50 } threadEntry_t; 51 52 /* 53 * Heap allocated during initialized - one entry per fd 54 */ 55 typedef struct { 56 pthread_mutex_t lock; /* fd lock */ 57 threadEntry_t *threads; /* threads blocked on fd */ 58 } fdEntry_t; 59 60 /* 61 * Signal to unblock thread 62 */ 63 #define WAKEUP_SIGNAL (SIGRTMAX - 2) 64 65 /* 66 * fdTable holds one entry per file descriptor, up to a certain 67 * maximum. 68 * Theoretically, the number of possible file descriptors can get 69 * large, though usually it does not. Entries for small value file 70 * descriptors are kept in a simple table, which covers most scenarios. 71 * Entries for large value file descriptors are kept in an overflow 72 * table, which is organized as a sparse two dimensional array whose 73 * slabs are allocated on demand. This covers all corner cases while 74 * keeping memory consumption reasonable. 75 */ 76 77 /* Base table for low value file descriptors */ 78 static fdEntry_t* fdTable = NULL; 79 /* Maximum size of base table (in number of entries). */ 80 static const int fdTableMaxSize = 0x1000; /* 4K */ 81 /* Actual size of base table (in number of entries) */ 82 static int fdTableLen = 0; 83 /* Max. theoretical number of file descriptors on system. */ 84 static int fdLimit = 0; 85 86 /* Overflow table, should base table not be large enough. Organized as 87 * an array of n slabs, each holding 64k entries. 88 */ 89 static fdEntry_t** fdOverflowTable = NULL; 90 /* Number of slabs in the overflow table */ 91 static int fdOverflowTableLen = 0; 92 /* Number of entries in one slab */ 93 static const int fdOverflowTableSlabSize = 0x10000; /* 64k */ 94 pthread_mutex_t fdOverflowTableLock = PTHREAD_MUTEX_INITIALIZER; 95 96 /* 97 * Null signal handler 98 */ 99 static void sig_wakeup(int sig) { 100 } 101 102 /* 103 * Initialization routine (executed when library is loaded) 104 * Allocate fd tables and sets up signal handler. 105 */ 106 static void __attribute((constructor)) init() { 107 struct rlimit nbr_files; 108 sigset_t sigset; 109 struct sigaction sa; 110 int i = 0; 111 112 /* Determine the maximum number of possible file descriptors. */ 113 if (-1 == getrlimit(RLIMIT_NOFILE, &nbr_files)) { 114 fprintf(stderr, "library initialization failed - " 115 "unable to get max # of allocated fds\n"); 116 abort(); 117 } 118 if (nbr_files.rlim_max != RLIM_INFINITY) { 119 fdLimit = nbr_files.rlim_max; 120 } else { 121 /* We just do not know. */ 122 fdLimit = INT_MAX; 123 } 124 125 /* Allocate table for low value file descriptors. */ 126 fdTableLen = fdLimit < fdTableMaxSize ? fdLimit : fdTableMaxSize; 127 fdTable = (fdEntry_t*) calloc(fdTableLen, sizeof(fdEntry_t)); 128 if (fdTable == NULL) { 129 fprintf(stderr, "library initialization failed - " 130 "unable to allocate file descriptor table - out of memory"); 131 abort(); 132 } else { 133 for (i = 0; i < fdTableLen; i ++) { 134 pthread_mutex_init(&fdTable[i].lock, NULL); 135 } 136 } 137 138 /* Allocate overflow table, if needed */ 139 if (fdLimit > fdTableMaxSize) { 140 fdOverflowTableLen = ((fdLimit - fdTableMaxSize) / fdOverflowTableSlabSize) + 1; 141 fdOverflowTable = (fdEntry_t**) calloc(fdOverflowTableLen, sizeof(fdEntry_t*)); 142 if (fdOverflowTable == NULL) { 143 fprintf(stderr, "library initialization failed - " 144 "unable to allocate file descriptor overflow table - out of memory"); 145 abort(); 146 } 147 } 148 149 /* 150 * Setup the signal handler 151 */ 152 sa.sa_handler = sig_wakeup; 153 sa.sa_flags = 0; 154 sigemptyset(&sa.sa_mask); 155 sigaction(WAKEUP_SIGNAL, &sa, NULL); 156 157 sigemptyset(&sigset); 158 sigaddset(&sigset, WAKEUP_SIGNAL); 159 sigprocmask(SIG_UNBLOCK, &sigset, NULL); 160 } 161 162 /* 163 * Return the fd table for this fd. 164 */ 165 static inline fdEntry_t *getFdEntry(int fd) 166 { 167 fdEntry_t* result = NULL; 168 169 if (fd < 0) { 170 return NULL; 171 } 172 173 /* This should not happen. If it does, our assumption about 174 * max. fd value was wrong. */ 175 assert(fd < fdLimit); 176 177 if (fd < fdTableMaxSize) { 178 /* fd is in base table. */ 179 assert(fd < fdTableLen); 180 result = &fdTable[fd]; 181 } else { 182 /* fd is in overflow table. */ 183 const int indexInOverflowTable = fd - fdTableMaxSize; 184 const int rootindex = indexInOverflowTable / fdOverflowTableSlabSize; 185 const int slabindex = indexInOverflowTable % fdOverflowTableSlabSize; 186 fdEntry_t* slab = NULL; 187 assert(rootindex < fdOverflowTableLen); 188 assert(slabindex < fdOverflowTableSlabSize); 189 pthread_mutex_lock(&fdOverflowTableLock); 190 /* Allocate new slab in overflow table if needed */ 191 if (fdOverflowTable[rootindex] == NULL) { 192 fdEntry_t* const newSlab = 193 (fdEntry_t*)calloc(fdOverflowTableSlabSize, sizeof(fdEntry_t)); 194 if (newSlab == NULL) { 195 fprintf(stderr, "Unable to allocate file descriptor overflow" 196 " table slab - out of memory"); 197 pthread_mutex_unlock(&fdOverflowTableLock); 198 abort(); 199 } else { 200 int i; 201 for (i = 0; i < fdOverflowTableSlabSize; i ++) { 202 pthread_mutex_init(&newSlab[i].lock, NULL); 203 } 204 fdOverflowTable[rootindex] = newSlab; 205 } 206 } 207 pthread_mutex_unlock(&fdOverflowTableLock); 208 slab = fdOverflowTable[rootindex]; 209 result = &slab[slabindex]; 210 } 211 212 return result; 213 214 } 215 216 /* 217 * Start a blocking operation :- 218 * Insert thread onto thread list for the fd. 219 */ 220 static inline void startOp(fdEntry_t *fdEntry, threadEntry_t *self) 221 { 222 self->thr = pthread_self(); 223 self->intr = 0; 224 225 pthread_mutex_lock(&(fdEntry->lock)); 226 { 227 self->next = fdEntry->threads; 228 fdEntry->threads = self; 229 } 230 pthread_mutex_unlock(&(fdEntry->lock)); 231 } 232 233 /* 234 * End a blocking operation :- 235 * Remove thread from thread list for the fd 236 * If fd has been interrupted then set errno to EBADF 237 */ 238 static inline void endOp 239 (fdEntry_t *fdEntry, threadEntry_t *self) 240 { 241 int orig_errno = errno; 242 pthread_mutex_lock(&(fdEntry->lock)); 243 { 244 threadEntry_t *curr, *prev=NULL; 245 curr = fdEntry->threads; 246 while (curr != NULL) { 247 if (curr == self) { 248 if (curr->intr) { 249 orig_errno = EBADF; 250 } 251 if (prev == NULL) { 252 fdEntry->threads = curr->next; 253 } else { 254 prev->next = curr->next; 255 } 256 break; 257 } 258 prev = curr; 259 curr = curr->next; 260 } 261 } 262 pthread_mutex_unlock(&(fdEntry->lock)); 263 errno = orig_errno; 264 } 265 266 /* 267 * Close or dup2 a file descriptor ensuring that all threads blocked on 268 * the file descriptor are notified via a wakeup signal. 269 * 270 * fd1 < 0 => close(fd2) 271 * fd1 >= 0 => dup2(fd1, fd2) 272 * 273 * Returns -1 with errno set if operation fails. 274 */ 275 static int closefd(int fd1, int fd2) { 276 int rv, orig_errno; 277 fdEntry_t *fdEntry = getFdEntry(fd2); 278 if (fdEntry == NULL) { 279 errno = EBADF; 280 return -1; 281 } 282 283 /* 284 * Lock the fd to hold-off additional I/O on this fd. 285 */ 286 pthread_mutex_lock(&(fdEntry->lock)); 287 288 { 289 /* 290 * And close/dup the file descriptor 291 * (restart if interrupted by signal) 292 */ 293 if (fd1 < 0) { 294 rv = close(fd2); 295 } else { 296 do { 297 rv = dup2(fd1, fd2); 298 } while (rv == -1 && errno == EINTR); 299 } 300 301 /* 302 * Send a wakeup signal to all threads blocked on this 303 * file descriptor. 304 */ 305 threadEntry_t *curr = fdEntry->threads; 306 while (curr != NULL) { 307 curr->intr = 1; 308 pthread_kill( curr->thr, WAKEUP_SIGNAL); 309 curr = curr->next; 310 } 311 } 312 313 /* 314 * Unlock without destroying errno 315 */ 316 orig_errno = errno; 317 pthread_mutex_unlock(&(fdEntry->lock)); 318 errno = orig_errno; 319 320 return rv; 321 } 322 323 /* 324 * Wrapper for dup2 - same semantics as dup2 system call except 325 * that any threads blocked in an I/O system call on fd2 will be 326 * preempted and return -1/EBADF; 327 */ 328 int NET_Dup2(int fd, int fd2) { 329 if (fd < 0) { 330 errno = EBADF; 331 return -1; 332 } 333 return closefd(fd, fd2); 334 } 335 336 /* 337 * Wrapper for close - same semantics as close system call 338 * except that any threads blocked in an I/O on fd will be 339 * preempted and the I/O system call will return -1/EBADF. 340 */ 341 int NET_SocketClose(int fd) { 342 return closefd(-1, fd); 343 } 344 345 /************** Basic I/O operations here ***************/ 346 347 /* 348 * Macro to perform a blocking IO operation. Restarts 349 * automatically if interrupted by signal (other than 350 * our wakeup signal) 351 */ 352 #define BLOCKING_IO_RETURN_INT(FD, FUNC) { \ 353 int ret; \ 354 threadEntry_t self; \ 355 fdEntry_t *fdEntry = getFdEntry(FD); \ 356 if (fdEntry == NULL) { \ 357 errno = EBADF; \ 358 return -1; \ 359 } \ 360 do { \ 361 startOp(fdEntry, &self); \ 362 ret = FUNC; \ 363 endOp(fdEntry, &self); \ 364 } while (ret == -1 && errno == EINTR); \ 365 return ret; \ 366 } 367 368 int NET_Read(int s, void* buf, size_t len) { 369 BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, 0) ); 370 } 371 372 int NET_NonBlockingRead(int s, void* buf, size_t len) { 373 BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, MSG_DONTWAIT) ); 374 } 375 376 int NET_RecvFrom(int s, void *buf, int len, unsigned int flags, 377 struct sockaddr *from, socklen_t *fromlen) { 378 BLOCKING_IO_RETURN_INT( s, recvfrom(s, buf, len, flags, from, fromlen) ); 379 } 380 381 int NET_Send(int s, void *msg, int len, unsigned int flags) { 382 BLOCKING_IO_RETURN_INT( s, send(s, msg, len, flags) ); 383 } 384 385 int NET_SendTo(int s, const void *msg, int len, unsigned int 386 flags, const struct sockaddr *to, int tolen) { 387 BLOCKING_IO_RETURN_INT( s, sendto(s, msg, len, flags, to, tolen) ); 388 } 389 390 int NET_Accept(int s, struct sockaddr *addr, socklen_t *addrlen) { 391 BLOCKING_IO_RETURN_INT( s, accept(s, addr, addrlen) ); 392 } 393 394 int NET_Connect(int s, struct sockaddr *addr, int addrlen) { 395 BLOCKING_IO_RETURN_INT( s, connect(s, addr, addrlen) ); 396 } 397 398 int NET_Poll(struct pollfd *ufds, unsigned int nfds, int timeout) { 399 BLOCKING_IO_RETURN_INT( ufds[0].fd, poll(ufds, nfds, timeout) ); 400 } 401 402 /* 403 * Wrapper for poll(s, timeout). 404 * Auto restarts with adjusted timeout if interrupted by 405 * signal other than our wakeup signal. 406 */ 407 int NET_Timeout(JNIEnv *env, int s, long timeout, jlong nanoTimeStamp) { 408 jlong prevNanoTime = nanoTimeStamp; 409 jlong nanoTimeout = (jlong)timeout * NET_NSEC_PER_MSEC; 410 fdEntry_t *fdEntry = getFdEntry(s); 411 412 /* 413 * Check that fd hasn't been closed. 414 */ 415 if (fdEntry == NULL) { 416 errno = EBADF; 417 return -1; 418 } 419 420 for(;;) { 421 struct pollfd pfd; 422 int rv; 423 threadEntry_t self; 424 425 /* 426 * Poll the fd. If interrupted by our wakeup signal 427 * errno will be set to EBADF. 428 */ 429 pfd.fd = s; 430 pfd.events = POLLIN | POLLERR; 431 432 startOp(fdEntry, &self); 433 rv = poll(&pfd, 1, nanoTimeout / NET_NSEC_PER_MSEC); 434 endOp(fdEntry, &self); 435 /* 436 * If interrupted then adjust timeout. If timeout 437 * has expired return 0 (indicating timeout expired). 438 */ 439 if (rv < 0 && errno == EINTR) { 440 if (timeout > 0) { 441 jlong newNanoTime = JVM_NanoTime(env, 0); 442 nanoTimeout -= newNanoTime - prevNanoTime; 443 if (nanoTimeout < NET_NSEC_PER_MSEC) { 444 return 0; 445 } 446 prevNanoTime = newNanoTime; 447 } else { 448 continue; // timeout is -1, so loop again. 449 } 450 } else { 451 return rv; 452 } 453 } 454 }