1 /* 2 * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 #include <assert.h> 27 #include <limits.h> 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <signal.h> 31 #include <pthread.h> 32 #include <sys/types.h> 33 #include <sys/socket.h> 34 #include <sys/time.h> 35 #include <sys/resource.h> 36 #include <sys/uio.h> 37 #include <unistd.h> 38 #include <errno.h> 39 #include <sys/poll.h> 40 41 /* 42 * Stack allocated by thread when doing blocking operation 43 */ 44 typedef struct threadEntry { 45 pthread_t thr; /* this thread */ 46 struct threadEntry *next; /* next thread */ 47 int intr; /* interrupted */ 48 } threadEntry_t; 49 50 /* 51 * Heap allocated during initialized - one entry per fd 52 */ 53 typedef struct { 54 pthread_mutex_t lock; /* fd lock */ 55 threadEntry_t *threads; /* threads blocked on fd */ 56 } fdEntry_t; 57 58 /* 59 * Signal to unblock thread 60 */ 61 static int sigWakeup = (__SIGRTMAX - 2); 62 63 /* 64 * fdTable holds one entry per file descriptor, up to a certain 65 * maximum. 66 * Theoretically, the number of possible file descriptors can get 67 * large, though usually it does not. To save memory, we keep file 68 * descriptors with large numerical values in an overflow table. That 69 * table is organized as a two-dimensional sparse array, allocated 70 * on demand. 71 */ 72 73 static fdEntry_t* fdTable; 74 /* Max. number of file descriptors in fdTable. */ 75 static const int fdTableMaxSize = 0x1000; /* 4K */ 76 /* Max. theoretical number of file descriptor on system. */ 77 static int fdLimit; 78 /* Length of fdTable, in number of entries. */ 79 static int fdTableLen; 80 81 /* Overflow table: organized as array of n slabs, each holding 82 * 64k entries. 83 */ 84 static fdEntry_t** fdOverflowTable; 85 /* Number of slabs in the overflow table */ 86 static int fdOverflowTableLen; 87 /* Number of entries in one slab */ 88 static const int fdOverflowTableSlabSize = 0x10000; /* 64k */ 89 pthread_mutex_t fdOverflowTableLock = PTHREAD_MUTEX_INITIALIZER; 90 91 /* 92 * Null signal handler 93 */ 94 static void sig_wakeup(int sig) { 95 } 96 97 /* 98 * Initialization routine (executed when library is loaded) 99 * Allocate fd tables and sets up signal handler. 100 */ 101 static void __attribute((constructor)) init() { 102 struct rlimit nbr_files; 103 sigset_t sigset; 104 struct sigaction sa; 105 int i = 0; 106 107 /* Determine the maximum number of possible file descriptors. */ 108 getrlimit(RLIMIT_NOFILE, &nbr_files); 109 if (nbr_files.rlim_max != RLIM_INFINITY) { 110 fdLimit = nbr_files.rlim_max; 111 } else { 112 /* We just do not know. */ 113 fdLimit = INT_MAX; 114 } 115 116 /* Allocate table for low value file descriptors. */ 117 fdTableLen = fdLimit < fdTableMaxSize ? fdLimit : fdTableMaxSize; 118 fdTable = (fdEntry_t*) calloc(fdTableLen, sizeof(fdEntry_t)); 119 if (fdTable == NULL) { 120 fprintf(stderr, "library initialization failed - " 121 "unable to allocate file descriptor table - out of memory"); 122 abort(); 123 } else { 124 for (i = 0; i < fdTableLen; i ++) { 125 pthread_mutex_init(&fdTable[i].lock, NULL); 126 } 127 } 128 129 /* Allocate overflow table, if needed */ 130 if (fdLimit > fdTableMaxSize) { 131 fdOverflowTableLen = ((fdLimit - fdTableMaxSize) / fdOverflowTableSlabSize) + 1; 132 fdOverflowTable = (fdEntry_t**) calloc(fdOverflowTableLen, sizeof(fdEntry_t*)); 133 if (fdOverflowTable == NULL) { 134 fprintf(stderr, "library initialization failed - " 135 "unable to allocate file descriptor overflow table - out of memory"); 136 abort(); 137 } 138 } 139 140 /* 141 * Setup the signal handler 142 */ 143 sa.sa_handler = sig_wakeup; 144 sa.sa_flags = 0; 145 sigemptyset(&sa.sa_mask); 146 sigaction(sigWakeup, &sa, NULL); 147 148 sigemptyset(&sigset); 149 sigaddset(&sigset, sigWakeup); 150 sigprocmask(SIG_UNBLOCK, &sigset, NULL); 151 } 152 153 /* 154 * Return the fd table for this fd. 155 */ 156 static inline fdEntry_t *getFdEntry(int fd) 157 { 158 fdEntry_t* result = NULL; 159 160 if (fd < 0) { 161 return NULL; 162 } 163 164 /* This should not happen. If it does, our assumption about 165 * max. fd value was wrong. */ 166 assert(fd < fdLimit); 167 168 if (fd < fdTableMaxSize) { 169 assert(fd < fdTableLen); 170 result = fdTable + fd; 171 } else { 172 const int indexInOverflowTable = fd - fdTableMaxSize; 173 const int rootindex = indexInOverflowTable / fdOverflowTableSlabSize; 174 const int slabindex = indexInOverflowTable % fdOverflowTableSlabSize; 175 assert(rootindex < fdOverflowTableLen); 176 assert(slabindex < fdOverflowTableSlabSize); 177 pthread_mutex_lock(&fdOverflowTableLock); 178 if (fdOverflowTable[rootindex] == NULL) { 179 fdEntry_t* const newSlab = 180 (fdEntry_t*)calloc(fdOverflowTableSlabSize, sizeof(fdEntry_t)); 181 if (newSlab == NULL) { 182 fprintf(stderr, "Unable to allocate file descriptor table - out of memory"); 183 pthread_mutex_unlock(&fdOverflowTableLock); 184 abort(); 185 } else { 186 int i; 187 for (i = 0; i < fdOverflowTableSlabSize; i ++) { 188 pthread_mutex_init(&newSlab[i].lock, NULL); 189 } 190 fdOverflowTable[rootindex] = newSlab; 191 } 192 } 193 pthread_mutex_unlock(&fdOverflowTableLock); 194 result = fdOverflowTable[rootindex] + slabindex; 195 } 196 197 return result; 198 199 } 200 201 /* 202 * Start a blocking operation :- 203 * Insert thread onto thread list for the fd. 204 */ 205 static inline void startOp(fdEntry_t *fdEntry, threadEntry_t *self) 206 { 207 self->thr = pthread_self(); 208 self->intr = 0; 209 210 pthread_mutex_lock(&(fdEntry->lock)); 211 { 212 self->next = fdEntry->threads; 213 fdEntry->threads = self; 214 } 215 pthread_mutex_unlock(&(fdEntry->lock)); 216 } 217 218 /* 219 * End a blocking operation :- 220 * Remove thread from thread list for the fd 221 * If fd has been interrupted then set errno to EBADF 222 */ 223 static inline void endOp 224 (fdEntry_t *fdEntry, threadEntry_t *self) 225 { 226 int orig_errno = errno; 227 pthread_mutex_lock(&(fdEntry->lock)); 228 { 229 threadEntry_t *curr, *prev=NULL; 230 curr = fdEntry->threads; 231 while (curr != NULL) { 232 if (curr == self) { 233 if (curr->intr) { 234 orig_errno = EBADF; 235 } 236 if (prev == NULL) { 237 fdEntry->threads = curr->next; 238 } else { 239 prev->next = curr->next; 240 } 241 break; 242 } 243 prev = curr; 244 curr = curr->next; 245 } 246 } 247 pthread_mutex_unlock(&(fdEntry->lock)); 248 errno = orig_errno; 249 } 250 251 /* 252 * Close or dup2 a file descriptor ensuring that all threads blocked on 253 * the file descriptor are notified via a wakeup signal. 254 * 255 * fd1 < 0 => close(fd2) 256 * fd1 >= 0 => dup2(fd1, fd2) 257 * 258 * Returns -1 with errno set if operation fails. 259 */ 260 static int closefd(int fd1, int fd2) { 261 int rv, orig_errno; 262 fdEntry_t *fdEntry = getFdEntry(fd2); 263 if (fdEntry == NULL) { 264 errno = EBADF; 265 return -1; 266 } 267 268 /* 269 * Lock the fd to hold-off additional I/O on this fd. 270 */ 271 pthread_mutex_lock(&(fdEntry->lock)); 272 273 { 274 /* 275 * And close/dup the file descriptor 276 * (restart if interrupted by signal) 277 */ 278 do { 279 if (fd1 < 0) { 280 rv = close(fd2); 281 } else { 282 rv = dup2(fd1, fd2); 283 } 284 } while (rv == -1 && errno == EINTR); 285 286 /* 287 * Send a wakeup signal to all threads blocked on this 288 * file descriptor. 289 */ 290 threadEntry_t *curr = fdEntry->threads; 291 while (curr != NULL) { 292 curr->intr = 1; 293 pthread_kill( curr->thr, sigWakeup ); 294 curr = curr->next; 295 } 296 } 297 298 /* 299 * Unlock without destroying errno 300 */ 301 orig_errno = errno; 302 pthread_mutex_unlock(&(fdEntry->lock)); 303 errno = orig_errno; 304 305 return rv; 306 } 307 308 /* 309 * Wrapper for dup2 - same semantics as dup2 system call except 310 * that any threads blocked in an I/O system call on fd2 will be 311 * preempted and return -1/EBADF; 312 */ 313 int NET_Dup2(int fd, int fd2) { 314 if (fd < 0) { 315 errno = EBADF; 316 return -1; 317 } 318 return closefd(fd, fd2); 319 } 320 321 /* 322 * Wrapper for close - same semantics as close system call 323 * except that any threads blocked in an I/O on fd will be 324 * preempted and the I/O system call will return -1/EBADF. 325 */ 326 int NET_SocketClose(int fd) { 327 return closefd(-1, fd); 328 } 329 330 /************** Basic I/O operations here ***************/ 331 332 /* 333 * Macro to perform a blocking IO operation. Restarts 334 * automatically if interrupted by signal (other than 335 * our wakeup signal) 336 */ 337 #define BLOCKING_IO_RETURN_INT(FD, FUNC) { \ 338 int ret; \ 339 threadEntry_t self; \ 340 fdEntry_t *fdEntry = getFdEntry(FD); \ 341 if (fdEntry == NULL) { \ 342 errno = EBADF; \ 343 return -1; \ 344 } \ 345 do { \ 346 startOp(fdEntry, &self); \ 347 ret = FUNC; \ 348 endOp(fdEntry, &self); \ 349 } while (ret == -1 && errno == EINTR); \ 350 return ret; \ 351 } 352 353 int NET_Read(int s, void* buf, size_t len) { 354 BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, 0) ); 355 } 356 357 int NET_ReadV(int s, const struct iovec * vector, int count) { 358 BLOCKING_IO_RETURN_INT( s, readv(s, vector, count) ); 359 } 360 361 int NET_RecvFrom(int s, void *buf, int len, unsigned int flags, 362 struct sockaddr *from, socklen_t *fromlen) { 363 BLOCKING_IO_RETURN_INT( s, recvfrom(s, buf, len, flags, from, fromlen) ); 364 } 365 366 int NET_Send(int s, void *msg, int len, unsigned int flags) { 367 BLOCKING_IO_RETURN_INT( s, send(s, msg, len, flags) ); 368 } 369 370 int NET_WriteV(int s, const struct iovec * vector, int count) { 371 BLOCKING_IO_RETURN_INT( s, writev(s, vector, count) ); 372 } 373 374 int NET_SendTo(int s, const void *msg, int len, unsigned int 375 flags, const struct sockaddr *to, int tolen) { 376 BLOCKING_IO_RETURN_INT( s, sendto(s, msg, len, flags, to, tolen) ); 377 } 378 379 int NET_Accept(int s, struct sockaddr *addr, socklen_t *addrlen) { 380 BLOCKING_IO_RETURN_INT( s, accept(s, addr, addrlen) ); 381 } 382 383 int NET_Connect(int s, struct sockaddr *addr, int addrlen) { 384 BLOCKING_IO_RETURN_INT( s, connect(s, addr, addrlen) ); 385 } 386 387 int NET_Poll(struct pollfd *ufds, unsigned int nfds, int timeout) { 388 BLOCKING_IO_RETURN_INT( ufds[0].fd, poll(ufds, nfds, timeout) ); 389 } 390 391 /* 392 * Wrapper for poll(s, timeout). 393 * Auto restarts with adjusted timeout if interrupted by 394 * signal other than our wakeup signal. 395 */ 396 int NET_Timeout(int s, long timeout) { 397 long prevtime = 0, newtime; 398 struct timeval t; 399 fdEntry_t *fdEntry = getFdEntry(s); 400 401 /* 402 * Check that fd hasn't been closed. 403 */ 404 if (fdEntry == NULL) { 405 errno = EBADF; 406 return -1; 407 } 408 409 /* 410 * Pick up current time as may need to adjust timeout 411 */ 412 if (timeout > 0) { 413 gettimeofday(&t, NULL); 414 prevtime = t.tv_sec * 1000 + t.tv_usec / 1000; 415 } 416 417 for(;;) { 418 struct pollfd pfd; 419 int rv; 420 threadEntry_t self; 421 422 /* 423 * Poll the fd. If interrupted by our wakeup signal 424 * errno will be set to EBADF. 425 */ 426 pfd.fd = s; 427 pfd.events = POLLIN | POLLERR; 428 429 startOp(fdEntry, &self); 430 rv = poll(&pfd, 1, timeout); 431 endOp(fdEntry, &self); 432 433 /* 434 * If interrupted then adjust timeout. If timeout 435 * has expired return 0 (indicating timeout expired). 436 */ 437 if (rv < 0 && errno == EINTR) { 438 if (timeout > 0) { 439 gettimeofday(&t, NULL); 440 newtime = t.tv_sec * 1000 + t.tv_usec / 1000; 441 timeout -= newtime - prevtime; 442 if (timeout <= 0) { 443 return 0; 444 } 445 prevtime = newtime; 446 } 447 } else { 448 return rv; 449 } 450 451 } 452 }