1 /* 2 * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 #include <assert.h> 27 #include <limits.h> 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <signal.h> 31 #include <pthread.h> 32 #include <sys/types.h> 33 #include <sys/socket.h> 34 #include <sys/time.h> 35 #include <sys/resource.h> 36 #include <sys/uio.h> 37 #include <unistd.h> 38 #include <errno.h> 39 #include <sys/poll.h> 40 41 /* 42 * Stack allocated by thread when doing blocking operation 43 */ 44 typedef struct threadEntry { 45 pthread_t thr; /* this thread */ 46 struct threadEntry *next; /* next thread */ 47 int intr; /* interrupted */ 48 } threadEntry_t; 49 50 /* 51 * Heap allocated during initialized - one entry per fd 52 */ 53 typedef struct { 54 pthread_mutex_t lock; /* fd lock */ 55 threadEntry_t *threads; /* threads blocked on fd */ 56 } fdEntry_t; 57 58 /* 59 * Signal to unblock thread 60 */ 61 static int sigWakeup = (__SIGRTMAX - 2); 62 63 /* 64 * fdTable holds one entry per file descriptor, up to a certain 65 * maximum. 66 * Theoretically, the number of possible file descriptors can get 67 * large, though usually it does not. Entries for small value file 68 * descriptors are kept in a simple table, which covers most scenarios. 69 * Entries for large value file descriptors are kept in an overflow 70 * table, which is organized as a sparse two dimensional array whose 71 * slabs are allocated on demand. This covers all corner cases while 72 * keeping memory consumption reasonable. 73 */ 74 75 /* Base table for low value file descriptors */ 76 static fdEntry_t* fdTable = NULL; 77 /* Maximum size of base table (in number of entries). */ 78 static const int fdTableMaxSize = 0x1000; /* 4K */ 79 /* Actual size of base table (in number of entries) */ 80 static int fdTableLen = 0; 81 /* Max. theoretical number of file descriptors on system. */ 82 static int fdLimit = 0; 83 84 /* Overflow table, should base table not be large enough. Organized as 85 * an array of n slabs, each holding 64k entries. 86 */ 87 static fdEntry_t** fdOverflowTable = NULL; 88 /* Number of slabs in the overflow table */ 89 static int fdOverflowTableLen = 0; 90 /* Number of entries in one slab */ 91 static const int fdOverflowTableSlabSize = 0x10000; /* 64k */ 92 pthread_mutex_t fdOverflowTableLock = PTHREAD_MUTEX_INITIALIZER; 93 94 /* 95 * Null signal handler 96 */ 97 static void sig_wakeup(int sig) { 98 } 99 100 /* 101 * Initialization routine (executed when library is loaded) 102 * Allocate fd tables and sets up signal handler. 103 */ 104 static void __attribute((constructor)) init() { 105 struct rlimit nbr_files; 106 sigset_t sigset; 107 struct sigaction sa; 108 int i = 0; 109 110 /* Determine the maximum number of possible file descriptors. */ 111 if (-1 == getrlimit(RLIMIT_NOFILE, &nbr_files)) { 112 fprintf(stderr, "library initialization failed - " 113 "unable to get max # of allocated fds\n"); 114 abort(); 115 } 116 if (nbr_files.rlim_max != RLIM_INFINITY) { 117 fdLimit = nbr_files.rlim_max; 118 } else { 119 /* We just do not know. */ 120 fdLimit = INT_MAX; 121 } 122 123 /* Allocate table for low value file descriptors. */ 124 fdTableLen = fdLimit < fdTableMaxSize ? fdLimit : fdTableMaxSize; 125 fdTable = (fdEntry_t*) calloc(fdTableLen, sizeof(fdEntry_t)); 126 if (fdTable == NULL) { 127 fprintf(stderr, "library initialization failed - " 128 "unable to allocate file descriptor table - out of memory"); 129 abort(); 130 } else { 131 for (i = 0; i < fdTableLen; i ++) { 132 pthread_mutex_init(&fdTable[i].lock, NULL); 133 } 134 } 135 136 /* Allocate overflow table, if needed */ 137 if (fdLimit > fdTableMaxSize) { 138 fdOverflowTableLen = ((fdLimit - fdTableMaxSize) / fdOverflowTableSlabSize) + 1; 139 fdOverflowTable = (fdEntry_t**) calloc(fdOverflowTableLen, sizeof(fdEntry_t*)); 140 if (fdOverflowTable == NULL) { 141 fprintf(stderr, "library initialization failed - " 142 "unable to allocate file descriptor overflow table - out of memory"); 143 abort(); 144 } 145 } 146 147 /* 148 * Setup the signal handler 149 */ 150 sa.sa_handler = sig_wakeup; 151 sa.sa_flags = 0; 152 sigemptyset(&sa.sa_mask); 153 sigaction(sigWakeup, &sa, NULL); 154 155 sigemptyset(&sigset); 156 sigaddset(&sigset, sigWakeup); 157 sigprocmask(SIG_UNBLOCK, &sigset, NULL); 158 } 159 160 /* 161 * Return the fd table for this fd. 162 */ 163 static inline fdEntry_t *getFdEntry(int fd) 164 { 165 fdEntry_t* result = NULL; 166 167 if (fd < 0) { 168 return NULL; 169 } 170 171 /* This should not happen. If it does, our assumption about 172 * max. fd value was wrong. */ 173 assert(fd < fdLimit); 174 175 if (fd < fdTableMaxSize) { 176 /* fd is in base table. */ 177 assert(fd < fdTableLen); 178 result = &fdTable[fd]; 179 } else { 180 /* fd is in overflow table. */ 181 const int indexInOverflowTable = fd - fdTableMaxSize; 182 const int rootindex = indexInOverflowTable / fdOverflowTableSlabSize; 183 const int slabindex = indexInOverflowTable % fdOverflowTableSlabSize; 184 fdEntry_t* slab = NULL; 185 assert(rootindex < fdOverflowTableLen); 186 assert(slabindex < fdOverflowTableSlabSize); 187 pthread_mutex_lock(&fdOverflowTableLock); 188 /* Allocate new slab in overflow table if needed */ 189 if (fdOverflowTable[rootindex] == NULL) { 190 fdEntry_t* const newSlab = 191 (fdEntry_t*)calloc(fdOverflowTableSlabSize, sizeof(fdEntry_t)); 192 if (newSlab == NULL) { 193 fprintf(stderr, "Unable to allocate file descriptor overflow" 194 " table slab - out of memory"); 195 pthread_mutex_unlock(&fdOverflowTableLock); 196 abort(); 197 } else { 198 int i; 199 for (i = 0; i < fdOverflowTableSlabSize; i ++) { 200 pthread_mutex_init(&newSlab[i].lock, NULL); 201 } 202 fdOverflowTable[rootindex] = newSlab; 203 } 204 } 205 pthread_mutex_unlock(&fdOverflowTableLock); 206 slab = fdOverflowTable[rootindex]; 207 result = &slab[slabindex]; 208 } 209 210 return result; 211 212 } 213 214 /* 215 * Start a blocking operation :- 216 * Insert thread onto thread list for the fd. 217 */ 218 static inline void startOp(fdEntry_t *fdEntry, threadEntry_t *self) 219 { 220 self->thr = pthread_self(); 221 self->intr = 0; 222 223 pthread_mutex_lock(&(fdEntry->lock)); 224 { 225 self->next = fdEntry->threads; 226 fdEntry->threads = self; 227 } 228 pthread_mutex_unlock(&(fdEntry->lock)); 229 } 230 231 /* 232 * End a blocking operation :- 233 * Remove thread from thread list for the fd 234 * If fd has been interrupted then set errno to EBADF 235 */ 236 static inline void endOp 237 (fdEntry_t *fdEntry, threadEntry_t *self) 238 { 239 int orig_errno = errno; 240 pthread_mutex_lock(&(fdEntry->lock)); 241 { 242 threadEntry_t *curr, *prev=NULL; 243 curr = fdEntry->threads; 244 while (curr != NULL) { 245 if (curr == self) { 246 if (curr->intr) { 247 orig_errno = EBADF; 248 } 249 if (prev == NULL) { 250 fdEntry->threads = curr->next; 251 } else { 252 prev->next = curr->next; 253 } 254 break; 255 } 256 prev = curr; 257 curr = curr->next; 258 } 259 } 260 pthread_mutex_unlock(&(fdEntry->lock)); 261 errno = orig_errno; 262 } 263 264 /* 265 * Close or dup2 a file descriptor ensuring that all threads blocked on 266 * the file descriptor are notified via a wakeup signal. 267 * 268 * fd1 < 0 => close(fd2) 269 * fd1 >= 0 => dup2(fd1, fd2) 270 * 271 * Returns -1 with errno set if operation fails. 272 */ 273 static int closefd(int fd1, int fd2) { 274 int rv, orig_errno; 275 fdEntry_t *fdEntry = getFdEntry(fd2); 276 if (fdEntry == NULL) { 277 errno = EBADF; 278 return -1; 279 } 280 281 /* 282 * Lock the fd to hold-off additional I/O on this fd. 283 */ 284 pthread_mutex_lock(&(fdEntry->lock)); 285 286 { 287 /* 288 * And close/dup the file descriptor 289 * (restart if interrupted by signal) 290 */ 291 do { 292 if (fd1 < 0) { 293 rv = close(fd2); 294 } else { 295 rv = dup2(fd1, fd2); 296 } 297 } while (rv == -1 && errno == EINTR); 298 299 /* 300 * Send a wakeup signal to all threads blocked on this 301 * file descriptor. 302 */ 303 threadEntry_t *curr = fdEntry->threads; 304 while (curr != NULL) { 305 curr->intr = 1; 306 pthread_kill( curr->thr, sigWakeup ); 307 curr = curr->next; 308 } 309 } 310 311 /* 312 * Unlock without destroying errno 313 */ 314 orig_errno = errno; 315 pthread_mutex_unlock(&(fdEntry->lock)); 316 errno = orig_errno; 317 318 return rv; 319 } 320 321 /* 322 * Wrapper for dup2 - same semantics as dup2 system call except 323 * that any threads blocked in an I/O system call on fd2 will be 324 * preempted and return -1/EBADF; 325 */ 326 int NET_Dup2(int fd, int fd2) { 327 if (fd < 0) { 328 errno = EBADF; 329 return -1; 330 } 331 return closefd(fd, fd2); 332 } 333 334 /* 335 * Wrapper for close - same semantics as close system call 336 * except that any threads blocked in an I/O on fd will be 337 * preempted and the I/O system call will return -1/EBADF. 338 */ 339 int NET_SocketClose(int fd) { 340 return closefd(-1, fd); 341 } 342 343 /************** Basic I/O operations here ***************/ 344 345 /* 346 * Macro to perform a blocking IO operation. Restarts 347 * automatically if interrupted by signal (other than 348 * our wakeup signal) 349 */ 350 #define BLOCKING_IO_RETURN_INT(FD, FUNC) { \ 351 int ret; \ 352 threadEntry_t self; \ 353 fdEntry_t *fdEntry = getFdEntry(FD); \ 354 if (fdEntry == NULL) { \ 355 errno = EBADF; \ 356 return -1; \ 357 } \ 358 do { \ 359 startOp(fdEntry, &self); \ 360 ret = FUNC; \ 361 endOp(fdEntry, &self); \ 362 } while (ret == -1 && errno == EINTR); \ 363 return ret; \ 364 } 365 366 int NET_Read(int s, void* buf, size_t len) { 367 BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, 0) ); 368 } 369 370 int NET_NonBlockingRead(int s, void* buf, size_t len) { 371 BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, MSG_DONTWAIT) ); 372 } 373 374 int NET_ReadV(int s, const struct iovec * vector, int count) { 375 BLOCKING_IO_RETURN_INT( s, readv(s, vector, count) ); 376 } 377 378 int NET_RecvFrom(int s, void *buf, int len, unsigned int flags, 379 struct sockaddr *from, socklen_t *fromlen) { 380 BLOCKING_IO_RETURN_INT( s, recvfrom(s, buf, len, flags, from, fromlen) ); 381 } 382 383 int NET_Send(int s, void *msg, int len, unsigned int flags) { 384 BLOCKING_IO_RETURN_INT( s, send(s, msg, len, flags) ); 385 } 386 387 int NET_WriteV(int s, const struct iovec * vector, int count) { 388 BLOCKING_IO_RETURN_INT( s, writev(s, vector, count) ); 389 } 390 391 int NET_SendTo(int s, const void *msg, int len, unsigned int 392 flags, const struct sockaddr *to, int tolen) { 393 BLOCKING_IO_RETURN_INT( s, sendto(s, msg, len, flags, to, tolen) ); 394 } 395 396 int NET_Accept(int s, struct sockaddr *addr, socklen_t *addrlen) { 397 BLOCKING_IO_RETURN_INT( s, accept(s, addr, addrlen) ); 398 } 399 400 int NET_Connect(int s, struct sockaddr *addr, int addrlen) { 401 BLOCKING_IO_RETURN_INT( s, connect(s, addr, addrlen) ); 402 } 403 404 int NET_Poll(struct pollfd *ufds, unsigned int nfds, int timeout) { 405 BLOCKING_IO_RETURN_INT( ufds[0].fd, poll(ufds, nfds, timeout) ); 406 } 407 408 /* 409 * Wrapper for poll(s, timeout). 410 * Auto restarts with adjusted timeout if interrupted by 411 * signal other than our wakeup signal. 412 */ 413 int NET_Timeout0(int s, long timeout,long currentTime) { 414 long prevtime = currentTime, newtime; 415 struct timeval t; 416 fdEntry_t *fdEntry = getFdEntry(s); 417 418 /* 419 * Check that fd hasn't been closed. 420 */ 421 if (fdEntry == NULL) { 422 errno = EBADF; 423 return -1; 424 } 425 426 for(;;) { 427 struct pollfd pfd; 428 int rv; 429 threadEntry_t self; 430 431 /* 432 * Poll the fd. If interrupted by our wakeup signal 433 * errno will be set to EBADF. 434 */ 435 pfd.fd = s; 436 pfd.events = POLLIN | POLLERR; 437 438 startOp(fdEntry, &self); 439 rv = poll(&pfd, 1, timeout); 440 endOp(fdEntry, &self); 441 442 /* 443 * If interrupted then adjust timeout. If timeout 444 * has expired return 0 (indicating timeout expired). 445 */ 446 if (rv < 0 && errno == EINTR) { 447 if (timeout > 0) { 448 gettimeofday(&t, NULL); 449 newtime = t.tv_sec * 1000 + t.tv_usec / 1000; 450 timeout -= newtime - prevtime; 451 if (timeout <= 0) { 452 return 0; 453 } 454 prevtime = newtime; 455 } 456 } else { 457 return rv; 458 } 459 460 } 461 } 462 463 int NET_TimeoutWithCurrentTime(int s, long timeout, long currentTime) { 464 return NET_Timeout0(s, timeout, currentTime); 465 } 466 467 int NET_Timeout(int s, long timeout) { 468 long currentTime = 0; 469 struct timeval t; 470 if (timeout > 0) { 471 gettimeofday(&t, NULL); 472 currentTime = t.tv_sec * 1000 + t.tv_usec / 1000; 473 } 474 return NET_Timeout0(s, timeout, currentTime); 475 }