1 /*
   2  * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 #include <assert.h>
  27 #include <limits.h>
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <signal.h>
  31 #include <pthread.h>
  32 #include <sys/types.h>
  33 #include <sys/socket.h>
  34 #include <sys/time.h>
  35 #include <sys/resource.h>
  36 #include <sys/uio.h>
  37 #include <unistd.h>
  38 #include <errno.h>
  39 #include <sys/poll.h>
  40 
  41 /*
  42  * Stack allocated by thread when doing blocking operation
  43  */
  44 typedef struct threadEntry {
  45     pthread_t thr;                      /* this thread */
  46     struct threadEntry *next;           /* next thread */
  47     int intr;                           /* interrupted */
  48 } threadEntry_t;
  49 
  50 /*
  51  * Heap allocated during initialized - one entry per fd
  52  */
  53 typedef struct {
  54     pthread_mutex_t lock;               /* fd lock */
  55     threadEntry_t *threads;             /* threads blocked on fd */
  56 } fdEntry_t;
  57 
  58 /*
  59  * Signal to unblock thread
  60  */
  61 static int sigWakeup = (__SIGRTMAX - 2);
  62 
  63 /*
  64  * fdTable holds one entry per file descriptor, up to a certain
  65  * maximum.
  66  * Theoretically, the number of possible file descriptors can get
  67  * large, though usually it does not. Entries for small value file
  68  * descriptors are kept in a simple table, which covers most scenarios.
  69  * Entries for large value file descriptors are kept in an overflow
  70  * table, which is organized as a sparse two dimensional array whose
  71  * slabs are allocated on demand. This covers all corner cases while
  72  * keeping memory consumption reasonable.
  73  */
  74 
  75 /* Base table for low value file descriptors */
  76 static fdEntry_t* fdTable = NULL;
  77 /* Maximum size of base table (in number of entries). */
  78 static const int fdTableMaxSize = 0x1000; /* 4K */
  79 /* Actual size of base table (in number of entries) */
  80 static int fdTableLen = 0;
  81 /* Max. theoretical number of file descriptors on system. */
  82 static int fdLimit = 0;
  83 
  84 /* Overflow table, should base table not be large enough. Organized as
  85  *   an array of n slabs, each holding 64k entries.
  86  */
  87 static fdEntry_t** fdOverflowTable = NULL;
  88 /* Number of slabs in the overflow table */
  89 static int fdOverflowTableLen = 0;
  90 /* Number of entries in one slab */
  91 static const int fdOverflowTableSlabSize = 0x10000; /* 64k */
  92 pthread_mutex_t fdOverflowTableLock = PTHREAD_MUTEX_INITIALIZER;
  93 
  94 /*
  95  * Null signal handler
  96  */
  97 static void sig_wakeup(int sig) {
  98 }
  99 
 100 /*
 101  * Initialization routine (executed when library is loaded)
 102  * Allocate fd tables and sets up signal handler.
 103  */
 104 static void __attribute((constructor)) init() {
 105     struct rlimit nbr_files;
 106     sigset_t sigset;
 107     struct sigaction sa;
 108     int i = 0;
 109 
 110     /* Determine the maximum number of possible file descriptors. */
 111     if (-1 == getrlimit(RLIMIT_NOFILE, &nbr_files)) {
 112         fprintf(stderr, "library initialization failed - "
 113                 "unable to get max # of allocated fds\n");
 114         abort();
 115     }
 116     if (nbr_files.rlim_max != RLIM_INFINITY) {
 117         fdLimit = nbr_files.rlim_max;
 118     } else {
 119         /* We just do not know. */
 120         fdLimit = INT_MAX;
 121     }
 122 
 123     /* Allocate table for low value file descriptors. */
 124     fdTableLen = fdLimit < fdTableMaxSize ? fdLimit : fdTableMaxSize;
 125     fdTable = (fdEntry_t*) calloc(fdTableLen, sizeof(fdEntry_t));
 126     if (fdTable == NULL) {
 127         fprintf(stderr, "library initialization failed - "
 128                 "unable to allocate file descriptor table - out of memory");
 129         abort();
 130     } else {
 131         for (i = 0; i < fdTableLen; i ++) {
 132             pthread_mutex_init(&fdTable[i].lock, NULL);
 133         }
 134     }
 135 
 136     /* Allocate overflow table, if needed */
 137     if (fdLimit > fdTableMaxSize) {
 138         fdOverflowTableLen = ((fdLimit - fdTableMaxSize) / fdOverflowTableSlabSize) + 1;
 139         fdOverflowTable = (fdEntry_t**) calloc(fdOverflowTableLen, sizeof(fdEntry_t*));
 140         if (fdOverflowTable == NULL) {
 141             fprintf(stderr, "library initialization failed - "
 142                     "unable to allocate file descriptor overflow table - out of memory");
 143             abort();
 144         }
 145     }
 146 
 147     /*
 148      * Setup the signal handler
 149      */
 150     sa.sa_handler = sig_wakeup;
 151     sa.sa_flags   = 0;
 152     sigemptyset(&sa.sa_mask);
 153     sigaction(sigWakeup, &sa, NULL);
 154 
 155     sigemptyset(&sigset);
 156     sigaddset(&sigset, sigWakeup);
 157     sigprocmask(SIG_UNBLOCK, &sigset, NULL);
 158 }
 159 
 160 /*
 161  * Return the fd table for this fd.
 162  */
 163 static inline fdEntry_t *getFdEntry(int fd)
 164 {
 165     fdEntry_t* result = NULL;
 166 
 167     if (fd < 0) {
 168         return NULL;
 169     }
 170 
 171     /* This should not happen. If it does, our assumption about
 172      * max. fd value was wrong. */
 173     assert(fd < fdLimit);
 174 
 175     if (fd < fdTableMaxSize) {
 176         /* fd is in base table. */
 177         assert(fd < fdTableLen);
 178         result = &fdTable[fd];
 179     } else {
 180         /* fd is in overflow table. */
 181         const int indexInOverflowTable = fd - fdTableMaxSize;
 182         const int rootindex = indexInOverflowTable / fdOverflowTableSlabSize;
 183         const int slabindex = indexInOverflowTable % fdOverflowTableSlabSize;
 184         fdEntry_t* slab = NULL;
 185         assert(rootindex < fdOverflowTableLen);
 186         assert(slabindex < fdOverflowTableSlabSize);
 187         pthread_mutex_lock(&fdOverflowTableLock);
 188         /* Allocate new slab in overflow table if needed */
 189         if (fdOverflowTable[rootindex] == NULL) {
 190             fdEntry_t* const newSlab =
 191                 (fdEntry_t*)calloc(fdOverflowTableSlabSize, sizeof(fdEntry_t));
 192             if (newSlab == NULL) {
 193                 fprintf(stderr, "Unable to allocate file descriptor overflow"
 194                         " table slab - out of memory");
 195                 pthread_mutex_unlock(&fdOverflowTableLock);
 196                 abort();
 197             } else {
 198                 int i;
 199                 for (i = 0; i < fdOverflowTableSlabSize; i ++) {
 200                     pthread_mutex_init(&newSlab[i].lock, NULL);
 201                 }
 202                 fdOverflowTable[rootindex] = newSlab;
 203             }
 204         }
 205         pthread_mutex_unlock(&fdOverflowTableLock);
 206         slab = fdOverflowTable[rootindex];
 207         result = &slab[slabindex];
 208     }
 209 
 210     return result;
 211 
 212 }
 213 
 214 /*
 215  * Start a blocking operation :-
 216  *    Insert thread onto thread list for the fd.
 217  */
 218 static inline void startOp(fdEntry_t *fdEntry, threadEntry_t *self)
 219 {
 220     self->thr = pthread_self();
 221     self->intr = 0;
 222 
 223     pthread_mutex_lock(&(fdEntry->lock));
 224     {
 225         self->next = fdEntry->threads;
 226         fdEntry->threads = self;
 227     }
 228     pthread_mutex_unlock(&(fdEntry->lock));
 229 }
 230 
 231 /*
 232  * End a blocking operation :-
 233  *     Remove thread from thread list for the fd
 234  *     If fd has been interrupted then set errno to EBADF
 235  */
 236 static inline void endOp
 237     (fdEntry_t *fdEntry, threadEntry_t *self)
 238 {
 239     int orig_errno = errno;
 240     pthread_mutex_lock(&(fdEntry->lock));
 241     {
 242         threadEntry_t *curr, *prev=NULL;
 243         curr = fdEntry->threads;
 244         while (curr != NULL) {
 245             if (curr == self) {
 246                 if (curr->intr) {
 247                     orig_errno = EBADF;
 248                 }
 249                 if (prev == NULL) {
 250                     fdEntry->threads = curr->next;
 251                 } else {
 252                     prev->next = curr->next;
 253                 }
 254                 break;
 255             }
 256             prev = curr;
 257             curr = curr->next;
 258         }
 259     }
 260     pthread_mutex_unlock(&(fdEntry->lock));
 261     errno = orig_errno;
 262 }
 263 
 264 /*
 265  * Close or dup2 a file descriptor ensuring that all threads blocked on
 266  * the file descriptor are notified via a wakeup signal.
 267  *
 268  *      fd1 < 0    => close(fd2)
 269  *      fd1 >= 0   => dup2(fd1, fd2)
 270  *
 271  * Returns -1 with errno set if operation fails.
 272  */
 273 static int closefd(int fd1, int fd2) {
 274     int rv, orig_errno;
 275     fdEntry_t *fdEntry = getFdEntry(fd2);
 276     if (fdEntry == NULL) {
 277         errno = EBADF;
 278         return -1;
 279     }
 280 
 281     /*
 282      * Lock the fd to hold-off additional I/O on this fd.
 283      */
 284     pthread_mutex_lock(&(fdEntry->lock));
 285 
 286     {
 287         /*
 288          * And close/dup the file descriptor
 289          * (restart if interrupted by signal)
 290          */
 291         do {
 292             if (fd1 < 0) {
 293                 rv = close(fd2);
 294             } else {
 295                 rv = dup2(fd1, fd2);
 296             }
 297         } while (rv == -1 && errno == EINTR);
 298 
 299         /*
 300          * Send a wakeup signal to all threads blocked on this
 301          * file descriptor.
 302          */
 303         threadEntry_t *curr = fdEntry->threads;
 304         while (curr != NULL) {
 305             curr->intr = 1;
 306             pthread_kill( curr->thr, sigWakeup );
 307             curr = curr->next;
 308         }
 309     }
 310 
 311     /*
 312      * Unlock without destroying errno
 313      */
 314     orig_errno = errno;
 315     pthread_mutex_unlock(&(fdEntry->lock));
 316     errno = orig_errno;
 317 
 318     return rv;
 319 }
 320 
 321 /*
 322  * Wrapper for dup2 - same semantics as dup2 system call except
 323  * that any threads blocked in an I/O system call on fd2 will be
 324  * preempted and return -1/EBADF;
 325  */
 326 int NET_Dup2(int fd, int fd2) {
 327     if (fd < 0) {
 328         errno = EBADF;
 329         return -1;
 330     }
 331     return closefd(fd, fd2);
 332 }
 333 
 334 /*
 335  * Wrapper for close - same semantics as close system call
 336  * except that any threads blocked in an I/O on fd will be
 337  * preempted and the I/O system call will return -1/EBADF.
 338  */
 339 int NET_SocketClose(int fd) {
 340     return closefd(-1, fd);
 341 }
 342 
 343 /************** Basic I/O operations here ***************/
 344 
 345 /*
 346  * Macro to perform a blocking IO operation. Restarts
 347  * automatically if interrupted by signal (other than
 348  * our wakeup signal)
 349  */
 350 #define BLOCKING_IO_RETURN_INT(FD, FUNC) {      \
 351     int ret;                                    \
 352     threadEntry_t self;                         \
 353     fdEntry_t *fdEntry = getFdEntry(FD);        \
 354     if (fdEntry == NULL) {                      \
 355         errno = EBADF;                          \
 356         return -1;                              \
 357     }                                           \
 358     do {                                        \
 359         startOp(fdEntry, &self);                \
 360         ret = FUNC;                             \
 361         endOp(fdEntry, &self);                  \
 362     } while (ret == -1 && errno == EINTR);      \
 363     return ret;                                 \
 364 }
 365 
 366 int NET_Read(int s, void* buf, size_t len) {
 367     BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, 0) );
 368 }
 369 
 370 int NET_NonBlockingRead(int s, void* buf, size_t len) { 
 371     BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, MSG_DONTWAIT) );
 372 }
 373 
 374 int NET_ReadV(int s, const struct iovec * vector, int count) {
 375     BLOCKING_IO_RETURN_INT( s, readv(s, vector, count) );
 376 }
 377 
 378 int NET_RecvFrom(int s, void *buf, int len, unsigned int flags,
 379        struct sockaddr *from, socklen_t *fromlen) {
 380     BLOCKING_IO_RETURN_INT( s, recvfrom(s, buf, len, flags, from, fromlen) );
 381 }
 382 
 383 int NET_Send(int s, void *msg, int len, unsigned int flags) {
 384     BLOCKING_IO_RETURN_INT( s, send(s, msg, len, flags) );
 385 }
 386 
 387 int NET_WriteV(int s, const struct iovec * vector, int count) {
 388     BLOCKING_IO_RETURN_INT( s, writev(s, vector, count) );
 389 }
 390 
 391 int NET_SendTo(int s, const void *msg, int len,  unsigned  int
 392        flags, const struct sockaddr *to, int tolen) {
 393     BLOCKING_IO_RETURN_INT( s, sendto(s, msg, len, flags, to, tolen) );
 394 }
 395 
 396 int NET_Accept(int s, struct sockaddr *addr, socklen_t *addrlen) {
 397     BLOCKING_IO_RETURN_INT( s, accept(s, addr, addrlen) );
 398 }
 399 
 400 int NET_Connect(int s, struct sockaddr *addr, int addrlen) {
 401     BLOCKING_IO_RETURN_INT( s, connect(s, addr, addrlen) );
 402 }
 403 
 404 int NET_Poll(struct pollfd *ufds, unsigned int nfds, int timeout) {
 405     BLOCKING_IO_RETURN_INT( ufds[0].fd, poll(ufds, nfds, timeout) );
 406 }
 407 
 408 /*
 409  * Wrapper for poll(s, timeout).
 410  * Auto restarts with adjusted timeout if interrupted by
 411  * signal other than our wakeup signal.
 412  */
 413 int NET_Timeout0(int s, long timeout,long currentTime) {
 414     long prevtime = currentTime, newtime;
 415     struct timeval t;
 416     fdEntry_t *fdEntry = getFdEntry(s);
 417 
 418     /*
 419      * Check that fd hasn't been closed.
 420      */
 421     if (fdEntry == NULL) {
 422         errno = EBADF;
 423         return -1;
 424     }
 425     
 426     for(;;) {
 427         struct pollfd pfd;
 428         int rv;
 429         threadEntry_t self;
 430 
 431         /*
 432          * Poll the fd. If interrupted by our wakeup signal
 433          * errno will be set to EBADF.
 434          */
 435         pfd.fd = s;
 436         pfd.events = POLLIN | POLLERR;
 437 
 438         startOp(fdEntry, &self);
 439         rv = poll(&pfd, 1, timeout);
 440         endOp(fdEntry, &self);
 441 
 442         /*
 443          * If interrupted then adjust timeout. If timeout
 444          * has expired return 0 (indicating timeout expired).
 445          */
 446         if (rv < 0 && errno == EINTR) {
 447             if (timeout > 0) {
 448                 gettimeofday(&t, NULL);
 449                 newtime = t.tv_sec * 1000  +  t.tv_usec / 1000;
 450                 timeout -= newtime - prevtime;
 451                 if (timeout <= 0) {
 452                     return 0;
 453                 }
 454                 prevtime = newtime;
 455             }
 456         } else {
 457             return rv;
 458         }
 459 
 460     }
 461 }
 462 
 463 int NET_TimeoutWithCurrentTime(int s, long timeout, long currentTime) {
 464     return NET_Timeout0(s, timeout, currentTime);
 465 }
 466 
 467 int NET_Timeout(int s, long timeout) {
 468     long currentTime = 0;
 469     struct timeval t;
 470     if (timeout > 0) {
 471         gettimeofday(&t, NULL);
 472         currentTime = t.tv_sec * 1000 + t.tv_usec / 1000;
 473     }
 474     return NET_Timeout0(s, timeout, currentTime);
 475 }