1 /*
   2  * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 #include <assert.h>
  27 #include <limits.h>
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <signal.h>
  31 #include <pthread.h>
  32 #include <sys/types.h>
  33 #include <sys/socket.h>
  34 #include <sys/time.h>
  35 #include <sys/resource.h>
  36 #include <sys/uio.h>
  37 #include <unistd.h>
  38 #include <errno.h>
  39 #include <sys/poll.h>
  40 
  41 /*
  42  * Stack allocated by thread when doing blocking operation
  43  */
  44 typedef struct threadEntry {
  45     pthread_t thr;                      /* this thread */
  46     struct threadEntry *next;           /* next thread */
  47     int intr;                           /* interrupted */
  48 } threadEntry_t;
  49 
  50 /*
  51  * Heap allocated during initialized - one entry per fd
  52  */
  53 typedef struct {
  54     pthread_mutex_t lock;               /* fd lock */
  55     threadEntry_t *threads;             /* threads blocked on fd */
  56 } fdEntry_t;
  57 
  58 /*
  59  * Signal to unblock thread
  60  */
  61 static int sigWakeup = (__SIGRTMAX - 2);
  62 
  63 /*
  64  * fdTable holds one entry per file descriptor, up to a certain
  65  * maximum.
  66  * Theoretically, the number of possible file descriptors can get
  67  * large, though usually it does not. To save memory, we keep file
  68  * descriptors with large numerical values in an overflow table. That
  69  * table is organized as a two-dimensional sparse array, allocated
  70  * on demand.
  71  */
  72 
  73 static fdEntry_t* fdTable;
  74 /* Max. number of file descriptors in fdTable. */
  75 static const int fdTableMaxSize = 0x1000; /* 4K */
  76 /* Max. theoretical number of file descriptor on system. */
  77 static int fdLimit;
  78 /* Length of fdTable, in number of entries. */
  79 static int fdTableLen;
  80 
  81 /* Overflow table: organized as array of n slabs, each holding
  82  *   64k entries.
  83  */
  84 static fdEntry_t** fdOverflowTable;
  85 /* Number of slabs in the overflow table */
  86 static int fdOverflowTableLen;
  87 /* Number of entries in one slab */
  88 static const int fdOverflowTableSlabSize = 0x10000; /* 64k */
  89 pthread_mutex_t fdOverflowTableLock = PTHREAD_MUTEX_INITIALIZER;
  90 
  91 /*
  92  * Null signal handler
  93  */
  94 static void sig_wakeup(int sig) {
  95 }
  96 
  97 /*
  98  * Initialization routine (executed when library is loaded)
  99  * Allocate fd tables and sets up signal handler.
 100  */
 101 static void __attribute((constructor)) init() {
 102     struct rlimit nbr_files;
 103     sigset_t sigset;
 104     struct sigaction sa;
 105     int i = 0;
 106 
 107     /* Determine the maximum number of possible file descriptors. */
 108     getrlimit(RLIMIT_NOFILE, &nbr_files);
 109     if (nbr_files.rlim_max != RLIM_INFINITY) {
 110         fdLimit = nbr_files.rlim_max;
 111     } else {
 112         /* We just do not know. */
 113         fdLimit = INT_MAX;
 114     }
 115 
 116     /* Allocate table for low value file descriptors. */
 117     fdTableLen = fdLimit < fdTableMaxSize ? fdLimit : fdTableMaxSize;
 118     fdTable = (fdEntry_t*) calloc(fdTableLen, sizeof(fdEntry_t));
 119     if (fdTable == NULL) {
 120         fprintf(stderr, "library initialization failed - "
 121                 "unable to allocate file descriptor table - out of memory");
 122         abort();
 123     } else {
 124         for (i = 0; i < fdTableLen; i ++) {
 125             pthread_mutex_init(&fdTable[i].lock, NULL);
 126         }
 127     }
 128 
 129     /* Allocate overflow table, if needed */
 130     if (fdLimit > fdTableMaxSize) {
 131         fdOverflowTableLen = ((fdLimit - fdTableMaxSize) / fdOverflowTableSlabSize) + 1;
 132         fdOverflowTable = (fdEntry_t**) calloc(fdOverflowTableLen, sizeof(fdEntry_t*));
 133         if (fdOverflowTable == NULL) {
 134             fprintf(stderr, "library initialization failed - "
 135                     "unable to allocate file descriptor overflow table - out of memory");
 136             abort();
 137         }
 138     }
 139 
 140     /*
 141      * Setup the signal handler
 142      */
 143     sa.sa_handler = sig_wakeup;
 144     sa.sa_flags   = 0;
 145     sigemptyset(&sa.sa_mask);
 146     sigaction(sigWakeup, &sa, NULL);
 147 
 148     sigemptyset(&sigset);
 149     sigaddset(&sigset, sigWakeup);
 150     sigprocmask(SIG_UNBLOCK, &sigset, NULL);
 151 }
 152 
 153 /*
 154  * Return the fd table for this fd.
 155  */
 156 static inline fdEntry_t *getFdEntry(int fd)
 157 {
 158     fdEntry_t* result = NULL;
 159 
 160     if (fd < 0) {
 161         return NULL;
 162     }
 163 
 164     /* This should not happen. If it does, our assumption about
 165      * max. fd value was wrong. */
 166     assert(fd < fdLimit);
 167 
 168     if (fd < fdTableMaxSize) {
 169         assert(fd < fdTableLen);
 170         result = fdTable + fd;
 171     } else {
 172         const int indexInOverflowTable = fd - fdTableMaxSize;
 173         const int rootindex = indexInOverflowTable / fdOverflowTableSlabSize;
 174         const int slabindex = indexInOverflowTable % fdOverflowTableSlabSize;
 175         assert(rootindex < fdOverflowTableLen);
 176         assert(slabindex < fdOverflowTableSlabSize);
 177         pthread_mutex_lock(&fdOverflowTableLock);
 178         if (fdOverflowTable[rootindex] == NULL) {
 179             fdEntry_t* const newSlab =
 180                 (fdEntry_t*)calloc(fdOverflowTableSlabSize, sizeof(fdEntry_t));
 181             if (newSlab == NULL) {
 182                 fprintf(stderr, "Unable to allocate file descriptor table - out of memory");
 183                 pthread_mutex_unlock(&fdOverflowTableLock);
 184                 abort();
 185             } else {
 186                 int i;
 187                 for (i = 0; i < fdOverflowTableSlabSize; i ++) {
 188                     pthread_mutex_init(&newSlab[i].lock, NULL);
 189                 }
 190                 fdOverflowTable[rootindex] = newSlab;
 191             }
 192         }
 193         pthread_mutex_unlock(&fdOverflowTableLock);
 194         result = fdOverflowTable[rootindex] + slabindex;
 195     }
 196 
 197     return result;
 198 
 199 }
 200 
 201 /*
 202  * Start a blocking operation :-
 203  *    Insert thread onto thread list for the fd.
 204  */
 205 static inline void startOp(fdEntry_t *fdEntry, threadEntry_t *self)
 206 {
 207     self->thr = pthread_self();
 208     self->intr = 0;
 209 
 210     pthread_mutex_lock(&(fdEntry->lock));
 211     {
 212         self->next = fdEntry->threads;
 213         fdEntry->threads = self;
 214     }
 215     pthread_mutex_unlock(&(fdEntry->lock));
 216 }
 217 
 218 /*
 219  * End a blocking operation :-
 220  *     Remove thread from thread list for the fd
 221  *     If fd has been interrupted then set errno to EBADF
 222  */
 223 static inline void endOp
 224     (fdEntry_t *fdEntry, threadEntry_t *self)
 225 {
 226     int orig_errno = errno;
 227     pthread_mutex_lock(&(fdEntry->lock));
 228     {
 229         threadEntry_t *curr, *prev=NULL;
 230         curr = fdEntry->threads;
 231         while (curr != NULL) {
 232             if (curr == self) {
 233                 if (curr->intr) {
 234                     orig_errno = EBADF;
 235                 }
 236                 if (prev == NULL) {
 237                     fdEntry->threads = curr->next;
 238                 } else {
 239                     prev->next = curr->next;
 240                 }
 241                 break;
 242             }
 243             prev = curr;
 244             curr = curr->next;
 245         }
 246     }
 247     pthread_mutex_unlock(&(fdEntry->lock));
 248     errno = orig_errno;
 249 }
 250 
 251 /*
 252  * Close or dup2 a file descriptor ensuring that all threads blocked on
 253  * the file descriptor are notified via a wakeup signal.
 254  *
 255  *      fd1 < 0    => close(fd2)
 256  *      fd1 >= 0   => dup2(fd1, fd2)
 257  *
 258  * Returns -1 with errno set if operation fails.
 259  */
 260 static int closefd(int fd1, int fd2) {
 261     int rv, orig_errno;
 262     fdEntry_t *fdEntry = getFdEntry(fd2);
 263     if (fdEntry == NULL) {
 264         errno = EBADF;
 265         return -1;
 266     }
 267 
 268     /*
 269      * Lock the fd to hold-off additional I/O on this fd.
 270      */
 271     pthread_mutex_lock(&(fdEntry->lock));
 272 
 273     {
 274         /*
 275          * And close/dup the file descriptor
 276          * (restart if interrupted by signal)
 277          */
 278         do {
 279             if (fd1 < 0) {
 280                 rv = close(fd2);
 281             } else {
 282                 rv = dup2(fd1, fd2);
 283             }
 284         } while (rv == -1 && errno == EINTR);
 285 
 286         /*
 287          * Send a wakeup signal to all threads blocked on this
 288          * file descriptor.
 289          */
 290         threadEntry_t *curr = fdEntry->threads;
 291         while (curr != NULL) {
 292             curr->intr = 1;
 293             pthread_kill( curr->thr, sigWakeup );
 294             curr = curr->next;
 295         }
 296     }
 297 
 298     /*
 299      * Unlock without destroying errno
 300      */
 301     orig_errno = errno;
 302     pthread_mutex_unlock(&(fdEntry->lock));
 303     errno = orig_errno;
 304 
 305     return rv;
 306 }
 307 
 308 /*
 309  * Wrapper for dup2 - same semantics as dup2 system call except
 310  * that any threads blocked in an I/O system call on fd2 will be
 311  * preempted and return -1/EBADF;
 312  */
 313 int NET_Dup2(int fd, int fd2) {
 314     if (fd < 0) {
 315         errno = EBADF;
 316         return -1;
 317     }
 318     return closefd(fd, fd2);
 319 }
 320 
 321 /*
 322  * Wrapper for close - same semantics as close system call
 323  * except that any threads blocked in an I/O on fd will be
 324  * preempted and the I/O system call will return -1/EBADF.
 325  */
 326 int NET_SocketClose(int fd) {
 327     return closefd(-1, fd);
 328 }
 329 
 330 /************** Basic I/O operations here ***************/
 331 
 332 /*
 333  * Macro to perform a blocking IO operation. Restarts
 334  * automatically if interrupted by signal (other than
 335  * our wakeup signal)
 336  */
 337 #define BLOCKING_IO_RETURN_INT(FD, FUNC) {      \
 338     int ret;                                    \
 339     threadEntry_t self;                         \
 340     fdEntry_t *fdEntry = getFdEntry(FD);        \
 341     if (fdEntry == NULL) {                      \
 342         errno = EBADF;                          \
 343         return -1;                              \
 344     }                                           \
 345     do {                                        \
 346         startOp(fdEntry, &self);                \
 347         ret = FUNC;                             \
 348         endOp(fdEntry, &self);                  \
 349     } while (ret == -1 && errno == EINTR);      \
 350     return ret;                                 \
 351 }
 352 
 353 int NET_Read(int s, void* buf, size_t len) {
 354     BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, 0) );
 355 }
 356 
 357 int NET_ReadV(int s, const struct iovec * vector, int count) {
 358     BLOCKING_IO_RETURN_INT( s, readv(s, vector, count) );
 359 }
 360 
 361 int NET_RecvFrom(int s, void *buf, int len, unsigned int flags,
 362        struct sockaddr *from, socklen_t *fromlen) {
 363     BLOCKING_IO_RETURN_INT( s, recvfrom(s, buf, len, flags, from, fromlen) );
 364 }
 365 
 366 int NET_Send(int s, void *msg, int len, unsigned int flags) {
 367     BLOCKING_IO_RETURN_INT( s, send(s, msg, len, flags) );
 368 }
 369 
 370 int NET_WriteV(int s, const struct iovec * vector, int count) {
 371     BLOCKING_IO_RETURN_INT( s, writev(s, vector, count) );
 372 }
 373 
 374 int NET_SendTo(int s, const void *msg, int len,  unsigned  int
 375        flags, const struct sockaddr *to, int tolen) {
 376     BLOCKING_IO_RETURN_INT( s, sendto(s, msg, len, flags, to, tolen) );
 377 }
 378 
 379 int NET_Accept(int s, struct sockaddr *addr, socklen_t *addrlen) {
 380     BLOCKING_IO_RETURN_INT( s, accept(s, addr, addrlen) );
 381 }
 382 
 383 int NET_Connect(int s, struct sockaddr *addr, int addrlen) {
 384     BLOCKING_IO_RETURN_INT( s, connect(s, addr, addrlen) );
 385 }
 386 
 387 int NET_Poll(struct pollfd *ufds, unsigned int nfds, int timeout) {
 388     BLOCKING_IO_RETURN_INT( ufds[0].fd, poll(ufds, nfds, timeout) );
 389 }
 390 
 391 /*
 392  * Wrapper for poll(s, timeout).
 393  * Auto restarts with adjusted timeout if interrupted by
 394  * signal other than our wakeup signal.
 395  */
 396 int NET_Timeout(int s, long timeout) {
 397     long prevtime = 0, newtime;
 398     struct timeval t;
 399     fdEntry_t *fdEntry = getFdEntry(s);
 400 
 401     /*
 402      * Check that fd hasn't been closed.
 403      */
 404     if (fdEntry == NULL) {
 405         errno = EBADF;
 406         return -1;
 407     }
 408 
 409     /*
 410      * Pick up current time as may need to adjust timeout
 411      */
 412     if (timeout > 0) {
 413         gettimeofday(&t, NULL);
 414         prevtime = t.tv_sec * 1000  +  t.tv_usec / 1000;
 415     }
 416 
 417     for(;;) {
 418         struct pollfd pfd;
 419         int rv;
 420         threadEntry_t self;
 421 
 422         /*
 423          * Poll the fd. If interrupted by our wakeup signal
 424          * errno will be set to EBADF.
 425          */
 426         pfd.fd = s;
 427         pfd.events = POLLIN | POLLERR;
 428 
 429         startOp(fdEntry, &self);
 430         rv = poll(&pfd, 1, timeout);
 431         endOp(fdEntry, &self);
 432 
 433         /*
 434          * If interrupted then adjust timeout. If timeout
 435          * has expired return 0 (indicating timeout expired).
 436          */
 437         if (rv < 0 && errno == EINTR) {
 438             if (timeout > 0) {
 439                 gettimeofday(&t, NULL);
 440                 newtime = t.tv_sec * 1000  +  t.tv_usec / 1000;
 441                 timeout -= newtime - prevtime;
 442                 if (timeout <= 0) {
 443                     return 0;
 444                 }
 445                 prevtime = newtime;
 446             }
 447         } else {
 448             return rv;
 449         }
 450 
 451     }
 452 }