1 /*
   2  * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 #include <assert.h>
  27 #include <limits.h>
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <sys/param.h>
  31 #include <signal.h>
  32 #include <pthread.h>
  33 #include <sys/types.h>
  34 #include <sys/socket.h>
  35 #include <sys/select.h>
  36 #include <sys/time.h>
  37 #include <sys/resource.h>
  38 #include <sys/uio.h>
  39 #include <unistd.h>
  40 #include <errno.h>
  41 #include <sys/poll.h>
  42 
  43 /*
  44  * Stack allocated by thread when doing blocking operation
  45  */
  46 typedef struct threadEntry {
  47     pthread_t thr;                      /* this thread */
  48     struct threadEntry *next;           /* next thread */
  49     int intr;                           /* interrupted */
  50 } threadEntry_t;
  51 
  52 /*
  53  * Heap allocated during initialized - one entry per fd
  54  */
  55 typedef struct {
  56     pthread_mutex_t lock;               /* fd lock */
  57     threadEntry_t *threads;             /* threads blocked on fd */
  58 } fdEntry_t;
  59 
  60 /*
  61  * Signal to unblock thread
  62  */
  63 static int sigWakeup = SIGIO;
  64 
  65 /*
  66  * fdTable holds one entry per file descriptor, up to a certain
  67  * maximum.
  68  * Theoretically, the number of possible file descriptors can get
  69  * large, though usually it does not. Entries for small value file
  70  * descriptors are kept in a simple table, which covers most scenarios.
  71  * Entries for large value file descriptors are kept in an overflow
  72  * table, which is organized as a sparse two dimensional array whose
  73  * slabs are allocated on demand. This covers all corner cases while
  74  * keeping memory consumption reasonable.
  75  */
  76 
  77 /* Base table for low value file descriptors */
  78 static fdEntry_t* fdTable = NULL;
  79 /* Maximum size of base table (in number of entries). */
  80 static const int fdTableMaxSize = 0x1000; /* 4K */
  81 /* Actual size of base table (in number of entries) */
  82 static int fdTableLen = 0;
  83 /* Max. theoretical number of file descriptors on system. */
  84 static int fdLimit = 0;
  85 
  86 /* Overflow table, should base table not be large enough. Organized as
  87  *   an array of n slabs, each holding 64k entries.
  88  */
  89 static fdEntry_t** fdOverflowTable = NULL;
  90 /* Number of slabs in the overflow table */
  91 static int fdOverflowTableLen = 0;
  92 /* Number of entries in one slab */
  93 static const int fdOverflowTableSlabSize = 0x10000; /* 64k */
  94 pthread_mutex_t fdOverflowTableLock = PTHREAD_MUTEX_INITIALIZER;
  95 
  96 /*
  97  * Null signal handler
  98  */
  99 static void sig_wakeup(int sig) {
 100 }
 101 
 102 /*
 103  * Initialization routine (executed when library is loaded)
 104  * Allocate fd tables and sets up signal handler.
 105  */
 106 static void __attribute((constructor)) init() {
 107     struct rlimit nbr_files;
 108     sigset_t sigset;
 109     struct sigaction sa;
 110     int i = 0;
 111 
 112     /* Determine the maximum number of possible file descriptors. */
 113     if (-1 == getrlimit(RLIMIT_NOFILE, &nbr_files)) {
 114         fprintf(stderr, "library initialization failed - "
 115                 "unable to get max # of allocated fds\n");
 116         abort();
 117     }
 118     if (nbr_files.rlim_max != RLIM_INFINITY) {
 119         fdLimit = nbr_files.rlim_max;
 120     } else {
 121         /* We just do not know. */
 122         fdLimit = INT_MAX;
 123     }
 124 
 125     /* Allocate table for low value file descriptors. */
 126     fdTableLen = fdLimit < fdTableMaxSize ? fdLimit : fdTableMaxSize;
 127     fdTable = (fdEntry_t*) calloc(fdTableLen, sizeof(fdEntry_t));
 128     if (fdTable == NULL) {
 129         fprintf(stderr, "library initialization failed - "
 130                 "unable to allocate file descriptor table - out of memory");
 131         abort();
 132     } else {
 133         for (i = 0; i < fdTableLen; i ++) {
 134             pthread_mutex_init(&fdTable[i].lock, NULL);
 135         }
 136     }
 137 
 138     /* Allocate overflow table, if needed */
 139     if (fdLimit > fdTableMaxSize) {
 140         fdOverflowTableLen = ((fdLimit - fdTableMaxSize) / fdOverflowTableSlabSize) + 1;
 141         fdOverflowTable = (fdEntry_t**) calloc(fdOverflowTableLen, sizeof(fdEntry_t*));
 142         if (fdOverflowTable == NULL) {
 143             fprintf(stderr, "library initialization failed - "
 144                     "unable to allocate file descriptor overflow table - out of memory");
 145             abort();
 146         }
 147     }
 148 
 149     /*
 150      * Setup the signal handler
 151      */
 152     sa.sa_handler = sig_wakeup;
 153     sa.sa_flags   = 0;
 154     sigemptyset(&sa.sa_mask);
 155     sigaction(sigWakeup, &sa, NULL);
 156 
 157     sigemptyset(&sigset);
 158     sigaddset(&sigset, sigWakeup);
 159     sigprocmask(SIG_UNBLOCK, &sigset, NULL);
 160 }
 161 
 162 /*
 163  * Return the fd table for this fd.
 164  */
 165 static inline fdEntry_t *getFdEntry(int fd)
 166 {
 167     fdEntry_t* result = NULL;
 168 
 169     if (fd < 0) {
 170         return NULL;
 171     }
 172 
 173     /* This should not happen. If it does, our assumption about
 174      * max. fd value was wrong. */
 175     assert(fd < fdLimit);
 176 
 177     if (fd < fdTableMaxSize) {
 178         /* fd is in base table. */
 179         assert(fd < fdTableLen);
 180         result = &fdTable[fd];
 181     } else {
 182         /* fd is in overflow table. */
 183         const int indexInOverflowTable = fd - fdTableMaxSize;
 184         const int rootindex = indexInOverflowTable / fdOverflowTableSlabSize;
 185         const int slabindex = indexInOverflowTable % fdOverflowTableSlabSize;
 186         fdEntry_t* slab = NULL;
 187         assert(rootindex < fdOverflowTableLen);
 188         assert(slabindex < fdOverflowTableSlabSize);
 189         pthread_mutex_lock(&fdOverflowTableLock);
 190         /* Allocate new slab in overflow table if needed */
 191         if (fdOverflowTable[rootindex] == NULL) {
 192             fdEntry_t* const newSlab =
 193                 (fdEntry_t*)calloc(fdOverflowTableSlabSize, sizeof(fdEntry_t));
 194             if (newSlab == NULL) {
 195                 fprintf(stderr, "Unable to allocate file descriptor overflow"
 196                         " table slab - out of memory");
 197                 pthread_mutex_unlock(&fdOverflowTableLock);
 198                 abort();
 199             } else {
 200                 int i;
 201                 for (i = 0; i < fdOverflowTableSlabSize; i ++) {
 202                     pthread_mutex_init(&newSlab[i].lock, NULL);
 203                 }
 204                 fdOverflowTable[rootindex] = newSlab;
 205             }
 206         }
 207         pthread_mutex_unlock(&fdOverflowTableLock);
 208         slab = fdOverflowTable[rootindex];
 209         result = &slab[slabindex];
 210     }
 211 
 212     return result;
 213 
 214 }
 215 
 216 
 217 /*
 218  * Start a blocking operation :-
 219  *    Insert thread onto thread list for the fd.
 220  */
 221 static inline void startOp(fdEntry_t *fdEntry, threadEntry_t *self)
 222 {
 223     self->thr = pthread_self();
 224     self->intr = 0;
 225 
 226     pthread_mutex_lock(&(fdEntry->lock));
 227     {
 228         self->next = fdEntry->threads;
 229         fdEntry->threads = self;
 230     }
 231     pthread_mutex_unlock(&(fdEntry->lock));
 232 }
 233 
 234 /*
 235  * End a blocking operation :-
 236  *     Remove thread from thread list for the fd
 237  *     If fd has been interrupted then set errno to EBADF
 238  */
 239 static inline void endOp
 240     (fdEntry_t *fdEntry, threadEntry_t *self)
 241 {
 242     int orig_errno = errno;
 243     pthread_mutex_lock(&(fdEntry->lock));
 244     {
 245         threadEntry_t *curr, *prev=NULL;
 246         curr = fdEntry->threads;
 247         while (curr != NULL) {
 248             if (curr == self) {
 249                 if (curr->intr) {
 250                     orig_errno = EBADF;
 251                 }
 252                 if (prev == NULL) {
 253                     fdEntry->threads = curr->next;
 254                 } else {
 255                     prev->next = curr->next;
 256                 }
 257                 break;
 258             }
 259             prev = curr;
 260             curr = curr->next;
 261         }
 262     }
 263     pthread_mutex_unlock(&(fdEntry->lock));
 264     errno = orig_errno;
 265 }
 266 
 267 /*
 268  * Close or dup2 a file descriptor ensuring that all threads blocked on
 269  * the file descriptor are notified via a wakeup signal.
 270  *
 271  *      fd1 < 0    => close(fd2)
 272  *      fd1 >= 0   => dup2(fd1, fd2)
 273  *
 274  * Returns -1 with errno set if operation fails.
 275  */
 276 static int closefd(int fd1, int fd2) {
 277     int rv, orig_errno;
 278     fdEntry_t *fdEntry = getFdEntry(fd2);
 279     if (fdEntry == NULL) {
 280         errno = EBADF;
 281         return -1;
 282     }
 283 
 284     /*
 285      * Lock the fd to hold-off additional I/O on this fd.
 286      */
 287     pthread_mutex_lock(&(fdEntry->lock));
 288 
 289     {
 290         /*
 291          * Send a wakeup signal to all threads blocked on this
 292          * file descriptor.
 293          */
 294         threadEntry_t *curr = fdEntry->threads;
 295         while (curr != NULL) {
 296             curr->intr = 1;
 297             pthread_kill( curr->thr, sigWakeup );
 298             curr = curr->next;
 299         }
 300 
 301         /*
 302          * And close/dup the file descriptor
 303          * (restart if interrupted by signal)
 304          */
 305         do {
 306             if (fd1 < 0) {
 307                 rv = close(fd2);
 308             } else {
 309                 rv = dup2(fd1, fd2);
 310             }
 311         } while (rv == -1 && errno == EINTR);
 312 
 313     }
 314 
 315     /*
 316      * Unlock without destroying errno
 317      */
 318     orig_errno = errno;
 319     pthread_mutex_unlock(&(fdEntry->lock));
 320     errno = orig_errno;
 321 
 322     return rv;
 323 }
 324 
 325 /*
 326  * Wrapper for dup2 - same semantics as dup2 system call except
 327  * that any threads blocked in an I/O system call on fd2 will be
 328  * preempted and return -1/EBADF;
 329  */
 330 int NET_Dup2(int fd, int fd2) {
 331     if (fd < 0) {
 332         errno = EBADF;
 333         return -1;
 334     }
 335     return closefd(fd, fd2);
 336 }
 337 
 338 /*
 339  * Wrapper for close - same semantics as close system call
 340  * except that any threads blocked in an I/O on fd will be
 341  * preempted and the I/O system call will return -1/EBADF.
 342  */
 343 int NET_SocketClose(int fd) {
 344     return closefd(-1, fd);
 345 }
 346 
 347 /************** Basic I/O operations here ***************/
 348 
 349 /*
 350  * Macro to perform a blocking IO operation. Restarts
 351  * automatically if interrupted by signal (other than
 352  * our wakeup signal)
 353  */
 354 #define BLOCKING_IO_RETURN_INT(FD, FUNC) {      \
 355     int ret;                                    \
 356     threadEntry_t self;                         \
 357     fdEntry_t *fdEntry = getFdEntry(FD);        \
 358     if (fdEntry == NULL) {                      \
 359         errno = EBADF;                          \
 360         return -1;                              \
 361     }                                           \
 362     do {                                        \
 363         startOp(fdEntry, &self);                \
 364         ret = FUNC;                             \
 365         endOp(fdEntry, &self);                  \
 366     } while (ret == -1 && errno == EINTR);      \
 367     return ret;                                 \
 368 }
 369 
 370 int NET_Read(int s, void* buf, size_t len) {
 371     BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, 0) );
 372 }
 373 
 374 int NET_ReadV(int s, const struct iovec * vector, int count) {
 375     BLOCKING_IO_RETURN_INT( s, readv(s, vector, count) );
 376 }
 377 
 378 int NET_RecvFrom(int s, void *buf, int len, unsigned int flags,
 379        struct sockaddr *from, socklen_t *fromlen) {
 380     BLOCKING_IO_RETURN_INT( s, recvfrom(s, buf, len, flags, from, fromlen) );
 381 }
 382 
 383 int NET_Send(int s, void *msg, int len, unsigned int flags) {
 384     BLOCKING_IO_RETURN_INT( s, send(s, msg, len, flags) );
 385 }
 386 
 387 int NET_WriteV(int s, const struct iovec * vector, int count) {
 388     BLOCKING_IO_RETURN_INT( s, writev(s, vector, count) );
 389 }
 390 
 391 int NET_SendTo(int s, const void *msg, int len,  unsigned  int
 392        flags, const struct sockaddr *to, int tolen) {
 393     BLOCKING_IO_RETURN_INT( s, sendto(s, msg, len, flags, to, tolen) );
 394 }
 395 
 396 int NET_Accept(int s, struct sockaddr *addr, socklen_t *addrlen) {
 397     BLOCKING_IO_RETURN_INT( s, accept(s, addr, addrlen) );
 398 }
 399 
 400 int NET_Connect(int s, struct sockaddr *addr, int addrlen) {
 401     BLOCKING_IO_RETURN_INT( s, connect(s, addr, addrlen) );
 402 }
 403 
 404 int NET_Poll(struct pollfd *ufds, unsigned int nfds, int timeout) {
 405     BLOCKING_IO_RETURN_INT( ufds[0].fd, poll(ufds, nfds, timeout) );
 406 }
 407 
 408 /*
 409  * Wrapper for select(s, timeout). We are using select() on Mac OS due to Bug 7131399.
 410  * Auto restarts with adjusted timeout if interrupted by
 411  * signal other than our wakeup signal.
 412  */
 413 int NET_Timeout(int s, long timeout) {
 414     long prevtime = 0, newtime;
 415     struct timeval t, *tp = &t;
 416     fd_set fds;
 417     fd_set* fdsp = NULL;
 418     int allocated = 0;
 419     threadEntry_t self;
 420     fdEntry_t *fdEntry = getFdEntry(s);
 421 
 422     /*
 423      * Check that fd hasn't been closed.
 424      */
 425     if (fdEntry == NULL) {
 426         errno = EBADF;
 427         return -1;
 428     }
 429 
 430     /*
 431      * Pick up current time as may need to adjust timeout
 432      */
 433     if (timeout > 0) {
 434         /* Timed */
 435         struct timeval now;
 436         gettimeofday(&now, NULL);
 437         prevtime = now.tv_sec * 1000  +  now.tv_usec / 1000;
 438         t.tv_sec = timeout / 1000;
 439         t.tv_usec = (timeout % 1000) * 1000;
 440     } else if (timeout < 0) {
 441         /* Blocking */
 442         tp = 0;
 443     } else {
 444         /* Poll */
 445         t.tv_sec = 0;
 446         t.tv_usec = 0;
 447     }
 448 
 449     if (s < FD_SETSIZE) {
 450         fdsp = &fds;
 451         FD_ZERO(fdsp);
 452     } else {
 453         int length = (howmany(s+1, NFDBITS)) * sizeof(int);
 454         fdsp = (fd_set *) calloc(1, length);
 455         if (fdsp == NULL) {
 456             return -1;   // errno will be set to ENOMEM
 457         }
 458         allocated = 1;
 459     }
 460     FD_SET(s, fdsp);
 461 
 462     for(;;) {
 463         int rv;
 464 
 465         /*
 466          * call select on the fd. If interrupted by our wakeup signal
 467          * errno will be set to EBADF.
 468          */
 469 
 470         startOp(fdEntry, &self);
 471         rv = select(s+1, fdsp, 0, 0, tp);
 472         endOp(fdEntry, &self);
 473 
 474         /*
 475          * If interrupted then adjust timeout. If timeout
 476          * has expired return 0 (indicating timeout expired).
 477          */
 478         if (rv < 0 && errno == EINTR) {
 479             if (timeout > 0) {
 480                 struct timeval now;
 481                 gettimeofday(&now, NULL);
 482                 newtime = now.tv_sec * 1000  +  now.tv_usec / 1000;
 483                 timeout -= newtime - prevtime;
 484                 if (timeout <= 0) {
 485                     if (allocated != 0)
 486                         free(fdsp);
 487                     return 0;
 488                 }
 489                 prevtime = newtime;
 490                 t.tv_sec = timeout / 1000;
 491                 t.tv_usec = (timeout % 1000) * 1000;
 492             }
 493         } else {
 494             if (allocated != 0)
 495                 free(fdsp);
 496             return rv;
 497         }
 498 
 499     }
 500 }