1 /*
   2  * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 #include <assert.h>
  27 #include <limits.h>
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <sys/param.h>
  31 #include <signal.h>
  32 #include <pthread.h>
  33 #include <sys/types.h>
  34 #include <sys/socket.h>
  35 #include <sys/select.h>
  36 #include <sys/time.h>
  37 #include <sys/resource.h>
  38 #include <sys/uio.h>
  39 #include <unistd.h>
  40 #include <errno.h>
  41 #include <sys/poll.h>
  42 
  43 /*
  44  * Stack allocated by thread when doing blocking operation
  45  */
  46 typedef struct threadEntry {
  47     pthread_t thr;                      /* this thread */
  48     struct threadEntry *next;           /* next thread */
  49     int intr;                           /* interrupted */
  50 } threadEntry_t;
  51 
  52 /*
  53  * Heap allocated during initialized - one entry per fd
  54  */
  55 typedef struct {
  56     pthread_mutex_t lock;               /* fd lock */
  57     threadEntry_t *threads;             /* threads blocked on fd */
  58 } fdEntry_t;
  59 
  60 /*
  61  * Signal to unblock thread
  62  */
  63 static int sigWakeup = SIGIO;
  64 
  65 /*
  66  * fdTable holds one entry per file descriptor, up to a certain
  67  * maximum.
  68  * Theoretically, the number of possible file descriptors can get
  69  * large, though usually it does not. To save memory, we keep file
  70  * descriptors with large numerical values in an overflow table. That
  71  * table is organized as a two-dimensional sparse array, allocated
  72  * on demand.
  73  */
  74 
  75 static fdEntry_t* fdTable;
  76 /* Max. number of file descriptors in fdTable. */
  77 static const int fdTableMaxSize = 0x1000; /* 4K */
  78 /* Max. theoretical number of file descriptor on system. */
  79 static int fdLimit;
  80 /* Length of fdTable, in number of entries. */
  81 static int fdTableLen;
  82 
  83 /* Overflow table: organized as array of n slabs, each holding
  84  *   64k entries.
  85  */
  86 static fdEntry_t** fdOverflowTable;
  87 /* Number of slabs in the overflow table */
  88 static int fdOverflowTableLen;
  89 /* Number of entries in one slab */
  90 static const int fdOverflowTableSlabSize = 0x10000; /* 64k */
  91 pthread_mutex_t fdOverflowTableLock = PTHREAD_MUTEX_INITIALIZER;
  92 
  93 /*
  94  * Null signal handler
  95  */
  96 static void sig_wakeup(int sig) {
  97 }
  98 
  99 /*
 100  * Initialization routine (executed when library is loaded)
 101  * Allocate fd tables and sets up signal handler.
 102  */
 103 static void __attribute((constructor)) init() {
 104     struct rlimit nbr_files;
 105     sigset_t sigset;
 106     struct sigaction sa;
 107     int i = 0;
 108 
 109     /* Determine the maximum number of possible file descriptors. */
 110     getrlimit(RLIMIT_NOFILE, &nbr_files);
 111     if (nbr_files.rlim_max != RLIM_INFINITY) {
 112         fdLimit = nbr_files.rlim_max;
 113     } else {
 114         /* We just do not know. */
 115         fdLimit = INT_MAX;
 116     }
 117 
 118     /* Allocate table for low value file descriptors. */
 119     fdTableLen = fdLimit < fdTableMaxSize ? fdLimit : fdTableMaxSize;
 120     fdTable = (fdEntry_t*) calloc(fdTableLen, sizeof(fdEntry_t));
 121     if (fdTable == NULL) {
 122         fprintf(stderr, "library initialization failed - "
 123                 "unable to allocate file descriptor table - out of memory");
 124         abort();
 125     } else {
 126         for (i = 0; i < fdTableLen; i ++) {
 127             pthread_mutex_init(&fdTable[i].lock, NULL);
 128         }
 129     }
 130 
 131     /* Allocate overflow table, if needed */
 132     if (fdLimit > fdTableMaxSize) {
 133         fdOverflowTableLen = ((fdLimit - fdTableMaxSize) / fdOverflowTableSlabSize) + 1;
 134         fdOverflowTable = (fdEntry_t**) calloc(fdOverflowTableLen, sizeof(fdEntry_t*));
 135         if (fdOverflowTable == NULL) {
 136             fprintf(stderr, "library initialization failed - "
 137                     "unable to allocate file descriptor overflow table - out of memory");
 138             abort();
 139         }
 140     }
 141 
 142     /*
 143      * Setup the signal handler
 144      */
 145     sa.sa_handler = sig_wakeup;
 146     sa.sa_flags   = 0;
 147     sigemptyset(&sa.sa_mask);
 148     sigaction(sigWakeup, &sa, NULL);
 149 
 150     sigemptyset(&sigset);
 151     sigaddset(&sigset, sigWakeup);
 152     sigprocmask(SIG_UNBLOCK, &sigset, NULL);
 153 }
 154 
 155 /*
 156  * Return the fd table for this fd.
 157  */
 158 static inline fdEntry_t *getFdEntry(int fd)
 159 {
 160     fdEntry_t* result = NULL;
 161 
 162     if (fd < 0) {
 163         return NULL;
 164     }
 165 
 166     /* This should not happen. If it does, our assumption about
 167      * max. fd value was wrong. */
 168     assert(fd < fdLimit);
 169 
 170     if (fd < fdTableMaxSize) {
 171         assert(fd < fdTableLen);
 172         result = fdTable + fd;
 173     } else {
 174         const int indexInOverflowTable = fd - fdTableMaxSize;
 175         const int rootindex = indexInOverflowTable / fdOverflowTableSlabSize;
 176         const int slabindex = indexInOverflowTable % fdOverflowTableSlabSize;
 177         assert(rootindex < fdOverflowTableLen);
 178         assert(slabindex < fdOverflowTableSlabSize);
 179         pthread_mutex_lock(&fdOverflowTableLock);
 180         if (fdOverflowTable[rootindex] == NULL) {
 181             fdEntry_t* const newSlab =
 182                 (fdEntry_t*)calloc(fdOverflowTableSlabSize, sizeof(fdEntry_t));
 183             if (newSlab == NULL) {
 184                 fprintf(stderr, "Unable to allocate file descriptor table - out of memory");
 185                 pthread_mutex_unlock(&fdOverflowTableLock);
 186                 abort();
 187             } else {
 188                 int i;
 189                 for (i = 0; i < fdOverflowTableSlabSize; i ++) {
 190                     pthread_mutex_init(&newSlab[i].lock, NULL);
 191                 }
 192                 fdOverflowTable[rootindex] = newSlab;
 193             }
 194         }
 195         pthread_mutex_unlock(&fdOverflowTableLock);
 196         result = fdOverflowTable[rootindex] + slabindex;
 197     }
 198 
 199     return result;
 200 
 201 }
 202 
 203 /*
 204  * Start a blocking operation :-
 205  *    Insert thread onto thread list for the fd.
 206  */
 207 static inline void startOp(fdEntry_t *fdEntry, threadEntry_t *self)
 208 {
 209     self->thr = pthread_self();
 210     self->intr = 0;
 211 
 212     pthread_mutex_lock(&(fdEntry->lock));
 213     {
 214         self->next = fdEntry->threads;
 215         fdEntry->threads = self;
 216     }
 217     pthread_mutex_unlock(&(fdEntry->lock));
 218 }
 219 
 220 /*
 221  * End a blocking operation :-
 222  *     Remove thread from thread list for the fd
 223  *     If fd has been interrupted then set errno to EBADF
 224  */
 225 static inline void endOp
 226     (fdEntry_t *fdEntry, threadEntry_t *self)
 227 {
 228     int orig_errno = errno;
 229     pthread_mutex_lock(&(fdEntry->lock));
 230     {
 231         threadEntry_t *curr, *prev=NULL;
 232         curr = fdEntry->threads;
 233         while (curr != NULL) {
 234             if (curr == self) {
 235                 if (curr->intr) {
 236                     orig_errno = EBADF;
 237                 }
 238                 if (prev == NULL) {
 239                     fdEntry->threads = curr->next;
 240                 } else {
 241                     prev->next = curr->next;
 242                 }
 243                 break;
 244             }
 245             prev = curr;
 246             curr = curr->next;
 247         }
 248     }
 249     pthread_mutex_unlock(&(fdEntry->lock));
 250     errno = orig_errno;
 251 }
 252 
 253 /*
 254  * Close or dup2 a file descriptor ensuring that all threads blocked on
 255  * the file descriptor are notified via a wakeup signal.
 256  *
 257  *      fd1 < 0    => close(fd2)
 258  *      fd1 >= 0   => dup2(fd1, fd2)
 259  *
 260  * Returns -1 with errno set if operation fails.
 261  */
 262 static int closefd(int fd1, int fd2) {
 263     int rv, orig_errno;
 264     fdEntry_t *fdEntry = getFdEntry(fd2);
 265     if (fdEntry == NULL) {
 266         errno = EBADF;
 267         return -1;
 268     }
 269 
 270     /*
 271      * Lock the fd to hold-off additional I/O on this fd.
 272      */
 273     pthread_mutex_lock(&(fdEntry->lock));
 274 
 275     {
 276         /*
 277          * Send a wakeup signal to all threads blocked on this
 278          * file descriptor.
 279          */
 280         threadEntry_t *curr = fdEntry->threads;
 281         while (curr != NULL) {
 282             curr->intr = 1;
 283             pthread_kill( curr->thr, sigWakeup );
 284             curr = curr->next;
 285         }
 286 
 287         /*
 288          * And close/dup the file descriptor
 289          * (restart if interrupted by signal)
 290          */
 291         do {
 292             if (fd1 < 0) {
 293                 rv = close(fd2);
 294             } else {
 295                 rv = dup2(fd1, fd2);
 296             }
 297         } while (rv == -1 && errno == EINTR);
 298 
 299     }
 300 
 301     /*
 302      * Unlock without destroying errno
 303      */
 304     orig_errno = errno;
 305     pthread_mutex_unlock(&(fdEntry->lock));
 306     errno = orig_errno;
 307 
 308     return rv;
 309 }
 310 
 311 /*
 312  * Wrapper for dup2 - same semantics as dup2 system call except
 313  * that any threads blocked in an I/O system call on fd2 will be
 314  * preempted and return -1/EBADF;
 315  */
 316 int NET_Dup2(int fd, int fd2) {
 317     if (fd < 0) {
 318         errno = EBADF;
 319         return -1;
 320     }
 321     return closefd(fd, fd2);
 322 }
 323 
 324 /*
 325  * Wrapper for close - same semantics as close system call
 326  * except that any threads blocked in an I/O on fd will be
 327  * preempted and the I/O system call will return -1/EBADF.
 328  */
 329 int NET_SocketClose(int fd) {
 330     return closefd(-1, fd);
 331 }
 332 
 333 /************** Basic I/O operations here ***************/
 334 
 335 /*
 336  * Macro to perform a blocking IO operation. Restarts
 337  * automatically if interrupted by signal (other than
 338  * our wakeup signal)
 339  */
 340 #define BLOCKING_IO_RETURN_INT(FD, FUNC) {      \
 341     int ret;                                    \
 342     threadEntry_t self;                         \
 343     fdEntry_t *fdEntry = getFdEntry(FD);        \
 344     if (fdEntry == NULL) {                      \
 345         errno = EBADF;                          \
 346         return -1;                              \
 347     }                                           \
 348     do {                                        \
 349         startOp(fdEntry, &self);                \
 350         ret = FUNC;                             \
 351         endOp(fdEntry, &self);                  \
 352     } while (ret == -1 && errno == EINTR);      \
 353     return ret;                                 \
 354 }
 355 
 356 int NET_Read(int s, void* buf, size_t len) {
 357     BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, 0) );
 358 }
 359 
 360 int NET_ReadV(int s, const struct iovec * vector, int count) {
 361     BLOCKING_IO_RETURN_INT( s, readv(s, vector, count) );
 362 }
 363 
 364 int NET_RecvFrom(int s, void *buf, int len, unsigned int flags,
 365        struct sockaddr *from, socklen_t *fromlen) {
 366     BLOCKING_IO_RETURN_INT( s, recvfrom(s, buf, len, flags, from, fromlen) );
 367 }
 368 
 369 int NET_Send(int s, void *msg, int len, unsigned int flags) {
 370     BLOCKING_IO_RETURN_INT( s, send(s, msg, len, flags) );
 371 }
 372 
 373 int NET_WriteV(int s, const struct iovec * vector, int count) {
 374     BLOCKING_IO_RETURN_INT( s, writev(s, vector, count) );
 375 }
 376 
 377 int NET_SendTo(int s, const void *msg, int len,  unsigned  int
 378        flags, const struct sockaddr *to, int tolen) {
 379     BLOCKING_IO_RETURN_INT( s, sendto(s, msg, len, flags, to, tolen) );
 380 }
 381 
 382 int NET_Accept(int s, struct sockaddr *addr, socklen_t *addrlen) {
 383     BLOCKING_IO_RETURN_INT( s, accept(s, addr, addrlen) );
 384 }
 385 
 386 int NET_Connect(int s, struct sockaddr *addr, int addrlen) {
 387     BLOCKING_IO_RETURN_INT( s, connect(s, addr, addrlen) );
 388 }
 389 
 390 int NET_Poll(struct pollfd *ufds, unsigned int nfds, int timeout) {
 391     BLOCKING_IO_RETURN_INT( ufds[0].fd, poll(ufds, nfds, timeout) );
 392 }
 393 
 394 /*
 395  * Wrapper for select(s, timeout). We are using select() on Mac OS due to Bug 7131399.
 396  * Auto restarts with adjusted timeout if interrupted by
 397  * signal other than our wakeup signal.
 398  */
 399 int NET_Timeout(int s, long timeout) {
 400     long prevtime = 0, newtime;
 401     struct timeval t, *tp = &t;
 402     fd_set fds;
 403     fd_set* fdsp = NULL;
 404     int allocated = 0;
 405     threadEntry_t self;
 406     fdEntry_t *fdEntry = getFdEntry(s);
 407 
 408     /*
 409      * Check that fd hasn't been closed.
 410      */
 411     if (fdEntry == NULL) {
 412         errno = EBADF;
 413         return -1;
 414     }
 415 
 416     /*
 417      * Pick up current time as may need to adjust timeout
 418      */
 419     if (timeout > 0) {
 420         /* Timed */
 421         struct timeval now;
 422         gettimeofday(&now, NULL);
 423         prevtime = now.tv_sec * 1000  +  now.tv_usec / 1000;
 424         t.tv_sec = timeout / 1000;
 425         t.tv_usec = (timeout % 1000) * 1000;
 426     } else if (timeout < 0) {
 427         /* Blocking */
 428         tp = 0;
 429     } else {
 430         /* Poll */
 431         t.tv_sec = 0;
 432         t.tv_usec = 0;
 433     }
 434 
 435     if (s < FD_SETSIZE) {
 436         fdsp = &fds;
 437         FD_ZERO(fdsp);
 438     } else {
 439         int length = (howmany(s+1, NFDBITS)) * sizeof(int);
 440         fdsp = (fd_set *) calloc(1, length);
 441         if (fdsp == NULL) {
 442             return -1;   // errno will be set to ENOMEM
 443         }
 444         allocated = 1;
 445     }
 446     FD_SET(s, fdsp);
 447 
 448     for(;;) {
 449         int rv;
 450 
 451         /*
 452          * call select on the fd. If interrupted by our wakeup signal
 453          * errno will be set to EBADF.
 454          */
 455 
 456         startOp(fdEntry, &self);
 457         rv = select(s+1, fdsp, 0, 0, tp);
 458         endOp(fdEntry, &self);
 459 
 460         /*
 461          * If interrupted then adjust timeout. If timeout
 462          * has expired return 0 (indicating timeout expired).
 463          */
 464         if (rv < 0 && errno == EINTR) {
 465             if (timeout > 0) {
 466                 struct timeval now;
 467                 gettimeofday(&now, NULL);
 468                 newtime = now.tv_sec * 1000  +  now.tv_usec / 1000;
 469                 timeout -= newtime - prevtime;
 470                 if (timeout <= 0) {
 471                     if (allocated != 0)
 472                         free(fdsp);
 473                     return 0;
 474                 }
 475                 prevtime = newtime;
 476                 t.tv_sec = timeout / 1000;
 477                 t.tv_usec = (timeout % 1000) * 1000;
 478             }
 479         } else {
 480             if (allocated != 0)
 481                 free(fdsp);
 482             return rv;
 483         }
 484 
 485     }
 486 }