1 /*
   2  * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27  * This file contains implementations of NET_... functions. The NET_.. functions are
  28  * wrappers for common file- and socket functions plus provisions for non-blocking IO.
  29  *
  30  * (basically, the layers remember all  file descriptors waiting for a particular fd;
  31  *  all threads waiting on a certain fd can be woken up by sending them a signal; this
  32  *  is done e.g. when the fd is closed.)
  33  *
  34  * This was originally copied from the linux_close.c implementation.
  35  *
  36  * Side Note: This coding needs initialization. Under Linux this is done
  37  * automatically via __attribute((constructor)), on AIX this is done manually
  38  * (see aix_close_init).
  39  *
  40  */
  41 
  42 /*
  43    AIX needs a workaround for I/O cancellation, see:
  44    http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.basetechref/doc/basetrf1/close.htm
  45    ...
  46    The close subroutine is blocked until all subroutines which use the file
  47    descriptor return to usr space. For example, when a thread is calling close
  48    and another thread is calling select with the same file descriptor, the
  49    close subroutine does not return until the select call returns.
  50    ...
  51 */
  52 
  53 #include <stdio.h>
  54 #include <stdlib.h>
  55 #include <signal.h>
  56 #include <pthread.h>
  57 #include <sys/types.h>
  58 #include <sys/socket.h>
  59 #include <sys/time.h>
  60 #include <sys/resource.h>
  61 #include <sys/uio.h>
  62 #include <unistd.h>
  63 #include <errno.h>
  64 
  65 #include <sys/poll.h>
  66 


  67 /*
  68  * Stack allocated by thread when doing blocking operation
  69  */
  70 typedef struct threadEntry {
  71     pthread_t thr;                      /* this thread */
  72     struct threadEntry *next;           /* next thread */
  73     int intr;                           /* interrupted */
  74 } threadEntry_t;
  75 
  76 /*
  77  * Heap allocated during initialized - one entry per fd
  78  */
  79 typedef struct {
  80     pthread_mutex_t lock;               /* fd lock */
  81     threadEntry_t *threads;             /* threads blocked on fd */
  82 } fdEntry_t;
  83 
  84 /*
  85  * Signal to unblock thread
  86  */
  87 static int sigWakeup = (SIGRTMAX - 1);
  88 
  89 /*
  90  * The fd table and the number of file descriptors
  91  */
  92 static fdEntry_t *fdTable = NULL;
  93 static int fdCount = 0;
  94 
  95 /*
  96  * Null signal handler
  97  */
  98 static void sig_wakeup(int sig) {
  99 }
 100 
 101 /*
 102  * Initialization routine (executed when library is loaded)
 103  * Allocate fd tables and sets up signal handler.
 104  *
 105  * On AIX we don't have __attribute((constructor)) so we need to initialize
 106  * manually (from JNI_OnLoad() in 'src/share/native/java/net/net_util.c')
 107  */
 108 void aix_close_init() {
 109     struct rlimit nbr_files;
 110     sigset_t sigset;
 111     struct sigaction sa;
 112 
 113     /* Check already initialized */
 114     if (fdCount > 0 && fdTable != NULL) {
 115         return;
 116     }
 117 
 118     /*
 119      * Allocate table based on the maximum number of
 120      * file descriptors.
 121      */
 122     if (-1 == getrlimit(RLIMIT_NOFILE, &nbr_files)) {
 123         fprintf(stderr, "library initialization failed - "
 124                 "unable to get max # of allocated fds\n");
 125         abort();
 126     }
 127     fdCount = nbr_files.rlim_max;
 128     /*
 129      * We have a conceptual problem here, when the number of files is
 130      * unlimited. As a kind of workaround, we ensure the table is big
 131      * enough for handle even a large number of files. Since SAP itself
 132      * recommends a limit of 32000 files, we just use 64000 as 'infinity'.
 133      */
 134     if (nbr_files.rlim_max == RLIM_INFINITY) {
 135         fdCount = 64000;
 136     }
 137     fdTable = (fdEntry_t *)calloc(fdCount, sizeof(fdEntry_t));
 138     if (fdTable == NULL) {
 139         fprintf(stderr, "library initialization failed - "
 140                 "unable to allocate file descriptor table - out of memory");
 141         abort();
 142     }
 143 
 144     {
 145         int i;
 146         for (i=0; i < fdCount; i++) {
 147             pthread_mutex_init(&fdTable[i].lock, NULL);
 148         }
 149     }
 150 
 151     /*
 152      * Setup the signal handler
 153      */
 154     sa.sa_handler = sig_wakeup;
 155     sa.sa_flags   = 0;
 156     sigemptyset(&sa.sa_mask);
 157     sigaction(sigWakeup, &sa, NULL);
 158 
 159     sigemptyset(&sigset);
 160     sigaddset(&sigset, sigWakeup);
 161     sigprocmask(SIG_UNBLOCK, &sigset, NULL);
 162 }
 163 
 164 /*
 165  * Return the fd table for this fd or NULL is fd out
 166  * of range.
 167  */
 168 static inline fdEntry_t *getFdEntry(int fd)
 169 {
 170     if (fd < 0 || fd >= fdCount) {
 171         return NULL;
 172     }
 173     return &fdTable[fd];
 174 }
 175 
 176 /*
 177  * Start a blocking operation :-
 178  *    Insert thread onto thread list for the fd.
 179  */
 180 static inline void startOp(fdEntry_t *fdEntry, threadEntry_t *self)
 181 {
 182     self->thr = pthread_self();
 183     self->intr = 0;
 184 
 185     pthread_mutex_lock(&(fdEntry->lock));
 186     {
 187         self->next = fdEntry->threads;
 188         fdEntry->threads = self;
 189     }
 190     pthread_mutex_unlock(&(fdEntry->lock));
 191 }
 192 
 193 /*
 194  * End a blocking operation :-
 195  *     Remove thread from thread list for the fd
 196  *     If fd has been interrupted then set errno to EBADF
 197  */
 198 static inline void endOp
 199     (fdEntry_t *fdEntry, threadEntry_t *self)
 200 {
 201     int orig_errno = errno;
 202     pthread_mutex_lock(&(fdEntry->lock));
 203     {
 204         threadEntry_t *curr, *prev=NULL;
 205         curr = fdEntry->threads;
 206         while (curr != NULL) {
 207             if (curr == self) {
 208                 if (curr->intr) {
 209                     orig_errno = EBADF;
 210                 }
 211                 if (prev == NULL) {
 212                     fdEntry->threads = curr->next;
 213                 } else {
 214                     prev->next = curr->next;
 215                 }
 216                 break;
 217             }
 218             prev = curr;
 219             curr = curr->next;
 220         }
 221     }
 222     pthread_mutex_unlock(&(fdEntry->lock));
 223     errno = orig_errno;
 224 }
 225 
 226 /*
 227  * Close or dup2 a file descriptor ensuring that all threads blocked on
 228  * the file descriptor are notified via a wakeup signal.
 229  *
 230  *      fd1 < 0    => close(fd2)
 231  *      fd1 >= 0   => dup2(fd1, fd2)
 232  *
 233  * Returns -1 with errno set if operation fails.
 234  */
 235 static int closefd(int fd1, int fd2) {
 236     int rv, orig_errno;
 237     fdEntry_t *fdEntry = getFdEntry(fd2);
 238     if (fdEntry == NULL) {
 239         errno = EBADF;
 240         return -1;
 241     }
 242 
 243     /*
 244      * Lock the fd to hold-off additional I/O on this fd.
 245      */
 246     pthread_mutex_lock(&(fdEntry->lock));
 247 
 248     {
 249         /* On fast machines we see that we enter dup2 before the
 250          * accepting thread had a chance to get and process the signal.
 251          * So in case we woke a thread up, give it some time to cope.
 252          * Also see https://bugs.openjdk.java.net/browse/JDK-8006395 */
 253         int num_woken = 0;
 254 
 255         /*
 256          * Send a wakeup signal to all threads blocked on this
 257          * file descriptor.
 258          */
 259         threadEntry_t *curr = fdEntry->threads;
 260         while (curr != NULL) {
 261             curr->intr = 1;
 262             pthread_kill( curr->thr, sigWakeup );
 263             num_woken ++;
 264             curr = curr->next;
 265         }
 266 
 267         if (num_woken > 0) {
 268           usleep(num_woken * 50);
 269         }
 270 
 271         /*
 272          * And close/dup the file descriptor
 273          * (restart if interrupted by signal)
 274          */
 275         do {
 276             if (fd1 < 0) {
 277                 rv = close(fd2);
 278             } else {
 279                 rv = dup2(fd1, fd2);
 280             }
 281         } while (rv == -1 && errno == EINTR);
 282     }
 283 
 284     /*
 285      * Unlock without destroying errno
 286      */
 287     orig_errno = errno;
 288     pthread_mutex_unlock(&(fdEntry->lock));
 289     errno = orig_errno;
 290 
 291     return rv;
 292 }
 293 
 294 /*
 295  * Wrapper for dup2 - same semantics as dup2 system call except
 296  * that any threads blocked in an I/O system call on fd2 will be
 297  * preempted and return -1/EBADF;
 298  */
 299 int NET_Dup2(int fd, int fd2) {
 300     if (fd < 0) {
 301         errno = EBADF;
 302         return -1;
 303     }
 304     return closefd(fd, fd2);
 305 }
 306 
 307 /*
 308  * Wrapper for close - same semantics as close system call
 309  * except that any threads blocked in an I/O on fd will be
 310  * preempted and the I/O system call will return -1/EBADF.
 311  */
 312 int NET_SocketClose(int fd) {
 313     return closefd(-1, fd);
 314 }
 315 
 316 /************** Basic I/O operations here ***************/
 317 
 318 /*
 319  * Macro to perform a blocking IO operation. Restarts
 320  * automatically if interrupted by signal (other than
 321  * our wakeup signal)
 322  */
 323 #define BLOCKING_IO_RETURN_INT(FD, FUNC) {      \
 324     int ret;                                    \
 325     threadEntry_t self;                         \
 326     fdEntry_t *fdEntry = getFdEntry(FD);        \
 327     if (fdEntry == NULL) {                      \
 328         errno = EBADF;                          \
 329         return -1;                              \
 330     }                                           \
 331     do {                                        \
 332         startOp(fdEntry, &self);                \
 333         ret = FUNC;                             \
 334         endOp(fdEntry, &self);                  \
 335     } while (ret == -1 && errno == EINTR);      \
 336     return ret;                                 \
 337 }
 338 
 339 int NET_Read(int s, void* buf, size_t len) {
 340     BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, 0) );
 341 }
 342 
 343 int NET_ReadV(int s, const struct iovec * vector, int count) {
 344     BLOCKING_IO_RETURN_INT( s, readv(s, vector, count) );
 345 }
 346 
 347 int NET_RecvFrom(int s, void *buf, int len, unsigned int flags,
 348        struct sockaddr *from, int *fromlen) {
 349     socklen_t socklen = *fromlen;
 350     BLOCKING_IO_RETURN_INT( s, recvfrom(s, buf, len, flags, from, &socklen) );
 351     *fromlen = socklen;
 352 }
 353 
 354 int NET_Send(int s, void *msg, int len, unsigned int flags) {
 355     BLOCKING_IO_RETURN_INT( s, send(s, msg, len, flags) );
 356 }
 357 
 358 int NET_WriteV(int s, const struct iovec * vector, int count) {
 359     BLOCKING_IO_RETURN_INT( s, writev(s, vector, count) );
 360 }
 361 
 362 int NET_SendTo(int s, const void *msg, int len,  unsigned  int
 363        flags, const struct sockaddr *to, int tolen) {
 364     BLOCKING_IO_RETURN_INT( s, sendto(s, msg, len, flags, to, tolen) );
 365 }
 366 
 367 int NET_Accept(int s, struct sockaddr *addr, int *addrlen) {
 368     socklen_t socklen = *addrlen;
 369     BLOCKING_IO_RETURN_INT( s, accept(s, addr, &socklen) );
 370     *addrlen = socklen;
 371 }
 372 
 373 int NET_Connect(int s, struct sockaddr *addr, int addrlen) {
 374     BLOCKING_IO_RETURN_INT( s, connect(s, addr, addrlen) );
 375 }
 376 
 377 #ifndef USE_SELECT
 378 int NET_Poll(struct pollfd *ufds, unsigned int nfds, int timeout) {
 379     BLOCKING_IO_RETURN_INT( ufds[0].fd, poll(ufds, nfds, timeout) );
 380 }
 381 #else
 382 int NET_Select(int s, fd_set *readfds, fd_set *writefds,
 383                fd_set *exceptfds, struct timeval *timeout) {
 384     BLOCKING_IO_RETURN_INT( s-1,
 385                             select(s, readfds, writefds, exceptfds, timeout) );
 386 }
 387 #endif
 388 
 389 /*
 390  * Wrapper for poll(s, timeout).
 391  * Auto restarts with adjusted timeout if interrupted by
 392  * signal other than our wakeup signal.
 393  */
 394 int NET_Timeout(int s, long timeout) {
 395     long prevtime = 0, newtime;
 396     struct timeval t;
 397     fdEntry_t *fdEntry = getFdEntry(s);
 398 
 399     /*
 400      * Check that fd hasn't been closed.
 401      */
 402     if (fdEntry == NULL) {
 403         errno = EBADF;
 404         return -1;
 405     }
 406 
 407     /*
 408      * Pick up current time as may need to adjust timeout
 409      */
 410     if (timeout > 0) {
 411         gettimeofday(&t, NULL);
 412         prevtime = t.tv_sec * 1000  +  t.tv_usec / 1000;
 413     }
 414 
 415     for(;;) {
 416         struct pollfd pfd;
 417         int rv;
 418         threadEntry_t self;
 419 
 420         /*
 421          * Poll the fd. If interrupted by our wakeup signal
 422          * errno will be set to EBADF.
 423          */
 424         pfd.fd = s;
 425         pfd.events = POLLIN | POLLERR;
 426 
 427         startOp(fdEntry, &self);
 428         rv = poll(&pfd, 1, timeout);
 429         endOp(fdEntry, &self);
 430 
 431         /*
 432          * If interrupted then adjust timeout. If timeout
 433          * has expired return 0 (indicating timeout expired).
 434          */
 435         if (rv < 0 && errno == EINTR) {
 436             if (timeout > 0) {
 437                 gettimeofday(&t, NULL);
 438                 newtime = t.tv_sec * 1000  +  t.tv_usec / 1000;
 439                 timeout -= newtime - prevtime;
 440                 if (timeout <= 0) {
 441                     return 0;
 442                 }
 443                 prevtime = newtime;
 444             }
 445         } else {
 446             return rv;
 447         }
 448 
 449     }
 450 }
--- EOF ---