1 /*
   2  * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 /*
  27  * This file contains implementations of NET_... functions. The NET_.. functions are
  28  * wrappers for common file- and socket functions plus provisions for non-blocking IO.
  29  *
  30  * (basically, the layers remember all  file descriptors waiting for a particular fd;
  31  *  all threads waiting on a certain fd can be woken up by sending them a signal; this
  32  *  is done e.g. when the fd is closed.)
  33  *
  34  * This was originally copied from the linux_close.c implementation.
  35  *
  36  * Side Note: This coding needs initialization. Under Linux this is done
  37  * automatically via __attribute((constructor)), on AIX this is done manually
  38  * (see aix_close_init).
  39  *
  40  */
  41 
  42 /*
  43    AIX needs a workaround for I/O cancellation, see:
  44    http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.basetechref/doc/basetrf1/close.htm
  45    ...
  46    The close subroutine is blocked until all subroutines which use the file
  47    descriptor return to usr space. For example, when a thread is calling close
  48    and another thread is calling select with the same file descriptor, the
  49    close subroutine does not return until the select call returns.
  50    ...
  51 */
  52 
  53 #include <stdio.h>
  54 #include <stdlib.h>
  55 #include <signal.h>
  56 #include <pthread.h>
  57 #include <sys/types.h>
  58 #include <sys/socket.h>
  59 #include <sys/time.h>
  60 #include <sys/resource.h>
  61 #include <sys/uio.h>
  62 #include <unistd.h>
  63 #include <errno.h>
  64 #include <sys/poll.h>
  65 
  66 /*
  67  * Stack allocated by thread when doing blocking operation
  68  */
  69 typedef struct threadEntry {
  70     pthread_t thr;                      /* this thread */
  71     struct threadEntry *next;           /* next thread */
  72     int intr;                           /* interrupted */
  73 } threadEntry_t;
  74 
  75 /*
  76  * Heap allocated during initialized - one entry per fd
  77  */
  78 typedef struct {
  79     pthread_mutex_t lock;               /* fd lock */
  80     threadEntry_t *threads;             /* threads blocked on fd */
  81 } fdEntry_t;
  82 
  83 /*
  84  * Signal to unblock thread
  85  */
  86 static int sigWakeup = (SIGRTMAX - 1);
  87 
  88 /*
  89  * The fd table and the number of file descriptors
  90  */
  91 static fdEntry_t *fdTable = NULL;
  92 static int fdCount = 0;
  93 
  94 /*
  95  * Null signal handler
  96  */
  97 static void sig_wakeup(int sig) {
  98 }
  99 
 100 /*
 101  * Initialization routine (executed when library is loaded)
 102  * Allocate fd tables and sets up signal handler.
 103  *
 104  * On AIX we don't have __attribute((constructor)) so we need to initialize
 105  * manually (from JNI_OnLoad() in 'src/share/native/java/net/net_util.c')
 106  */
 107 void aix_close_init() {
 108     struct rlimit nbr_files;
 109     sigset_t sigset;
 110     struct sigaction sa;
 111 
 112     /* Check already initialized */
 113     if (fdCount > 0 && fdTable != NULL) {
 114         return;
 115     }
 116 
 117     /*
 118      * Allocate table based on the maximum number of
 119      * file descriptors.
 120      */
 121     if (-1 == getrlimit(RLIMIT_NOFILE, &nbr_files)) {
 122         fprintf(stderr, "library initialization failed - "
 123                 "unable to get max # of allocated fds\n");
 124         abort();
 125     }
 126     fdCount = nbr_files.rlim_max;
 127     /*
 128      * We have a conceptual problem here, when the number of files is
 129      * unlimited. As a kind of workaround, we ensure the table is big
 130      * enough for handle even a large number of files. Since SAP itself
 131      * recommends a limit of 32000 files, we just use 64000 as 'infinity'.
 132      */
 133     if (nbr_files.rlim_max == RLIM_INFINITY) {
 134         fdCount = 64000;
 135     }
 136     fdTable = (fdEntry_t *)calloc(fdCount, sizeof(fdEntry_t));
 137     if (fdTable == NULL) {
 138         fprintf(stderr, "library initialization failed - "
 139                 "unable to allocate file descriptor table - out of memory");
 140         abort();
 141     }
 142 
 143     {
 144         int i;
 145         for (i=0; i < fdCount; i++) {
 146             pthread_mutex_init(&fdTable[i].lock, NULL);
 147         }
 148     }
 149 
 150     /*
 151      * Setup the signal handler
 152      */
 153     sa.sa_handler = sig_wakeup;
 154     sa.sa_flags   = 0;
 155     sigemptyset(&sa.sa_mask);
 156     sigaction(sigWakeup, &sa, NULL);
 157 
 158     sigemptyset(&sigset);
 159     sigaddset(&sigset, sigWakeup);
 160     sigprocmask(SIG_UNBLOCK, &sigset, NULL);
 161 }
 162 
 163 /*
 164  * Return the fd table for this fd or NULL is fd out
 165  * of range.
 166  */
 167 static inline fdEntry_t *getFdEntry(int fd)
 168 {
 169     if (fd < 0 || fd >= fdCount) {
 170         return NULL;
 171     }
 172     return &fdTable[fd];
 173 }
 174 
 175 /*
 176  * Start a blocking operation :-
 177  *    Insert thread onto thread list for the fd.
 178  */
 179 static inline void startOp(fdEntry_t *fdEntry, threadEntry_t *self)
 180 {
 181     self->thr = pthread_self();
 182     self->intr = 0;
 183 
 184     pthread_mutex_lock(&(fdEntry->lock));
 185     {
 186         self->next = fdEntry->threads;
 187         fdEntry->threads = self;
 188     }
 189     pthread_mutex_unlock(&(fdEntry->lock));
 190 }
 191 
 192 /*
 193  * End a blocking operation :-
 194  *     Remove thread from thread list for the fd
 195  *     If fd has been interrupted then set errno to EBADF
 196  */
 197 static inline void endOp
 198     (fdEntry_t *fdEntry, threadEntry_t *self)
 199 {
 200     int orig_errno = errno;
 201     pthread_mutex_lock(&(fdEntry->lock));
 202     {
 203         threadEntry_t *curr, *prev=NULL;
 204         curr = fdEntry->threads;
 205         while (curr != NULL) {
 206             if (curr == self) {
 207                 if (curr->intr) {
 208                     orig_errno = EBADF;
 209                 }
 210                 if (prev == NULL) {
 211                     fdEntry->threads = curr->next;
 212                 } else {
 213                     prev->next = curr->next;
 214                 }
 215                 break;
 216             }
 217             prev = curr;
 218             curr = curr->next;
 219         }
 220     }
 221     pthread_mutex_unlock(&(fdEntry->lock));
 222     errno = orig_errno;
 223 }
 224 
 225 /*
 226  * Close or dup2 a file descriptor ensuring that all threads blocked on
 227  * the file descriptor are notified via a wakeup signal.
 228  *
 229  *      fd1 < 0    => close(fd2)
 230  *      fd1 >= 0   => dup2(fd1, fd2)
 231  *
 232  * Returns -1 with errno set if operation fails.
 233  */
 234 static int closefd(int fd1, int fd2) {
 235     int rv, orig_errno;
 236     fdEntry_t *fdEntry = getFdEntry(fd2);
 237     if (fdEntry == NULL) {
 238         errno = EBADF;
 239         return -1;
 240     }
 241 
 242     /*
 243      * Lock the fd to hold-off additional I/O on this fd.
 244      */
 245     pthread_mutex_lock(&(fdEntry->lock));
 246 
 247     {
 248         /* On fast machines we see that we enter dup2 before the
 249          * accepting thread had a chance to get and process the signal.
 250          * So in case we woke a thread up, give it some time to cope.
 251          * Also see https://bugs.openjdk.java.net/browse/JDK-8006395 */
 252         int num_woken = 0;
 253 
 254         /*
 255          * Send a wakeup signal to all threads blocked on this
 256          * file descriptor.
 257          */
 258         threadEntry_t *curr = fdEntry->threads;
 259         while (curr != NULL) {
 260             curr->intr = 1;
 261             pthread_kill( curr->thr, sigWakeup );
 262             num_woken ++;
 263             curr = curr->next;
 264         }
 265 
 266         if (num_woken > 0) {
 267           usleep(num_woken * 50);
 268         }
 269 
 270         /*
 271          * And close/dup the file descriptor
 272          * (restart if interrupted by signal)
 273          */
 274         do {
 275             if (fd1 < 0) {
 276                 rv = close(fd2);
 277             } else {
 278                 rv = dup2(fd1, fd2);
 279             }
 280         } while (rv == -1 && errno == EINTR);
 281     }
 282 
 283     /*
 284      * Unlock without destroying errno
 285      */
 286     orig_errno = errno;
 287     pthread_mutex_unlock(&(fdEntry->lock));
 288     errno = orig_errno;
 289 
 290     return rv;
 291 }
 292 
 293 /*
 294  * Wrapper for dup2 - same semantics as dup2 system call except
 295  * that any threads blocked in an I/O system call on fd2 will be
 296  * preempted and return -1/EBADF;
 297  */
 298 int NET_Dup2(int fd, int fd2) {
 299     if (fd < 0) {
 300         errno = EBADF;
 301         return -1;
 302     }
 303     return closefd(fd, fd2);
 304 }
 305 
 306 /*
 307  * Wrapper for close - same semantics as close system call
 308  * except that any threads blocked in an I/O on fd will be
 309  * preempted and the I/O system call will return -1/EBADF.
 310  */
 311 int NET_SocketClose(int fd) {
 312     return closefd(-1, fd);
 313 }
 314 
 315 /************** Basic I/O operations here ***************/
 316 
 317 /*
 318  * Macro to perform a blocking IO operation. Restarts
 319  * automatically if interrupted by signal (other than
 320  * our wakeup signal)
 321  */
 322 #define BLOCKING_IO_RETURN_INT(FD, FUNC) {      \
 323     int ret;                                    \
 324     threadEntry_t self;                         \
 325     fdEntry_t *fdEntry = getFdEntry(FD);        \
 326     if (fdEntry == NULL) {                      \
 327         errno = EBADF;                          \
 328         return -1;                              \
 329     }                                           \
 330     do {                                        \
 331         startOp(fdEntry, &self);                \
 332         ret = FUNC;                             \
 333         endOp(fdEntry, &self);                  \
 334     } while (ret == -1 && errno == EINTR);      \
 335     return ret;                                 \
 336 }
 337 
 338 int NET_Read(int s, void* buf, size_t len) {
 339     BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, 0) );
 340 }
 341 
 342 int NET_ReadV(int s, const struct iovec * vector, int count) {
 343     BLOCKING_IO_RETURN_INT( s, readv(s, vector, count) );
 344 }
 345 
 346 int NET_RecvFrom(int s, void *buf, int len, unsigned int flags,
 347        struct sockaddr *from, int *fromlen) {
 348     socklen_t socklen = *fromlen;
 349     BLOCKING_IO_RETURN_INT( s, recvfrom(s, buf, len, flags, from, &socklen) );
 350     *fromlen = socklen;
 351 }
 352 
 353 int NET_Send(int s, void *msg, int len, unsigned int flags) {
 354     BLOCKING_IO_RETURN_INT( s, send(s, msg, len, flags) );
 355 }
 356 
 357 int NET_WriteV(int s, const struct iovec * vector, int count) {
 358     BLOCKING_IO_RETURN_INT( s, writev(s, vector, count) );
 359 }
 360 
 361 int NET_SendTo(int s, const void *msg, int len,  unsigned  int
 362        flags, const struct sockaddr *to, int tolen) {
 363     BLOCKING_IO_RETURN_INT( s, sendto(s, msg, len, flags, to, tolen) );
 364 }
 365 
 366 int NET_Accept(int s, struct sockaddr *addr, int *addrlen) {
 367     socklen_t socklen = *addrlen;
 368     BLOCKING_IO_RETURN_INT( s, accept(s, addr, &socklen) );
 369     *addrlen = socklen;
 370 }
 371 
 372 int NET_Connect(int s, struct sockaddr *addr, int addrlen) {
 373     int crc = -1, prc = -1;
 374     threadEntry_t self;
 375     fdEntry_t* fdEntry = getFdEntry(s);
 376 
 377     if (fdEntry == NULL) {
 378         errno = EBADF;
 379         return -1;
 380     }
 381 
 382     /* On AIX, when the system call connect() is interrupted, the connection
 383      * is not aborted and it will be established asynchronously by the kernel.
 384      * Hence, no need to restart connect() when EINTR is received
 385      */
 386     startOp(fdEntry, &self);
 387     crc = connect(s, addr, addrlen);
 388     endOp(fdEntry, &self);
 389 
 390     if (crc == -1 && errno == EINTR) {
 391         struct pollfd s_pollfd;
 392         int sockopt_arg = 0;
 393         socklen_t len;
 394 
 395         s_pollfd.fd = s;
 396         s_pollfd.events = POLLOUT | POLLERR;
 397 
 398         /* poll the file descriptor */
 399         do {
 400             startOp(fdEntry, &self);
 401             prc = poll(&s_pollfd, 1, -1);
 402             endOp(fdEntry, &self);
 403         } while (prc == -1  && errno == EINTR);
 404 
 405         if (prc < 0)
 406             return prc;
 407 
 408         len = sizeof(sockopt_arg);
 409 
 410         /* Check whether the connection has been established */
 411         if (getsockopt(s, SOL_SOCKET, SO_ERROR, &sockopt_arg, &len) == -1)
 412             return -1;
 413 
 414         if (sockopt_arg != 0 ) {
 415             errno = sockopt_arg;
 416             return -1;
 417         }
 418     } else {
 419         return crc;
 420     }
 421 
 422     /* At this point, fd is connected. Set successful return code */
 423     return 0;
 424 }
 425 
 426 int NET_Poll(struct pollfd *ufds, unsigned int nfds, int timeout) {
 427     BLOCKING_IO_RETURN_INT( ufds[0].fd, poll(ufds, nfds, timeout) );
 428 }
 429 
 430 /*
 431  * Wrapper for poll(s, timeout).
 432  * Auto restarts with adjusted timeout if interrupted by
 433  * signal other than our wakeup signal.
 434  */
 435 int NET_Timeout(int s, long timeout) {
 436     long prevtime = 0, newtime;
 437     struct timeval t;
 438     fdEntry_t *fdEntry = getFdEntry(s);
 439 
 440     /*
 441      * Check that fd hasn't been closed.
 442      */
 443     if (fdEntry == NULL) {
 444         errno = EBADF;
 445         return -1;
 446     }
 447 
 448     /*
 449      * Pick up current time as may need to adjust timeout
 450      */
 451     if (timeout > 0) {
 452         gettimeofday(&t, NULL);
 453         prevtime = t.tv_sec * 1000  +  t.tv_usec / 1000;
 454     }
 455 
 456     for(;;) {
 457         struct pollfd pfd;
 458         int rv;
 459         threadEntry_t self;
 460 
 461         /*
 462          * Poll the fd. If interrupted by our wakeup signal
 463          * errno will be set to EBADF.
 464          */
 465         pfd.fd = s;
 466         pfd.events = POLLIN | POLLERR;
 467 
 468         startOp(fdEntry, &self);
 469         rv = poll(&pfd, 1, timeout);
 470         endOp(fdEntry, &self);
 471 
 472         /*
 473          * If interrupted then adjust timeout. If timeout
 474          * has expired return 0 (indicating timeout expired).
 475          */
 476         if (rv < 0 && errno == EINTR) {
 477             if (timeout > 0) {
 478                 gettimeofday(&t, NULL);
 479                 newtime = t.tv_sec * 1000  +  t.tv_usec / 1000;
 480                 timeout -= newtime - prevtime;
 481                 if (timeout <= 0) {
 482                     return 0;
 483                 }
 484                 prevtime = newtime;
 485             }
 486         } else {
 487             return rv;
 488         }
 489 
 490     }
 491 }