1 /* 2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 * This file contains implementations of NET_... functions. The NET_.. functions are 28 * wrappers for common file- and socket functions plus provisions for non-blocking IO. 29 * 30 * (basically, the layers remember all file descriptors waiting for a particular fd; 31 * all threads waiting on a certain fd can be woken up by sending them a signal; this 32 * is done e.g. when the fd is closed.) 33 * 34 * This was originally copied from the linux_close.c implementation. 35 * 36 * Side Note: This coding needs initialization. Under Linux this is done 37 * automatically via __attribute((constructor)), on AIX this is done manually 38 * (see aix_close_init). 39 * 40 */ 41 42 /* 43 AIX needs a workaround for I/O cancellation, see: 44 http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.basetechref/doc/basetrf1/close.htm 45 ... 46 The close subroutine is blocked until all subroutines which use the file 47 descriptor return to usr space. For example, when a thread is calling close 48 and another thread is calling select with the same file descriptor, the 49 close subroutine does not return until the select call returns. 50 ... 51 */ 52 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <signal.h> 56 #include <pthread.h> 57 #include <sys/types.h> 58 #include <sys/socket.h> 59 #include <sys/time.h> 60 #include <sys/resource.h> 61 #include <sys/uio.h> 62 #include <unistd.h> 63 #include <errno.h> 64 65 #include <sys/poll.h> 66 67 /* 68 * Stack allocated by thread when doing blocking operation 69 */ 70 typedef struct threadEntry { 71 pthread_t thr; /* this thread */ 72 struct threadEntry *next; /* next thread */ 73 int intr; /* interrupted */ 74 } threadEntry_t; 75 76 /* 77 * Heap allocated during initialized - one entry per fd 78 */ 79 typedef struct { 80 pthread_mutex_t lock; /* fd lock */ 81 threadEntry_t *threads; /* threads blocked on fd */ 82 } fdEntry_t; 83 84 /* 85 * Signal to unblock thread 86 */ 87 static int sigWakeup = (SIGRTMAX - 1); 88 89 /* 90 * The fd table and the number of file descriptors 91 */ 92 static fdEntry_t *fdTable = NULL; 93 static int fdCount = 0; 94 95 /* 96 * Null signal handler 97 */ 98 static void sig_wakeup(int sig) { 99 } 100 101 /* 102 * Initialization routine (executed when library is loaded) 103 * Allocate fd tables and sets up signal handler. 104 * 105 * On AIX we don't have __attribute((constructor)) so we need to initialize 106 * manually (from JNI_OnLoad() in 'src/share/native/java/net/net_util.c') 107 */ 108 void aix_close_init() { 109 struct rlimit nbr_files; 110 sigset_t sigset; 111 struct sigaction sa; 112 113 /* Check already initialized */ 114 if (fdCount > 0 && fdTable != NULL) { 115 return; 116 } 117 118 /* 119 * Allocate table based on the maximum number of 120 * file descriptors. 121 */ 122 if (-1 == getrlimit(RLIMIT_NOFILE, &nbr_files)) { 123 fprintf(stderr, "library initialization failed - " 124 "unable to get max # of allocated fds\n"); 125 abort(); 126 } 127 fdCount = nbr_files.rlim_max; 128 /* 129 * We have a conceptual problem here, when the number of files is 130 * unlimited. As a kind of workaround, we ensure the table is big 131 * enough for handle even a large number of files. Since SAP itself 132 * recommends a limit of 32000 files, we just use 64000 as 'infinity'. 133 */ 134 if (nbr_files.rlim_max == RLIM_INFINITY) { 135 fdCount = 64000; 136 } 137 fdTable = (fdEntry_t *)calloc(fdCount, sizeof(fdEntry_t)); 138 if (fdTable == NULL) { 139 fprintf(stderr, "library initialization failed - " 140 "unable to allocate file descriptor table - out of memory"); 141 abort(); 142 } 143 144 { 145 int i; 146 for (i=0; i < fdCount; i++) { 147 pthread_mutex_init(&fdTable[i].lock, NULL); 148 } 149 } 150 151 /* 152 * Setup the signal handler 153 */ 154 sa.sa_handler = sig_wakeup; 155 sa.sa_flags = 0; 156 sigemptyset(&sa.sa_mask); 157 sigaction(sigWakeup, &sa, NULL); 158 159 sigemptyset(&sigset); 160 sigaddset(&sigset, sigWakeup); 161 sigprocmask(SIG_UNBLOCK, &sigset, NULL); 162 } 163 164 /* 165 * Return the fd table for this fd or NULL is fd out 166 * of range. 167 */ 168 static inline fdEntry_t *getFdEntry(int fd) 169 { 170 if (fd < 0 || fd >= fdCount) { 171 return NULL; 172 } 173 return &fdTable[fd]; 174 } 175 176 /* 177 * Start a blocking operation :- 178 * Insert thread onto thread list for the fd. 179 */ 180 static inline void startOp(fdEntry_t *fdEntry, threadEntry_t *self) 181 { 182 self->thr = pthread_self(); 183 self->intr = 0; 184 185 pthread_mutex_lock(&(fdEntry->lock)); 186 { 187 self->next = fdEntry->threads; 188 fdEntry->threads = self; 189 } 190 pthread_mutex_unlock(&(fdEntry->lock)); 191 } 192 193 /* 194 * End a blocking operation :- 195 * Remove thread from thread list for the fd 196 * If fd has been interrupted then set errno to EBADF 197 */ 198 static inline void endOp 199 (fdEntry_t *fdEntry, threadEntry_t *self) 200 { 201 int orig_errno = errno; 202 pthread_mutex_lock(&(fdEntry->lock)); 203 { 204 threadEntry_t *curr, *prev=NULL; 205 curr = fdEntry->threads; 206 while (curr != NULL) { 207 if (curr == self) { 208 if (curr->intr) { 209 orig_errno = EBADF; 210 } 211 if (prev == NULL) { 212 fdEntry->threads = curr->next; 213 } else { 214 prev->next = curr->next; 215 } 216 break; 217 } 218 prev = curr; 219 curr = curr->next; 220 } 221 } 222 pthread_mutex_unlock(&(fdEntry->lock)); 223 errno = orig_errno; 224 } 225 226 /* 227 * Close or dup2 a file descriptor ensuring that all threads blocked on 228 * the file descriptor are notified via a wakeup signal. 229 * 230 * fd1 < 0 => close(fd2) 231 * fd1 >= 0 => dup2(fd1, fd2) 232 * 233 * Returns -1 with errno set if operation fails. 234 */ 235 static int closefd(int fd1, int fd2) { 236 int rv, orig_errno; 237 fdEntry_t *fdEntry = getFdEntry(fd2); 238 if (fdEntry == NULL) { 239 errno = EBADF; 240 return -1; 241 } 242 243 /* 244 * Lock the fd to hold-off additional I/O on this fd. 245 */ 246 pthread_mutex_lock(&(fdEntry->lock)); 247 248 { 249 /* On fast machines we see that we enter dup2 before the 250 * accepting thread had a chance to get and process the signal. 251 * So in case we woke a thread up, give it some time to cope. 252 * Also see https://bugs.openjdk.java.net/browse/JDK-8006395 */ 253 int num_woken = 0; 254 255 /* 256 * Send a wakeup signal to all threads blocked on this 257 * file descriptor. 258 */ 259 threadEntry_t *curr = fdEntry->threads; 260 while (curr != NULL) { 261 curr->intr = 1; 262 pthread_kill( curr->thr, sigWakeup ); 263 num_woken ++; 264 curr = curr->next; 265 } 266 267 if (num_woken > 0) { 268 usleep(num_woken * 50); 269 } 270 271 /* 272 * And close/dup the file descriptor 273 * (restart if interrupted by signal) 274 */ 275 do { 276 if (fd1 < 0) { 277 rv = close(fd2); 278 } else { 279 rv = dup2(fd1, fd2); 280 } 281 } while (rv == -1 && errno == EINTR); 282 } 283 284 /* 285 * Unlock without destroying errno 286 */ 287 orig_errno = errno; 288 pthread_mutex_unlock(&(fdEntry->lock)); 289 errno = orig_errno; 290 291 return rv; 292 } 293 294 /* 295 * Wrapper for dup2 - same semantics as dup2 system call except 296 * that any threads blocked in an I/O system call on fd2 will be 297 * preempted and return -1/EBADF; 298 */ 299 int NET_Dup2(int fd, int fd2) { 300 if (fd < 0) { 301 errno = EBADF; 302 return -1; 303 } 304 return closefd(fd, fd2); 305 } 306 307 /* 308 * Wrapper for close - same semantics as close system call 309 * except that any threads blocked in an I/O on fd will be 310 * preempted and the I/O system call will return -1/EBADF. 311 */ 312 int NET_SocketClose(int fd) { 313 return closefd(-1, fd); 314 } 315 316 /************** Basic I/O operations here ***************/ 317 318 /* 319 * Macro to perform a blocking IO operation. Restarts 320 * automatically if interrupted by signal (other than 321 * our wakeup signal) 322 */ 323 #define BLOCKING_IO_RETURN_INT(FD, FUNC) { \ 324 int ret; \ 325 threadEntry_t self; \ 326 fdEntry_t *fdEntry = getFdEntry(FD); \ 327 if (fdEntry == NULL) { \ 328 errno = EBADF; \ 329 return -1; \ 330 } \ 331 do { \ 332 startOp(fdEntry, &self); \ 333 ret = FUNC; \ 334 endOp(fdEntry, &self); \ 335 } while (ret == -1 && errno == EINTR); \ 336 return ret; \ 337 } 338 339 int NET_Read(int s, void* buf, size_t len) { 340 BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, 0) ); 341 } 342 343 int NET_ReadV(int s, const struct iovec * vector, int count) { 344 BLOCKING_IO_RETURN_INT( s, readv(s, vector, count) ); 345 } 346 347 int NET_RecvFrom(int s, void *buf, int len, unsigned int flags, 348 struct sockaddr *from, int *fromlen) { 349 socklen_t socklen = *fromlen; 350 BLOCKING_IO_RETURN_INT( s, recvfrom(s, buf, len, flags, from, &socklen) ); 351 *fromlen = socklen; 352 } 353 354 int NET_Send(int s, void *msg, int len, unsigned int flags) { 355 BLOCKING_IO_RETURN_INT( s, send(s, msg, len, flags) ); 356 } 357 358 int NET_WriteV(int s, const struct iovec * vector, int count) { 359 BLOCKING_IO_RETURN_INT( s, writev(s, vector, count) ); 360 } 361 362 int NET_SendTo(int s, const void *msg, int len, unsigned int 363 flags, const struct sockaddr *to, int tolen) { 364 BLOCKING_IO_RETURN_INT( s, sendto(s, msg, len, flags, to, tolen) ); 365 } 366 367 int NET_Accept(int s, struct sockaddr *addr, int *addrlen) { 368 socklen_t socklen = *addrlen; 369 BLOCKING_IO_RETURN_INT( s, accept(s, addr, &socklen) ); 370 *addrlen = socklen; 371 } 372 373 int NET_Connect(int s, struct sockaddr *addr, int addrlen) { 374 BLOCKING_IO_RETURN_INT( s, connect(s, addr, addrlen) ); 375 } 376 377 #ifndef USE_SELECT 378 int NET_Poll(struct pollfd *ufds, unsigned int nfds, int timeout) { 379 BLOCKING_IO_RETURN_INT( ufds[0].fd, poll(ufds, nfds, timeout) ); 380 } 381 #else 382 int NET_Select(int s, fd_set *readfds, fd_set *writefds, 383 fd_set *exceptfds, struct timeval *timeout) { 384 BLOCKING_IO_RETURN_INT( s-1, 385 select(s, readfds, writefds, exceptfds, timeout) ); 386 } 387 #endif 388 389 /* 390 * Wrapper for poll(s, timeout). 391 * Auto restarts with adjusted timeout if interrupted by 392 * signal other than our wakeup signal. 393 */ 394 int NET_Timeout(int s, long timeout) { 395 long prevtime = 0, newtime; 396 struct timeval t; 397 fdEntry_t *fdEntry = getFdEntry(s); 398 399 /* 400 * Check that fd hasn't been closed. 401 */ 402 if (fdEntry == NULL) { 403 errno = EBADF; 404 return -1; 405 } 406 407 /* 408 * Pick up current time as may need to adjust timeout 409 */ 410 if (timeout > 0) { 411 gettimeofday(&t, NULL); 412 prevtime = t.tv_sec * 1000 + t.tv_usec / 1000; 413 } 414 415 for(;;) { 416 struct pollfd pfd; 417 int rv; 418 threadEntry_t self; 419 420 /* 421 * Poll the fd. If interrupted by our wakeup signal 422 * errno will be set to EBADF. 423 */ 424 pfd.fd = s; 425 pfd.events = POLLIN | POLLERR; 426 427 startOp(fdEntry, &self); 428 rv = poll(&pfd, 1, timeout); 429 endOp(fdEntry, &self); 430 431 /* 432 * If interrupted then adjust timeout. If timeout 433 * has expired return 0 (indicating timeout expired). 434 */ 435 if (rv < 0 && errno == EINTR) { 436 if (timeout > 0) { 437 gettimeofday(&t, NULL); 438 newtime = t.tv_sec * 1000 + t.tv_usec / 1000; 439 timeout -= newtime - prevtime; 440 if (timeout <= 0) { 441 return 0; 442 } 443 prevtime = newtime; 444 } 445 } else { 446 return rv; 447 } 448 449 } 450 }