1 /* 2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 /* 27 * This file contains implementations of NET_... functions. The NET_.. functions are 28 * wrappers for common file- and socket functions plus provisions for non-blocking IO. 29 * 30 * (basically, the layers remember all file descriptors waiting for a particular fd; 31 * all threads waiting on a certain fd can be woken up by sending them a signal; this 32 * is done e.g. when the fd is closed.) 33 * 34 * This was originally copied from the linux_close.c implementation. 35 * 36 * Side Note: This coding needs initialization. Under Linux this is done 37 * automatically via __attribute((constructor)), on AIX this is done manually 38 * (see aix_close_init). 39 * 40 */ 41 42 /* 43 AIX needs a workaround for I/O cancellation, see: 44 http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.basetechref/doc/basetrf1/close.htm 45 ... 46 The close subroutine is blocked until all subroutines which use the file 47 descriptor return to usr space. For example, when a thread is calling close 48 and another thread is calling select with the same file descriptor, the 49 close subroutine does not return until the select call returns. 50 ... 51 */ 52 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <signal.h> 56 #include <pthread.h> 57 #include <sys/types.h> 58 #include <sys/socket.h> 59 #include <sys/time.h> 60 #include <sys/resource.h> 61 #include <sys/uio.h> 62 #include <unistd.h> 63 #include <errno.h> 64 #include <sys/poll.h> 65 66 /* 67 * Stack allocated by thread when doing blocking operation 68 */ 69 typedef struct threadEntry { 70 pthread_t thr; /* this thread */ 71 struct threadEntry *next; /* next thread */ 72 int intr; /* interrupted */ 73 } threadEntry_t; 74 75 /* 76 * Heap allocated during initialized - one entry per fd 77 */ 78 typedef struct { 79 pthread_mutex_t lock; /* fd lock */ 80 threadEntry_t *threads; /* threads blocked on fd */ 81 } fdEntry_t; 82 83 /* 84 * Signal to unblock thread 85 */ 86 static int sigWakeup = (SIGRTMAX - 1); 87 88 /* 89 * The fd table and the number of file descriptors 90 */ 91 static fdEntry_t *fdTable = NULL; 92 static int fdCount = 0; 93 94 /* 95 * Null signal handler 96 */ 97 static void sig_wakeup(int sig) { 98 } 99 100 /* 101 * Initialization routine (executed when library is loaded) 102 * Allocate fd tables and sets up signal handler. 103 * 104 * On AIX we don't have __attribute((constructor)) so we need to initialize 105 * manually (from JNI_OnLoad() in 'src/share/native/java/net/net_util.c') 106 */ 107 void aix_close_init() { 108 struct rlimit nbr_files; 109 sigset_t sigset; 110 struct sigaction sa; 111 112 /* Check already initialized */ 113 if (fdCount > 0 && fdTable != NULL) { 114 return; 115 } 116 117 /* 118 * Allocate table based on the maximum number of 119 * file descriptors. 120 */ 121 if (-1 == getrlimit(RLIMIT_NOFILE, &nbr_files)) { 122 fprintf(stderr, "library initialization failed - " 123 "unable to get max # of allocated fds\n"); 124 abort(); 125 } 126 fdCount = nbr_files.rlim_max; 127 /* 128 * We have a conceptual problem here, when the number of files is 129 * unlimited. As a kind of workaround, we ensure the table is big 130 * enough for handle even a large number of files. Since SAP itself 131 * recommends a limit of 32000 files, we just use 64000 as 'infinity'. 132 */ 133 if (nbr_files.rlim_max == RLIM_INFINITY) { 134 fdCount = 64000; 135 } 136 fdTable = (fdEntry_t *)calloc(fdCount, sizeof(fdEntry_t)); 137 if (fdTable == NULL) { 138 fprintf(stderr, "library initialization failed - " 139 "unable to allocate file descriptor table - out of memory"); 140 abort(); 141 } 142 143 { 144 int i; 145 for (i=0; i < fdCount; i++) { 146 pthread_mutex_init(&fdTable[i].lock, NULL); 147 } 148 } 149 150 /* 151 * Setup the signal handler 152 */ 153 sa.sa_handler = sig_wakeup; 154 sa.sa_flags = 0; 155 sigemptyset(&sa.sa_mask); 156 sigaction(sigWakeup, &sa, NULL); 157 158 sigemptyset(&sigset); 159 sigaddset(&sigset, sigWakeup); 160 sigprocmask(SIG_UNBLOCK, &sigset, NULL); 161 } 162 163 /* 164 * Return the fd table for this fd or NULL is fd out 165 * of range. 166 */ 167 static inline fdEntry_t *getFdEntry(int fd) 168 { 169 if (fd < 0 || fd >= fdCount) { 170 return NULL; 171 } 172 return &fdTable[fd]; 173 } 174 175 /* 176 * Start a blocking operation :- 177 * Insert thread onto thread list for the fd. 178 */ 179 static inline void startOp(fdEntry_t *fdEntry, threadEntry_t *self) 180 { 181 self->thr = pthread_self(); 182 self->intr = 0; 183 184 pthread_mutex_lock(&(fdEntry->lock)); 185 { 186 self->next = fdEntry->threads; 187 fdEntry->threads = self; 188 } 189 pthread_mutex_unlock(&(fdEntry->lock)); 190 } 191 192 /* 193 * End a blocking operation :- 194 * Remove thread from thread list for the fd 195 * If fd has been interrupted then set errno to EBADF 196 */ 197 static inline void endOp 198 (fdEntry_t *fdEntry, threadEntry_t *self) 199 { 200 int orig_errno = errno; 201 pthread_mutex_lock(&(fdEntry->lock)); 202 { 203 threadEntry_t *curr, *prev=NULL; 204 curr = fdEntry->threads; 205 while (curr != NULL) { 206 if (curr == self) { 207 if (curr->intr) { 208 orig_errno = EBADF; 209 } 210 if (prev == NULL) { 211 fdEntry->threads = curr->next; 212 } else { 213 prev->next = curr->next; 214 } 215 break; 216 } 217 prev = curr; 218 curr = curr->next; 219 } 220 } 221 pthread_mutex_unlock(&(fdEntry->lock)); 222 errno = orig_errno; 223 } 224 225 /* 226 * Close or dup2 a file descriptor ensuring that all threads blocked on 227 * the file descriptor are notified via a wakeup signal. 228 * 229 * fd1 < 0 => close(fd2) 230 * fd1 >= 0 => dup2(fd1, fd2) 231 * 232 * Returns -1 with errno set if operation fails. 233 */ 234 static int closefd(int fd1, int fd2) { 235 int rv, orig_errno; 236 fdEntry_t *fdEntry = getFdEntry(fd2); 237 if (fdEntry == NULL) { 238 errno = EBADF; 239 return -1; 240 } 241 242 /* 243 * Lock the fd to hold-off additional I/O on this fd. 244 */ 245 pthread_mutex_lock(&(fdEntry->lock)); 246 247 { 248 /* On fast machines we see that we enter dup2 before the 249 * accepting thread had a chance to get and process the signal. 250 * So in case we woke a thread up, give it some time to cope. 251 * Also see https://bugs.openjdk.java.net/browse/JDK-8006395 */ 252 int num_woken = 0; 253 254 /* 255 * Send a wakeup signal to all threads blocked on this 256 * file descriptor. 257 */ 258 threadEntry_t *curr = fdEntry->threads; 259 while (curr != NULL) { 260 curr->intr = 1; 261 pthread_kill( curr->thr, sigWakeup ); 262 num_woken ++; 263 curr = curr->next; 264 } 265 266 if (num_woken > 0) { 267 usleep(num_woken * 50); 268 } 269 270 /* 271 * And close/dup the file descriptor 272 * (restart if interrupted by signal) 273 */ 274 do { 275 if (fd1 < 0) { 276 rv = close(fd2); 277 } else { 278 rv = dup2(fd1, fd2); 279 } 280 } while (rv == -1 && errno == EINTR); 281 } 282 283 /* 284 * Unlock without destroying errno 285 */ 286 orig_errno = errno; 287 pthread_mutex_unlock(&(fdEntry->lock)); 288 errno = orig_errno; 289 290 return rv; 291 } 292 293 /* 294 * Wrapper for dup2 - same semantics as dup2 system call except 295 * that any threads blocked in an I/O system call on fd2 will be 296 * preempted and return -1/EBADF; 297 */ 298 int NET_Dup2(int fd, int fd2) { 299 if (fd < 0) { 300 errno = EBADF; 301 return -1; 302 } 303 return closefd(fd, fd2); 304 } 305 306 /* 307 * Wrapper for close - same semantics as close system call 308 * except that any threads blocked in an I/O on fd will be 309 * preempted and the I/O system call will return -1/EBADF. 310 */ 311 int NET_SocketClose(int fd) { 312 return closefd(-1, fd); 313 } 314 315 /************** Basic I/O operations here ***************/ 316 317 /* 318 * Macro to perform a blocking IO operation. Restarts 319 * automatically if interrupted by signal (other than 320 * our wakeup signal) 321 */ 322 #define BLOCKING_IO_RETURN_INT(FD, FUNC) { \ 323 int ret; \ 324 threadEntry_t self; \ 325 fdEntry_t *fdEntry = getFdEntry(FD); \ 326 if (fdEntry == NULL) { \ 327 errno = EBADF; \ 328 return -1; \ 329 } \ 330 do { \ 331 startOp(fdEntry, &self); \ 332 ret = FUNC; \ 333 endOp(fdEntry, &self); \ 334 } while (ret == -1 && errno == EINTR); \ 335 return ret; \ 336 } 337 338 int NET_Read(int s, void* buf, size_t len) { 339 BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, 0) ); 340 } 341 342 int NET_ReadV(int s, const struct iovec * vector, int count) { 343 BLOCKING_IO_RETURN_INT( s, readv(s, vector, count) ); 344 } 345 346 int NET_RecvFrom(int s, void *buf, int len, unsigned int flags, 347 struct sockaddr *from, int *fromlen) { 348 socklen_t socklen = *fromlen; 349 BLOCKING_IO_RETURN_INT( s, recvfrom(s, buf, len, flags, from, &socklen) ); 350 *fromlen = socklen; 351 } 352 353 int NET_Send(int s, void *msg, int len, unsigned int flags) { 354 BLOCKING_IO_RETURN_INT( s, send(s, msg, len, flags) ); 355 } 356 357 int NET_WriteV(int s, const struct iovec * vector, int count) { 358 BLOCKING_IO_RETURN_INT( s, writev(s, vector, count) ); 359 } 360 361 int NET_SendTo(int s, const void *msg, int len, unsigned int 362 flags, const struct sockaddr *to, int tolen) { 363 BLOCKING_IO_RETURN_INT( s, sendto(s, msg, len, flags, to, tolen) ); 364 } 365 366 int NET_Accept(int s, struct sockaddr *addr, int *addrlen) { 367 socklen_t socklen = *addrlen; 368 BLOCKING_IO_RETURN_INT( s, accept(s, addr, &socklen) ); 369 *addrlen = socklen; 370 } 371 372 int NET_Connect(int s, struct sockaddr *addr, int addrlen) { 373 int crc = -1, prc = -1; 374 threadEntry_t self; 375 fdEntry_t* fdEntry = getFdEntry(s); 376 377 if (fdEntry == NULL) { 378 errno = EBADF; 379 return -1; 380 } 381 382 /* On AIX, when the system call connect() is interrupted, the connection 383 * is not aborted and it will be established asynchronously by the kernel. 384 * Hence, no need to restart connect() when EINTR is received 385 */ 386 startOp(fdEntry, &self); 387 crc = connect(s, addr, addrlen); 388 endOp(fdEntry, &self); 389 390 if (crc == -1 && errno == EINTR) { 391 struct pollfd s_pollfd; 392 int sockopt_arg = 0; 393 socklen_t len; 394 395 s_pollfd.fd = s; 396 s_pollfd.events = POLLOUT | POLLERR; 397 398 /* poll the file descriptor */ 399 do { 400 startOp(fdEntry, &self); 401 prc = poll(&s_pollfd, 1, -1); 402 endOp(fdEntry, &self); 403 } while (prc == -1 && errno == EINTR); 404 405 if (prc < 0) 406 return prc; 407 408 len = sizeof(sockopt_arg); 409 410 /* Check whether the connection has been established */ 411 if (getsockopt(s, SOL_SOCKET, SO_ERROR, &sockopt_arg, &len) == -1) 412 return -1; 413 414 if (sockopt_arg != 0 ) { 415 errno = sockopt_arg; 416 return -1; 417 } 418 } else { 419 return crc; 420 } 421 422 /* At this point, fd is connected. Set successful return code */ 423 return 0; 424 } 425 426 int NET_Poll(struct pollfd *ufds, unsigned int nfds, int timeout) { 427 BLOCKING_IO_RETURN_INT( ufds[0].fd, poll(ufds, nfds, timeout) ); 428 } 429 430 /* 431 * Wrapper for poll(s, timeout). 432 * Auto restarts with adjusted timeout if interrupted by 433 * signal other than our wakeup signal. 434 */ 435 int NET_Timeout(int s, long timeout) { 436 long prevtime = 0, newtime; 437 struct timeval t; 438 fdEntry_t *fdEntry = getFdEntry(s); 439 440 /* 441 * Check that fd hasn't been closed. 442 */ 443 if (fdEntry == NULL) { 444 errno = EBADF; 445 return -1; 446 } 447 448 /* 449 * Pick up current time as may need to adjust timeout 450 */ 451 if (timeout > 0) { 452 gettimeofday(&t, NULL); 453 prevtime = t.tv_sec * 1000 + t.tv_usec / 1000; 454 } 455 456 for(;;) { 457 struct pollfd pfd; 458 int rv; 459 threadEntry_t self; 460 461 /* 462 * Poll the fd. If interrupted by our wakeup signal 463 * errno will be set to EBADF. 464 */ 465 pfd.fd = s; 466 pfd.events = POLLIN | POLLERR; 467 468 startOp(fdEntry, &self); 469 rv = poll(&pfd, 1, timeout); 470 endOp(fdEntry, &self); 471 472 /* 473 * If interrupted then adjust timeout. If timeout 474 * has expired return 0 (indicating timeout expired). 475 */ 476 if (rv < 0 && errno == EINTR) { 477 if (timeout > 0) { 478 gettimeofday(&t, NULL); 479 newtime = t.tv_sec * 1000 + t.tv_usec / 1000; 480 timeout -= newtime - prevtime; 481 if (timeout <= 0) { 482 return 0; 483 } 484 prevtime = newtime; 485 } 486 } else { 487 return rv; 488 } 489 490 } 491 }