1 /* 2 * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 #include <assert.h> 27 #include <limits.h> 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <sys/param.h> 31 #include <signal.h> 32 #include <pthread.h> 33 #include <sys/types.h> 34 #include <sys/socket.h> 35 #include <sys/select.h> 36 #include <sys/time.h> 37 #include <sys/resource.h> 38 #include <sys/uio.h> 39 #include <unistd.h> 40 #include <errno.h> 41 #include <sys/poll.h> 42 43 /* 44 * Stack allocated by thread when doing blocking operation 45 */ 46 typedef struct threadEntry { 47 pthread_t thr; /* this thread */ 48 struct threadEntry *next; /* next thread */ 49 int intr; /* interrupted */ 50 } threadEntry_t; 51 52 /* 53 * Heap allocated during initialized - one entry per fd 54 */ 55 typedef struct { 56 pthread_mutex_t lock; /* fd lock */ 57 threadEntry_t *threads; /* threads blocked on fd */ 58 } fdEntry_t; 59 60 /* 61 * Signal to unblock thread 62 */ 63 static int sigWakeup = SIGIO; 64 65 /* 66 * fdTable holds one entry per file descriptor, up to a certain 67 * maximum. 68 * Theoretically, the number of possible file descriptors can get 69 * large, though usually it does not. To save memory, we keep file 70 * descriptors with large numerical values in an overflow table. That 71 * table is organized as a two-dimensional sparse array, allocated 72 * on demand. 73 */ 74 75 static fdEntry_t* fdTable; 76 /* Max. number of file descriptors in fdTable. */ 77 static const int fdTableMaxSize = 0x1000; /* 4K */ 78 /* Max. theoretical number of file descriptor on system. */ 79 static int fdLimit; 80 /* Length of fdTable, in number of entries. */ 81 static int fdTableLen; 82 83 /* Overflow table: organized as array of n slabs, each holding 84 * 64k entries. 85 */ 86 static fdEntry_t** fdOverflowTable; 87 /* Number of slabs in the overflow table */ 88 static int fdOverflowTableLen; 89 /* Number of entries in one slab */ 90 static const int fdOverflowTableSlabSize = 0x10000; /* 64k */ 91 pthread_mutex_t fdOverflowTableLock = PTHREAD_MUTEX_INITIALIZER; 92 93 /* 94 * Null signal handler 95 */ 96 static void sig_wakeup(int sig) { 97 } 98 99 /* 100 * Initialization routine (executed when library is loaded) 101 * Allocate fd tables and sets up signal handler. 102 */ 103 static void __attribute((constructor)) init() { 104 struct rlimit nbr_files; 105 sigset_t sigset; 106 struct sigaction sa; 107 int i = 0; 108 109 /* Determine the maximum number of possible file descriptors. */ 110 getrlimit(RLIMIT_NOFILE, &nbr_files); 111 if (nbr_files.rlim_max != RLIM_INFINITY) { 112 fdLimit = nbr_files.rlim_max; 113 } else { 114 /* We just do not know. */ 115 fdLimit = INT_MAX; 116 } 117 118 /* Allocate table for low value file descriptors. */ 119 fdTableLen = fdLimit < fdTableMaxSize ? fdLimit : fdTableMaxSize; 120 fdTable = (fdEntry_t*) calloc(fdTableLen, sizeof(fdEntry_t)); 121 if (fdTable == NULL) { 122 fprintf(stderr, "library initialization failed - " 123 "unable to allocate file descriptor table - out of memory"); 124 abort(); 125 } else { 126 for (i = 0; i < fdTableLen; i ++) { 127 pthread_mutex_init(&fdTable[i].lock, NULL); 128 } 129 } 130 131 /* Allocate overflow table, if needed */ 132 if (fdLimit > fdTableMaxSize) { 133 fdOverflowTableLen = ((fdLimit - fdTableMaxSize) / fdOverflowTableSlabSize) + 1; 134 fdOverflowTable = (fdEntry_t**) calloc(fdOverflowTableLen, sizeof(fdEntry_t*)); 135 if (fdOverflowTable == NULL) { 136 fprintf(stderr, "library initialization failed - " 137 "unable to allocate file descriptor overflow table - out of memory"); 138 abort(); 139 } 140 } 141 142 /* 143 * Setup the signal handler 144 */ 145 sa.sa_handler = sig_wakeup; 146 sa.sa_flags = 0; 147 sigemptyset(&sa.sa_mask); 148 sigaction(sigWakeup, &sa, NULL); 149 150 sigemptyset(&sigset); 151 sigaddset(&sigset, sigWakeup); 152 sigprocmask(SIG_UNBLOCK, &sigset, NULL); 153 } 154 155 /* 156 * Return the fd table for this fd. 157 */ 158 static inline fdEntry_t *getFdEntry(int fd) 159 { 160 fdEntry_t* result = NULL; 161 162 if (fd < 0) { 163 return NULL; 164 } 165 166 /* This should not happen. If it does, our assumption about 167 * max. fd value was wrong. */ 168 assert(fd < fdLimit); 169 170 if (fd < fdTableMaxSize) { 171 assert(fd < fdTableLen); 172 result = fdTable + fd; 173 } else { 174 const int indexInOverflowTable = fd - fdTableMaxSize; 175 const int rootindex = indexInOverflowTable / fdOverflowTableSlabSize; 176 const int slabindex = indexInOverflowTable % fdOverflowTableSlabSize; 177 assert(rootindex < fdOverflowTableLen); 178 assert(slabindex < fdOverflowTableSlabSize); 179 pthread_mutex_lock(&fdOverflowTableLock); 180 if (fdOverflowTable[rootindex] == NULL) { 181 fdEntry_t* const newSlab = 182 (fdEntry_t*)calloc(fdOverflowTableSlabSize, sizeof(fdEntry_t)); 183 if (newSlab == NULL) { 184 fprintf(stderr, "Unable to allocate file descriptor table - out of memory"); 185 pthread_mutex_unlock(&fdOverflowTableLock); 186 abort(); 187 } else { 188 int i; 189 for (i = 0; i < fdOverflowTableSlabSize; i ++) { 190 pthread_mutex_init(&newSlab[i].lock, NULL); 191 } 192 fdOverflowTable[rootindex] = newSlab; 193 } 194 } 195 pthread_mutex_unlock(&fdOverflowTableLock); 196 result = fdOverflowTable[rootindex] + slabindex; 197 } 198 199 return result; 200 201 } 202 203 /* 204 * Start a blocking operation :- 205 * Insert thread onto thread list for the fd. 206 */ 207 static inline void startOp(fdEntry_t *fdEntry, threadEntry_t *self) 208 { 209 self->thr = pthread_self(); 210 self->intr = 0; 211 212 pthread_mutex_lock(&(fdEntry->lock)); 213 { 214 self->next = fdEntry->threads; 215 fdEntry->threads = self; 216 } 217 pthread_mutex_unlock(&(fdEntry->lock)); 218 } 219 220 /* 221 * End a blocking operation :- 222 * Remove thread from thread list for the fd 223 * If fd has been interrupted then set errno to EBADF 224 */ 225 static inline void endOp 226 (fdEntry_t *fdEntry, threadEntry_t *self) 227 { 228 int orig_errno = errno; 229 pthread_mutex_lock(&(fdEntry->lock)); 230 { 231 threadEntry_t *curr, *prev=NULL; 232 curr = fdEntry->threads; 233 while (curr != NULL) { 234 if (curr == self) { 235 if (curr->intr) { 236 orig_errno = EBADF; 237 } 238 if (prev == NULL) { 239 fdEntry->threads = curr->next; 240 } else { 241 prev->next = curr->next; 242 } 243 break; 244 } 245 prev = curr; 246 curr = curr->next; 247 } 248 } 249 pthread_mutex_unlock(&(fdEntry->lock)); 250 errno = orig_errno; 251 } 252 253 /* 254 * Close or dup2 a file descriptor ensuring that all threads blocked on 255 * the file descriptor are notified via a wakeup signal. 256 * 257 * fd1 < 0 => close(fd2) 258 * fd1 >= 0 => dup2(fd1, fd2) 259 * 260 * Returns -1 with errno set if operation fails. 261 */ 262 static int closefd(int fd1, int fd2) { 263 int rv, orig_errno; 264 fdEntry_t *fdEntry = getFdEntry(fd2); 265 if (fdEntry == NULL) { 266 errno = EBADF; 267 return -1; 268 } 269 270 /* 271 * Lock the fd to hold-off additional I/O on this fd. 272 */ 273 pthread_mutex_lock(&(fdEntry->lock)); 274 275 { 276 /* 277 * Send a wakeup signal to all threads blocked on this 278 * file descriptor. 279 */ 280 threadEntry_t *curr = fdEntry->threads; 281 while (curr != NULL) { 282 curr->intr = 1; 283 pthread_kill( curr->thr, sigWakeup ); 284 curr = curr->next; 285 } 286 287 /* 288 * And close/dup the file descriptor 289 * (restart if interrupted by signal) 290 */ 291 do { 292 if (fd1 < 0) { 293 rv = close(fd2); 294 } else { 295 rv = dup2(fd1, fd2); 296 } 297 } while (rv == -1 && errno == EINTR); 298 299 } 300 301 /* 302 * Unlock without destroying errno 303 */ 304 orig_errno = errno; 305 pthread_mutex_unlock(&(fdEntry->lock)); 306 errno = orig_errno; 307 308 return rv; 309 } 310 311 /* 312 * Wrapper for dup2 - same semantics as dup2 system call except 313 * that any threads blocked in an I/O system call on fd2 will be 314 * preempted and return -1/EBADF; 315 */ 316 int NET_Dup2(int fd, int fd2) { 317 if (fd < 0) { 318 errno = EBADF; 319 return -1; 320 } 321 return closefd(fd, fd2); 322 } 323 324 /* 325 * Wrapper for close - same semantics as close system call 326 * except that any threads blocked in an I/O on fd will be 327 * preempted and the I/O system call will return -1/EBADF. 328 */ 329 int NET_SocketClose(int fd) { 330 return closefd(-1, fd); 331 } 332 333 /************** Basic I/O operations here ***************/ 334 335 /* 336 * Macro to perform a blocking IO operation. Restarts 337 * automatically if interrupted by signal (other than 338 * our wakeup signal) 339 */ 340 #define BLOCKING_IO_RETURN_INT(FD, FUNC) { \ 341 int ret; \ 342 threadEntry_t self; \ 343 fdEntry_t *fdEntry = getFdEntry(FD); \ 344 if (fdEntry == NULL) { \ 345 errno = EBADF; \ 346 return -1; \ 347 } \ 348 do { \ 349 startOp(fdEntry, &self); \ 350 ret = FUNC; \ 351 endOp(fdEntry, &self); \ 352 } while (ret == -1 && errno == EINTR); \ 353 return ret; \ 354 } 355 356 int NET_Read(int s, void* buf, size_t len) { 357 BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, 0) ); 358 } 359 360 int NET_ReadV(int s, const struct iovec * vector, int count) { 361 BLOCKING_IO_RETURN_INT( s, readv(s, vector, count) ); 362 } 363 364 int NET_RecvFrom(int s, void *buf, int len, unsigned int flags, 365 struct sockaddr *from, socklen_t *fromlen) { 366 BLOCKING_IO_RETURN_INT( s, recvfrom(s, buf, len, flags, from, fromlen) ); 367 } 368 369 int NET_Send(int s, void *msg, int len, unsigned int flags) { 370 BLOCKING_IO_RETURN_INT( s, send(s, msg, len, flags) ); 371 } 372 373 int NET_WriteV(int s, const struct iovec * vector, int count) { 374 BLOCKING_IO_RETURN_INT( s, writev(s, vector, count) ); 375 } 376 377 int NET_SendTo(int s, const void *msg, int len, unsigned int 378 flags, const struct sockaddr *to, int tolen) { 379 BLOCKING_IO_RETURN_INT( s, sendto(s, msg, len, flags, to, tolen) ); 380 } 381 382 int NET_Accept(int s, struct sockaddr *addr, socklen_t *addrlen) { 383 BLOCKING_IO_RETURN_INT( s, accept(s, addr, addrlen) ); 384 } 385 386 int NET_Connect(int s, struct sockaddr *addr, int addrlen) { 387 BLOCKING_IO_RETURN_INT( s, connect(s, addr, addrlen) ); 388 } 389 390 int NET_Poll(struct pollfd *ufds, unsigned int nfds, int timeout) { 391 BLOCKING_IO_RETURN_INT( ufds[0].fd, poll(ufds, nfds, timeout) ); 392 } 393 394 /* 395 * Wrapper for select(s, timeout). We are using select() on Mac OS due to Bug 7131399. 396 * Auto restarts with adjusted timeout if interrupted by 397 * signal other than our wakeup signal. 398 */ 399 int NET_Timeout(int s, long timeout) { 400 long prevtime = 0, newtime; 401 struct timeval t, *tp = &t; 402 fd_set fds; 403 fd_set* fdsp = NULL; 404 int allocated = 0; 405 threadEntry_t self; 406 fdEntry_t *fdEntry = getFdEntry(s); 407 408 /* 409 * Check that fd hasn't been closed. 410 */ 411 if (fdEntry == NULL) { 412 errno = EBADF; 413 return -1; 414 } 415 416 /* 417 * Pick up current time as may need to adjust timeout 418 */ 419 if (timeout > 0) { 420 /* Timed */ 421 struct timeval now; 422 gettimeofday(&now, NULL); 423 prevtime = now.tv_sec * 1000 + now.tv_usec / 1000; 424 t.tv_sec = timeout / 1000; 425 t.tv_usec = (timeout % 1000) * 1000; 426 } else if (timeout < 0) { 427 /* Blocking */ 428 tp = 0; 429 } else { 430 /* Poll */ 431 t.tv_sec = 0; 432 t.tv_usec = 0; 433 } 434 435 if (s < FD_SETSIZE) { 436 fdsp = &fds; 437 FD_ZERO(fdsp); 438 } else { 439 int length = (howmany(s+1, NFDBITS)) * sizeof(int); 440 fdsp = (fd_set *) calloc(1, length); 441 if (fdsp == NULL) { 442 return -1; // errno will be set to ENOMEM 443 } 444 allocated = 1; 445 } 446 FD_SET(s, fdsp); 447 448 for(;;) { 449 int rv; 450 451 /* 452 * call select on the fd. If interrupted by our wakeup signal 453 * errno will be set to EBADF. 454 */ 455 456 startOp(fdEntry, &self); 457 rv = select(s+1, fdsp, 0, 0, tp); 458 endOp(fdEntry, &self); 459 460 /* 461 * If interrupted then adjust timeout. If timeout 462 * has expired return 0 (indicating timeout expired). 463 */ 464 if (rv < 0 && errno == EINTR) { 465 if (timeout > 0) { 466 struct timeval now; 467 gettimeofday(&now, NULL); 468 newtime = now.tv_sec * 1000 + now.tv_usec / 1000; 469 timeout -= newtime - prevtime; 470 if (timeout <= 0) { 471 if (allocated != 0) 472 free(fdsp); 473 return 0; 474 } 475 prevtime = newtime; 476 t.tv_sec = timeout / 1000; 477 t.tv_usec = (timeout % 1000) * 1000; 478 } 479 } else { 480 if (allocated != 0) 481 free(fdsp); 482 return rv; 483 } 484 485 } 486 }