1 /* 2 * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 #include <assert.h> 27 #include <limits.h> 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <sys/param.h> 31 #include <signal.h> 32 #include <pthread.h> 33 #include <sys/types.h> 34 #include <sys/socket.h> 35 #include <sys/select.h> 36 #include <sys/time.h> 37 #include <sys/resource.h> 38 #include <sys/uio.h> 39 #include <unistd.h> 40 #include <errno.h> 41 #include <sys/poll.h> 42 43 /* 44 * Stack allocated by thread when doing blocking operation 45 */ 46 typedef struct threadEntry { 47 pthread_t thr; /* this thread */ 48 struct threadEntry *next; /* next thread */ 49 int intr; /* interrupted */ 50 } threadEntry_t; 51 52 /* 53 * Heap allocated during initialized - one entry per fd 54 */ 55 typedef struct { 56 pthread_mutex_t lock; /* fd lock */ 57 threadEntry_t *threads; /* threads blocked on fd */ 58 } fdEntry_t; 59 60 /* 61 * Signal to unblock thread 62 */ 63 static int sigWakeup = SIGIO; 64 65 /* 66 * fdTable holds one entry per file descriptor, up to a certain 67 * maximum. 68 * Theoretically, the number of possible file descriptors can get 69 * large, though usually it does not. Entries for small value file 70 * descriptors are kept in a simple table, which covers most scenarios. 71 * Entries for large value file descriptors are kept in an overflow 72 * table, which is organized as a sparse two dimensional array whose 73 * slabs are allocated on demand. This covers all corner cases while 74 * keeping memory consumption reasonable. 75 */ 76 77 /* Base table for low value file descriptors */ 78 static fdEntry_t* fdTable = NULL; 79 /* Maximum size of base table (in number of entries). */ 80 static const int fdTableMaxSize = 0x1000; /* 4K */ 81 /* Actual size of base table (in number of entries) */ 82 static int fdTableLen = 0; 83 /* Max. theoretical number of file descriptors on system. */ 84 static int fdLimit = 0; 85 86 /* Overflow table, should base table not be large enough. Organized as 87 * an array of n slabs, each holding 64k entries. 88 */ 89 static fdEntry_t** fdOverflowTable = NULL; 90 /* Number of slabs in the overflow table */ 91 static int fdOverflowTableLen = 0; 92 /* Number of entries in one slab */ 93 static const int fdOverflowTableSlabSize = 0x10000; /* 64k */ 94 pthread_mutex_t fdOverflowTableLock = PTHREAD_MUTEX_INITIALIZER; 95 96 /* 97 * Null signal handler 98 */ 99 static void sig_wakeup(int sig) { 100 } 101 102 /* 103 * Initialization routine (executed when library is loaded) 104 * Allocate fd tables and sets up signal handler. 105 */ 106 static void __attribute((constructor)) init() { 107 struct rlimit nbr_files; 108 sigset_t sigset; 109 struct sigaction sa; 110 int i = 0; 111 112 /* Determine the maximum number of possible file descriptors. */ 113 if (-1 == getrlimit(RLIMIT_NOFILE, &nbr_files)) { 114 fprintf(stderr, "library initialization failed - " 115 "unable to get max # of allocated fds\n"); 116 abort(); 117 } 118 if (nbr_files.rlim_max != RLIM_INFINITY) { 119 fdLimit = nbr_files.rlim_max; 120 } else { 121 /* We just do not know. */ 122 fdLimit = INT_MAX; 123 } 124 125 /* Allocate table for low value file descriptors. */ 126 fdTableLen = fdLimit < fdTableMaxSize ? fdLimit : fdTableMaxSize; 127 fdTable = (fdEntry_t*) calloc(fdTableLen, sizeof(fdEntry_t)); 128 if (fdTable == NULL) { 129 fprintf(stderr, "library initialization failed - " 130 "unable to allocate file descriptor table - out of memory"); 131 abort(); 132 } else { 133 for (i = 0; i < fdTableLen; i ++) { 134 pthread_mutex_init(&fdTable[i].lock, NULL); 135 } 136 } 137 138 /* Allocate overflow table, if needed */ 139 if (fdLimit > fdTableMaxSize) { 140 fdOverflowTableLen = ((fdLimit - fdTableMaxSize) / fdOverflowTableSlabSize) + 1; 141 fdOverflowTable = (fdEntry_t**) calloc(fdOverflowTableLen, sizeof(fdEntry_t*)); 142 if (fdOverflowTable == NULL) { 143 fprintf(stderr, "library initialization failed - " 144 "unable to allocate file descriptor overflow table - out of memory"); 145 abort(); 146 } 147 } 148 149 /* 150 * Setup the signal handler 151 */ 152 sa.sa_handler = sig_wakeup; 153 sa.sa_flags = 0; 154 sigemptyset(&sa.sa_mask); 155 sigaction(sigWakeup, &sa, NULL); 156 157 sigemptyset(&sigset); 158 sigaddset(&sigset, sigWakeup); 159 sigprocmask(SIG_UNBLOCK, &sigset, NULL); 160 } 161 162 /* 163 * Return the fd table for this fd. 164 */ 165 static inline fdEntry_t *getFdEntry(int fd) 166 { 167 fdEntry_t* result = NULL; 168 169 if (fd < 0) { 170 return NULL; 171 } 172 173 /* This should not happen. If it does, our assumption about 174 * max. fd value was wrong. */ 175 assert(fd < fdLimit); 176 177 if (fd < fdTableMaxSize) { 178 /* fd is in base table. */ 179 assert(fd < fdTableLen); 180 result = &fdTable[fd]; 181 } else { 182 /* fd is in overflow table. */ 183 const int indexInOverflowTable = fd - fdTableMaxSize; 184 const int rootindex = indexInOverflowTable / fdOverflowTableSlabSize; 185 const int slabindex = indexInOverflowTable % fdOverflowTableSlabSize; 186 fdEntry_t* slab = NULL; 187 assert(rootindex < fdOverflowTableLen); 188 assert(slabindex < fdOverflowTableSlabSize); 189 pthread_mutex_lock(&fdOverflowTableLock); 190 /* Allocate new slab in overflow table if needed */ 191 if (fdOverflowTable[rootindex] == NULL) { 192 fdEntry_t* const newSlab = 193 (fdEntry_t*)calloc(fdOverflowTableSlabSize, sizeof(fdEntry_t)); 194 if (newSlab == NULL) { 195 fprintf(stderr, "Unable to allocate file descriptor overflow" 196 " table slab - out of memory"); 197 pthread_mutex_unlock(&fdOverflowTableLock); 198 abort(); 199 } else { 200 int i; 201 for (i = 0; i < fdOverflowTableSlabSize; i ++) { 202 pthread_mutex_init(&newSlab[i].lock, NULL); 203 } 204 fdOverflowTable[rootindex] = newSlab; 205 } 206 } 207 pthread_mutex_unlock(&fdOverflowTableLock); 208 slab = fdOverflowTable[rootindex]; 209 result = &slab[slabindex]; 210 } 211 212 return result; 213 214 } 215 216 217 /* 218 * Start a blocking operation :- 219 * Insert thread onto thread list for the fd. 220 */ 221 static inline void startOp(fdEntry_t *fdEntry, threadEntry_t *self) 222 { 223 self->thr = pthread_self(); 224 self->intr = 0; 225 226 pthread_mutex_lock(&(fdEntry->lock)); 227 { 228 self->next = fdEntry->threads; 229 fdEntry->threads = self; 230 } 231 pthread_mutex_unlock(&(fdEntry->lock)); 232 } 233 234 /* 235 * End a blocking operation :- 236 * Remove thread from thread list for the fd 237 * If fd has been interrupted then set errno to EBADF 238 */ 239 static inline void endOp 240 (fdEntry_t *fdEntry, threadEntry_t *self) 241 { 242 int orig_errno = errno; 243 pthread_mutex_lock(&(fdEntry->lock)); 244 { 245 threadEntry_t *curr, *prev=NULL; 246 curr = fdEntry->threads; 247 while (curr != NULL) { 248 if (curr == self) { 249 if (curr->intr) { 250 orig_errno = EBADF; 251 } 252 if (prev == NULL) { 253 fdEntry->threads = curr->next; 254 } else { 255 prev->next = curr->next; 256 } 257 break; 258 } 259 prev = curr; 260 curr = curr->next; 261 } 262 } 263 pthread_mutex_unlock(&(fdEntry->lock)); 264 errno = orig_errno; 265 } 266 267 /* 268 * Close or dup2 a file descriptor ensuring that all threads blocked on 269 * the file descriptor are notified via a wakeup signal. 270 * 271 * fd1 < 0 => close(fd2) 272 * fd1 >= 0 => dup2(fd1, fd2) 273 * 274 * Returns -1 with errno set if operation fails. 275 */ 276 static int closefd(int fd1, int fd2) { 277 int rv, orig_errno; 278 fdEntry_t *fdEntry = getFdEntry(fd2); 279 if (fdEntry == NULL) { 280 errno = EBADF; 281 return -1; 282 } 283 284 /* 285 * Lock the fd to hold-off additional I/O on this fd. 286 */ 287 pthread_mutex_lock(&(fdEntry->lock)); 288 289 { 290 /* 291 * Send a wakeup signal to all threads blocked on this 292 * file descriptor. 293 */ 294 threadEntry_t *curr = fdEntry->threads; 295 while (curr != NULL) { 296 curr->intr = 1; 297 pthread_kill( curr->thr, sigWakeup ); 298 curr = curr->next; 299 } 300 301 /* 302 * And close/dup the file descriptor 303 * (restart if interrupted by signal) 304 */ 305 do { 306 if (fd1 < 0) { 307 rv = close(fd2); 308 } else { 309 rv = dup2(fd1, fd2); 310 } 311 } while (rv == -1 && errno == EINTR); 312 313 } 314 315 /* 316 * Unlock without destroying errno 317 */ 318 orig_errno = errno; 319 pthread_mutex_unlock(&(fdEntry->lock)); 320 errno = orig_errno; 321 322 return rv; 323 } 324 325 /* 326 * Wrapper for dup2 - same semantics as dup2 system call except 327 * that any threads blocked in an I/O system call on fd2 will be 328 * preempted and return -1/EBADF; 329 */ 330 int NET_Dup2(int fd, int fd2) { 331 if (fd < 0) { 332 errno = EBADF; 333 return -1; 334 } 335 return closefd(fd, fd2); 336 } 337 338 /* 339 * Wrapper for close - same semantics as close system call 340 * except that any threads blocked in an I/O on fd will be 341 * preempted and the I/O system call will return -1/EBADF. 342 */ 343 int NET_SocketClose(int fd) { 344 return closefd(-1, fd); 345 } 346 347 /************** Basic I/O operations here ***************/ 348 349 /* 350 * Macro to perform a blocking IO operation. Restarts 351 * automatically if interrupted by signal (other than 352 * our wakeup signal) 353 */ 354 #define BLOCKING_IO_RETURN_INT(FD, FUNC) { \ 355 int ret; \ 356 threadEntry_t self; \ 357 fdEntry_t *fdEntry = getFdEntry(FD); \ 358 if (fdEntry == NULL) { \ 359 errno = EBADF; \ 360 return -1; \ 361 } \ 362 do { \ 363 startOp(fdEntry, &self); \ 364 ret = FUNC; \ 365 endOp(fdEntry, &self); \ 366 } while (ret == -1 && errno == EINTR); \ 367 return ret; \ 368 } 369 370 int NET_Read(int s, void* buf, size_t len) { 371 BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, 0) ); 372 } 373 374 int NET_ReadV(int s, const struct iovec * vector, int count) { 375 BLOCKING_IO_RETURN_INT( s, readv(s, vector, count) ); 376 } 377 378 int NET_RecvFrom(int s, void *buf, int len, unsigned int flags, 379 struct sockaddr *from, socklen_t *fromlen) { 380 BLOCKING_IO_RETURN_INT( s, recvfrom(s, buf, len, flags, from, fromlen) ); 381 } 382 383 int NET_Send(int s, void *msg, int len, unsigned int flags) { 384 BLOCKING_IO_RETURN_INT( s, send(s, msg, len, flags) ); 385 } 386 387 int NET_WriteV(int s, const struct iovec * vector, int count) { 388 BLOCKING_IO_RETURN_INT( s, writev(s, vector, count) ); 389 } 390 391 int NET_SendTo(int s, const void *msg, int len, unsigned int 392 flags, const struct sockaddr *to, int tolen) { 393 BLOCKING_IO_RETURN_INT( s, sendto(s, msg, len, flags, to, tolen) ); 394 } 395 396 int NET_Accept(int s, struct sockaddr *addr, socklen_t *addrlen) { 397 BLOCKING_IO_RETURN_INT( s, accept(s, addr, addrlen) ); 398 } 399 400 int NET_Connect(int s, struct sockaddr *addr, int addrlen) { 401 BLOCKING_IO_RETURN_INT( s, connect(s, addr, addrlen) ); 402 } 403 404 int NET_Poll(struct pollfd *ufds, unsigned int nfds, int timeout) { 405 BLOCKING_IO_RETURN_INT( ufds[0].fd, poll(ufds, nfds, timeout) ); 406 } 407 408 /* 409 * Wrapper for select(s, timeout). We are using select() on Mac OS due to Bug 7131399. 410 * Auto restarts with adjusted timeout if interrupted by 411 * signal other than our wakeup signal. 412 */ 413 int NET_Timeout(int s, long timeout) { 414 long prevtime = 0, newtime; 415 struct timeval t, *tp = &t; 416 fd_set fds; 417 fd_set* fdsp = NULL; 418 int allocated = 0; 419 threadEntry_t self; 420 fdEntry_t *fdEntry = getFdEntry(s); 421 422 /* 423 * Check that fd hasn't been closed. 424 */ 425 if (fdEntry == NULL) { 426 errno = EBADF; 427 return -1; 428 } 429 430 /* 431 * Pick up current time as may need to adjust timeout 432 */ 433 if (timeout > 0) { 434 /* Timed */ 435 struct timeval now; 436 gettimeofday(&now, NULL); 437 prevtime = now.tv_sec * 1000 + now.tv_usec / 1000; 438 t.tv_sec = timeout / 1000; 439 t.tv_usec = (timeout % 1000) * 1000; 440 } else if (timeout < 0) { 441 /* Blocking */ 442 tp = 0; 443 } else { 444 /* Poll */ 445 t.tv_sec = 0; 446 t.tv_usec = 0; 447 } 448 449 if (s < FD_SETSIZE) { 450 fdsp = &fds; 451 FD_ZERO(fdsp); 452 } else { 453 int length = (howmany(s+1, NFDBITS)) * sizeof(int); 454 fdsp = (fd_set *) calloc(1, length); 455 if (fdsp == NULL) { 456 return -1; // errno will be set to ENOMEM 457 } 458 allocated = 1; 459 } 460 FD_SET(s, fdsp); 461 462 for(;;) { 463 int rv; 464 465 /* 466 * call select on the fd. If interrupted by our wakeup signal 467 * errno will be set to EBADF. 468 */ 469 470 startOp(fdEntry, &self); 471 rv = select(s+1, fdsp, 0, 0, tp); 472 endOp(fdEntry, &self); 473 474 /* 475 * If interrupted then adjust timeout. If timeout 476 * has expired return 0 (indicating timeout expired). 477 */ 478 if (rv < 0 && errno == EINTR) { 479 if (timeout > 0) { 480 struct timeval now; 481 gettimeofday(&now, NULL); 482 newtime = now.tv_sec * 1000 + now.tv_usec / 1000; 483 timeout -= newtime - prevtime; 484 if (timeout <= 0) { 485 if (allocated != 0) 486 free(fdsp); 487 return 0; 488 } 489 prevtime = newtime; 490 t.tv_sec = timeout / 1000; 491 t.tv_usec = (timeout % 1000) * 1000; 492 } 493 } else { 494 if (allocated != 0) 495 free(fdsp); 496 return rv; 497 } 498 499 } 500 }