1 /* 2 * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 #include <assert.h> 27 #include <limits.h> 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <sys/param.h> 31 #include <signal.h> 32 #include <pthread.h> 33 #include <sys/types.h> 34 #include <sys/socket.h> 35 #include <sys/select.h> 36 #include <sys/time.h> 37 #include <sys/resource.h> 38 #include <sys/uio.h> 39 #include <unistd.h> 40 #include <errno.h> 41 #include <sys/poll.h> 42 43 /* 44 * Stack allocated by thread when doing blocking operation 45 */ 46 typedef struct threadEntry { 47 pthread_t thr; /* this thread */ 48 struct threadEntry *next; /* next thread */ 49 int intr; /* interrupted */ 50 } threadEntry_t; 51 52 /* 53 * Heap allocated during initialized - one entry per fd 54 */ 55 typedef struct { 56 pthread_mutex_t lock; /* fd lock */ 57 threadEntry_t *threads; /* threads blocked on fd */ 58 } fdEntry_t; 59 60 /* 61 * Signal to unblock thread 62 */ 63 static int sigWakeup = SIGIO; 64 65 /* 66 * fdTable holds one entry per file descriptor, up to a certain 67 * maximum. 68 * Theoretically, the number of possible file descriptors can get 69 * large, though usually it does not. Entries for small value file 70 * descriptors are kept in a simple table, which covers most scenarios. 71 * Entries for large value file descriptors are kept in an overflow 72 * table, which is organized as a sparse two dimensional array whose 73 * slabs are allocated on demand. This covers all corner cases while 74 * keeping memory consumption reasonable. 75 */ 76 77 /* Base table for low value file descriptors */ 78 static fdEntry_t* fdTable = NULL; 79 /* Maximum size of base table (in number of entries). */ 80 static const int fdTableMaxSize = 0x1000; /* 4K */ 81 /* Actual size of base table (in number of entries) */ 82 static int fdTableLen = 0; 83 /* Max. theoretical number of file descriptors on system. */ 84 static int fdLimit = 0; 85 86 /* Overflow table, should base table not be large enough. Organized as 87 * an array of n slabs, each holding 64k entries. 88 */ 89 static fdEntry_t** fdOverflowTable = NULL; 90 /* Number of slabs in the overflow table */ 91 static int fdOverflowTableLen = 0; 92 /* Number of entries in one slab */ 93 static const int fdOverflowTableSlabSize = 0x10000; /* 64k */ 94 pthread_mutex_t fdOverflowTableLock = PTHREAD_MUTEX_INITIALIZER; 95 96 /* 97 * Null signal handler 98 */ 99 static void sig_wakeup(int sig) { 100 } 101 102 /* 103 * Initialization routine (executed when library is loaded) 104 * Allocate fd tables and sets up signal handler. 105 */ 106 static void __attribute((constructor)) init() { 107 struct rlimit nbr_files; 108 sigset_t sigset; 109 struct sigaction sa; 110 int i = 0; 111 112 /* Determine the maximum number of possible file descriptors. */ 113 if (-1 == getrlimit(RLIMIT_NOFILE, &nbr_files)) { 114 fprintf(stderr, "library initialization failed - " 115 "unable to get max # of allocated fds\n"); 116 abort(); 117 } 118 if (nbr_files.rlim_max != RLIM_INFINITY) { 119 fdLimit = nbr_files.rlim_max; 120 } else { 121 /* We just do not know. */ 122 fdLimit = INT_MAX; 123 } 124 125 /* Allocate table for low value file descriptors. */ 126 fdTableLen = fdLimit < fdTableMaxSize ? fdLimit : fdTableMaxSize; 127 fdTable = (fdEntry_t*) calloc(fdTableLen, sizeof(fdEntry_t)); 128 if (fdTable == NULL) { 129 fprintf(stderr, "library initialization failed - " 130 "unable to allocate file descriptor table - out of memory"); 131 abort(); 132 } else { 133 for (i = 0; i < fdTableLen; i ++) { 134 pthread_mutex_init(&fdTable[i].lock, NULL); 135 } 136 } 137 138 /* Allocate overflow table, if needed */ 139 if (fdLimit > fdTableMaxSize) { 140 fdOverflowTableLen = ((fdLimit - fdTableMaxSize) / fdOverflowTableSlabSize) + 1; 141 fdOverflowTable = (fdEntry_t**) calloc(fdOverflowTableLen, sizeof(fdEntry_t*)); 142 if (fdOverflowTable == NULL) { 143 fprintf(stderr, "library initialization failed - " 144 "unable to allocate file descriptor overflow table - out of memory"); 145 abort(); 146 } 147 } 148 149 /* 150 * Setup the signal handler 151 */ 152 sa.sa_handler = sig_wakeup; 153 sa.sa_flags = 0; 154 sigemptyset(&sa.sa_mask); 155 sigaction(sigWakeup, &sa, NULL); 156 157 sigemptyset(&sigset); 158 sigaddset(&sigset, sigWakeup); 159 sigprocmask(SIG_UNBLOCK, &sigset, NULL); 160 } 161 162 /* 163 * Return the fd table for this fd. 164 */ 165 static inline fdEntry_t *getFdEntry(int fd) 166 { 167 fdEntry_t* result = NULL; 168 169 if (fd < 0) { 170 return NULL; 171 } 172 173 /* This should not happen. If it does, our assumption about 174 * max. fd value was wrong. */ 175 assert(fd < fdLimit); 176 177 if (fd < fdTableMaxSize) { 178 /* fd is in base table. */ 179 assert(fd < fdTableLen); 180 result = &fdTable[fd]; 181 } else { 182 /* fd is in overflow table. */ 183 const int indexInOverflowTable = fd - fdTableMaxSize; 184 const int rootindex = indexInOverflowTable / fdOverflowTableSlabSize; 185 const int slabindex = indexInOverflowTable % fdOverflowTableSlabSize; 186 fdEntry_t* slab = NULL; 187 assert(rootindex < fdOverflowTableLen); 188 assert(slabindex < fdOverflowTableSlabSize); 189 pthread_mutex_lock(&fdOverflowTableLock); 190 /* Allocate new slab in overflow table if needed */ 191 if (fdOverflowTable[rootindex] == NULL) { 192 fdEntry_t* const newSlab = 193 (fdEntry_t*)calloc(fdOverflowTableSlabSize, sizeof(fdEntry_t)); 194 if (newSlab == NULL) { 195 fprintf(stderr, "Unable to allocate file descriptor overflow" 196 " table slab - out of memory"); 197 pthread_mutex_unlock(&fdOverflowTableLock); 198 abort(); 199 } else { 200 int i; 201 for (i = 0; i < fdOverflowTableSlabSize; i ++) { 202 pthread_mutex_init(&newSlab[i].lock, NULL); 203 } 204 fdOverflowTable[rootindex] = newSlab; 205 } 206 } 207 pthread_mutex_unlock(&fdOverflowTableLock); 208 slab = fdOverflowTable[rootindex]; 209 result = &slab[slabindex]; 210 } 211 212 return result; 213 214 } 215 216 217 /* 218 * Start a blocking operation :- 219 * Insert thread onto thread list for the fd. 220 */ 221 static inline void startOp(fdEntry_t *fdEntry, threadEntry_t *self) 222 { 223 self->thr = pthread_self(); 224 self->intr = 0; 225 226 pthread_mutex_lock(&(fdEntry->lock)); 227 { 228 self->next = fdEntry->threads; 229 fdEntry->threads = self; 230 } 231 pthread_mutex_unlock(&(fdEntry->lock)); 232 } 233 234 /* 235 * End a blocking operation :- 236 * Remove thread from thread list for the fd 237 * If fd has been interrupted then set errno to EBADF 238 */ 239 static inline void endOp 240 (fdEntry_t *fdEntry, threadEntry_t *self) 241 { 242 int orig_errno = errno; 243 pthread_mutex_lock(&(fdEntry->lock)); 244 { 245 threadEntry_t *curr, *prev=NULL; 246 curr = fdEntry->threads; 247 while (curr != NULL) { 248 if (curr == self) { 249 if (curr->intr) { 250 orig_errno = EBADF; 251 } 252 if (prev == NULL) { 253 fdEntry->threads = curr->next; 254 } else { 255 prev->next = curr->next; 256 } 257 break; 258 } 259 prev = curr; 260 curr = curr->next; 261 } 262 } 263 pthread_mutex_unlock(&(fdEntry->lock)); 264 errno = orig_errno; 265 } 266 267 /* 268 * Close or dup2 a file descriptor ensuring that all threads blocked on 269 * the file descriptor are notified via a wakeup signal. 270 * 271 * fd1 < 0 => close(fd2) 272 * fd1 >= 0 => dup2(fd1, fd2) 273 * 274 * Returns -1 with errno set if operation fails. 275 */ 276 static int closefd(int fd1, int fd2) { 277 int rv, orig_errno; 278 fdEntry_t *fdEntry = getFdEntry(fd2); 279 if (fdEntry == NULL) { 280 errno = EBADF; 281 return -1; 282 } 283 284 /* 285 * Lock the fd to hold-off additional I/O on this fd. 286 */ 287 pthread_mutex_lock(&(fdEntry->lock)); 288 289 { 290 /* 291 * Send a wakeup signal to all threads blocked on this 292 * file descriptor. 293 */ 294 threadEntry_t *curr = fdEntry->threads; 295 while (curr != NULL) { 296 curr->intr = 1; 297 pthread_kill( curr->thr, sigWakeup ); 298 curr = curr->next; 299 } 300 301 /* 302 * And close/dup the file descriptor 303 * (restart if interrupted by signal) 304 */ 305 do { 306 if (fd1 < 0) { 307 rv = close(fd2); 308 } else { 309 rv = dup2(fd1, fd2); 310 } 311 } while (rv == -1 && errno == EINTR); 312 313 } 314 315 /* 316 * Unlock without destroying errno 317 */ 318 orig_errno = errno; 319 pthread_mutex_unlock(&(fdEntry->lock)); 320 errno = orig_errno; 321 322 return rv; 323 } 324 325 /* 326 * Wrapper for dup2 - same semantics as dup2 system call except 327 * that any threads blocked in an I/O system call on fd2 will be 328 * preempted and return -1/EBADF; 329 */ 330 int NET_Dup2(int fd, int fd2) { 331 if (fd < 0) { 332 errno = EBADF; 333 return -1; 334 } 335 return closefd(fd, fd2); 336 } 337 338 /* 339 * Wrapper for close - same semantics as close system call 340 * except that any threads blocked in an I/O on fd will be 341 * preempted and the I/O system call will return -1/EBADF. 342 */ 343 int NET_SocketClose(int fd) { 344 return closefd(-1, fd); 345 } 346 347 /************** Basic I/O operations here ***************/ 348 349 /* 350 * Macro to perform a blocking IO operation. Restarts 351 * automatically if interrupted by signal (other than 352 * our wakeup signal) 353 */ 354 #define BLOCKING_IO_RETURN_INT(FD, FUNC) { \ 355 int ret; \ 356 threadEntry_t self; \ 357 fdEntry_t *fdEntry = getFdEntry(FD); \ 358 if (fdEntry == NULL) { \ 359 errno = EBADF; \ 360 return -1; \ 361 } \ 362 do { \ 363 startOp(fdEntry, &self); \ 364 ret = FUNC; \ 365 endOp(fdEntry, &self); \ 366 } while (ret == -1 && errno == EINTR); \ 367 return ret; \ 368 } 369 370 int NET_Read(int s, void* buf, size_t len) { 371 BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, 0) ); 372 } 373 374 int NET_NonBlockingRead(int s, void* buf, size_t len) { 375 BLOCKING_IO_RETURN_INT( s, recv(s, buf, len, MSG_DONTWAIT)); 376 } 377 378 int NET_ReadV(int s, const struct iovec * vector, int count) { 379 BLOCKING_IO_RETURN_INT( s, readv(s, vector, count) ); 380 } 381 382 int NET_RecvFrom(int s, void *buf, int len, unsigned int flags, 383 struct sockaddr *from, socklen_t *fromlen) { 384 BLOCKING_IO_RETURN_INT( s, recvfrom(s, buf, len, flags, from, fromlen) ); 385 } 386 387 int NET_Send(int s, void *msg, int len, unsigned int flags) { 388 BLOCKING_IO_RETURN_INT( s, send(s, msg, len, flags) ); 389 } 390 391 int NET_WriteV(int s, const struct iovec * vector, int count) { 392 BLOCKING_IO_RETURN_INT( s, writev(s, vector, count) ); 393 } 394 395 int NET_SendTo(int s, const void *msg, int len, unsigned int 396 flags, const struct sockaddr *to, int tolen) { 397 BLOCKING_IO_RETURN_INT( s, sendto(s, msg, len, flags, to, tolen) ); 398 } 399 400 int NET_Accept(int s, struct sockaddr *addr, socklen_t *addrlen) { 401 BLOCKING_IO_RETURN_INT( s, accept(s, addr, addrlen) ); 402 } 403 404 int NET_Connect(int s, struct sockaddr *addr, int addrlen) { 405 BLOCKING_IO_RETURN_INT( s, connect(s, addr, addrlen) ); 406 } 407 408 int NET_Poll(struct pollfd *ufds, unsigned int nfds, int timeout) { 409 BLOCKING_IO_RETURN_INT( ufds[0].fd, poll(ufds, nfds, timeout) ); 410 } 411 412 /* 413 * Wrapper for select(s, timeout). We are using select() on Mac OS due to Bug 7131399. 414 * Auto restarts with adjusted timeout if interrupted by 415 * signal other than our wakeup signal. 416 */ 417 int NET_Timeout(int s, long timeout) { 418 long prevtime = 0, newtime; 419 struct timeval t, *tp = &t; 420 fd_set fds; 421 fd_set* fdsp = NULL; 422 int allocated = 0; 423 threadEntry_t self; 424 fdEntry_t *fdEntry = getFdEntry(s); 425 426 /* 427 * Check that fd hasn't been closed. 428 */ 429 if (fdEntry == NULL) { 430 errno = EBADF; 431 return -1; 432 } 433 434 /* 435 * Pick up current time as may need to adjust timeout 436 */ 437 if (timeout > 0) { 438 /* Timed */ 439 struct timeval now; 440 gettimeofday(&now, NULL); 441 prevtime = now.tv_sec * 1000 + now.tv_usec / 1000; 442 t.tv_sec = timeout / 1000; 443 t.tv_usec = (timeout % 1000) * 1000; 444 } else if (timeout < 0) { 445 /* Blocking */ 446 tp = 0; 447 } else { 448 /* Poll */ 449 t.tv_sec = 0; 450 t.tv_usec = 0; 451 } 452 453 if (s < FD_SETSIZE) { 454 fdsp = &fds; 455 FD_ZERO(fdsp); 456 } else { 457 int length = (howmany(s+1, NFDBITS)) * sizeof(int); 458 fdsp = (fd_set *) calloc(1, length); 459 if (fdsp == NULL) { 460 return -1; // errno will be set to ENOMEM 461 } 462 allocated = 1; 463 } 464 FD_SET(s, fdsp); 465 466 for(;;) { 467 int rv; 468 469 /* 470 * call select on the fd. If interrupted by our wakeup signal 471 * errno will be set to EBADF. 472 */ 473 474 startOp(fdEntry, &self); 475 rv = select(s+1, fdsp, 0, 0, tp); 476 endOp(fdEntry, &self); 477 478 /* 479 * If interrupted then adjust timeout. If timeout 480 * has expired return 0 (indicating timeout expired). 481 */ 482 if (rv < 0 && errno == EINTR) { 483 if (timeout > 0) { 484 struct timeval now; 485 gettimeofday(&now, NULL); 486 newtime = now.tv_sec * 1000 + now.tv_usec / 1000; 487 timeout -= newtime - prevtime; 488 if (timeout <= 0) { 489 if (allocated != 0) 490 free(fdsp); 491 return 0; 492 } 493 prevtime = newtime; 494 t.tv_sec = timeout / 1000; 495 t.tv_usec = (timeout % 1000) * 1000; 496 } 497 } else { 498 if (allocated != 0) 499 free(fdsp); 500 return rv; 501 } 502 503 } 504 }