1 /*
   2  * Copyright (c) 1995, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 #undef  _LARGEFILE64_SOURCE
  27 #define _LARGEFILE64_SOURCE 1
  28 
  29 #include "jni.h"
  30 #include "jvm.h"
  31 #include "jvm_md.h"
  32 #include "jni_util.h"
  33 #include "io_util.h"
  34 
  35 /*
  36  * Platform-specific support for java.lang.Process
  37  */
  38 #include <assert.h>
  39 #include <stddef.h>
  40 #include <stdlib.h>
  41 #include <sys/types.h>
  42 #include <ctype.h>
  43 #include <sys/wait.h>
  44 #include <signal.h>
  45 #include <string.h>
  46 #include <errno.h>
  47 #include <dirent.h>
  48 #include <unistd.h>
  49 #include <fcntl.h>
  50 #include <limits.h>
  51 
  52 #ifdef __APPLE__
  53 #include <crt_externs.h>
  54 #define environ (*_NSGetEnviron())
  55 #endif
  56 
  57 /*
  58  * There are 3 possible strategies we might use to "fork":
  59  *
  60  * - fork(2).  Very portable and reliable but subject to
  61  *   failure due to overcommit (see the documentation on
  62  *   /proc/sys/vm/overcommit_memory in Linux proc(5)).
  63  *   This is the ancient problem of spurious failure whenever a large
  64  *   process starts a small subprocess.
  65  *
  66  * - vfork().  Using this is scary because all relevant man pages
  67  *   contain dire warnings, e.g. Linux vfork(2).  But at least it's
  68  *   documented in the glibc docs and is standardized by XPG4.
  69  *   http://www.opengroup.org/onlinepubs/000095399/functions/vfork.html
  70  *   On Linux, one might think that vfork() would be implemented using
  71  *   the clone system call with flag CLONE_VFORK, but in fact vfork is
  72  *   a separate system call (which is a good sign, suggesting that
  73  *   vfork will continue to be supported at least on Linux).
  74  *   Another good sign is that glibc implements posix_spawn using
  75  *   vfork whenever possible.  Note that we cannot use posix_spawn
  76  *   ourselves because there's no reliable way to close all inherited
  77  *   file descriptors.
  78  *
  79  * - clone() with flags CLONE_VM but not CLONE_THREAD.  clone() is
  80  *   Linux-specific, but this ought to work - at least the glibc
  81  *   sources contain code to handle different combinations of CLONE_VM
  82  *   and CLONE_THREAD.  However, when this was implemented, it
  83  *   appeared to fail on 32-bit i386 (but not 64-bit x86_64) Linux with
  84  *   the simple program
  85  *     Runtime.getRuntime().exec("/bin/true").waitFor();
  86  *   with:
  87  *     #  Internal Error (os_linux_x86.cpp:683), pid=19940, tid=2934639536
  88  *     #  Error: pthread_getattr_np failed with errno = 3 (ESRCH)
  89  *   We believe this is a glibc bug, reported here:
  90  *     http://sources.redhat.com/bugzilla/show_bug.cgi?id=10311
  91  *   but the glibc maintainers closed it as WONTFIX.
  92  *
  93  * Based on the above analysis, we are currently using vfork() on
  94  * Linux and fork() on other Unix systems, but the code to use clone()
  95  * remains.
  96  */
  97 
  98 #define START_CHILD_USE_CLONE 0  /* clone() currently disabled; see above. */
  99 
 100 #ifndef START_CHILD_USE_CLONE
 101   #ifdef __linux__
 102     #define START_CHILD_USE_CLONE 1
 103   #else
 104     #define START_CHILD_USE_CLONE 0
 105   #endif
 106 #endif
 107 
 108 /* By default, use vfork() on Linux. */
 109 #ifndef START_CHILD_USE_VFORK
 110   #ifdef __linux__
 111     #define START_CHILD_USE_VFORK 1
 112   #else
 113     #define START_CHILD_USE_VFORK 0
 114   #endif
 115 #endif
 116 
 117 #if START_CHILD_USE_CLONE
 118 #include <sched.h>
 119 #define START_CHILD_SYSTEM_CALL "clone"
 120 #elif START_CHILD_USE_VFORK
 121 #define START_CHILD_SYSTEM_CALL "vfork"
 122 #else
 123 #define START_CHILD_SYSTEM_CALL "fork"
 124 #endif
 125 
 126 #ifndef STDIN_FILENO
 127 #define STDIN_FILENO 0
 128 #endif
 129 
 130 #ifndef STDOUT_FILENO
 131 #define STDOUT_FILENO 1
 132 #endif
 133 
 134 #ifndef STDERR_FILENO
 135 #define STDERR_FILENO 2
 136 #endif
 137 
 138 #ifndef SA_NOCLDSTOP
 139 #define SA_NOCLDSTOP 0
 140 #endif
 141 
 142 #ifndef SA_RESTART
 143 #define SA_RESTART 0
 144 #endif
 145 
 146 #define FAIL_FILENO (STDERR_FILENO + 1)
 147 
 148 /* TODO: Refactor. */
 149 #define RESTARTABLE(_cmd, _result) do { \
 150   do { \
 151     _result = _cmd; \
 152   } while((_result == -1) && (errno == EINTR)); \
 153 } while(0)
 154 
 155 /* This is one of the rare times it's more portable to declare an
 156  * external symbol explicitly, rather than via a system header.
 157  * The declaration is standardized as part of UNIX98, but there is
 158  * no standard (not even de-facto) header file where the
 159  * declaration is to be found.  See:
 160  * http://www.opengroup.org/onlinepubs/009695399/functions/environ.html
 161  * http://www.opengroup.org/onlinepubs/009695399/functions/xsh_chap02_02.html
 162  *
 163  * "All identifiers in this volume of IEEE Std 1003.1-2001, except
 164  * environ, are defined in at least one of the headers" (!)
 165  */
 166 extern char **environ;
 167 
 168 
 169 static void
 170 setSIGCHLDHandler(JNIEnv *env)
 171 {
 172     /* There is a subtle difference between having the signal handler
 173      * for SIGCHLD be SIG_DFL and SIG_IGN.  We cannot obtain process
 174      * termination information for child processes if the signal
 175      * handler is SIG_IGN.  It must be SIG_DFL.
 176      *
 177      * We used to set the SIGCHLD handler only on Linux, but it's
 178      * safest to set it unconditionally.
 179      *
 180      * Consider what happens if java's parent process sets the SIGCHLD
 181      * handler to SIG_IGN.  Normally signal handlers are inherited by
 182      * children, but SIGCHLD is a controversial case.  Solaris appears
 183      * to always reset it to SIG_DFL, but this behavior may be
 184      * non-standard-compliant, and we shouldn't rely on it.
 185      *
 186      * References:
 187      * http://www.opengroup.org/onlinepubs/7908799/xsh/exec.html
 188      * http://www.pasc.org/interps/unofficial/db/p1003.1/pasc-1003.1-132.html
 189      */
 190     struct sigaction sa;
 191     sa.sa_handler = SIG_DFL;
 192     sigemptyset(&sa.sa_mask);
 193     sa.sa_flags = SA_NOCLDSTOP | SA_RESTART;
 194     if (sigaction(SIGCHLD, &sa, NULL) < 0)
 195         JNU_ThrowInternalError(env, "Can't set SIGCHLD handler");
 196 }
 197 
 198 static void*
 199 xmalloc(JNIEnv *env, size_t size)
 200 {
 201     void *p = malloc(size);
 202     if (p == NULL)
 203         JNU_ThrowOutOfMemoryError(env, NULL);
 204     return p;
 205 }
 206 
 207 #define NEW(type, n) ((type *) xmalloc(env, (n) * sizeof(type)))
 208 
 209 /**
 210  * If PATH is not defined, the OS provides some default value.
 211  * Unfortunately, there's no portable way to get this value.
 212  * Fortunately, it's only needed if the child has PATH while we do not.
 213  */
 214 static const char*
 215 defaultPath(void)
 216 {
 217 #ifdef __solaris__
 218     /* These really are the Solaris defaults! */
 219     return (geteuid() == 0 || getuid() == 0) ?
 220         "/usr/xpg4/bin:/usr/ccs/bin:/usr/bin:/opt/SUNWspro/bin:/usr/sbin" :
 221         "/usr/xpg4/bin:/usr/ccs/bin:/usr/bin:/opt/SUNWspro/bin:";
 222 #else
 223     return ":/bin:/usr/bin";    /* glibc */
 224 #endif
 225 }
 226 
 227 static const char*
 228 effectivePath(void)
 229 {
 230     const char *s = getenv("PATH");
 231     return (s != NULL) ? s : defaultPath();
 232 }
 233 
 234 static int
 235 countOccurrences(const char *s, char c)
 236 {
 237     int count;
 238     for (count = 0; *s != '\0'; s++)
 239         count += (*s == c);
 240     return count;
 241 }
 242 
 243 static const char * const *
 244 splitPath(JNIEnv *env, const char *path)
 245 {
 246     const char *p, *q;
 247     char **pathv;
 248     const char *cwd = "./";
 249     size_t i;
 250     size_t count = countOccurrences(path, ':') + 1;
 251 
 252     pathv = NEW(char*, count+1);
 253     if (pathv == NULL)
 254         return NULL;
 255 
 256     pathv[count] = NULL;
 257     for (p = path, i = 0; i < count; i++, p = q + 1) {
 258         for (q = p; (*q != ':') && (*q != '\0'); q++)
 259             ;
 260 
 261         if (q == p) {             /* empty PATH component => "." */
 262             pathv[i] = NEW(char,sizeof(cwd));
 263 
 264             if (pathv[i] == NULL) {
 265 
 266                 while(i)
 267                     free(pathv[--i]); 
 268 
 269                 free(pathv);
 270                 return NULL;
 271             }
 272 
 273             memcpy(pathv[i],cwd,sizeof(cwd));
 274         }
 275         else {
 276             int addSlash = ((*(q - 1)) != '/');
 277             pathv[i] = NEW(char, q - p + addSlash + 1);
 278 
 279             if (pathv[i] == NULL) {
 280 
 281                 while(i)
 282                     free(pathv[--i]); 
 283 
 284                 free(pathv);
 285                 return NULL; 
 286             }
 287 
 288             memcpy(pathv[i], p, q - p);
 289             if (addSlash)
 290                 pathv[i][q - p] = '/';
 291             pathv[i][q - p + addSlash] = '\0';
 292         }
 293     }
 294     return (const char * const *) pathv;
 295 }
 296 
 297 /**
 298  * Cached value of JVM's effective PATH.
 299  * (We don't support putenv("PATH=...") in native code)
 300  */
 301 static const char *parentPath;
 302 
 303 /**
 304  * Split, canonicalized version of parentPath
 305  */
 306 static const char * const *parentPathv;
 307 
 308 static jfieldID field_exitcode;
 309 
 310 JNIEXPORT void JNICALL
 311 Java_java_lang_UNIXProcess_initIDs(JNIEnv *env, jclass clazz)
 312 {
 313     field_exitcode = (*env)->GetFieldID(env, clazz, "exitcode", "I");
 314 
 315     parentPath  = effectivePath();
 316     parentPathv = splitPath(env, parentPath);
 317 
 318     setSIGCHLDHandler(env);
 319 }
 320 
 321 
 322 #ifndef WIFEXITED
 323 #define WIFEXITED(status) (((status)&0xFF) == 0)
 324 #endif
 325 
 326 #ifndef WEXITSTATUS
 327 #define WEXITSTATUS(status) (((status)>>8)&0xFF)
 328 #endif
 329 
 330 #ifndef WIFSIGNALED
 331 #define WIFSIGNALED(status) (((status)&0xFF) > 0 && ((status)&0xFF00) == 0)
 332 #endif
 333 
 334 #ifndef WTERMSIG
 335 #define WTERMSIG(status) ((status)&0x7F)
 336 #endif
 337 
 338 /* Block until a child process exits and return its exit code.
 339    Note, can only be called once for any given pid. */
 340 JNIEXPORT jint JNICALL
 341 Java_java_lang_UNIXProcess_waitForProcessExit(JNIEnv* env,
 342                                               jobject junk,
 343                                               jint pid)
 344 {
 345     /* We used to use waitid() on Solaris, waitpid() on Linux, but
 346      * waitpid() is more standard, so use it on all POSIX platforms. */
 347     int status;
 348     /* Wait for the child process to exit.  This returns immediately if
 349        the child has already exited. */
 350     while (waitpid(pid, &status, 0) < 0) {
 351         switch (errno) {
 352         case ECHILD: return 0;
 353         case EINTR: break;
 354         default: return -1;
 355         }
 356     }
 357 
 358     if (WIFEXITED(status)) {
 359         /*
 360          * The child exited normally; get its exit code.
 361          */
 362         return WEXITSTATUS(status);
 363     } else if (WIFSIGNALED(status)) {
 364         /* The child exited because of a signal.
 365          * The best value to return is 0x80 + signal number,
 366          * because that is what all Unix shells do, and because
 367          * it allows callers to distinguish between process exit and
 368          * process death by signal.
 369          * Unfortunately, the historical behavior on Solaris is to return
 370          * the signal number, and we preserve this for compatibility. */
 371 #ifdef __solaris__
 372         return WTERMSIG(status);
 373 #else
 374         return 0x80 + WTERMSIG(status);
 375 #endif
 376     } else {
 377         /*
 378          * Unknown exit code; pass it through.
 379          */
 380         return status;
 381     }
 382 }
 383 
 384 static ssize_t
 385 restartableWrite(int fd, const void *buf, size_t count)
 386 {
 387     ssize_t result;
 388     RESTARTABLE(write(fd, buf, count), result);
 389     return result;
 390 }
 391 
 392 static int
 393 restartableDup2(int fd_from, int fd_to)
 394 {
 395     int err;
 396     RESTARTABLE(dup2(fd_from, fd_to), err);
 397     return err;
 398 }
 399 
 400 static int
 401 restartableClose(int fd)
 402 {
 403     int err;
 404     RESTARTABLE(close(fd), err);
 405     return err;
 406 }
 407 
 408 static int
 409 closeSafely(int fd)
 410 {
 411     return (fd == -1) ? 0 : restartableClose(fd);
 412 }
 413 
 414 static int
 415 isAsciiDigit(char c)
 416 {
 417   return c >= '0' && c <= '9';
 418 }
 419 
 420 #ifdef _ALLBSD_SOURCE
 421 #define FD_DIR "/dev/fd"
 422 #define dirent64 dirent
 423 #define readdir64 readdir
 424 #else
 425 #define FD_DIR "/proc/self/fd"
 426 #endif
 427 
 428 static int
 429 closeDescriptors(void)
 430 {
 431     DIR *dp;
 432     struct dirent64 *dirp;
 433     int from_fd = FAIL_FILENO + 1;
 434 
 435     /* We're trying to close all file descriptors, but opendir() might
 436      * itself be implemented using a file descriptor, and we certainly
 437      * don't want to close that while it's in use.  We assume that if
 438      * opendir() is implemented using a file descriptor, then it uses
 439      * the lowest numbered file descriptor, just like open().  So we
 440      * close a couple explicitly.  */
 441 
 442     restartableClose(from_fd);          /* for possible use by opendir() */
 443     restartableClose(from_fd + 1);      /* another one for good luck */
 444 
 445     if ((dp = opendir(FD_DIR)) == NULL)
 446         return 0;
 447 
 448     /* We use readdir64 instead of readdir to work around Solaris bug
 449      * 6395699: /proc/self/fd fails to report file descriptors >= 1024 on Solaris 9
 450      */
 451     while ((dirp = readdir64(dp)) != NULL) {
 452         int fd;
 453         if (isAsciiDigit(dirp->d_name[0]) &&
 454             (fd = strtol(dirp->d_name, NULL, 10)) >= from_fd + 2)
 455             restartableClose(fd);
 456     }
 457 
 458     closedir(dp);
 459 
 460     return 1;
 461 }
 462 
 463 static int
 464 moveDescriptor(int fd_from, int fd_to)
 465 {
 466     if (fd_from != fd_to) {
 467         if ((restartableDup2(fd_from, fd_to) == -1) ||
 468             (restartableClose(fd_from) == -1))
 469             return -1;
 470     }
 471     return 0;
 472 }
 473 
 474 static const char *
 475 getBytes(JNIEnv *env, jbyteArray arr)
 476 {
 477     return arr == NULL ? NULL :
 478         (const char*) (*env)->GetByteArrayElements(env, arr, NULL);
 479 }
 480 
 481 static void
 482 releaseBytes(JNIEnv *env, jbyteArray arr, const char* parr)
 483 {
 484     if (parr != NULL)
 485         (*env)->ReleaseByteArrayElements(env, arr, (jbyte*) parr, JNI_ABORT);
 486 }
 487 
 488 static void
 489 initVectorFromBlock(const char**vector, const char* block, int count)
 490 {
 491     int i;
 492     const char *p;
 493     for (i = 0, p = block; i < count; i++) {
 494         /* Invariant: p always points to the start of a C string. */
 495         vector[i] = p;
 496         while (*(p++));
 497     }
 498     vector[count] = NULL;
 499 }
 500 
 501 static void
 502 throwIOException(JNIEnv *env, int errnum, const char *defaultDetail)
 503 {
 504     static const char * const format = "error=%d, %s";
 505     const char *detail = defaultDetail;
 506     char *errmsg;
 507     jstring s;
 508 
 509     if (errnum != 0) {
 510         const char *s = strerror(errnum);
 511         if (strcmp(s, "Unknown error") != 0)
 512             detail = s;
 513     }
 514     /* ASCII Decimal representation uses 2.4 times as many bits as binary. */
 515     errmsg = NEW(char, strlen(format) + strlen(detail) + 3 * sizeof(errnum));
 516     if (errmsg == NULL)
 517         return;
 518 
 519     sprintf(errmsg, format, errnum, detail);
 520     s = JNU_NewStringPlatform(env, errmsg);
 521     if (s != NULL) {
 522         jobject x = JNU_NewObjectByName(env, "java/io/IOException",
 523                                         "(Ljava/lang/String;)V", s);
 524         if (x != NULL)
 525             (*env)->Throw(env, x);
 526     }
 527     free(errmsg);
 528 }
 529 
 530 #ifdef DEBUG_PROCESS
 531 /* Debugging process code is difficult; where to write debug output? */
 532 static void
 533 debugPrint(char *format, ...)
 534 {
 535     FILE *tty = fopen("/dev/tty", "w");
 536     va_list ap;
 537     va_start(ap, format);
 538     vfprintf(tty, format, ap);
 539     va_end(ap);
 540     fclose(tty);
 541 }
 542 #endif /* DEBUG_PROCESS */
 543 
 544 /**
 545  * Exec FILE as a traditional Bourne shell script (i.e. one without #!).
 546  * If we could do it over again, we would probably not support such an ancient
 547  * misfeature, but compatibility wins over sanity.  The original support for
 548  * this was imported accidentally from execvp().
 549  */
 550 static void
 551 execve_as_traditional_shell_script(const char *file,
 552                                    const char *argv[],
 553                                    const char *const envp[])
 554 {
 555     /* Use the extra word of space provided for us in argv by caller. */
 556     const char *argv0 = argv[0];
 557     const char *const *end = argv;
 558     while (*end != NULL)
 559         ++end;
 560     memmove(argv+2, argv+1, (end-argv) * sizeof (*end));
 561     argv[0] = "/bin/sh";
 562     argv[1] = file;
 563     execve(argv[0], (char **) argv, (char **) envp);
 564     /* Can't even exec /bin/sh?  Big trouble, but let's soldier on... */
 565     memmove(argv+1, argv+2, (end-argv) * sizeof (*end));
 566     argv[0] = argv0;
 567 }
 568 
 569 /**
 570  * Like execve(2), except that in case of ENOEXEC, FILE is assumed to
 571  * be a shell script and the system default shell is invoked to run it.
 572  */
 573 static void
 574 execve_with_shell_fallback(const char *file,
 575                            const char *argv[],
 576                            const char *const envp[])
 577 {
 578 #if START_CHILD_USE_CLONE || START_CHILD_USE_VFORK
 579     /* shared address space; be very careful. */
 580     execve(file, (char **) argv, (char **) envp);
 581     if (errno == ENOEXEC)
 582         execve_as_traditional_shell_script(file, argv, envp);
 583 #else
 584     /* unshared address space; we can mutate environ. */
 585     environ = (char **) envp;
 586     execvp(file, (char **) argv);
 587 #endif
 588 }
 589 
 590 /**
 591  * 'execvpe' should have been included in the Unix standards,
 592  * and is a GNU extension in glibc 2.10.
 593  *
 594  * JDK_execvpe is identical to execvp, except that the child environment is
 595  * specified via the 3rd argument instead of being inherited from environ.
 596  */
 597 static void
 598 JDK_execvpe(const char *file,
 599             const char *argv[],
 600             const char *const envp[])
 601 {
 602     if (envp == NULL || (char **) envp == environ) {
 603         execvp(file, (char **) argv);
 604         return;
 605     }
 606 
 607     if (*file == '\0') {
 608         errno = ENOENT;
 609         return;
 610     }
 611 
 612     if (strchr(file, '/') != NULL) {
 613         execve_with_shell_fallback(file, argv, envp);
 614     } else {
 615         /* We must search PATH (parent's, not child's) */
 616         char expanded_file[PATH_MAX];
 617         int filelen = strlen(file);
 618         int sticky_errno = 0;
 619         const char * const * dirs;
 620         for (dirs = parentPathv; *dirs; dirs++) {
 621             const char * dir = *dirs;
 622             int dirlen = strlen(dir);
 623             if (filelen + dirlen + 1 >= PATH_MAX) {
 624                 errno = ENAMETOOLONG;
 625                 continue;
 626             }
 627             memcpy(expanded_file, dir, dirlen);
 628             memcpy(expanded_file + dirlen, file, filelen);
 629             expanded_file[dirlen + filelen] = '\0';
 630             execve_with_shell_fallback(expanded_file, argv, envp);
 631             /* There are 3 responses to various classes of errno:
 632              * return immediately, continue (especially for ENOENT),
 633              * or continue with "sticky" errno.
 634              *
 635              * From exec(3):
 636              *
 637              * If permission is denied for a file (the attempted
 638              * execve returned EACCES), these functions will continue
 639              * searching the rest of the search path.  If no other
 640              * file is found, however, they will return with the
 641              * global variable errno set to EACCES.
 642              */
 643             switch (errno) {
 644             case EACCES:
 645                 sticky_errno = errno;
 646                 /* FALLTHRU */
 647             case ENOENT:
 648             case ENOTDIR:
 649 #ifdef ELOOP
 650             case ELOOP:
 651 #endif
 652 #ifdef ESTALE
 653             case ESTALE:
 654 #endif
 655 #ifdef ENODEV
 656             case ENODEV:
 657 #endif
 658 #ifdef ETIMEDOUT
 659             case ETIMEDOUT:
 660 #endif
 661                 break; /* Try other directories in PATH */
 662             default:
 663                 return;
 664             }
 665         }
 666         if (sticky_errno != 0)
 667             errno = sticky_errno;
 668     }
 669 }
 670 
 671 /*
 672  * Reads nbyte bytes from file descriptor fd into buf,
 673  * The read operation is retried in case of EINTR or partial reads.
 674  *
 675  * Returns number of bytes read (normally nbyte, but may be less in
 676  * case of EOF).  In case of read errors, returns -1 and sets errno.
 677  */
 678 static ssize_t
 679 readFully(int fd, void *buf, size_t nbyte)
 680 {
 681     ssize_t remaining = nbyte;
 682     for (;;) {
 683         ssize_t n = read(fd, buf, remaining);
 684         if (n == 0) {
 685             return nbyte - remaining;
 686         } else if (n > 0) {
 687             remaining -= n;
 688             if (remaining <= 0)
 689                 return nbyte;
 690             /* We were interrupted in the middle of reading the bytes.
 691              * Unlikely, but possible. */
 692             buf = (void *) (((char *)buf) + n);
 693         } else if (errno == EINTR) {
 694             /* Strange signals like SIGJVM1 are possible at any time.
 695              * See http://www.dreamsongs.com/WorseIsBetter.html */
 696         } else {
 697             return -1;
 698         }
 699     }
 700 }
 701 
 702 typedef struct _ChildStuff
 703 {
 704     int in[2];
 705     int out[2];
 706     int err[2];
 707     int fail[2];
 708     int fds[3];
 709     const char **argv;
 710     const char **envv;
 711     const char *pdir;
 712     jboolean redirectErrorStream;
 713 #if START_CHILD_USE_CLONE
 714     void *clone_stack;
 715 #endif
 716 } ChildStuff;
 717 
 718 static void
 719 copyPipe(int from[2], int to[2])
 720 {
 721     to[0] = from[0];
 722     to[1] = from[1];
 723 }
 724 
 725 /**
 726  * Child process after a successful fork() or clone().
 727  * This function must not return, and must be prepared for either all
 728  * of its address space to be shared with its parent, or to be a copy.
 729  * It must not modify global variables such as "environ".
 730  */
 731 static int
 732 childProcess(void *arg)
 733 {
 734     const ChildStuff* p = (const ChildStuff*) arg;
 735 
 736     /* Close the parent sides of the pipes.
 737        Closing pipe fds here is redundant, since closeDescriptors()
 738        would do it anyways, but a little paranoia is a good thing. */
 739     if ((closeSafely(p->in[1])   == -1) ||
 740         (closeSafely(p->out[0])  == -1) ||
 741         (closeSafely(p->err[0])  == -1) ||
 742         (closeSafely(p->fail[0]) == -1))
 743         goto WhyCantJohnnyExec;
 744 
 745     /* Give the child sides of the pipes the right fileno's. */
 746     /* Note: it is possible for in[0] == 0 */
 747     if ((moveDescriptor(p->in[0] != -1 ?  p->in[0] : p->fds[0],
 748                         STDIN_FILENO) == -1) ||
 749         (moveDescriptor(p->out[1]!= -1 ? p->out[1] : p->fds[1],
 750                         STDOUT_FILENO) == -1))
 751         goto WhyCantJohnnyExec;
 752 
 753     if (p->redirectErrorStream) {
 754         if ((closeSafely(p->err[1]) == -1) ||
 755             (restartableDup2(STDOUT_FILENO, STDERR_FILENO) == -1))
 756             goto WhyCantJohnnyExec;
 757     } else {
 758         if (moveDescriptor(p->err[1] != -1 ? p->err[1] : p->fds[2],
 759                            STDERR_FILENO) == -1)
 760             goto WhyCantJohnnyExec;
 761     }
 762 
 763     if (moveDescriptor(p->fail[1], FAIL_FILENO) == -1)
 764         goto WhyCantJohnnyExec;
 765 
 766     /* close everything */
 767     if (closeDescriptors() == 0) { /* failed,  close the old way */
 768         int max_fd = (int)sysconf(_SC_OPEN_MAX);
 769         int fd;
 770         for (fd = FAIL_FILENO + 1; fd < max_fd; fd++)
 771             if (restartableClose(fd) == -1 && errno != EBADF)
 772                 goto WhyCantJohnnyExec;
 773     }
 774 
 775     /* change to the new working directory */
 776     if (p->pdir != NULL && chdir(p->pdir) < 0)
 777         goto WhyCantJohnnyExec;
 778 
 779     if (fcntl(FAIL_FILENO, F_SETFD, FD_CLOEXEC) == -1)
 780         goto WhyCantJohnnyExec;
 781 
 782     JDK_execvpe(p->argv[0], p->argv, p->envv);
 783 
 784  WhyCantJohnnyExec:
 785     /* We used to go to an awful lot of trouble to predict whether the
 786      * child would fail, but there is no reliable way to predict the
 787      * success of an operation without *trying* it, and there's no way
 788      * to try a chdir or exec in the parent.  Instead, all we need is a
 789      * way to communicate any failure back to the parent.  Easy; we just
 790      * send the errno back to the parent over a pipe in case of failure.
 791      * The tricky thing is, how do we communicate the *success* of exec?
 792      * We use FD_CLOEXEC together with the fact that a read() on a pipe
 793      * yields EOF when the write ends (we have two of them!) are closed.
 794      */
 795     {
 796         int errnum = errno;
 797         restartableWrite(FAIL_FILENO, &errnum, sizeof(errnum));
 798     }
 799     restartableClose(FAIL_FILENO);
 800     _exit(-1);
 801     return 0;  /* Suppress warning "no return value from function" */
 802 }
 803 
 804 /**
 805  * Start a child process running function childProcess.
 806  * This function only returns in the parent.
 807  * We are unusually paranoid; use of clone/vfork is
 808  * especially likely to tickle gcc/glibc bugs.
 809  */
 810 #ifdef __attribute_noinline__  /* See: sys/cdefs.h */
 811 __attribute_noinline__
 812 #endif
 813 static pid_t
 814 startChild(ChildStuff *c) {
 815 #if START_CHILD_USE_CLONE
 816 #define START_CHILD_CLONE_STACK_SIZE (64 * 1024)
 817     /*
 818      * See clone(2).
 819      * Instead of worrying about which direction the stack grows, just
 820      * allocate twice as much and start the stack in the middle.
 821      */
 822     if ((c->clone_stack = malloc(2 * START_CHILD_CLONE_STACK_SIZE)) == NULL)
 823         /* errno will be set to ENOMEM */
 824         return -1;
 825     return clone(childProcess,
 826                  c->clone_stack + START_CHILD_CLONE_STACK_SIZE,
 827                  CLONE_VFORK | CLONE_VM | SIGCHLD, c);
 828 #else
 829   #if START_CHILD_USE_VFORK
 830     /*
 831      * We separate the call to vfork into a separate function to make
 832      * very sure to keep stack of child from corrupting stack of parent,
 833      * as suggested by the scary gcc warning:
 834      *  warning: variable 'foo' might be clobbered by 'longjmp' or 'vfork'
 835      */
 836     volatile pid_t resultPid = vfork();
 837   #else
 838     /*
 839      * From Solaris fork(2): In Solaris 10, a call to fork() is
 840      * identical to a call to fork1(); only the calling thread is
 841      * replicated in the child process. This is the POSIX-specified
 842      * behavior for fork().
 843      */
 844     pid_t resultPid = fork();
 845   #endif
 846     if (resultPid == 0)
 847         childProcess(c);
 848     assert(resultPid != 0);  /* childProcess never returns */
 849     return resultPid;
 850 #endif /* ! START_CHILD_USE_CLONE */
 851 }
 852 
 853 JNIEXPORT jint JNICALL
 854 Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env,
 855                                        jobject process,
 856                                        jbyteArray prog,
 857                                        jbyteArray argBlock, jint argc,
 858                                        jbyteArray envBlock, jint envc,
 859                                        jbyteArray dir,
 860                                        jintArray std_fds,
 861                                        jboolean redirectErrorStream)
 862 {
 863     int errnum;
 864     int resultPid = -1;
 865     int in[2], out[2], err[2], fail[2];
 866     jint *fds = NULL;
 867     const char *pprog = NULL;
 868     const char *pargBlock = NULL;
 869     const char *penvBlock = NULL;
 870     ChildStuff *c;
 871 
 872     in[0] = in[1] = out[0] = out[1] = err[0] = err[1] = fail[0] = fail[1] = -1;
 873 
 874     if ((c = NEW(ChildStuff, 1)) == NULL) return -1;
 875     c->argv = NULL;
 876     c->envv = NULL;
 877     c->pdir = NULL;
 878 #if START_CHILD_USE_CLONE
 879     c->clone_stack = NULL;
 880 #endif
 881 
 882     /* Convert prog + argBlock into a char ** argv.
 883      * Add one word room for expansion of argv for use by
 884      * execve_as_traditional_shell_script.
 885      */
 886     assert(prog != NULL && argBlock != NULL);
 887     if ((pprog     = getBytes(env, prog))       == NULL) goto Catch;
 888     if ((pargBlock = getBytes(env, argBlock))   == NULL) goto Catch;
 889     if ((c->argv = NEW(const char *, argc + 3)) == NULL) goto Catch;
 890     c->argv[0] = pprog;
 891     initVectorFromBlock(c->argv+1, pargBlock, argc);
 892 
 893     if (envBlock != NULL) {
 894         /* Convert envBlock into a char ** envv */
 895         if ((penvBlock = getBytes(env, envBlock))   == NULL) goto Catch;
 896         if ((c->envv = NEW(const char *, envc + 1)) == NULL) goto Catch;
 897         initVectorFromBlock(c->envv, penvBlock, envc);
 898     }
 899 
 900     if (dir != NULL) {
 901         if ((c->pdir = getBytes(env, dir)) == NULL) goto Catch;
 902     }
 903 
 904     assert(std_fds != NULL);
 905     fds = (*env)->GetIntArrayElements(env, std_fds, NULL);
 906     if (fds == NULL) goto Catch;
 907 
 908     if ((fds[0] == -1 && pipe(in)  < 0) ||
 909         (fds[1] == -1 && pipe(out) < 0) ||
 910         (fds[2] == -1 && pipe(err) < 0) ||
 911         (pipe(fail) < 0)) {
 912         throwIOException(env, errno, "Bad file descriptor");
 913         goto Catch;
 914     }
 915     c->fds[0] = fds[0];
 916     c->fds[1] = fds[1];
 917     c->fds[2] = fds[2];
 918 
 919     copyPipe(in,   c->in);
 920     copyPipe(out,  c->out);
 921     copyPipe(err,  c->err);
 922     copyPipe(fail, c->fail);
 923 
 924     c->redirectErrorStream = redirectErrorStream;
 925 
 926     resultPid = startChild(c);
 927     assert(resultPid != 0);
 928 
 929     if (resultPid < 0) {
 930         throwIOException(env, errno, START_CHILD_SYSTEM_CALL " failed");
 931         goto Catch;
 932     }
 933 
 934     restartableClose(fail[1]); fail[1] = -1; /* See: WhyCantJohnnyExec */
 935 
 936     switch (readFully(fail[0], &errnum, sizeof(errnum))) {
 937     case 0: break; /* Exec succeeded */
 938     case sizeof(errnum):
 939         waitpid(resultPid, NULL, 0);
 940         throwIOException(env, errnum, "Exec failed");
 941         goto Catch;
 942     default:
 943         throwIOException(env, errno, "Read failed");
 944         goto Catch;
 945     }
 946 
 947     fds[0] = (in [1] != -1) ? in [1] : -1;
 948     fds[1] = (out[0] != -1) ? out[0] : -1;
 949     fds[2] = (err[0] != -1) ? err[0] : -1;
 950 
 951  Finally:
 952 #if START_CHILD_USE_CLONE
 953     free(c->clone_stack);
 954 #endif
 955 
 956     /* Always clean up the child's side of the pipes */
 957     closeSafely(in [0]);
 958     closeSafely(out[1]);
 959     closeSafely(err[1]);
 960 
 961     /* Always clean up fail descriptors */
 962     closeSafely(fail[0]);
 963     closeSafely(fail[1]);
 964 
 965     releaseBytes(env, prog,     pprog);
 966     releaseBytes(env, argBlock, pargBlock);
 967     releaseBytes(env, envBlock, penvBlock);
 968     releaseBytes(env, dir,      c->pdir);
 969 
 970     free(c->argv);
 971     free(c->envv);
 972     free(c);
 973 
 974     if (fds != NULL)
 975         (*env)->ReleaseIntArrayElements(env, std_fds, fds, 0);
 976 
 977     return resultPid;
 978 
 979  Catch:
 980     /* Clean up the parent's side of the pipes in case of failure only */
 981     closeSafely(in [1]);
 982     closeSafely(out[0]);
 983     closeSafely(err[0]);
 984     goto Finally;
 985 }
 986 
 987 JNIEXPORT void JNICALL
 988 Java_java_lang_UNIXProcess_destroyProcess(JNIEnv *env,
 989                                           jobject junk,
 990                                           jint pid,
 991                                           jboolean force)
 992 {
 993     int sig = (force == JNI_TRUE) ? SIGKILL : SIGTERM;
 994     kill(pid, sig);
 995 }