1 /*
   2  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.  Oracle designates this
   8  * particular file as subject to the "Classpath" exception as provided
   9  * by Oracle in the LICENSE file that accompanied this code.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/stat.h>
  28 #include <fcntl.h>
  29 #include <stdio.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32 #include "jli_util.h"
  33 
  34 #include <zlib.h>
  35 #include "manifest_info.h"
  36 
  37 static char     *manifest;
  38 
  39 static const char       *manifest_name = "META-INF/MANIFEST.MF";
  40 
  41 /*
  42  * Inflate the manifest file (or any file for that matter).
  43  *
  44  *   fd:        File descriptor of the jar file.
  45  *   entry:     Contains the information necessary to perform the inflation
  46  *              (the compressed and uncompressed sizes and the offset in
  47  *              the file where the compressed data is located).
  48  *   size_out:  Returns the size of the inflated file.
  49  *
  50  * Upon success, it returns a pointer to a NUL-terminated malloc'd buffer
  51  * containing the inflated manifest file.  When the caller is done with it,
  52  * this buffer should be released by a call to free().  Upon failure,
  53  * returns NULL.
  54  */
  55 static char *
  56 inflate_file(int fd, zentry *entry, int *size_out)
  57 {
  58     char        *in;
  59     char        *out;
  60     z_stream    zs;
  61 
  62     if (entry->csize == (size_t) -1 || entry->isize == (size_t) -1 )
  63         return (NULL);
  64     if (lseek(fd, entry->offset, SEEK_SET) < (off_t)0)
  65         return (NULL);
  66     if ((in = malloc(entry->csize + 1)) == NULL)
  67         return (NULL);
  68     if ((size_t)(read(fd, in, (unsigned int)entry->csize)) != entry->csize) {
  69         free(in);
  70         return (NULL);
  71     }
  72     if (entry->how == STORED) {
  73         *(char *)((size_t)in + entry->csize) = '\0';
  74         if (size_out) {
  75             *size_out = (int)entry->csize;
  76         }
  77         return (in);
  78     } else if (entry->how == DEFLATED) {
  79         zs.zalloc = (alloc_func)Z_NULL;
  80         zs.zfree = (free_func)Z_NULL;
  81         zs.opaque = (voidpf)Z_NULL;
  82         zs.next_in = (Byte*)in;
  83         zs.avail_in = (uInt)entry->csize;
  84         if (inflateInit2(&zs, -MAX_WBITS) < 0) {
  85             free(in);
  86             return (NULL);
  87         }
  88         if ((out = malloc(entry->isize + 1)) == NULL) {
  89             free(in);
  90             return (NULL);
  91         }
  92         zs.next_out = (Byte*)out;
  93         zs.avail_out = (uInt)entry->isize;
  94         if (inflate(&zs, Z_PARTIAL_FLUSH) < 0) {
  95             free(in);
  96             free(out);
  97             return (NULL);
  98         }
  99         *(char *)((size_t)out + entry->isize) = '\0';
 100         free(in);
 101         if (inflateEnd(&zs) < 0) {
 102             free(out);
 103             return (NULL);
 104         }
 105         if (size_out) {
 106             *size_out = (int)entry->isize;
 107         }
 108         return (out);
 109     } else
 110         return (NULL);
 111 }
 112 
 113 /*
 114  * A very little used routine to handle the case that zip file has
 115  * a comment at the end. Believe it or not, the only way to find the
 116  * END record is to walk backwards, byte by bloody byte looking for
 117  * the END record signature.
 118  *
 119  *      fd:     File descriptor of the jar file.
 120  *      eb:     Pointer to a buffer to receive a copy of the END header.
 121  *
 122  * Returns the offset of the END record in the file on success,
 123  * -1 on failure.
 124  */
 125 static off_t
 126 find_end(int fd, Byte *eb)
 127 {
 128     off_t   len;
 129     off_t   pos;
 130     off_t   flen;
 131     int     bytes;
 132     Byte    *cp;
 133     Byte    *endpos;
 134     Byte    *buffer;
 135 
 136     /*
 137      * 99.44% (or more) of the time, there will be no comment at the
 138      * end of the zip file.  Try reading just enough to read the END
 139      * record from the end of the file.
 140      */
 141     if ((pos = lseek(fd, -ENDHDR, SEEK_END)) < (off_t)0)
 142         return (-1);
 143     if ((bytes = read(fd, eb, ENDHDR)) < 0)
 144         return (-1);
 145     if (GETSIG(eb) == ENDSIG)
 146         return (pos);
 147 
 148     /*
 149      * Shucky-Darn,... There is a comment at the end of the zip file.
 150      *
 151      * Allocate and fill a buffer with enough of the zip file
 152      * to meet the specification for a maximal comment length.
 153      */
 154     if ((flen = lseek(fd, 0, SEEK_END)) < (off_t)0)
 155         return (-1);
 156     len = (flen < END_MAXLEN) ? flen : END_MAXLEN;
 157     if (lseek(fd, -len, SEEK_END) < (off_t)0)
 158         return (-1);
 159     if ((buffer = malloc(END_MAXLEN)) == NULL)
 160         return (-1);
 161     if ((bytes = read(fd, buffer, len)) < 0) {
 162         free(buffer);
 163         return (-1);
 164     }
 165 
 166     /*
 167      * Search backwards from the end of file stopping when the END header
 168      * signature is found. (The first condition of the "if" is just a
 169      * fast fail, because the GETSIG macro isn't always cheap.  The
 170      * final condition protects against false positives.)
 171      */
 172     endpos = &buffer[bytes];
 173     for (cp = &buffer[bytes - ENDHDR]; cp >= &buffer[0]; cp--)
 174         if ((*cp == (ENDSIG & 0xFF)) && (GETSIG(cp) == ENDSIG) &&
 175           (cp + ENDHDR + ENDCOM(cp) == endpos)) {
 176             (void) memcpy(eb, cp, ENDHDR);
 177             free(buffer);
 178             return (flen - (endpos - cp));
 179         }
 180     free(buffer);
 181     return (-1);
 182 }
 183 
 184 /*
 185  * Locate the manifest file with the zip/jar file.
 186  *
 187  *      fd:     File descriptor of the jar file.
 188  *      entry:  To be populated with the information necessary to perform
 189  *              the inflation (the compressed and uncompressed sizes and
 190  *              the offset in the file where the compressed data is located).
 191  *
 192  * Returns zero upon success. Returns a negative value upon failure.
 193  *
 194  * The buffer for reading the Central Directory if the zip/jar file needs
 195  * to be large enough to accommodate the largest possible single record
 196  * and the signature of the next record which is:
 197  *
 198  *      3*2**16 + CENHDR + SIGSIZ
 199  *
 200  * Each of the three variable sized fields (name, comment and extension)
 201  * has a maximum possible size of 64k.
 202  *
 203  * Typically, only a small bit of this buffer is used with bytes shuffled
 204  * down to the beginning of the buffer.  It is one thing to allocate such
 205  * a large buffer and another thing to actually start faulting it in.
 206  *
 207  * In most cases, all that needs to be read are the first two entries in
 208  * a typical jar file (META-INF and META-INF/MANIFEST.MF). Keep this factoid
 209  * in mind when optimizing this code.
 210  */
 211 #define BUFSIZE (3 * 65536 + CENHDR + SIGSIZ)
 212 #define MINREAD 1024
 213 
 214 static int
 215 find_file(int fd, zentry *entry, const char *file_name)
 216 {
 217     int     bytes;
 218     int     res;
 219     int     entry_size;
 220     int     read_size;
 221     int     base_offset;
 222     Byte    *p;
 223     Byte    *bp;
 224     Byte    *buffer;
 225     Byte    locbuf[LOCHDR];
 226 
 227     if ((buffer = (Byte*)malloc(BUFSIZE)) == NULL) {
 228         return(-1);
 229     }
 230 
 231     p = buffer;
 232     bp = buffer;
 233 
 234     /*
 235      * Read the END Header, which is the starting point for ZIP files.
 236      * (Clearly designed to make writing a zip file easier than reading
 237      * one. Now isn't that precious...)
 238      */
 239     if ((base_offset = find_end(fd, bp)) == -1) {
 240         free(buffer);
 241         return (-1);
 242     }
 243 
 244     /*
 245      * There is a historical, but undocumented, ability to allow for
 246      * additional "stuff" to be prepended to the zip/jar file. It seems
 247      * that this has been used to prepend an actual java launcher
 248      * executable to the jar on Windows.  Although this is just another
 249      * form of statically linking a small piece of the JVM to the
 250      * application, we choose to continue to support it.  Note that no
 251      * guarantees have been made (or should be made) to the customer that
 252      * this will continue to work.
 253      *
 254      * Therefore, calculate the base offset of the zip file (within the
 255      * expanded file) by assuming that the central directory is followed
 256      * immediately by the end record.
 257      */
 258     base_offset = base_offset - ENDSIZ(p) - ENDOFF(p);
 259 
 260     /*
 261      * The END Header indicates the start of the Central Directory
 262      * Headers. Remember that the desired Central Directory Header (CEN)
 263      * will almost always be the second one and the first one is a small
 264      * directory entry ("META-INF/"). Keep the code optimized for
 265      * that case.
 266      *
 267      * Begin by seeking to the beginning of the Central Directory and
 268      * reading in the first buffer full of bits.
 269      */
 270     if (lseek(fd, base_offset + ENDOFF(p), SEEK_SET) < (off_t)0) {
 271         free(buffer);
 272         return (-1);
 273     }
 274     if ((bytes = read(fd, bp, MINREAD)) < 0) {
 275         free(buffer);
 276         return (-1);
 277     }
 278 
 279     /*
 280      * Loop through the Central Directory Headers. Note that a valid zip/jar
 281      * must have an ENDHDR (with ENDSIG) after the Central Directory.
 282      */
 283     while (GETSIG(p) == CENSIG) {
 284 
 285         /*
 286          * If a complete header isn't in the buffer, shift the contents
 287          * of the buffer down and refill the buffer.  Note that the check
 288          * for "bytes < CENHDR" must be made before the test for the entire
 289          * size of the header, because if bytes is less than CENHDR, the
 290          * actual size of the header can't be determined. The addition of
 291          * SIGSIZ guarantees that the next signature is also in the buffer
 292          * for proper loop termination.
 293          */
 294         if (bytes < CENHDR) {
 295             p = memmove(bp, p, bytes);
 296             if ((res = read(fd, bp + bytes, MINREAD)) <= 0) {
 297                 free(buffer);
 298                 return (-1);
 299             }
 300             bytes += res;
 301         }
 302         entry_size = CENHDR + CENNAM(p) + CENEXT(p) + CENCOM(p);
 303         if (bytes < entry_size + SIGSIZ) {
 304             if (p != bp)
 305                 p = memmove(bp, p, bytes);
 306             read_size = entry_size - bytes + SIGSIZ;
 307             read_size = (read_size < MINREAD) ? MINREAD : read_size;
 308             if ((res = read(fd, bp + bytes,  read_size)) <= 0) {
 309                 free(buffer);
 310                 return (-1);
 311             }
 312             bytes += res;
 313         }
 314 
 315         /*
 316          * Check if the name is the droid we are looking for; the jar file
 317          * manifest.  If so, build the entry record from the data found in
 318          * the header located and return success.
 319          */
 320         if ((size_t)CENNAM(p) == JLI_StrLen(file_name) &&
 321           memcmp((p + CENHDR), file_name, JLI_StrLen(file_name)) == 0) {
 322             if (lseek(fd, base_offset + CENOFF(p), SEEK_SET) < (off_t)0) {
 323                 free(buffer);
 324                 return (-1);
 325             }
 326             if (read(fd, locbuf, LOCHDR) < 0) {
 327                 free(buffer);
 328                 return (-1);
 329             }
 330             if (GETSIG(locbuf) != LOCSIG) {
 331                 free(buffer);
 332                 return (-1);
 333             }
 334             entry->isize = CENLEN(p);
 335             entry->csize = CENSIZ(p);
 336             entry->offset = base_offset + CENOFF(p) + LOCHDR +
 337                 LOCNAM(locbuf) + LOCEXT(locbuf);
 338             entry->how = CENHOW(p);
 339             free(buffer);
 340             return (0);
 341         }
 342 
 343         /*
 344          * Point to the next entry and decrement the count of valid remaining
 345          * bytes.
 346          */
 347         bytes -= entry_size;
 348         p += entry_size;
 349     }
 350     free(buffer);
 351     return (-1);        /* Fell off the end the loop without a Manifest */
 352 }
 353 
 354 /*
 355  * Parse a Manifest file header entry into a distinct "name" and "value".
 356  * Continuation lines are joined into a single "value". The documented
 357  * syntax for a header entry is:
 358  *
 359  *      header: name ":" value
 360  *
 361  *      name: alphanum *headerchar
 362  *
 363  *      value: SPACE *otherchar newline *continuation
 364  *
 365  *      continuation: SPACE *otherchar newline
 366  *
 367  *      newline: CR LF | LF | CR (not followed by LF)
 368  *
 369  *      alphanum: {"A"-"Z"} | {"a"-"z"} | {"0"-"9"}
 370  *
 371  *      headerchar: alphanum | "-" | "_"
 372  *
 373  *      otherchar: any UTF-8 character except NUL, CR and LF
 374  *
 375  * Note that a manifest file may be composed of multiple sections,
 376  * each of which may contain multiple headers.
 377  *
 378  *      section: *header +newline
 379  *
 380  *      nonempty-section: +header +newline
 381  *
 382  * (Note that the point of "nonempty-section" is unclear, because it isn't
 383  * referenced elsewhere in the full specification for the Manifest file.)
 384  *
 385  * Arguments:
 386  *      lp      pointer to a character pointer which points to the start
 387  *              of a valid header.
 388  *      name    pointer to a character pointer which will be set to point
 389  *              to the name portion of the header (nul terminated).
 390  *      value   pointer to a character pointer which will be set to point
 391  *              to the value portion of the header (nul terminated).
 392  *
 393  * Returns:
 394  *    1 Successful parsing of an NV pair.  lp is updated to point to the
 395  *      next character after the terminating newline in the string
 396  *      representing the Manifest file. name and value are updated to
 397  *      point to the strings parsed.
 398  *    0 A valid end of section indicator was encountered.  lp, name, and
 399  *      value are not modified.
 400  *   -1 lp does not point to a valid header. Upon return, the values of
 401  *      lp, name, and value are undefined.
 402  */
 403 static int
 404 parse_nv_pair(char **lp, char **name, char **value)
 405 {
 406     char    *nl;
 407     char    *cp;
 408 
 409     /*
 410      * End of the section - return 0. The end of section condition is
 411      * indicated by either encountering a blank line or the end of the
 412      * Manifest "string" (EOF).
 413      */
 414     if (**lp == '\0' || **lp == '\n' || **lp == '\r')
 415         return (0);
 416 
 417     /*
 418      * Getting to here, indicates that *lp points to an "otherchar".
 419      * Turn the "header" into a string on its own.
 420      */
 421     nl = JLI_StrPBrk(*lp, "\n\r");
 422     if (nl == NULL) {
 423         nl = JLI_StrChr(*lp, (int)'\0');
 424     } else {
 425         cp = nl;                        /* For merging continuation lines */
 426         if (*nl == '\r' && *(nl+1) == '\n')
 427             *nl++ = '\0';
 428         *nl++ = '\0';
 429 
 430         /*
 431          * Process any "continuation" line(s), by making them part of the
 432          * "header" line. Yes, I know that we are "undoing" the NULs we
 433          * just placed here, but continuation lines are the fairly rare
 434          * case, so we shouldn't unnecessarily complicate the code above.
 435          *
 436          * Note that an entire continuation line is processed each iteration
 437          * through the outer while loop.
 438          */
 439         while (*nl == ' ') {
 440             nl++;                       /* First character to be moved */
 441             while (*nl != '\n' && *nl != '\r' && *nl != '\0')
 442                 *cp++ = *nl++;          /* Shift string */
 443             if (*nl == '\0')
 444                 return (-1);            /* Error: newline required */
 445             *cp = '\0';
 446             if (*nl == '\r' && *(nl+1) == '\n')
 447                 *nl++ = '\0';
 448             *nl++ = '\0';
 449         }
 450     }
 451 
 452     /*
 453      * Separate the name from the value;
 454      */
 455     cp = JLI_StrChr(*lp, (int)':');
 456     if (cp == NULL)
 457         return (-1);
 458     *cp++ = '\0';               /* The colon terminates the name */
 459     if (*cp != ' ')
 460         return (-1);
 461     *cp++ = '\0';               /* Eat the required space */
 462     *name = *lp;
 463     *value = cp;
 464     *lp = nl;
 465     return (1);
 466 }
 467 
 468 /*
 469  * Read the manifest from the specified jar file and fill in the manifest_info
 470  * structure with the information found within.
 471  *
 472  * Error returns are as follows:
 473  *    0 Success
 474  *   -1 Unable to open jarfile
 475  *   -2 Error accessing the manifest from within the jarfile (most likely
 476  *      a manifest is not present, or this isn't a valid zip/jar file).
 477  */
 478 int
 479 JLI_ParseManifest(char *jarfile, manifest_info *info)
 480 {
 481     int     fd;
 482     zentry  entry;
 483     char    *lp;
 484     char    *name;
 485     char    *value;
 486     int     rc;
 487     char    *splashscreen_name = NULL;
 488 
 489     if ((fd = open(jarfile, O_RDONLY
 490 #ifdef O_BINARY
 491         | O_BINARY /* use binary mode on windows */
 492 #endif
 493         )) == -1) {
 494         return (-1);
 495     }
 496     info->manifest_version = NULL;
 497     info->main_class = NULL;
 498     info->jre_version = NULL;
 499     info->jre_restrict_search = 0;
 500     info->splashscreen_image_file_name = NULL;
 501     if (rc = find_file(fd, &entry, manifest_name) != 0) {
 502         close(fd);
 503         return (-2);
 504     }
 505     manifest = inflate_file(fd, &entry, NULL);
 506     if (manifest == NULL) {
 507         close(fd);
 508         return (-2);
 509     }
 510     lp = manifest;
 511     while ((rc = parse_nv_pair(&lp, &name, &value)) > 0) {
 512         if (JLI_StrCaseCmp(name, "Manifest-Version") == 0)
 513             info->manifest_version = value;
 514         else if (JLI_StrCaseCmp(name, "Main-Class") == 0)
 515             info->main_class = value;
 516         else if (JLI_StrCaseCmp(name, "JRE-Version") == 0)
 517             info->jre_version = value;
 518         else if (JLI_StrCaseCmp(name, "JRE-Restrict-Search") == 0) {
 519             if (JLI_StrCaseCmp(value, "true") == 0)
 520                 info->jre_restrict_search = 1;
 521         } else if (JLI_StrCaseCmp(name, "Splashscreen-Image") == 0) {
 522             info->splashscreen_image_file_name = value;
 523         }
 524     }
 525     close(fd);
 526     if (rc == 0)
 527         return (0);
 528     else
 529         return (-2);
 530 }
 531 
 532 /*
 533  * Opens the jar file and unpacks the specified file from its contents.
 534  * Returns NULL on failure.
 535  */
 536 void *
 537 JLI_JarUnpackFile(const char *jarfile, const char *filename, int *size) {
 538     int     fd;
 539     zentry  entry;
 540     void    *data = NULL;
 541 
 542     if ((fd = open(jarfile, O_RDONLY
 543 #ifdef O_BINARY
 544         | O_BINARY /* use binary mode on windows */
 545 #endif
 546         )) == -1) {
 547         return NULL;
 548     }
 549     if (find_file(fd, &entry, filename) == 0) {
 550         data = inflate_file(fd, &entry, size);
 551     }
 552     close(fd);
 553     return (data);
 554 }
 555 
 556 /*
 557  * Specialized "free" function.
 558  */
 559 void
 560 JLI_FreeManifest()
 561 {
 562     if (manifest)
 563         free(manifest);
 564 }
 565 
 566 /*
 567  * Iterate over the manifest of the specified jar file and invoke the provided
 568  * closure function for each attribute encountered.
 569  *
 570  * Error returns are as follows:
 571  *    0 Success
 572  *   -1 Unable to open jarfile
 573  *   -2 Error accessing the manifest from within the jarfile (most likely
 574  *      this means a manifest is not present, or it isn't a valid zip/jar file).
 575  */
 576 int
 577 JLI_ManifestIterate(const char *jarfile, attribute_closure ac, void *user_data)
 578 {
 579     int     fd;
 580     zentry  entry;
 581     char    *mp;        /* manifest pointer */
 582     char    *lp;        /* pointer into manifest, updated during iteration */
 583     char    *name;
 584     char    *value;
 585     int     rc;
 586 
 587     if ((fd = open(jarfile, O_RDONLY
 588 #ifdef O_BINARY
 589         | O_BINARY /* use binary mode on windows */
 590 #endif
 591         )) == -1) {
 592         return (-1);
 593     }
 594 
 595     if (rc = find_file(fd, &entry, manifest_name) != 0) {
 596         close(fd);
 597         return (-2);
 598     }
 599 
 600     mp = inflate_file(fd, &entry, NULL);
 601     if (mp == NULL) {
 602         close(fd);
 603         return (-2);
 604     }
 605 
 606     lp = mp;
 607     while ((rc = parse_nv_pair(&lp, &name, &value)) > 0) {
 608         (*ac)(name, value, user_data);
 609     }
 610     free(mp);
 611     close(fd);
 612     return (rc == 0) ? 0 : -2;
 613 }