1 /*
   2  * Copyright (c) 2015, 2016 Oracle and/or its affiliates. All rights reserved.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions
   6  * are met:
   7  *
   8  *   - Redistributions of source code must retain the above copyright
   9  *     notice, this list of conditions and the following disclaimer.
  10  *
  11  *   - Redistributions in binary form must reproduce the above copyright
  12  *     notice, this list of conditions and the following disclaimer in the
  13  *     documentation and/or other materials provided with the distribution.
  14  *
  15  *   - Neither the name of Oracle nor the names of its
  16  *     contributors may be used to endorse or promote products derived
  17  *     from this software without specific prior written permission.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
  20  * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  21  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  23  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  24  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  25  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  26  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  27  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  28  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30  */
  31 
  32 #include "jni.h"
  33 #include "imageDecompressor.hpp"
  34 #include "endian.hpp"
  35 #ifdef WIN32
  36 #include <windows.h>
  37 #else
  38 #include <dlfcn.h>
  39 #endif
  40 
  41 typedef jboolean (JNICALL *ZipInflateFully_t)(void *inBuf, jlong inLen,
  42                                               void *outBuf, jlong outLen, char **pmsg);
  43 static ZipInflateFully_t ZipInflateFully        = NULL;
  44 
  45 #ifndef WIN32
  46     #define JNI_LIB_PREFIX "lib"
  47     #ifdef __APPLE__
  48         #define JNI_LIB_SUFFIX ".dylib"
  49     #else
  50         #define JNI_LIB_SUFFIX ".so"
  51     #endif
  52 #endif
  53 
  54 /**
  55  * Return the address of the entry point named in the zip shared library.
  56  * @param name - the name of the entry point
  57  * @return the address of the entry point or NULL
  58  */
  59 static void* findEntry(const char* name) {
  60     void *addr = NULL;
  61 #ifdef WIN32
  62     HMODULE handle = GetModuleHandle("zip.dll");
  63     if (handle == NULL) {
  64         return NULL;
  65     }
  66     addr = (void*) GetProcAddress(handle, name);
  67     return addr;
  68 #else
  69     addr = dlopen(JNI_LIB_PREFIX "zip" JNI_LIB_SUFFIX, RTLD_GLOBAL|RTLD_LAZY);
  70     if (addr == NULL) {
  71         return NULL;
  72     }
  73     addr = dlsym(addr, name);
  74     return addr;
  75 #endif
  76 }
  77 
  78 /*
  79  * Initialize the array of decompressors.
  80  */
  81 int ImageDecompressor::_decompressors_num = 0;
  82 ImageDecompressor** ImageDecompressor::_decompressors = NULL;
  83 void ImageDecompressor::image_decompressor_init() {
  84     if (_decompressors == NULL) {
  85         ZipInflateFully = (ZipInflateFully_t) findEntry("ZIP_InflateFully");
  86      assert(ZipInflateFully != NULL && "ZIP decompressor not found.");
  87         _decompressors_num = 2;
  88         _decompressors = new ImageDecompressor*[_decompressors_num];
  89         _decompressors[0] = new ZipDecompressor("zip");
  90         _decompressors[1] = new SharedStringDecompressor("compact-cp");
  91     }
  92 }
  93 
  94 void ImageDecompressor::image_decompressor_close() {
  95     delete[] _decompressors;
  96 }
  97 
  98 /*
  99  * Locate decompressor.
 100  */
 101 ImageDecompressor* ImageDecompressor::get_decompressor(const char * decompressor_name) {
 102     image_decompressor_init();
 103     for (int i = 0; i < _decompressors_num; i++) {
 104         ImageDecompressor* decompressor = _decompressors[i];
 105         assert(decompressor != NULL && "Decompressors not initialized.");
 106         if (strcmp(decompressor->get_name(), decompressor_name) == 0) {
 107             return decompressor;
 108         }
 109     }
 110     assert(false && "No decompressor found.");
 111     return NULL;
 112 }
 113 
 114 // Sparc to read unaligned content
 115 // u8 l = (*(u8*) ptr);
 116 // If ptr is not aligned, sparc will fail.
 117 u8 ImageDecompressor::getU8(u1* ptr, Endian *endian) {
 118     u8 ret;
 119     if (endian->is_big_endian()) {
 120         ret = (u8)ptr[0] << 56 | (u8)ptr[1] << 48 | (u8)ptr[2]<<40 | (u8)ptr[3]<<32 |
 121                 ptr[4]<<24 | ptr[5]<<16 | ptr[6]<<8 | ptr[7];
 122     } else {
 123         ret = ptr[0] | ptr[1]<<8 | ptr[2]<<16 | ptr[3]<<24 | (u8)ptr[4]<<32 |
 124                 (u8)ptr[5]<<40 | (u8)ptr[6]<<48 | (u8)ptr[7]<<56;
 125     }
 126     return ret;
 127 }
 128 
 129 u4 ImageDecompressor::getU4(u1* ptr, Endian *endian) {
 130     u4 ret;
 131     if (endian->is_big_endian()) {
 132         ret = ptr[0] << 24 | ptr[1]<<16 | (ptr[2]<<8) | ptr[3];
 133     } else {
 134         ret = ptr[0] | ptr[1]<<8 | (ptr[2]<<16) | ptr[3]<<24;
 135     }
 136     return ret;
 137 }
 138 
 139 /*
 140  * Decompression entry point. Called from ImageFileReader::get_resource.
 141  */
 142 void ImageDecompressor::decompress_resource(u1* compressed, u1* uncompressed,
 143                 u8 uncompressed_size, const ImageStrings* strings, Endian *endian) {
 144     bool has_header = false;
 145     u1* decompressed_resource = compressed;
 146     u1* compressed_resource = compressed;
 147     // Resource could have been transformed by a stack of decompressors.
 148     // Iterate and decompress resources until there is no more header.
 149     do {
 150         ResourceHeader _header;
 151         u1* compressed_resource_base = compressed_resource;
 152         _header._magic = getU4(compressed_resource, endian);
 153         compressed_resource += 4;
 154         _header._size = getU8(compressed_resource, endian);
 155         compressed_resource += 8;
 156         _header._uncompressed_size = getU8(compressed_resource, endian);
 157         compressed_resource += 8;
 158         _header._decompressor_name_offset = getU4(compressed_resource, endian);
 159         compressed_resource += 4;
 160         _header._decompressor_config_offset = getU4(compressed_resource, endian);
 161         compressed_resource += 4;
 162         _header._is_terminal = *compressed_resource;
 163         compressed_resource += 1;
 164         has_header = _header._magic == ResourceHeader::resource_header_magic;
 165         if (has_header) {
 166             // decompressed_resource array contains the result of decompression
 167             decompressed_resource = new u1[(size_t) _header._uncompressed_size];
 168             // Retrieve the decompressor name
 169             const char* decompressor_name = strings->get(_header._decompressor_name_offset);
 170             assert(decompressor_name && "image decompressor not found");
 171             // Retrieve the decompressor instance
 172             ImageDecompressor* decompressor = get_decompressor(decompressor_name);
 173             assert(decompressor && "image decompressor not found");
 174             // Ask the decompressor to decompress the compressed content
 175             decompressor->decompress_resource(compressed_resource, decompressed_resource,
 176                 &_header, strings);
 177             if (compressed_resource_base != compressed) {
 178                 delete[] compressed_resource_base;
 179             }
 180             compressed_resource = decompressed_resource;
 181         }
 182     } while (has_header);
 183     memcpy(uncompressed, decompressed_resource, (size_t) uncompressed_size);
 184     delete[] decompressed_resource;
 185 }
 186 
 187 // Zip decompressor
 188 
 189 void ZipDecompressor::decompress_resource(u1* data, u1* uncompressed,
 190                 ResourceHeader* header, const ImageStrings* strings) {
 191     char* msg = NULL;
 192     jboolean res = ZipDecompressor::decompress(data, header->_size, uncompressed,
 193                     header->_uncompressed_size, &msg);
 194     assert(res && "decompression failed");
 195 }
 196 
 197 jboolean ZipDecompressor::decompress(void *in, u8 inSize, void *out, u8 outSize, char **pmsg) {
 198     return (*ZipInflateFully)(in, inSize, out, outSize, pmsg);
 199 }
 200 
 201 // END Zip Decompressor
 202 
 203 // Shared String decompressor
 204 
 205 // array index is the constant pool tag. value is size.
 206 // eg: array[5]  = 8; means size of long is 8 bytes.
 207 const u1 SharedStringDecompressor::sizes[] = {
 208     0, 0, 0, 4, 4, 8, 8, 2, 2, 4, 4, 4, 4, 0, 0, 3, 2, 0, 4
 209 };
 210 /**
 211  * Recreate the class by reconstructing the constant pool.
 212  */
 213 void SharedStringDecompressor::decompress_resource(u1* data,
 214                 u1* uncompressed_resource,
 215                 ResourceHeader* header, const ImageStrings* strings) {
 216     u1* uncompressed_base = uncompressed_resource;
 217     u1* data_base = data;
 218     int header_size = 8; // magic + major + minor
 219     memcpy(uncompressed_resource, data, header_size + 2); //+ cp count
 220     uncompressed_resource += header_size + 2;
 221     data += header_size;
 222     u2 cp_count = Endian::get_java(data);
 223     data += 2;
 224     for (int i = 1; i < cp_count; i++) {
 225         u1 tag = *data;
 226         data += 1;
 227         switch (tag) {
 228 
 229             case externalized_string:
 230             { // String in Strings table
 231                 *uncompressed_resource = 1;
 232                 uncompressed_resource += 1;
 233                 int k = decompress_int(data);
 234                 const char * string = strings->get(k);
 235                 int str_length = (int) strlen(string);
 236                 Endian::set_java(uncompressed_resource, str_length);
 237                 uncompressed_resource += 2;
 238                 memcpy(uncompressed_resource, string, str_length);
 239                 uncompressed_resource += str_length;
 240                 break;
 241             }
 242             // Descriptor String has been split and types added to Strings table
 243             case externalized_string_descriptor:
 244             {
 245                 *uncompressed_resource = 1;
 246                 uncompressed_resource += 1;
 247                 int descriptor_index = decompress_int(data);
 248                 int indexes_length = decompress_int(data);
 249                 u1* length_address = uncompressed_resource;
 250                 uncompressed_resource += 2;
 251                 int desc_length = 0;
 252                 const char * desc_string = strings->get(descriptor_index);
 253                 if (indexes_length > 0) {
 254                     u1* indexes_base = data;
 255                     data += indexes_length;
 256                     char c = *desc_string;
 257                     do {
 258                         *uncompressed_resource = c;
 259                         uncompressed_resource++;
 260                         desc_length += 1;
 261                         /*
 262                          * Every L character is the marker we are looking at in order
 263                          * to reconstruct the descriptor. Each time an L is found, then
 264                          * we retrieve the couple token/token at the current index and
 265                          * add it to the descriptor.
 266                          * "(L;I)V" and "java/lang","String" couple of tokens,
 267                          * this becomes "(Ljava/lang/String;I)V"
 268                          */
 269                         if (c == 'L') {
 270                             int index = decompress_int(indexes_base);
 271                             const char * pkg = strings->get(index);
 272                             int str_length = (int) strlen(pkg);
 273                             // the case where we have a package.
 274                             // reconstruct the type full name
 275                             if (str_length > 0) {
 276                                 int len = str_length + 1;
 277                                 char* fullpkg = new char[len];
 278                                 char* pkg_base = fullpkg;
 279                                 memcpy(fullpkg, pkg, str_length);
 280                                 fullpkg += str_length;
 281                                 *fullpkg = '/';
 282                                 memcpy(uncompressed_resource, pkg_base, len);
 283                                 uncompressed_resource += len;
 284                                 delete[] pkg_base;
 285                                 desc_length += len;
 286                             } else { // Empty package
 287                                 // Nothing to do.
 288                             }
 289                             int classIndex = decompress_int(indexes_base);
 290                             const char * clazz = strings->get(classIndex);
 291                             int clazz_length = (int) strlen(clazz);
 292                             memcpy(uncompressed_resource, clazz, clazz_length);
 293                             uncompressed_resource += clazz_length;
 294                             desc_length += clazz_length;
 295                         }
 296                         desc_string += 1;
 297                         c = *desc_string;
 298                     } while (c != '\0');
 299                 } else {
 300                         desc_length = (int) strlen(desc_string);
 301                         memcpy(uncompressed_resource, desc_string, desc_length);
 302                         uncompressed_resource += desc_length;
 303                 }
 304                 Endian::set_java(length_address, desc_length);
 305                 break;
 306             }
 307 
 308             case constant_utf8:
 309             { // UTF-8
 310                 *uncompressed_resource = tag;
 311                 uncompressed_resource += 1;
 312                 u2 str_length = Endian::get_java(data);
 313                 int len = str_length + 2;
 314                 memcpy(uncompressed_resource, data, len);
 315                 uncompressed_resource += len;
 316                 data += len;
 317                 break;
 318             }
 319 
 320             case constant_long:
 321             case constant_double:
 322             {
 323                 i++;
 324             }
 325             default:
 326             {
 327                 *uncompressed_resource = tag;
 328                 uncompressed_resource += 1;
 329                 int size = sizes[tag];
 330                 memcpy(uncompressed_resource, data, size);
 331                 uncompressed_resource += size;
 332                 data += size;
 333             }
 334         }
 335     }
 336     u8 remain = header->_size - (int)(data - data_base);
 337     u8 computed = (u8)(uncompressed_resource - uncompressed_base) + remain;
 338     if (header->_uncompressed_size != computed)
 339         printf("Failure, expecting %llu but getting %llu\n", header->_uncompressed_size,
 340                 computed);
 341     assert(header->_uncompressed_size == computed &&
 342                 "Constant Pool reconstruction failed");
 343     memcpy(uncompressed_resource, data, (size_t) remain);
 344 }
 345 
 346 /*
 347  * Decompress integers. Compressed integers are negative.
 348  * If positive, the integer is not decompressed.
 349  * If negative, length extracted from the first byte, then reconstruct the integer
 350  * from the following bytes.
 351  * Example of compression: 1 is compressed on 1 byte: 10100001
 352  */
 353 int SharedStringDecompressor::decompress_int(unsigned char*& value) {
 354     int len = 4;
 355     int res = 0;
 356     char b1 = *value;
 357     if (is_compressed((signed char)b1)) { // compressed
 358         len = get_compressed_length(b1);
 359         char clearedValue = b1 &= 0x1F;
 360         if (len == 1) {
 361             res = clearedValue;
 362         } else {
 363             res = (clearedValue & 0xFF) << 8 * (len - 1);
 364             for (int i = 1; i < len; i++) {
 365                 res |= (value[i]&0xFF) << 8 * (len - i - 1);
 366             }
 367         }
 368     } else {
 369         res = (value[0] & 0xFF) << 24 | (value[1]&0xFF) << 16 |
 370                     (value[2]&0xFF) << 8 | (value[3]&0xFF);
 371     }
 372     value += len;
 373     return res;
 374 }
 375 // END Shared String decompressor