1 /*
   2  * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef SHARE_VM_CLASSFILE_IMAGEFILE_HPP
  26 #define SHARE_VM_CLASSFILE_IMAGEFILE_HPP
  27 
  28 #include "classfile/classLoader.hpp"
  29 #include "memory/allocation.hpp"
  30 #include "memory/allocation.inline.hpp"
  31 #include "utilities/globalDefinitions.hpp"
  32 
  33 // Image files are an alternate file format for storing classes and resources. The
  34 // goal is to supply file access which is faster and smaller that the jar format.
  35 // It should be noted that unlike jars information stored in an image is in native
  36 // endian format. This allows the image to be memory mapped into memory without
  37 // endian translation.  This also means that images are platform dependent.
  38 //
  39 // Image files are structured as three sections;
  40 //
  41 //         +-----------+
  42 //         |  Header   |
  43 //         +-----------+
  44 //         |           |
  45 //         | Directory |
  46 //         |           |
  47 //         +-----------+
  48 //         |           |
  49 //         |           |
  50 //         | Resources |
  51 //         |           |
  52 //         |           |
  53 //         +-----------+
  54 //
  55 // The header contains information related to identification and description of
  56 // contents.
  57 //
  58 //         +-------------------------+
  59 //         |   Magic (0xCAFEDADA)    |
  60 //         +------------+------------+
  61 //         | Major Vers | Minor Vers |
  62 //         +------------+------------+
  63 //         |      Location Count     |
  64 //         +-------------------------+
  65 //         |      Attributes Size    |
  66 //         +-------------------------+
  67 //         |       Strings Size      |
  68 //         +-------------------------+
  69 //
  70 // Magic - means of identifying validity of the file.  This avoids requiring a
  71 //         special file extension.
  72 // Major vers, minor vers - differences in version numbers indicate structural
  73 //                          changes in the image.
  74 // Location count - number of locations/resources in the file.  This count is also
  75 //                  the length of lookup tables used in the directory.
  76 // Attributes size - number of bytes in the region used to store location attribute
  77 //                   streams.
  78 // Strings size - the size of the region used to store strings used by the
  79 //                directory and meta data.
  80 //
  81 // The directory contains information related to resource lookup. The algorithm
  82 // used for lookup is "A Practical Minimal Perfect Hashing Method"
  83 // (http://homepages.dcc.ufmg.br/~nivio/papers/wea05.pdf). Given a path string
  84 // in the form <package>/<base>.<extension>  return the resource location
  85 // information;
  86 //
  87 //     redirectIndex = hash(path, DEFAULT_SEED) % count;
  88 //     redirect = redirectTable[redirectIndex];
  89 //     if (redirect == 0) return not found;
  90 //     locationIndex = redirect < 0 ? -1 - redirect : hash(path, redirect) % count;
  91 //     location = locationTable[locationIndex];
  92 //     if (!verify(location, path)) return not found;
  93 //     return location;
  94 //
  95 // Note: The hash function takes an initial seed value.  A different seed value
  96 // usually returns a different result for strings that would otherwise collide with
  97 // other seeds. The verify function guarantees the found resource location is
  98 // indeed the resource we are looking for.
  99 //
 100 // The following is the format of the directory;
 101 //
 102 //         +-------------------+
 103 //         |   Redirect Table  |
 104 //         +-------------------+
 105 //         | Attribute Offsets |
 106 //         +-------------------+
 107 //         |   Attribute Data  |
 108 //         +-------------------+
 109 //         |      Strings      |
 110 //         +-------------------+
 111 //
 112 // Redirect Table - Array of 32-bit signed values representing actions that
 113 //                  should take place for hashed strings that map to that
 114 //                  value.  Negative values indicate no hash collision and can be
 115 //                  quickly converted to indices into attribute offsets.  Positive
 116 //                  values represent a new seed for hashing an index into attribute
 117 //                  offsets.  Zero indicates not found.
 118 // Attribute Offsets - Array of 32-bit unsigned values representing offsets into
 119 //                     attribute data.  Attribute offsets can be iterated to do a
 120 //                     full survey of resources in the image.
 121 // Attribute Data - Bytes representing compact attribute data for locations. (See
 122 //                  comments in ImageLocation.)
 123 // Strings - Collection of zero terminated UTF-8 strings used by the directory and
 124 //           image meta data.  Each string is accessed by offset.  Each string is
 125 //           unique.  Offset zero is reserved for the empty string.
 126 //
 127 // Note that the memory mapped directory assumes 32 bit alignment of the image
 128 // header, the redirect table and the attribute offsets.
 129 //
 130 
 131 
 132 // Manage image file string table.
 133 class ImageStrings {
 134 private:
 135   // Data bytes for strings.
 136   u1* _data;
 137   // Number of bytes in the string table.
 138   u4 _size;
 139 
 140 public:
 141   // Prime used to generate hash for Perfect Hashing.
 142   static const u4 HASH_MULTIPLIER = 0x01000193;
 143 
 144   ImageStrings(u1* data, u4 size) : _data(data), _size(size) {}
 145 
 146   // Return the UTF-8 string beginning at offset.
 147   inline const char* get(u4 offset) const {
 148     assert(offset < _size, "offset exceeds string table size");
 149     return (const char*)(_data + offset);
 150   }
 151 
 152   // Compute the Perfect Hashing hash code for the supplied string.
 153   inline static u4 hash_code(const char* string) {
 154     return hash_code(string, HASH_MULTIPLIER);
 155   }
 156 
 157   // Compute the Perfect Hashing hash code for the supplied string, starting at seed.
 158   static u4 hash_code(const char* string, u4 seed);
 159 
 160   // Test to see if string begins with start.  If so returns remaining portion
 161   // of string.  Otherwise, NULL.  Used to test sections of a path without
 162   // copying.
 163   static const char* starts_with(const char* string, const char* start);
 164 
 165 };
 166 
 167 // Manage image file location attribute streams.  Within an image, a location's
 168 // attributes are compressed into a stream of bytes.  An attribute stream is
 169 // composed of individual attribute sequences.  Each attribute sequence begins with
 170 // a header byte containing the attribute 'kind' (upper 5 bits of header) and the
 171 // 'length' less 1 (lower 3 bits of header) of bytes that follow containing the
 172 // attribute value.  Attribute values present as most significant byte first.
 173 //
 174 // Ex. Container offset (ATTRIBUTE_OFFSET) 0x33562 would be represented as 0x22
 175 // (kind = 4, length = 3), 0x03, 0x35, 0x62.
 176 //
 177 // An attribute stream is terminated with a header kind of ATTRIBUTE_END (header
 178 // byte of zero.)
 179 //
 180 // ImageLocation inflates the stream into individual values stored in the long
 181 // array _attributes. This allows an attribute value can be quickly accessed by
 182 // direct indexing. Unspecified values default to zero.
 183 //
 184 // Notes:
 185 //  - Even though ATTRIBUTE_END is used to mark the end of the attribute stream,
 186 //    streams will contain zero byte values to represent lesser significant bits.
 187 //    Thus, detecting a zero byte is not sufficient to detect the end of an attribute
 188 //    stream.
 189 //  - ATTRIBUTE_OFFSET represents the number of bytes from the beginning of the region
 190 //    storing the resources.  Thus, in an image this represents the number of bytes
 191 //    after the directory.
 192 //  - Currently, compressed resources are represented by having a non-zero
 193 //    ATTRIBUTE_COMPRESSED value.  This represents the number of bytes stored in the
 194 //    image, and the value of ATTRIBUTE_UNCOMPRESSED represents number of bytes of the
 195 //    inflated resource in memory. If the ATTRIBUTE_COMPRESSED is zero then the value
 196 //    of ATTRIBUTE_UNCOMPRESSED represents both the number of bytes in the image and
 197 //    in memory.  In the future, additional compression techniques will be used and
 198 //    represented differently.
 199 //  - Package strings include trailing slash and extensions include prefix period.
 200 //
 201 class ImageLocation {
 202 public:
 203   // Attribute kind enumeration.
 204   static const u1 ATTRIBUTE_END = 0; // End of attribute stream marker
 205   static const u1 ATTRIBUTE_BASE = 1; // String table offset of resource path base
 206   static const u1 ATTRIBUTE_PARENT = 2; // String table offset of resource path parent
 207   static const u1 ATTRIBUTE_EXTENSION = 3; // String table offset of resource path extension
 208   static const u1 ATTRIBUTE_OFFSET = 4; // Container byte offset of resource
 209   static const u1 ATTRIBUTE_COMPRESSED = 5; // In image byte size of the compressed resource
 210   static const u1 ATTRIBUTE_UNCOMPRESSED = 6; // In memory byte size of the uncompressed resource
 211   static const u1 ATTRIBUTE_COUNT = 7; // Number of attribute kinds
 212 
 213 private:
 214   // Values of inflated attributes.
 215   u8 _attributes[ATTRIBUTE_COUNT];
 216 
 217   // Return the attribute value number of bytes.
 218   inline static u1 attribute_length(u1 data) {
 219     return (data & 0x7) + 1;
 220   }
 221 
 222   // Return the attribute kind.
 223   inline static u1 attribute_kind(u1 data) {
 224     u1 kind = data >> 3;
 225     assert(kind < ATTRIBUTE_COUNT, "invalid attribute kind");
 226     return kind;
 227   }
 228 
 229   // Return the attribute length.
 230   inline static u8 attribute_value(u1* data, u1 n) {
 231     assert(0 < n && n <= 8, "invalid attribute value length");
 232     u8 value = 0;
 233 
 234     // Most significant bytes first.
 235     for (u1 i = 0; i < n; i++) {
 236       value <<= 8;
 237       value |= data[i];
 238     }
 239 
 240     return value;
 241   }
 242 
 243 public:
 244   ImageLocation(u1* data);
 245 
 246   // Retrieve an attribute value from the inflated array.
 247   inline u8 get_attribute(u1 kind) const {
 248     assert(ATTRIBUTE_END < kind && kind < ATTRIBUTE_COUNT, "invalid attribute kind");
 249     return _attributes[kind];
 250   }
 251 
 252   // Retrieve an attribute string value from the inflated array.
 253   inline const char* get_attribute(u4 kind, const ImageStrings& strings) const {
 254     return strings.get((u4)get_attribute(kind));
 255   }
 256 };
 257 
 258 // Manage the image file.
 259 class ImageFile: public CHeapObj<mtClass> {
 260 private:
 261   // Image file marker.
 262   static const u4 IMAGE_MAGIC = 0xCAFEDADA;
 263   // Image file major version number.
 264   static const u2 MAJOR_VERSION = 0;
 265   // Image file minor version number.
 266   static const u2 MINOR_VERSION = 1;
 267 
 268   struct ImageHeader {
 269     u4 _magic;          // Image file marker
 270     u2 _major_version;  // Image file major version number
 271     u2 _minor_version;  // Image file minor version number
 272     u4 _location_count; // Number of locations managed in index.
 273     u4 _locations_size; // Number of bytes in attribute table.
 274     u4 _strings_size;   // Number of bytes in string table.
 275   };
 276 
 277   char* _name;          // Name of image
 278   int _fd;              // File descriptor
 279   bool _memory_mapped;  // Is file memory mapped
 280   ImageHeader _header;  // Image header
 281   u8 _index_size;       // Total size of index
 282   u1* _index_data;      // Raw index data
 283   s4* _redirect_table;  // Perfect hash redirect table
 284   u4* _offsets_table;   // Location offset table
 285   u1* _location_bytes;  // Location attributes
 286   u1* _string_bytes;    // String table
 287 
 288   // Compute number of bytes in image file index.
 289   inline u8 index_size() {
 290     return sizeof(ImageHeader) +
 291     _header._location_count * sizeof(u4) * 2 +
 292     _header._locations_size +
 293     _header._strings_size;
 294   }
 295 
 296 public:
 297   ImageFile(const char* name);
 298   ~ImageFile();
 299 
 300   // Open image file for access.
 301   bool open();
 302   // Close image file.
 303   void close();
 304 
 305   // Retrieve name of image file.
 306   inline const char* name() const {
 307     return _name;
 308   }
 309 
 310   // Return a string table accessor.
 311   inline const ImageStrings get_strings() const {
 312     return ImageStrings(_string_bytes, _header._strings_size);
 313   }
 314 
 315   // Return number of locations in image file index.
 316   inline u4 get_location_count() const {
 317     return _header._location_count;
 318   }
 319 
 320   // Return location attribute stream for location i.
 321   inline u1* get_location_data(u4 i) const {
 322     u4 offset = _offsets_table[i];
 323 
 324     return offset != 0 ? _location_bytes + offset : NULL;
 325   }
 326 
 327   // Return the attribute stream for a named resourced.
 328   u1* find_location_data(const char* path) const;
 329 
 330   // Verify that a found location matches the supplied path.
 331   bool verify_location(ImageLocation& location, const char* path) const;
 332 
 333   // Return the resource for the supplied location info.
 334   u1* get_resource(ImageLocation& location) const;
 335 
 336   // Return the resource associated with the path else NULL if not found.
 337   void get_resource(const char* path, u1*& buffer, u8& size) const;
 338 
 339   // Return an array of packages for a given module
 340   GrowableArray<const char*>* packages(const char* name);
 341 };
 342 
 343 #endif // SHARE_VM_CLASSFILE_IMAGEFILE_HPP