1 /* 2 * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef SHARE_VM_CLASSFILE_IMAGEFILE_HPP 26 #define SHARE_VM_CLASSFILE_IMAGEFILE_HPP 27 28 #include "classfile/classLoader.hpp" 29 #include "memory/allocation.hpp" 30 #include "memory/allocation.inline.hpp" 31 #include "utilities/globalDefinitions.hpp" 32 33 // Image files are an alternate file format for storing classes and resources. The 34 // goal is to supply file access which is faster and smaller that the jar format. 35 // It should be noted that unlike jars information stored in an image is in native 36 // endian format. This allows the image to be memory mapped into memory without 37 // endian translation. This also means that images are platform dependent. 38 // 39 // Image files are structured as three sections; 40 // 41 // +-----------+ 42 // | Header | 43 // +-----------+ 44 // | | 45 // | Directory | 46 // | | 47 // +-----------+ 48 // | | 49 // | | 50 // | Resources | 51 // | | 52 // | | 53 // +-----------+ 54 // 55 // The header contains information related to identification and description of 56 // contents. 57 // 58 // +-------------------------+ 59 // | Magic (0xCAFEDADA) | 60 // +------------+------------+ 61 // | Major Vers | Minor Vers | 62 // +------------+------------+ 63 // | Location Count | 64 // +-------------------------+ 65 // | Attributes Size | 66 // +-------------------------+ 67 // | Strings Size | 68 // +-------------------------+ 69 // 70 // Magic - means of identifying validity of the file. This avoids requiring a 71 // special file extension. 72 // Major vers, minor vers - differences in version numbers indicate structural 73 // changes in the image. 74 // Location count - number of locations/resources in the file. This count is also 75 // the length of lookup tables used in the directory. 76 // Attributes size - number of bytes in the region used to store location attribute 77 // streams. 78 // Strings size - the size of the region used to store strings used by the 79 // directory and meta data. 80 // 81 // The directory contains information related to resource lookup. The algorithm 82 // used for lookup is "A Practical Minimal Perfect Hashing Method" 83 // (http://homepages.dcc.ufmg.br/~nivio/papers/wea05.pdf). Given a path string 84 // in the form <package>/<base>.<extension> return the resource location 85 // information; 86 // 87 // redirectIndex = hash(path, DEFAULT_SEED) % count; 88 // redirect = redirectTable[redirectIndex]; 89 // if (redirect == 0) return not found; 90 // locationIndex = redirect < 0 ? -1 - redirect : hash(path, redirect) % count; 91 // location = locationTable[locationIndex]; 92 // if (!verify(location, path)) return not found; 93 // return location; 94 // 95 // Note: The hash function takes an initial seed value. A different seed value 96 // usually returns a different result for strings that would otherwise collide with 97 // other seeds. The verify function guarantees the found resource location is 98 // indeed the resource we are looking for. 99 // 100 // The following is the format of the directory; 101 // 102 // +-------------------+ 103 // | Redirect Table | 104 // +-------------------+ 105 // | Attribute Offsets | 106 // +-------------------+ 107 // | Attribute Data | 108 // +-------------------+ 109 // | Strings | 110 // +-------------------+ 111 // 112 // Redirect Table - Array of 32-bit signed values representing actions that 113 // should take place for hashed strings that map to that 114 // value. Negative values indicate no hash collision and can be 115 // quickly converted to indices into attribute offsets. Positive 116 // values represent a new seed for hashing an index into attribute 117 // offsets. Zero indicates not found. 118 // Attribute Offsets - Array of 32-bit unsigned values representing offsets into 119 // attribute data. Attribute offsets can be iterated to do a 120 // full survey of resources in the image. 121 // Attribute Data - Bytes representing compact attribute data for locations. (See 122 // comments in ImageLocation.) 123 // Strings - Collection of zero terminated UTF-8 strings used by the directory and 124 // image meta data. Each string is accessed by offset. Each string is 125 // unique. Offset zero is reserved for the empty string. 126 // 127 // Note that the memory mapped directory assumes 32 bit alignment of the image 128 // header, the redirect table and the attribute offsets. 129 // 130 131 132 // Manage image file string table. 133 class ImageStrings { 134 private: 135 // Data bytes for strings. 136 u1* _data; 137 // Number of bytes in the string table. 138 u4 _size; 139 140 public: 141 // Prime used to generate hash for Perfect Hashing. 142 static const u4 HASH_MULTIPLIER = 0x01000193; 143 144 ImageStrings(u1* data, u4 size) : _data(data), _size(size) {} 145 146 // Return the UTF-8 string beginning at offset. 147 inline const char* get(u4 offset) const { 148 assert(offset < _size, "offset exceeds string table size"); 149 return (const char*)(_data + offset); 150 } 151 152 // Compute the Perfect Hashing hash code for the supplied string. 153 inline static u4 hash_code(const char* string) { 154 return hash_code(string, HASH_MULTIPLIER); 155 } 156 157 // Compute the Perfect Hashing hash code for the supplied string, starting at seed. 158 static u4 hash_code(const char* string, u4 seed); 159 160 // Test to see if string begins with start. If so returns remaining portion 161 // of string. Otherwise, NULL. Used to test sections of a path without 162 // copying. 163 static const char* starts_with(const char* string, const char* start); 164 165 }; 166 167 // Manage image file location attribute streams. Within an image, a location's 168 // attributes are compressed into a stream of bytes. An attribute stream is 169 // composed of individual attribute sequences. Each attribute sequence begins with 170 // a header byte containing the attribute 'kind' (upper 5 bits of header) and the 171 // 'length' less 1 (lower 3 bits of header) of bytes that follow containing the 172 // attribute value. Attribute values present as most significant byte first. 173 // 174 // Ex. Container offset (ATTRIBUTE_OFFSET) 0x33562 would be represented as 0x22 175 // (kind = 4, length = 3), 0x03, 0x35, 0x62. 176 // 177 // An attribute stream is terminated with a header kind of ATTRIBUTE_END (header 178 // byte of zero.) 179 // 180 // ImageLocation inflates the stream into individual values stored in the long 181 // array _attributes. This allows an attribute value can be quickly accessed by 182 // direct indexing. Unspecified values default to zero. 183 // 184 // Notes: 185 // - Even though ATTRIBUTE_END is used to mark the end of the attribute stream, 186 // streams will contain zero byte values to represent lesser significant bits. 187 // Thus, detecting a zero byte is not sufficient to detect the end of an attribute 188 // stream. 189 // - ATTRIBUTE_OFFSET represents the number of bytes from the beginning of the region 190 // storing the resources. Thus, in an image this represents the number of bytes 191 // after the directory. 192 // - Currently, compressed resources are represented by having a non-zero 193 // ATTRIBUTE_COMPRESSED value. This represents the number of bytes stored in the 194 // image, and the value of ATTRIBUTE_UNCOMPRESSED represents number of bytes of the 195 // inflated resource in memory. If the ATTRIBUTE_COMPRESSED is zero then the value 196 // of ATTRIBUTE_UNCOMPRESSED represents both the number of bytes in the image and 197 // in memory. In the future, additional compression techniques will be used and 198 // represented differently. 199 // - Package strings include trailing slash and extensions include prefix period. 200 // 201 class ImageLocation { 202 public: 203 // Attribute kind enumeration. 204 static const u1 ATTRIBUTE_END = 0; // End of attribute stream marker 205 static const u1 ATTRIBUTE_BASE = 1; // String table offset of resource path base 206 static const u1 ATTRIBUTE_PARENT = 2; // String table offset of resource path parent 207 static const u1 ATTRIBUTE_EXTENSION = 3; // String table offset of resource path extension 208 static const u1 ATTRIBUTE_OFFSET = 4; // Container byte offset of resource 209 static const u1 ATTRIBUTE_COMPRESSED = 5; // In image byte size of the compressed resource 210 static const u1 ATTRIBUTE_UNCOMPRESSED = 6; // In memory byte size of the uncompressed resource 211 static const u1 ATTRIBUTE_COUNT = 7; // Number of attribute kinds 212 213 private: 214 // Values of inflated attributes. 215 u8 _attributes[ATTRIBUTE_COUNT]; 216 217 // Return the attribute value number of bytes. 218 inline static u1 attribute_length(u1 data) { 219 return (data & 0x7) + 1; 220 } 221 222 // Return the attribute kind. 223 inline static u1 attribute_kind(u1 data) { 224 u1 kind = data >> 3; 225 assert(kind < ATTRIBUTE_COUNT, "invalid attribute kind"); 226 return kind; 227 } 228 229 // Return the attribute length. 230 inline static u8 attribute_value(u1* data, u1 n) { 231 assert(0 < n && n <= 8, "invalid attribute value length"); 232 u8 value = 0; 233 234 // Most significant bytes first. 235 for (u1 i = 0; i < n; i++) { 236 value <<= 8; 237 value |= data[i]; 238 } 239 240 return value; 241 } 242 243 public: 244 ImageLocation(u1* data); 245 246 // Retrieve an attribute value from the inflated array. 247 inline u8 get_attribute(u1 kind) const { 248 assert(ATTRIBUTE_END < kind && kind < ATTRIBUTE_COUNT, "invalid attribute kind"); 249 return _attributes[kind]; 250 } 251 252 // Retrieve an attribute string value from the inflated array. 253 inline const char* get_attribute(u4 kind, const ImageStrings& strings) const { 254 return strings.get((u4)get_attribute(kind)); 255 } 256 }; 257 258 // Manage the image file. 259 class ImageFile: public CHeapObj<mtClass> { 260 private: 261 // Image file marker. 262 static const u4 IMAGE_MAGIC = 0xCAFEDADA; 263 // Image file major version number. 264 static const u2 MAJOR_VERSION = 0; 265 // Image file minor version number. 266 static const u2 MINOR_VERSION = 1; 267 268 struct ImageHeader { 269 u4 _magic; // Image file marker 270 u2 _major_version; // Image file major version number 271 u2 _minor_version; // Image file minor version number 272 u4 _location_count; // Number of locations managed in index. 273 u4 _locations_size; // Number of bytes in attribute table. 274 u4 _strings_size; // Number of bytes in string table. 275 }; 276 277 char* _name; // Name of image 278 int _fd; // File descriptor 279 bool _memory_mapped; // Is file memory mapped 280 ImageHeader _header; // Image header 281 u8 _index_size; // Total size of index 282 u1* _index_data; // Raw index data 283 s4* _redirect_table; // Perfect hash redirect table 284 u4* _offsets_table; // Location offset table 285 u1* _location_bytes; // Location attributes 286 u1* _string_bytes; // String table 287 288 // Compute number of bytes in image file index. 289 inline u8 index_size() { 290 return sizeof(ImageHeader) + 291 _header._location_count * sizeof(u4) * 2 + 292 _header._locations_size + 293 _header._strings_size; 294 } 295 296 public: 297 ImageFile(const char* name); 298 ~ImageFile(); 299 300 // Open image file for access. 301 bool open(); 302 // Close image file. 303 void close(); 304 305 // Retrieve name of image file. 306 inline const char* name() const { 307 return _name; 308 } 309 310 // Return a string table accessor. 311 inline const ImageStrings get_strings() const { 312 return ImageStrings(_string_bytes, _header._strings_size); 313 } 314 315 // Return number of locations in image file index. 316 inline u4 get_location_count() const { 317 return _header._location_count; 318 } 319 320 // Return location attribute stream for location i. 321 inline u1* get_location_data(u4 i) const { 322 u4 offset = _offsets_table[i]; 323 324 return offset != 0 ? _location_bytes + offset : NULL; 325 } 326 327 // Return the attribute stream for a named resourced. 328 u1* find_location_data(const char* path) const; 329 330 // Verify that a found location matches the supplied path. 331 bool verify_location(ImageLocation& location, const char* path) const; 332 333 // Return the resource for the supplied location info. 334 u1* get_resource(ImageLocation& location) const; 335 336 // Return the resource associated with the path else NULL if not found. 337 void get_resource(const char* path, u1*& buffer, u8& size) const; 338 339 // Return an array of packages for a given module 340 GrowableArray<const char*>* packages(const char* name); 341 }; 342 343 #endif // SHARE_VM_CLASSFILE_IMAGEFILE_HPP