1 /* 2 * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef SHARE_VM_CLASSFILE_IMAGEFILE_HPP 26 #define SHARE_VM_CLASSFILE_IMAGEFILE_HPP 27 28 #include "classfile/classLoader.hpp" 29 #include "memory/allocation.hpp" 30 #include "memory/allocation.inline.hpp" 31 #include "utilities/endian.hpp" 32 #include "utilities/globalDefinitions.hpp" 33 #include "utilities/growableArray.hpp" 34 35 // Image files are an alternate file format for storing classes and resources. The 36 // goal is to supply file access which is faster and smaller than the jar format. 37 // It should be noted that unlike jars, information stored in an image is in native 38 // endian format. This allows the image to be mapped into memory without endian 39 // translation. This also means that images are platform dependent. 40 // 41 // Image files are structured as three sections; 42 // 43 // +-----------+ 44 // | Header | 45 // +-----------+ 46 // | | 47 // | Index | 48 // | | 49 // +-----------+ 50 // | | 51 // | | 52 // | Resources | 53 // | | 54 // | | 55 // +-----------+ 56 // 57 // The header contains information related to identification and description of 58 // contents. 59 // 60 // +-------------------------+ 61 // | Magic (0xCAFEDADA) | 62 // +------------+------------+ 63 // | Major Vers | Minor Vers | 64 // +------------+------------+ 65 // | Flags | 66 // +-------------------------+ 67 // | Resource Count | 68 // +-------------------------+ 69 // | Table Length | 70 // +-------------------------+ 71 // | Attributes Size | 72 // +-------------------------+ 73 // | Strings Size | 74 // +-------------------------+ 75 // 76 // Magic - means of identifying validity of the file. This avoids requiring a 77 // special file extension. 78 // Major vers, minor vers - differences in version numbers indicate structural 79 // changes in the image. 80 // Flags - various image wide flags (future). 81 // Resource count - number of resources in the file. 82 // Table length - the length of lookup tables used in the index. 83 // Attributes size - number of bytes in the region used to store location attribute 84 // streams. 85 // Strings size - the size of the region used to store strings used by the 86 // index and meta data. 87 // 88 // The index contains information related to resource lookup. The algorithm 89 // used for lookup is "A Practical Minimal Perfect Hashing Method" 90 // (http://homepages.dcc.ufmg.br/~nivio/papers/wea05.pdf). Given a path string 91 // in the form /<module>/<package>/<base>.<extension> return the resource location 92 // information; 93 // 94 // redirectIndex = hash(path, DEFAULT_SEED) % table_length; 95 // redirect = redirectTable[redirectIndex]; 96 // if (redirect == 0) return not found; 97 // locationIndex = redirect < 0 ? -1 - redirect : hash(path, redirect) % table_length; 98 // location = locationTable[locationIndex]; 99 // if (!verify(location, path)) return not found; 100 // return location; 101 // 102 // Note: The hash function takes an initial seed value. A different seed value 103 // usually returns a different result for strings that would otherwise collide with 104 // other seeds. The verify function guarantees the found resource location is 105 // indeed the resource we are looking for. 106 // 107 // The following is the format of the index; 108 // 109 // +-------------------+ 110 // | Redirect Table | 111 // +-------------------+ 112 // | Attribute Offsets | 113 // +-------------------+ 114 // | Attribute Data | 115 // +-------------------+ 116 // | Strings | 117 // +-------------------+ 118 // 119 // Redirect Table - Array of 32-bit signed values representing actions that 120 // should take place for hashed strings that map to that 121 // value. Negative values indicate no hash collision and can be 122 // quickly converted to indices into attribute offsets. Positive 123 // values represent a new seed for hashing an index into attribute 124 // offsets. Zero indicates not found. 125 // Attribute Offsets - Array of 32-bit unsigned values representing offsets into 126 // attribute data. Attribute offsets can be iterated to do a 127 // full survey of resources in the image. Offset of zero 128 // indicates no attributes. 129 // Attribute Data - Bytes representing compact attribute data for locations. (See 130 // comments in ImageLocation.) 131 // Strings - Collection of zero terminated UTF-8 strings used by the index and 132 // image meta data. Each string is accessed by offset. Each string is 133 // unique. Offset zero is reserved for the empty string. 134 // 135 // Note that the memory mapped index assumes 32 bit alignment of each component 136 // in the index. 137 // 138 // Endianness of an image. 139 // An image booted by hotspot is always in native endian. However, it is possible 140 // to read (by the JDK) in alternate endian format. Primarily, this is during 141 // cross platform scenarios. Ex, where javac needs to read an embedded image 142 // to access classes for crossing compilation. 143 // 144 145 class ImageFileReader; // forward declaration 146 147 // Manage image file string table. 148 class ImageStrings VALUE_OBJ_CLASS_SPEC { 149 private: 150 u1* _data; // Data bytes for strings. 151 u4 _size; // Number of bytes in the string table. 152 public: 153 enum { 154 // Not found result from find routine. 155 NOT_FOUND = -1, 156 // Prime used to generate hash for Perfect Hashing. 157 HASH_MULTIPLIER = 0x01000193 158 }; 159 160 ImageStrings(u1* data, u4 size) : _data(data), _size(size) {} 161 162 // Return the UTF-8 string beginning at offset. 163 inline const char* get(u4 offset) const { 164 guarantee(offset < _size, "offset exceeds string table size"); 165 return (const char*)(_data + offset); 166 } 167 168 // Compute the Perfect Hashing hash code for the supplied UTF-8 string. 169 inline static u4 hash_code(const char* string) { 170 return hash_code(string, HASH_MULTIPLIER); 171 } 172 173 // Compute the Perfect Hashing hash code for the supplied string, starting at seed. 174 static s4 hash_code(const char* string, s4 seed); 175 176 // Match up a string in a perfect hash table. Result still needs validation 177 // for precise match. 178 static s4 find(Endian* endian, const char* name, s4* redirect, u4 length); 179 180 // Test to see if UTF-8 string begins with the start UTF-8 string. If so, 181 // return non-NULL address of remaining portion of string. Otherwise, return 182 // NULL. Used to test sections of a path without copying from image string 183 // table. 184 static const char* starts_with(const char* string, const char* start); 185 186 // Test to see if UTF-8 string begins with start char. If so, return non-NULL 187 // address of remaining portion of string. Otherwise, return NULL. Used 188 // to test a character of a path without copying. 189 inline static const char* starts_with(const char* string, const char ch) { 190 return *string == ch ? string + 1 : NULL; 191 } 192 }; 193 194 // Manage image file location attribute data. Within an image, a location's 195 // attributes are compressed into a stream of bytes. An attribute stream is 196 // composed of individual attribute sequences. Each attribute sequence begins with 197 // a header byte containing the attribute 'kind' (upper 5 bits of header) and the 198 // 'length' less 1 (lower 3 bits of header) of bytes that follow containing the 199 // attribute value. Attribute values present as most significant byte first. 200 // 201 // Ex. Container offset (ATTRIBUTE_OFFSET) 0x33562 would be represented as 0x22 202 // (kind = 4, length = 3), 0x03, 0x35, 0x62. 203 // 204 // An attribute stream is terminated with a header kind of ATTRIBUTE_END (header 205 // byte of zero.) 206 // 207 // ImageLocation inflates the stream into individual values stored in the long 208 // array _attributes. This allows an attribute value can be quickly accessed by 209 // direct indexing. Unspecified values default to zero. 210 // 211 // Notes: 212 // - Even though ATTRIBUTE_END is used to mark the end of the attribute stream, 213 // streams will contain zero byte values to represent lesser significant bits. 214 // Thus, detecting a zero byte is not sufficient to detect the end of an attribute 215 // stream. 216 // - ATTRIBUTE_OFFSET represents the number of bytes from the beginning of the region 217 // storing the resources. Thus, in an image this represents the number of bytes 218 // after the index. 219 // - Currently, compressed resources are represented by having a non-zero 220 // ATTRIBUTE_COMPRESSED value. This represents the number of bytes stored in the 221 // image, and the value of ATTRIBUTE_UNCOMPRESSED represents number of bytes of the 222 // inflated resource in memory. If the ATTRIBUTE_COMPRESSED is zero then the value 223 // of ATTRIBUTE_UNCOMPRESSED represents both the number of bytes in the image and 224 // in memory. In the future, additional compression techniques will be used and 225 // represented differently. 226 // - Package strings include trailing slash and extensions include prefix period. 227 // 228 class ImageLocation VALUE_OBJ_CLASS_SPEC { 229 public: 230 enum { 231 ATTRIBUTE_END, // End of attribute stream marker 232 ATTRIBUTE_MODULE, // String table offset of module name 233 ATTRIBUTE_PARENT, // String table offset of resource path parent 234 ATTRIBUTE_BASE, // String table offset of resource path base 235 ATTRIBUTE_EXTENSION, // String table offset of resource path extension 236 ATTRIBUTE_OFFSET, // Container byte offset of resource 237 ATTRIBUTE_COMPRESSED, // In image byte size of the compressed resource 238 ATTRIBUTE_UNCOMPRESSED, // In memory byte size of the uncompressed resource 239 ATTRIBUTE_COUNT // Number of attribute kinds 240 }; 241 242 private: 243 // Values of inflated attributes. 244 u8 _attributes[ATTRIBUTE_COUNT]; 245 246 // Return the attribute value number of bytes. 247 inline static u1 attribute_length(u1 data) { 248 return (data & 0x7) + 1; 249 } 250 251 // Return the attribute kind. 252 inline static u1 attribute_kind(u1 data) { 253 u1 kind = data >> 3; 254 guarantee(kind < ATTRIBUTE_COUNT, "invalid attribute kind"); 255 return kind; 256 } 257 258 // Return the attribute length. 259 inline static u8 attribute_value(u1* data, u1 n) { 260 guarantee(0 < n && n <= 8, "invalid attribute value length"); 261 u8 value = 0; 262 // Most significant bytes first. 263 for (u1 i = 0; i < n; i++) { 264 value <<= 8; 265 value |= data[i]; 266 } 267 return value; 268 } 269 270 public: 271 ImageLocation() { 272 clear_data(); 273 } 274 275 ImageLocation(u1* data) { 276 clear_data(); 277 set_data(data); 278 } 279 280 // Inflates the attribute stream into individual values stored in the long 281 // array _attributes. This allows an attribute value to be quickly accessed by 282 // direct indexing. Unspecified values default to zero. 283 void set_data(u1* data); 284 285 // Zero all attribute values. 286 void clear_data(); 287 288 // Retrieve an attribute value from the inflated array. 289 inline u8 get_attribute(u1 kind) const { 290 guarantee(ATTRIBUTE_END < kind && kind < ATTRIBUTE_COUNT, "invalid attribute kind"); 291 return _attributes[kind]; 292 } 293 294 // Retrieve an attribute string value from the inflated array. 295 inline const char* get_attribute(u4 kind, const ImageStrings& strings) const { 296 return strings.get((u4)get_attribute(kind)); 297 } 298 }; 299 300 // 301 // NOTE: needs revision. 302 // Each loader requires set of module meta data to identify which modules and 303 // packages are managed by that loader. Currently, there is one image file per 304 // builtin loader, so only one module meta data resource per file. 305 // 306 // Each element in the module meta data is a native endian 4 byte integer. Note 307 // that entries with zero offsets for string table entries should be ignored ( 308 // padding for hash table lookup.) 309 // 310 // Format: 311 // Count of package to module entries 312 // Count of module to package entries 313 // Perfect Hash redirect table[Count of package to module entries] 314 // Package to module entries[Count of package to module entries] 315 // Offset to package name in string table 316 // Offset to module name in string table 317 // Perfect Hash redirect table[Count of module to package entries] 318 // Module to package entries[Count of module to package entries] 319 // Offset to module name in string table 320 // Count of packages in module 321 // Offset to first package in packages table 322 // Packages[] 323 // Offset to package name in string table 324 // 325 // Manage the image module meta data. 326 class ImageModuleData : public CHeapObj<mtClass> { 327 class Header VALUE_OBJ_CLASS_SPEC { 328 private: 329 u4 _ptm_count; // Count of package to module entries 330 u4 _mtp_count; // Count of module to package entries 331 public: 332 inline u4 ptm_count(Endian* endian) const { return endian->get(_ptm_count); } 333 inline u4 mtp_count(Endian* endian) const { return endian->get(_mtp_count); } 334 }; 335 336 // Hashtable entry 337 class HashData VALUE_OBJ_CLASS_SPEC { 338 private: 339 u4 _name_offset; // Name offset in string table 340 public: 341 inline s4 name_offset(Endian* endian) const { return endian->get(_name_offset); } 342 }; 343 344 // Package to module hashtable entry 345 class PTMData : public HashData { 346 private: 347 u4 _module_name_offset; // Module name offset in string table 348 public: 349 inline s4 module_name_offset(Endian* endian) const { return endian->get(_module_name_offset); } 350 }; 351 352 // Module to package hashtable entry 353 class MTPData : public HashData { 354 private: 355 u4 _package_count; // Number of packages in module 356 u4 _package_offset; // Offset in package list 357 public: 358 inline u4 package_count(Endian* endian) const { return endian->get(_package_count); } 359 inline u4 package_offset(Endian* endian) const { return endian->get(_package_offset); } 360 }; 361 362 const ImageFileReader* _image_file; // Source image file 363 Endian* _endian; // Endian handler 364 ImageStrings _strings; // Image file strings 365 u1* _data; // Module data resource data 366 u8 _data_size; // Size of resource data 367 Header* _header; // Module data header 368 s4* _ptm_redirect; // Package to module hashtable redirect 369 PTMData* _ptm_data; // Package to module data 370 s4* _mtp_redirect; // Module to packages hashtable redirect 371 MTPData* _mtp_data; // Module to packages data 372 s4* _mtp_packages; // Package data (name offsets) 373 374 // Return a string from the string table. 375 inline const char* get_string(u4 offset) { 376 return _strings.get(offset); 377 } 378 379 inline u4 mtp_package(u4 index) { 380 return _endian->get(_mtp_packages[index]); 381 } 382 383 public: 384 ImageModuleData(const ImageFileReader* image_file, const char* module_data_name); 385 ~ImageModuleData(); 386 387 // Return the name of the module data resource. 388 static void module_data_name(char* buffer, const char* image_file_name); 389 390 // Return the module in which a package resides. Returns NULL if not found. 391 const char* package_to_module(const char* package_name); 392 393 // Returns all the package names in a module. Returns NULL if module not found. 394 GrowableArray<const char*>* module_to_packages(const char* module_name); 395 }; 396 397 // Image file header, starting at offset 0. 398 class ImageHeader VALUE_OBJ_CLASS_SPEC { 399 private: 400 u4 _magic; // Image file marker 401 u4 _version; // Image file major version number 402 u4 _flags; // Image file flags 403 u4 _resource_count; // Number of resources in file 404 u4 _table_length; // Number of slots in index tables 405 u4 _locations_size; // Number of bytes in attribute table 406 u4 _strings_size; // Number of bytes in string table 407 408 public: 409 u4 magic() const { return _magic; } 410 u4 magic(Endian* endian) const { return endian->get(_magic); } 411 void set_magic(Endian* endian, u4 magic) { return endian->set(_magic, magic); } 412 413 u4 major_version(Endian* endian) const { return endian->get(_version) >> 16; } 414 u4 minor_version(Endian* endian) const { return endian->get(_version) & 0xFFFF; } 415 void set_version(Endian* endian, u4 major_version, u4 minor_version) { 416 return endian->set(_version, major_version << 16 | minor_version); 417 } 418 419 u4 flags(Endian* endian) const { return endian->get(_flags); } 420 void set_flags(Endian* endian, u4 value) { return endian->set(_flags, value); } 421 422 u4 resource_count(Endian* endian) const { return endian->get(_resource_count); } 423 void set_resource_count(Endian* endian, u4 count) { return endian->set(_resource_count, count); } 424 425 u4 table_length(Endian* endian) const { return endian->get(_table_length); } 426 void set_table_length(Endian* endian, u4 count) { return endian->set(_table_length, count); } 427 428 u4 locations_size(Endian* endian) const { return endian->get(_locations_size); } 429 void set_locations_size(Endian* endian, u4 size) { return endian->set(_locations_size, size); } 430 431 u4 strings_size(Endian* endian) const { return endian->get(_strings_size); } 432 void set_strings_size(Endian* endian, u4 size) { return endian->set(_strings_size, size); } 433 }; 434 435 // Max path length limit independent of platform. Windows max path is 1024, 436 // other platforms use 4096. The JCK fails several tests when 1024 is used. 437 #define IMAGE_MAX_PATH 4096 438 439 // Manage the image file. 440 // ImageFileReader manages the content of an image file. 441 // Initially, the header of the image file is read for validation. If valid, 442 // values in the header are used calculate the size of the image index. The 443 // index is then memory mapped to allow load on demand and sharing. The 444 // -XX:+MemoryMapImage flag determines if the entire file is loaded (server use.) 445 // An image can be used by Hotspot and multiple reference points in the JDK, thus 446 // it is desirable to share a reader. To accomodate sharing, a share table is 447 // defined (see ImageFileReaderTable in imageFile.cpp) To track the number of 448 // uses, ImageFileReader keeps a use count (_use). Use is incremented when 449 // 'opened' by reference point and decremented when 'closed'. Use of zero 450 // leads the ImageFileReader to be actually closed and discarded. 451 class ImageFileReader : public CHeapObj<mtClass> { 452 private: 453 // Manage a number of image files such that an image can be shared across 454 // multiple uses (ex. loader.) 455 static GrowableArray<ImageFileReader*>* _reader_table; 456 457 char* _name; // Name of image 458 s4 _use; // Use count 459 int _fd; // File descriptor 460 Endian* _endian; // Endian handler 461 u8 _file_size; // File size in bytes 462 ImageHeader _header; // Image header 463 size_t _index_size; // Total size of index 464 u1* _index_data; // Raw index data 465 s4* _redirect_table; // Perfect hash redirect table 466 u4* _offsets_table; // Location offset table 467 u1* _location_bytes; // Location attributes 468 u1* _string_bytes; // String table 469 470 ImageFileReader(const char* name, bool big_endian); 471 ~ImageFileReader(); 472 473 // Compute number of bytes in image file index. 474 inline u8 index_size() { 475 return sizeof(ImageHeader) + 476 table_length() * sizeof(u4) * 2 + locations_size() + strings_size(); 477 } 478 479 public: 480 enum { 481 // Image file marker. 482 IMAGE_MAGIC = 0xCAFEDADA, 483 // Endian inverted Image file marker. 484 IMAGE_MAGIC_INVERT = 0xDADAFECA, 485 // Image file major version number. 486 MAJOR_VERSION = 1, 487 // Image file minor version number. 488 MINOR_VERSION = 0 489 }; 490 491 // Open an image file, reuse structure if file already open. 492 static ImageFileReader* open(const char* name, bool big_endian = Endian::is_big_endian()); 493 494 // Close an image file if the file is not in use elsewhere. 495 static void close(ImageFileReader *reader); 496 497 // Return an id for the specifed ImageFileReader. 498 static u8 readerToID(ImageFileReader *reader); 499 500 // Validate the image id. 501 static bool idCheck(u8 id); 502 503 // Return an id for the specifed ImageFileReader. 504 static ImageFileReader* idToReader(u8 id); 505 506 // Open image file for read access. 507 bool open(); 508 509 // Close image file. 510 void close(); 511 512 // Read directly from the file. 513 bool read_at(u1* data, u8 size, u8 offset) const; 514 515 inline Endian* endian() const { return _endian; } 516 517 // Retrieve name of image file. 518 inline const char* name() const { 519 return _name; 520 } 521 522 // Retrieve size of image file. 523 inline u8 file_size() const { 524 return _file_size; 525 } 526 527 // Return first address of index data. 528 inline u1* get_index_address() const { 529 return _index_data; 530 } 531 532 // Return first address of resource data. 533 inline u1* get_data_address() const { 534 return _index_data + _index_size; 535 } 536 537 // Get the size of the index data. 538 size_t get_index_size() const { 539 return _index_size; 540 } 541 542 inline u4 table_length() const { 543 return _header.table_length(_endian); 544 } 545 546 inline u4 locations_size() const { 547 return _header.locations_size(_endian); 548 } 549 550 inline u4 strings_size()const { 551 return _header.strings_size(_endian); 552 } 553 554 inline u4* offsets_table() const { 555 return _offsets_table; 556 } 557 558 // Increment use count. 559 inline void inc_use() { 560 _use++; 561 } 562 563 // Decrement use count. 564 inline bool dec_use() { 565 return --_use == 0; 566 } 567 568 // Return a string table accessor. 569 inline const ImageStrings get_strings() const { 570 return ImageStrings(_string_bytes, _header.strings_size(_endian)); 571 } 572 573 // Return location attribute stream at offset. 574 inline u1* get_location_offset_data(u4 offset) const { 575 guarantee((u4)offset < _header.locations_size(_endian), 576 "offset exceeds location attributes size"); 577 return offset != 0 ? _location_bytes + offset : NULL; 578 } 579 580 // Return location attribute stream for location i. 581 inline u1* get_location_data(u4 index) const { 582 guarantee((u4)index < _header.table_length(_endian), 583 "index exceeds location count"); 584 u4 offset = _endian->get(_offsets_table[index]); 585 586 return get_location_offset_data(offset); 587 } 588 589 // Find the location attributes associated with the path. Returns true if 590 // the location is found, false otherwise. 591 bool find_location(const char* path, ImageLocation& location) const; 592 593 // Assemble the location path. 594 void location_path(ImageLocation& location, char* path, size_t max) const; 595 596 // Verify that a found location matches the supplied path. 597 bool verify_location(ImageLocation& location, const char* path) const; 598 599 // Return the resource for the supplied path. 600 void get_resource(ImageLocation& location, u1* uncompressed_data) const; 601 }; 602 #endif // SHARE_VM_CLASSFILE_IMAGEFILE_HPP