1 /* 2 * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP 26 #define SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP 27 28 #include "oops/array.hpp" 29 #include "oops/symbol.hpp" 30 #include "utilities/growableArray.hpp" 31 32 33 template < 34 typename K, 35 typename V, 36 V (*DECODE)(address base_address, u4 offset), 37 bool (*EQUALS)(V value, K key, int len) 38 > 39 class CompactHashtable; 40 class NumberSeq; 41 class SimpleCompactHashtable; 42 class SerializeClosure; 43 44 // Stats for symbol tables in the CDS archive 45 class CompactHashtableStats { 46 public: 47 int hashentry_count; 48 int hashentry_bytes; 49 int bucket_count; 50 int bucket_bytes; 51 }; 52 53 #if INCLUDE_CDS 54 ///////////////////////////////////////////////////////////////////////// 55 // 56 // The compact hash table writer. Used at dump time for writing out 57 // the compact table to the shared archive. 58 // 59 // At dump time, the CompactHashtableWriter obtains all entries from the 60 // symbol/string table and adds them to a new temporary hash table. The hash 61 // table size (number of buckets) is calculated using 62 // '(num_entries + bucket_size - 1) / bucket_size'. The default bucket 63 // size is 4 and can be changed by -XX:SharedSymbolTableBucketSize option. 64 // 4 is chosen because it produces smaller sized bucket on average for 65 // faster lookup. It also has relatively small number of empty buckets and 66 // good distribution of the entries. 67 // 68 // We use a simple hash function (hash % num_bucket) for the table. 69 // The new table is compacted when written out. Please see comments 70 // above the CompactHashtable class for the table layout detail. The bucket 71 // offsets are written to the archive as part of the compact table. The 72 // bucket offset is encoded in the low 30-bit (0-29) and the bucket type 73 // (regular or compact) are encoded in bit[31, 30]. For buckets with more 74 // than one entry, both hash and entry offset are written to the 75 // table. For buckets with only one entry, only the entry offset is written 76 // to the table and the buckets are tagged as compact in their type bits. 77 // Buckets without entry are skipped from the table. Their offsets are 78 // still written out for faster lookup. 79 // 80 class CompactHashtableWriter: public StackObj { 81 public: 82 class Entry { 83 unsigned int _hash; 84 u4 _value; 85 86 public: 87 Entry() {} 88 Entry(unsigned int hash, u4 val) : _hash(hash), _value(val) {} 89 90 u4 value() { 91 return _value; 92 } 93 unsigned int hash() { 94 return _hash; 95 } 96 97 bool operator==(const CompactHashtableWriter::Entry& other) { 98 return (_value == other._value && _hash == other._hash); 99 } 100 }; // class CompactHashtableWriter::Entry 101 102 private: 103 int _num_entries; 104 int _num_buckets; 105 int _num_empty_buckets; 106 int _num_value_only_buckets; 107 int _num_other_buckets; 108 GrowableArray<Entry>** _buckets; 109 CompactHashtableStats* _stats; 110 Array<u4>* _compact_buckets; 111 Array<u4>* _compact_entries; 112 113 public: 114 // This is called at dump-time only 115 CompactHashtableWriter(int num_buckets, CompactHashtableStats* stats); 116 ~CompactHashtableWriter(); 117 118 void add(unsigned int hash, u4 value); 119 120 private: 121 void allocate_table(); 122 void dump_table(NumberSeq* summary); 123 124 public: 125 void dump(SimpleCompactHashtable *cht, const char* table_name); 126 127 static int default_num_buckets(size_t num_entries) { 128 return default_num_buckets((int)num_entries); 129 } 130 static int default_num_buckets(int num_entries) { 131 int num_buckets = num_entries / SharedSymbolTableBucketSize; 132 // calculation of num_buckets can result in zero buckets, we need at least one 133 return (num_buckets < 1) ? 1 : num_buckets; 134 } 135 }; 136 #endif // INCLUDE_CDS 137 138 #define REGULAR_BUCKET_TYPE 0 139 #define VALUE_ONLY_BUCKET_TYPE 1 140 #define TABLEEND_BUCKET_TYPE 3 141 #define BUCKET_OFFSET_MASK 0x3FFFFFFF 142 #define BUCKET_OFFSET(info) ((info) & BUCKET_OFFSET_MASK) 143 #define BUCKET_TYPE_SHIFT 30 144 #define BUCKET_TYPE(info) (((info) & ~BUCKET_OFFSET_MASK) >> BUCKET_TYPE_SHIFT) 145 #define BUCKET_INFO(offset, type) (((type) << BUCKET_TYPE_SHIFT) | ((offset) & BUCKET_OFFSET_MASK)) 146 147 ///////////////////////////////////////////////////////////////////////////// 148 // 149 // CompactHashtable is used to store the CDS archive's symbol/string tables. 150 // 151 // Because these tables are read-only (no entries can be added/deleted) at run-time 152 // and tend to have large number of entries, we try to minimize the footprint 153 // cost per entry. 154 // 155 // The CompactHashtable is split into two arrays 156 // 157 // u4 buckets[num_buckets+1]; // bit[31,30]: type; bit[29-0]: offset 158 // u4 entries[<variable size>] 159 // 160 // The size of buckets[] is 'num_buckets + 1'. Each entry of 161 // buckets[] is a 32-bit encoding of the bucket type and bucket offset, 162 // with the type in the left-most 2-bit and offset in the remaining 30-bit. 163 // The last entry is a special type. It contains the end of the last 164 // bucket. 165 // 166 // There are two types of buckets, regular buckets and value_only buckets. The 167 // value_only buckets have '01' in their highest 2-bit, and regular buckets have 168 // '00' in their highest 2-bit. 169 // 170 // For normal buckets, each entry is 8 bytes in the entries[]: 171 // u4 hash; /* symbol/string hash */ 172 // union { 173 // u4 offset; /* Symbol* sym = (Symbol*)(base_address + offset) */ 174 // narrowOop str; /* String narrowOop encoding */ 175 // } 176 // 177 // 178 // For value_only buckets, each entry has only the 4-byte 'offset' in the entries[]. 179 // 180 // Example -- note that the second bucket is a VALUE_ONLY_BUCKET_TYPE so the hash code 181 // is skipped. 182 // buckets[0, 4, 5, ....] 183 // | | | 184 // | | +---+ 185 // | | | 186 // | +----+ | 187 // v v v 188 // entries[H,O,H,O,O,H,O,H,O.....] 189 // 190 // See CompactHashtable::lookup() for how the table is searched at runtime. 191 // See CompactHashtableWriter::dump() for how the table is written at CDS 192 // dump time. 193 // 194 class SimpleCompactHashtable { 195 protected: 196 address _base_address; 197 u4 _bucket_count; 198 u4 _entry_count; 199 u4* _buckets; 200 u4* _entries; 201 202 public: 203 SimpleCompactHashtable() { 204 _entry_count = 0; 205 _bucket_count = 0; 206 _buckets = 0; 207 _entries = 0; 208 } 209 210 void reset() { 211 _bucket_count = 0; 212 _entry_count = 0; 213 _buckets = 0; 214 _entries = 0; 215 } 216 217 void init(address base_address, u4 entry_count, u4 bucket_count, u4* buckets, u4* entries) { 218 _base_address = base_address; 219 _bucket_count = bucket_count; 220 _entry_count = entry_count; 221 _buckets = buckets; 222 _entries = entries; 223 } 224 225 // Read/Write the table's header from/to the CDS archive 226 void serialize_header(SerializeClosure* soc) NOT_CDS_RETURN; 227 228 inline bool empty() { 229 return (_entry_count == 0); 230 } 231 }; 232 233 template < 234 typename K, 235 typename V, 236 V (*DECODE)(address base_address, u4 offset), 237 bool (*EQUALS)(V value, K key, int len) 238 > 239 class CompactHashtable : public SimpleCompactHashtable { 240 friend class VMStructs; 241 242 V decode(u4 offset) const { 243 return DECODE(_base_address, offset); 244 } 245 246 public: 247 // Lookup a value V from the compact table using key K 248 inline V lookup(K key, unsigned int hash, int len) const { 249 if (_entry_count > 0) { 250 int index = hash % _bucket_count; 251 u4 bucket_info = _buckets[index]; 252 u4 bucket_offset = BUCKET_OFFSET(bucket_info); 253 int bucket_type = BUCKET_TYPE(bucket_info); 254 u4* entry = _entries + bucket_offset; 255 256 if (bucket_type == VALUE_ONLY_BUCKET_TYPE) { 257 V value = decode(entry[0]); 258 if (EQUALS(value, key, len)) { 259 return value; 260 } 261 } else { 262 // This is a regular bucket, which has more than one 263 // entries. Each entry is a pair of entry (hash, offset). 264 // Seek until the end of the bucket. 265 u4* entry_max = _entries + BUCKET_OFFSET(_buckets[index + 1]); 266 while (entry < entry_max) { 267 unsigned int h = (unsigned int)(entry[0]); 268 if (h == hash) { 269 V value = decode(entry[1]); 270 if (EQUALS(value, key, len)) { 271 return value; 272 } 273 } 274 entry += 2; 275 } 276 } 277 } 278 return NULL; 279 } 280 281 template <class ITER> 282 inline void iterate(ITER* iter) const { 283 for (u4 i = 0; i < _bucket_count; i++) { 284 u4 bucket_info = _buckets[i]; 285 u4 bucket_offset = BUCKET_OFFSET(bucket_info); 286 int bucket_type = BUCKET_TYPE(bucket_info); 287 u4* entry = _entries + bucket_offset; 288 289 if (bucket_type == VALUE_ONLY_BUCKET_TYPE) { 290 iter->do_value(decode(entry[0])); 291 } else { 292 u4*entry_max = _entries + BUCKET_OFFSET(_buckets[i + 1]); 293 while (entry < entry_max) { 294 iter->do_value(decode(entry[1])); 295 entry += 2; 296 } 297 } 298 } 299 } 300 301 void print_table_statistics(outputStream* st, const char* name) { 302 st->print_cr("%s statistics:", name); 303 int total_entries = 0; 304 int max_bucket = 0; 305 for (u4 i = 0; i < _bucket_count; i++) { 306 u4 bucket_info = _buckets[i]; 307 int bucket_type = BUCKET_TYPE(bucket_info); 308 int bucket_size; 309 310 if (bucket_type == VALUE_ONLY_BUCKET_TYPE) { 311 bucket_size = 1; 312 } else { 313 bucket_size = (BUCKET_OFFSET(_buckets[i + 1]) - BUCKET_OFFSET(bucket_info)) / 2; 314 } 315 total_entries += bucket_size; 316 if (max_bucket < bucket_size) { 317 max_bucket = bucket_size; 318 } 319 } 320 st->print_cr("Number of buckets : %9d", _bucket_count); 321 st->print_cr("Number of entries : %9d", total_entries); 322 st->print_cr("Maximum bucket size : %9d", max_bucket); 323 } 324 }; 325 326 //////////////////////////////////////////////////////////////////////// 327 // 328 // OffsetCompactHashtable -- This is used to store many types of objects 329 // in the CDS archive. On 64-bit platforms, we save space by using a 32-bit 330 // offset from the CDS base address. 331 332 template <typename V> 333 inline V read_value_from_compact_hashtable(address base_address, u4 offset) { 334 return (V)(base_address + offset); 335 } 336 337 template < 338 typename K, 339 typename V, 340 bool (*EQUALS)(V value, K key, int len) 341 > 342 class OffsetCompactHashtable : public CompactHashtable< 343 K, V, read_value_from_compact_hashtable<V>, EQUALS> { 344 }; 345 346 347 //////////////////////////////////////////////////////////////////////// 348 // 349 // Read/Write the contents of a hashtable textual dump (created by 350 // SymbolTable::dump and StringTable::dump). 351 // Because the dump file may be big (hundred of MB in extreme cases), 352 // we use mmap for fast access when reading it. 353 // 354 class HashtableTextDump { 355 int _fd; 356 const char* _base; 357 const char* _p; 358 const char* _end; 359 const char* _filename; 360 size_t _size; 361 int _prefix_type; 362 int _line_no; 363 public: 364 HashtableTextDump(const char* filename); 365 ~HashtableTextDump(); 366 367 enum { 368 SymbolPrefix = 1 << 0, 369 StringPrefix = 1 << 1, 370 Unknown = 1 << 2 371 }; 372 373 void quit(const char* err, const char* msg); 374 375 inline int remain() { 376 return (int)(_end - _p); 377 } 378 379 void corrupted(const char *p, const char *msg); 380 381 inline void corrupted_if(bool cond, const char *msg) { 382 if (cond) { 383 corrupted(_p, msg); 384 } 385 } 386 387 bool skip_newline(); 388 int skip(char must_be_char); 389 void skip_past(char c); 390 void check_version(const char* ver); 391 392 inline void get_num(char delim, int *num) { 393 const char* p = _p; 394 const char* end = _end; 395 u8 n = 0; 396 397 while (p < end) { 398 char c = *p++; 399 if ('0' <= c && c <= '9') { 400 n = n * 10 + (c - '0'); 401 if (n > (u8)INT_MAX) { 402 corrupted(_p, "Num overflow"); 403 } 404 } else if (c == delim) { 405 _p = p; 406 *num = (int)n; 407 return; 408 } else { 409 // Not [0-9], not 'delim' 410 corrupted(_p, "Unrecognized format");; 411 } 412 } 413 414 corrupted(_end, "Incorrect format"); 415 ShouldNotReachHere(); 416 } 417 418 void scan_prefix_type(); 419 int scan_prefix(int* utf8_length); 420 int scan_string_prefix(); 421 int scan_symbol_prefix(); 422 423 jchar unescape(const char* from, const char* end, int count); 424 void get_utf8(char* utf8_buffer, int utf8_length); 425 static void put_utf8(outputStream* st, const char* utf8_string, int utf8_length); 426 }; 427 428 #endif // SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP