1 /* 2 * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP 26 #define SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP 27 28 #include "oops/array.hpp" 29 #include "oops/symbol.hpp" 30 #include "utilities/hashtable.hpp" 31 32 33 template < 34 typename K, 35 typename V, 36 V (*DECODE)(address base_address, u4 offset), 37 bool (*EQUALS)(V value, K key, int len) 38 > 39 class CompactHashtable; 40 class NumberSeq; 41 class SimpleCompactHashtable; 42 class SerializeClosure; 43 44 // Stats for symbol tables in the CDS archive 45 class CompactHashtableStats { 46 public: 47 int hashentry_count; 48 int hashentry_bytes; 49 int bucket_count; 50 int bucket_bytes; 51 }; 52 53 ///////////////////////////////////////////////////////////////////////// 54 // 55 // The compact hash table writer. Used at dump time for writing out 56 // the compact table to the shared archive. 57 // 58 // At dump time, the CompactHashtableWriter obtains all entries from the 59 // symbol/string table and adds them to a new temporary hash table. The hash 60 // table size (number of buckets) is calculated using 61 // '(num_entries + bucket_size - 1) / bucket_size'. The default bucket 62 // size is 4 and can be changed by -XX:SharedSymbolTableBucketSize option. 63 // 4 is chosen because it produces smaller sized bucket on average for 64 // faster lookup. It also has relatively small number of empty buckets and 65 // good distribution of the entries. 66 // 67 // We use a simple hash function (hash % num_bucket) for the table. 68 // The new table is compacted when written out. Please see comments 69 // above the CompactHashtable class for the table layout detail. The bucket 70 // offsets are written to the archive as part of the compact table. The 71 // bucket offset is encoded in the low 30-bit (0-29) and the bucket type 72 // (regular or compact) are encoded in bit[31, 30]. For buckets with more 73 // than one entry, both hash and entry offset are written to the 74 // table. For buckets with only one entry, only the entry offset is written 75 // to the table and the buckets are tagged as compact in their type bits. 76 // Buckets without entry are skipped from the table. Their offsets are 77 // still written out for faster lookup. 78 // 79 class CompactHashtableWriter: public StackObj { 80 public: 81 class Entry { 82 unsigned int _hash; 83 u4 _value; 84 85 public: 86 Entry() {} 87 Entry(unsigned int hash, u4 val) : _hash(hash), _value(val) {} 88 89 u4 value() { 90 return _value; 91 } 92 unsigned int hash() { 93 return _hash; 94 } 95 96 bool operator==(const CompactHashtableWriter::Entry& other) { 97 return (_value == other._value && _hash == other._hash); 98 } 99 }; // class CompactHashtableWriter::Entry 100 101 private: 102 int _num_entries; 103 int _num_buckets; 104 int _num_empty_buckets; 105 int _num_value_only_buckets; 106 int _num_other_buckets; 107 GrowableArray<Entry>** _buckets; 108 CompactHashtableStats* _stats; 109 Array<u4>* _compact_buckets; 110 Array<u4>* _compact_entries; 111 112 public: 113 // This is called at dump-time only 114 CompactHashtableWriter(int num_buckets, CompactHashtableStats* stats); 115 ~CompactHashtableWriter(); 116 117 void add(unsigned int hash, u4 value); 118 119 private: 120 void allocate_table(); 121 void dump_table(NumberSeq* summary); 122 123 public: 124 void dump(SimpleCompactHashtable *cht, const char* table_name); 125 }; 126 127 #define REGULAR_BUCKET_TYPE 0 128 #define VALUE_ONLY_BUCKET_TYPE 1 129 #define TABLEEND_BUCKET_TYPE 3 130 #define BUCKET_OFFSET_MASK 0x3FFFFFFF 131 #define BUCKET_OFFSET(info) ((info) & BUCKET_OFFSET_MASK) 132 #define BUCKET_TYPE_SHIFT 30 133 #define BUCKET_TYPE(info) (((info) & ~BUCKET_OFFSET_MASK) >> BUCKET_TYPE_SHIFT) 134 #define BUCKET_INFO(offset, type) (((type) << BUCKET_TYPE_SHIFT) | ((offset) & BUCKET_OFFSET_MASK)) 135 136 ///////////////////////////////////////////////////////////////////////////// 137 // 138 // CompactHashtable is used to store the CDS archive's symbol/string tables. 139 // 140 // Because these tables are read-only (no entries can be added/deleted) at run-time 141 // and tend to have large number of entries, we try to minimize the footprint 142 // cost per entry. 143 // 144 // The CompactHashtable is split into two arrays 145 // 146 // u4 buckets[num_buckets+1]; // bit[31,30]: type; bit[29-0]: offset 147 // u4 entries[<variable size>] 148 // 149 // The size of buckets[] is 'num_buckets + 1'. Each entry of 150 // buckets[] is a 32-bit encoding of the bucket type and bucket offset, 151 // with the type in the left-most 2-bit and offset in the remaining 30-bit. 152 // The last entry is a special type. It contains the end of the last 153 // bucket. 154 // 155 // There are two types of buckets, regular buckets and value_only buckets. The 156 // value_only buckets have '01' in their highest 2-bit, and regular buckets have 157 // '00' in their highest 2-bit. 158 // 159 // For normal buckets, each entry is 8 bytes in the entries[]: 160 // u4 hash; /* symbol/string hash */ 161 // union { 162 // u4 offset; /* Symbol* sym = (Symbol*)(base_address + offset) */ 163 // narrowOop str; /* String narrowOop encoding */ 164 // } 165 // 166 // 167 // For value_only buckets, each entry has only the 4-byte 'offset' in the entries[]. 168 // 169 // Example -- note that the second bucket is a VALUE_ONLY_BUCKET_TYPE so the hash code 170 // is skipped. 171 // buckets[0, 4, 5, ....] 172 // | | | 173 // | | +---+ 174 // | | | 175 // | +----+ | 176 // v v v 177 // entries[H,O,H,O,O,H,O,H,O.....] 178 // 179 // See CompactHashtable::lookup() for how the table is searched at runtime. 180 // See CompactHashtableWriter::dump() for how the table is written at CDS 181 // dump time. 182 // 183 class SimpleCompactHashtable { 184 protected: 185 address _base_address; 186 u4 _bucket_count; 187 u4 _entry_count; 188 u4* _buckets; 189 u4* _entries; 190 191 public: 192 SimpleCompactHashtable() { 193 _entry_count = 0; 194 _bucket_count = 0; 195 _buckets = 0; 196 _entries = 0; 197 } 198 199 void reset() { 200 _bucket_count = 0; 201 _entry_count = 0; 202 _buckets = 0; 203 _entries = 0; 204 } 205 206 void init(address base_address, u4 entry_count, u4 bucket_count, u4* buckets, u4* entries) { 207 _base_address = base_address; 208 _bucket_count = bucket_count; 209 _entry_count = entry_count; 210 _buckets = buckets; 211 _entries = entries; 212 } 213 214 // For reading from/writing to the CDS archive 215 void serialize(SerializeClosure* soc); 216 217 inline bool empty() { 218 return (_entry_count == 0); 219 } 220 }; 221 222 template < 223 typename K, 224 typename V, 225 V (*DECODE)(address base_address, u4 offset), 226 bool (*EQUALS)(V value, K key, int len) 227 > 228 class CompactHashtable : public SimpleCompactHashtable { 229 friend class VMStructs; 230 231 V decode(u4 offset) const { 232 return DECODE(_base_address, offset); 233 } 234 235 public: 236 CompactHashtable() : SimpleCompactHashtable() {} 237 238 // Lookup a value V from the compact table using key K 239 inline V lookup(K key, unsigned int hash, int len) const { 240 if (_entry_count > 0) { 241 int index = hash % _bucket_count; 242 u4 bucket_info = _buckets[index]; 243 u4 bucket_offset = BUCKET_OFFSET(bucket_info); 244 int bucket_type = BUCKET_TYPE(bucket_info); 245 u4* entry = _entries + bucket_offset; 246 247 if (bucket_type == VALUE_ONLY_BUCKET_TYPE) { 248 V value = decode(entry[0]); 249 if (EQUALS(value, key, len)) { 250 return value; 251 } 252 } else { 253 // This is a regular bucket, which has more than one 254 // entries. Each entry is a pair of entry (hash, offset). 255 // Seek until the end of the bucket. 256 u4* entry_max = _entries + BUCKET_OFFSET(_buckets[index + 1]); 257 while (entry < entry_max) { 258 unsigned int h = (unsigned int)(entry[0]); 259 if (h == hash) { 260 V value = decode(entry[1]); 261 if (EQUALS(value, key, len)) { 262 return value; 263 } 264 } 265 entry += 2; 266 } 267 } 268 } 269 return NULL; 270 } 271 272 template <class ITER> 273 inline void iterate(ITER* iter) const { 274 for (u4 i = 0; i < _bucket_count; i++) { 275 u4 bucket_info = _buckets[i]; 276 u4 bucket_offset = BUCKET_OFFSET(bucket_info); 277 int bucket_type = BUCKET_TYPE(bucket_info); 278 u4* entry = _entries + bucket_offset; 279 280 if (bucket_type == VALUE_ONLY_BUCKET_TYPE) { 281 iter->do_value(decode(entry[0])); 282 } else { 283 u4*entry_max = _entries + BUCKET_OFFSET(_buckets[i + 1]); 284 while (entry < entry_max) { 285 iter->do_value(decode(entry[1])); 286 entry += 2; 287 } 288 } 289 } 290 } 291 }; 292 293 //////////////////////////////////////////////////////////////////////// 294 // 295 // Read/Write the contents of a hashtable textual dump (created by 296 // SymbolTable::dump and StringTable::dump). 297 // Because the dump file may be big (hundred of MB in extreme cases), 298 // we use mmap for fast access when reading it. 299 // 300 class HashtableTextDump { 301 int _fd; 302 const char* _base; 303 const char* _p; 304 const char* _end; 305 const char* _filename; 306 size_t _size; 307 int _prefix_type; 308 int _line_no; 309 public: 310 HashtableTextDump(const char* filename); 311 ~HashtableTextDump(); 312 313 enum { 314 SymbolPrefix = 1 << 0, 315 StringPrefix = 1 << 1, 316 Unknown = 1 << 2 317 }; 318 319 void quit(const char* err, const char* msg); 320 321 inline int remain() { 322 return (int)(_end - _p); 323 } 324 325 void corrupted(const char *p, const char *msg); 326 327 inline void corrupted_if(bool cond, const char *msg) { 328 if (cond) { 329 corrupted(_p, msg); 330 } 331 } 332 333 bool skip_newline(); 334 int skip(char must_be_char); 335 void skip_past(char c); 336 void check_version(const char* ver); 337 338 inline void get_num(char delim, int *num) { 339 const char* p = _p; 340 const char* end = _end; 341 u8 n = 0; 342 343 while (p < end) { 344 char c = *p++; 345 if ('0' <= c && c <= '9') { 346 n = n * 10 + (c - '0'); 347 if (n > (u8)INT_MAX) { 348 corrupted(_p, "Num overflow"); 349 } 350 } else if (c == delim) { 351 _p = p; 352 *num = (int)n; 353 return; 354 } else { 355 // Not [0-9], not 'delim' 356 corrupted(_p, "Unrecognized format");; 357 } 358 } 359 360 corrupted(_end, "Incorrect format"); 361 ShouldNotReachHere(); 362 } 363 364 void scan_prefix_type(); 365 int scan_prefix(int* utf8_length); 366 int scan_string_prefix(); 367 int scan_symbol_prefix(); 368 369 jchar unescape(const char* from, const char* end, int count); 370 void get_utf8(char* utf8_buffer, int utf8_length); 371 static void put_utf8(outputStream* st, const char* utf8_string, int utf8_length); 372 }; 373 374 #endif // SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP