1 /*
   2  * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP
  26 #define SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP
  27 
  28 #include "oops/array.hpp"
  29 #include "oops/symbol.hpp"
  30 #include "utilities/growableArray.hpp"
  31 
  32 
  33 template <
  34   typename K,
  35   typename V,
  36   V (*DECODE)(address base_address, u4 offset),
  37   bool (*EQUALS)(V value, K key, int len)
  38   >
  39 class CompactHashtable;
  40 class NumberSeq;
  41 class SimpleCompactHashtable;
  42 class SerializeClosure;
  43 
  44 // Stats for symbol tables in the CDS archive
  45 class CompactHashtableStats {
  46 public:
  47   int hashentry_count;
  48   int hashentry_bytes;
  49   int bucket_count;
  50   int bucket_bytes;
  51 };
  52 
  53 #if INCLUDE_CDS
  54 /////////////////////////////////////////////////////////////////////////
  55 //
  56 // The compact hash table writer. Used at dump time for writing out
  57 // the compact table to the shared archive.
  58 //
  59 // At dump time, the CompactHashtableWriter obtains all entries from the
  60 // symbol/string table and adds them to a new temporary hash table. The hash
  61 // table size (number of buckets) is calculated using
  62 // '(num_entries + bucket_size - 1) / bucket_size'. The default bucket
  63 // size is 4 and can be changed by -XX:SharedSymbolTableBucketSize option.
  64 // 4 is chosen because it produces smaller sized bucket on average for
  65 // faster lookup. It also has relatively small number of empty buckets and
  66 // good distribution of the entries.
  67 //
  68 // We use a simple hash function (hash % num_bucket) for the table.
  69 // The new table is compacted when written out. Please see comments
  70 // above the CompactHashtable class for the table layout detail. The bucket
  71 // offsets are written to the archive as part of the compact table. The
  72 // bucket offset is encoded in the low 30-bit (0-29) and the bucket type
  73 // (regular or compact) are encoded in bit[31, 30]. For buckets with more
  74 // than one entry, both hash and entry offset are written to the
  75 // table. For buckets with only one entry, only the entry offset is written
  76 // to the table and the buckets are tagged as compact in their type bits.
  77 // Buckets without entry are skipped from the table. Their offsets are
  78 // still written out for faster lookup.
  79 //
  80 class CompactHashtableWriter: public StackObj {
  81 public:
  82   class Entry {
  83     unsigned int _hash;
  84     u4 _value;
  85 
  86   public:
  87     Entry() {}
  88     Entry(unsigned int hash, u4 val) : _hash(hash), _value(val) {}
  89 
  90     u4 value() {
  91       return _value;
  92     }
  93     unsigned int hash() {
  94       return _hash;
  95     }
  96 
  97     bool operator==(const CompactHashtableWriter::Entry& other) {
  98       return (_value == other._value && _hash == other._hash);
  99     }
 100   }; // class CompactHashtableWriter::Entry
 101 
 102 private:
 103   int _num_entries;
 104   int _num_buckets;
 105   int _num_empty_buckets;
 106   int _num_value_only_buckets;
 107   int _num_other_buckets;
 108   GrowableArray<Entry>** _buckets;
 109   CompactHashtableStats* _stats;
 110   Array<u4>* _compact_buckets;
 111   Array<u4>* _compact_entries;
 112 
 113 public:
 114   // This is called at dump-time only
 115   CompactHashtableWriter(int num_buckets, CompactHashtableStats* stats);
 116   ~CompactHashtableWriter();
 117 
 118   void add(unsigned int hash, u4 value);
 119 
 120 private:
 121   void allocate_table();
 122   void dump_table(NumberSeq* summary);
 123 
 124 public:
 125   void dump(SimpleCompactHashtable *cht, const char* table_name);
 126 
 127   static int default_num_buckets(size_t num_entries) {
 128     return default_num_buckets((int)num_entries);
 129   }
 130   static int default_num_buckets(int num_entries) {
 131     int num_buckets = num_entries / SharedSymbolTableBucketSize;
 132     // calculation of num_buckets can result in zero buckets, we need at least one
 133     return (num_buckets < 1) ? 1 : num_buckets;
 134   }
 135 };
 136 #endif // INCLUDE_CDS
 137 
 138 #define REGULAR_BUCKET_TYPE       0
 139 #define VALUE_ONLY_BUCKET_TYPE    1
 140 #define TABLEEND_BUCKET_TYPE      3
 141 #define BUCKET_OFFSET_MASK        0x3FFFFFFF
 142 #define BUCKET_OFFSET(info)       ((info) & BUCKET_OFFSET_MASK)
 143 #define BUCKET_TYPE_SHIFT         30
 144 #define BUCKET_TYPE(info)         (((info) & ~BUCKET_OFFSET_MASK) >> BUCKET_TYPE_SHIFT)
 145 #define BUCKET_INFO(offset, type) (((type) << BUCKET_TYPE_SHIFT) | ((offset) & BUCKET_OFFSET_MASK))
 146 
 147 /////////////////////////////////////////////////////////////////////////////
 148 //
 149 // CompactHashtable is used to store the CDS archive's symbol/string tables.
 150 //
 151 // Because these tables are read-only (no entries can be added/deleted) at run-time
 152 // and tend to have large number of entries, we try to minimize the footprint
 153 // cost per entry.
 154 //
 155 // The CompactHashtable is split into two arrays
 156 //
 157 //   u4 buckets[num_buckets+1]; // bit[31,30]: type; bit[29-0]: offset
 158 //   u4 entries[<variable size>]
 159 //
 160 // The size of buckets[] is 'num_buckets + 1'. Each entry of
 161 // buckets[] is a 32-bit encoding of the bucket type and bucket offset,
 162 // with the type in the left-most 2-bit and offset in the remaining 30-bit.
 163 // The last entry is a special type. It contains the end of the last
 164 // bucket.
 165 //
 166 // There are two types of buckets, regular buckets and value_only buckets. The
 167 // value_only buckets have '01' in their highest 2-bit, and regular buckets have
 168 // '00' in their highest 2-bit.
 169 //
 170 // For normal buckets, each entry is 8 bytes in the entries[]:
 171 //   u4 hash;    /* symbol/string hash */
 172 //   union {
 173 //     u4 offset;  /* Symbol* sym = (Symbol*)(base_address + offset) */
 174 //     narrowOop str; /* String narrowOop encoding */
 175 //   }
 176 //
 177 //
 178 // For value_only buckets, each entry has only the 4-byte 'offset' in the entries[].
 179 //
 180 // Example -- note that the second bucket is a VALUE_ONLY_BUCKET_TYPE so the hash code
 181 //            is skipped.
 182 // buckets[0, 4, 5, ....]
 183 //         |  |  |
 184 //         |  |  +---+
 185 //         |  |      |
 186 //         |  +----+ |
 187 //         v       v v
 188 // entries[H,O,H,O,O,H,O,H,O.....]
 189 //
 190 // See CompactHashtable::lookup() for how the table is searched at runtime.
 191 // See CompactHashtableWriter::dump() for how the table is written at CDS
 192 // dump time.
 193 //
 194 class SimpleCompactHashtable {
 195 protected:
 196   address  _base_address;
 197   u4  _bucket_count;
 198   u4  _entry_count;
 199   u4* _buckets;
 200   u4* _entries;
 201 
 202 public:
 203   SimpleCompactHashtable() {
 204     _entry_count = 0;
 205     _bucket_count = 0;
 206     _buckets = 0;
 207     _entries = 0;
 208   }
 209 
 210   void reset() {
 211     _bucket_count = 0;
 212     _entry_count = 0;
 213     _buckets = 0;
 214     _entries = 0;
 215   }
 216 
 217   void init(address base_address, u4 entry_count, u4 bucket_count, u4* buckets, u4* entries) {
 218     _base_address = base_address;
 219     _bucket_count = bucket_count;
 220     _entry_count = entry_count;
 221     _buckets = buckets;
 222     _entries = entries;
 223   }
 224 
 225   // Read/Write the table's header from/to the CDS archive
 226   void serialize_header(SerializeClosure* soc) NOT_CDS_RETURN;
 227 
 228   inline bool empty() {
 229     return (_entry_count == 0);
 230   }
 231 };
 232 
 233 template <
 234   typename K,
 235   typename V,
 236   V (*DECODE)(address base_address, u4 offset),
 237   bool (*EQUALS)(V value, K key, int len)
 238   >
 239 class CompactHashtable : public SimpleCompactHashtable {
 240   friend class VMStructs;
 241 
 242   V decode(u4 offset) const {
 243     return DECODE(_base_address, offset);
 244   }
 245 
 246 public:
 247   // Lookup a value V from the compact table using key K
 248   inline V lookup(K key, unsigned int hash, int len) const {
 249     if (_entry_count > 0) {
 250       int index = hash % _bucket_count;
 251       u4 bucket_info = _buckets[index];
 252       u4 bucket_offset = BUCKET_OFFSET(bucket_info);
 253       int bucket_type = BUCKET_TYPE(bucket_info);
 254       u4* entry = _entries + bucket_offset;
 255 
 256       if (bucket_type == VALUE_ONLY_BUCKET_TYPE) {
 257         V value = decode(entry[0]);
 258         if (EQUALS(value, key, len)) {
 259           return value;
 260         }
 261       } else {
 262         // This is a regular bucket, which has more than one
 263         // entries. Each entry is a pair of entry (hash, offset).
 264         // Seek until the end of the bucket.
 265         u4* entry_max = _entries + BUCKET_OFFSET(_buckets[index + 1]);
 266         while (entry < entry_max) {
 267           unsigned int h = (unsigned int)(entry[0]);
 268           if (h == hash) {
 269             V value = decode(entry[1]);
 270             if (EQUALS(value, key, len)) {
 271               return value;
 272             }
 273           }
 274           entry += 2;
 275         }
 276       }
 277     }
 278     return NULL;
 279   }
 280 
 281   template <class ITER>
 282   inline void iterate(ITER* iter) const {
 283     for (u4 i = 0; i < _bucket_count; i++) {
 284       u4 bucket_info = _buckets[i];
 285       u4 bucket_offset = BUCKET_OFFSET(bucket_info);
 286       int bucket_type = BUCKET_TYPE(bucket_info);
 287       u4* entry = _entries + bucket_offset;
 288 
 289       if (bucket_type == VALUE_ONLY_BUCKET_TYPE) {
 290         iter->do_value(decode(entry[0]));
 291       } else {
 292         u4*entry_max = _entries + BUCKET_OFFSET(_buckets[i + 1]);
 293         while (entry < entry_max) {
 294           iter->do_value(decode(entry[1]));
 295           entry += 2;
 296         }
 297       }
 298     }
 299   }
 300 
 301   void print_table_statistics(outputStream* st, const char* name) {
 302     st->print_cr("%s statistics:", name);
 303     int total_entries = 0;
 304     int max_bucket = 0;
 305     for (u4 i = 0; i < _bucket_count; i++) {
 306       u4 bucket_info = _buckets[i];
 307       int bucket_type = BUCKET_TYPE(bucket_info);
 308       int bucket_size;
 309 
 310       if (bucket_type == VALUE_ONLY_BUCKET_TYPE) {
 311         bucket_size = 1;
 312       } else {
 313         bucket_size = (BUCKET_OFFSET(_buckets[i + 1]) - BUCKET_OFFSET(bucket_info)) / 2;
 314       }
 315       total_entries += bucket_size;
 316       if (max_bucket < bucket_size) {
 317         max_bucket = bucket_size;
 318       }
 319     }
 320     st->print_cr("Number of buckets       : %9d", _bucket_count);
 321     st->print_cr("Number of entries       : %9d", total_entries);
 322     st->print_cr("Maximum bucket size     : %9d", max_bucket);
 323   }
 324 };
 325 
 326 ////////////////////////////////////////////////////////////////////////
 327 //
 328 // OffsetCompactHashtable -- This is used to store many types of objects
 329 // in the CDS archive. On 64-bit platforms, we save space by using a 32-bit
 330 // offset from the CDS base address.
 331 
 332 template <typename V>
 333 inline V read_value_from_compact_hashtable(address base_address, u4 offset) {
 334   return (V)(base_address + offset);
 335 }
 336 
 337 template <
 338   typename K,
 339   typename V,
 340   bool (*EQUALS)(V value, K key, int len)
 341   >
 342 class OffsetCompactHashtable : public CompactHashtable<
 343     K, V, read_value_from_compact_hashtable<V>, EQUALS> {
 344 };
 345 
 346 
 347 ////////////////////////////////////////////////////////////////////////
 348 //
 349 // Read/Write the contents of a hashtable textual dump (created by
 350 // SymbolTable::dump and StringTable::dump).
 351 // Because the dump file may be big (hundred of MB in extreme cases),
 352 // we use mmap for fast access when reading it.
 353 //
 354 class HashtableTextDump {
 355   int _fd;
 356   const char* _base;
 357   const char* _p;
 358   const char* _end;
 359   const char* _filename;
 360   size_t      _size;
 361   int         _prefix_type;
 362   int         _line_no;
 363 public:
 364   HashtableTextDump(const char* filename);
 365   ~HashtableTextDump();
 366 
 367   enum {
 368     SymbolPrefix = 1 << 0,
 369     StringPrefix = 1 << 1,
 370     Unknown = 1 << 2
 371   };
 372 
 373   void quit(const char* err, const char* msg);
 374 
 375   inline int remain() {
 376     return (int)(_end - _p);
 377   }
 378 
 379   void corrupted(const char *p, const char *msg);
 380 
 381   inline void corrupted_if(bool cond, const char *msg) {
 382     if (cond) {
 383       corrupted(_p, msg);
 384     }
 385   }
 386 
 387   bool skip_newline();
 388   int skip(char must_be_char);
 389   void skip_past(char c);
 390   void check_version(const char* ver);
 391 
 392   inline void get_num(char delim, int *num) {
 393     const char* p   = _p;
 394     const char* end = _end;
 395     u8 n = 0;
 396 
 397     while (p < end) {
 398       char c = *p++;
 399       if ('0' <= c && c <= '9') {
 400         n = n * 10 + (c - '0');
 401         if (n > (u8)INT_MAX) {
 402           corrupted(_p, "Num overflow");
 403         }
 404       } else if (c == delim) {
 405         _p = p;
 406         *num = (int)n;
 407         return;
 408       } else {
 409         // Not [0-9], not 'delim'
 410         corrupted(_p, "Unrecognized format");;
 411       }
 412     }
 413 
 414     corrupted(_end, "Incorrect format");
 415     ShouldNotReachHere();
 416   }
 417 
 418   void scan_prefix_type();
 419   int scan_prefix(int* utf8_length);
 420   int scan_string_prefix();
 421   int scan_symbol_prefix();
 422 
 423   jchar unescape(const char* from, const char* end, int count);
 424   void get_utf8(char* utf8_buffer, int utf8_length);
 425   static void put_utf8(outputStream* st, const char* utf8_string, int utf8_length);
 426 };
 427 
 428 #endif // SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP