1 /*
   2  * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP
  26 #define SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP
  27 
  28 #include "oops/array.hpp"
  29 #include "oops/symbol.hpp"
  30 #include "utilities/hashtable.hpp"
  31 
  32 template <class T, class N> class CompactHashtable;
  33 class NumberSeq;
  34 class SimpleCompactHashtable;
  35 class SerializeClosure;
  36 
  37 // Stats for symbol tables in the CDS archive
  38 class CompactHashtableStats {
  39 public:
  40   int hashentry_count;
  41   int hashentry_bytes;
  42   int bucket_count;
  43   int bucket_bytes;
  44 };
  45 
  46 /////////////////////////////////////////////////////////////////////////
  47 //
  48 // The compact hash table writer. Used at dump time for writing out
  49 // the compact table to the shared archive.
  50 //
  51 // At dump time, the CompactHashtableWriter obtains all entries from the
  52 // symbol/string table and adds them to a new temporary hash table. The hash
  53 // table size (number of buckets) is calculated using
  54 // '(num_entries + bucket_size - 1) / bucket_size'. The default bucket
  55 // size is 4 and can be changed by -XX:SharedSymbolTableBucketSize option.
  56 // 4 is chosen because it produces smaller sized bucket on average for
  57 // faster lookup. It also has relatively small number of empty buckets and
  58 // good distribution of the entries.
  59 //
  60 // We use a simple hash function (hash % num_bucket) for the table.
  61 // The new table is compacted when written out. Please see comments
  62 // above the CompactHashtable class for the table layout detail. The bucket
  63 // offsets are written to the archive as part of the compact table. The
  64 // bucket offset is encoded in the low 30-bit (0-29) and the bucket type
  65 // (regular or compact) are encoded in bit[31, 30]. For buckets with more
  66 // than one entry, both hash and entry offset are written to the
  67 // table. For buckets with only one entry, only the entry offset is written
  68 // to the table and the buckets are tagged as compact in their type bits.
  69 // Buckets without entry are skipped from the table. Their offsets are
  70 // still written out for faster lookup.
  71 //
  72 class CompactHashtableWriter: public StackObj {
  73 public:
  74   class Entry {
  75     unsigned int _hash;
  76     u4 _value;
  77 
  78   public:
  79     Entry() {}
  80     Entry(unsigned int hash, u4 val) : _hash(hash), _value(val) {}
  81 
  82     u4 value() {
  83       return _value;
  84     }
  85     unsigned int hash() {
  86       return _hash;
  87     }
  88 
  89     bool operator==(const CompactHashtableWriter::Entry& other) {
  90       return (_value == other._value && _hash == other._hash);
  91     }
  92   }; // class CompactHashtableWriter::Entry
  93 
  94 private:
  95   int _num_entries;
  96   int _num_buckets;
  97   int _num_empty_buckets;
  98   int _num_value_only_buckets;
  99   int _num_other_buckets;
 100   GrowableArray<Entry>** _buckets;
 101   CompactHashtableStats* _stats;
 102   Array<u4>* _compact_buckets;
 103   Array<u4>* _compact_entries;
 104 
 105 public:
 106   // This is called at dump-time only
 107   CompactHashtableWriter(int num_buckets, CompactHashtableStats* stats);
 108   ~CompactHashtableWriter();
 109 
 110   void add(unsigned int hash, u4 value);
 111   void add(u4 value) {
 112     add((unsigned int)value, value);
 113   }
 114 
 115 private:
 116   void allocate_table();
 117   void dump_table(NumberSeq* summary);
 118 
 119 public:
 120   void dump(SimpleCompactHashtable *cht, const char* table_name);
 121   const char* table_name();
 122 };
 123 
 124 class CompactSymbolTableWriter: public CompactHashtableWriter {
 125 public:
 126   CompactSymbolTableWriter(int num_buckets, CompactHashtableStats* stats) :
 127     CompactHashtableWriter(num_buckets, stats) {}
 128   void add(unsigned int hash, Symbol *symbol);
 129   void dump(CompactHashtable<Symbol*, char> *cht);
 130 };
 131 
 132 class CompactStringTableWriter: public CompactHashtableWriter {
 133 public:
 134   CompactStringTableWriter(int num_entries, CompactHashtableStats* stats) :
 135     CompactHashtableWriter(num_entries, stats) {}
 136   void add(unsigned int hash, oop string);
 137   void dump(CompactHashtable<oop, char> *cht);
 138 };
 139 
 140 #define REGULAR_BUCKET_TYPE       0
 141 #define VALUE_ONLY_BUCKET_TYPE    1
 142 #define TABLEEND_BUCKET_TYPE      3
 143 #define BUCKET_OFFSET_MASK        0x3FFFFFFF
 144 #define BUCKET_OFFSET(info)       ((info) & BUCKET_OFFSET_MASK)
 145 #define BUCKET_TYPE_SHIFT         30
 146 #define BUCKET_TYPE(info)         (((info) & ~BUCKET_OFFSET_MASK) >> BUCKET_TYPE_SHIFT)
 147 #define BUCKET_INFO(offset, type) (((type) << BUCKET_TYPE_SHIFT) | ((offset) & BUCKET_OFFSET_MASK))
 148 
 149 /////////////////////////////////////////////////////////////////////////////
 150 //
 151 // CompactHashtable is used to stored the CDS archive's symbol/string table. Used
 152 // at runtime only to access the compact table from the archive.
 153 //
 154 // Because these tables are read-only (no entries can be added/deleted) at run-time
 155 // and tend to have large number of entries, we try to minimize the footprint
 156 // cost per entry.
 157 //
 158 // The CompactHashtable is split into two arrays
 159 //
 160 //   u4 buckets[num_buckets+1]; // bit[31,30]: type; bit[29-0]: offset
 161 //   u4 entries[<variable size>]
 162 //
 163 // The size of buckets[] is 'num_buckets + 1'. Each entry of
 164 // buckets[] is a 32-bit encoding of the bucket type and bucket offset,
 165 // with the type in the left-most 2-bit and offset in the remaining 30-bit.
 166 // The last entry is a special type. It contains the end of the last
 167 // bucket.
 168 //
 169 // There are two types of buckets, regular buckets and value_only buckets. The
 170 // value_only buckets have '01' in their highest 2-bit, and regular buckets have
 171 // '00' in their highest 2-bit.
 172 //
 173 // For normal buckets, each entry is 8 bytes in the entries[]:
 174 //   u4 hash;    /* symbol/string hash */
 175 //   union {
 176 //     u4 offset;  /* Symbol* sym = (Symbol*)(base_address + offset) */
 177 //     narrowOop str; /* String narrowOop encoding */
 178 //   }
 179 //
 180 //
 181 // For value_only buckets, each entry has only the 4-byte 'offset' in the entries[].
 182 //
 183 // Example -- note that the second bucket is a VALUE_ONLY_BUCKET_TYPE so the hash code
 184 //            is skipped.
 185 // buckets[0, 4, 5, ....]
 186 //         |  |  |
 187 //         |  |  +---+
 188 //         |  |      |
 189 //         |  +----+ |
 190 //         v       v v
 191 // entries[H,O,H,O,O,H,O,H,O.....]
 192 //
 193 // See CompactHashtable::lookup() for how the table is searched at runtime.
 194 // See CompactHashtableWriter::dump() for how the table is written at CDS
 195 // dump time.
 196 //
 197 class SimpleCompactHashtable {
 198 protected:
 199   address  _base_address;
 200   u4  _bucket_count;
 201   u4  _entry_count;
 202   u4* _buckets;
 203   u4* _entries;
 204 
 205 public:
 206   SimpleCompactHashtable() {
 207     _entry_count = 0;
 208     _bucket_count = 0;
 209     _buckets = 0;
 210     _entries = 0;
 211   }
 212 
 213   void reset() {
 214     _bucket_count = 0;
 215     _entry_count = 0;
 216     _buckets = 0;
 217     _entries = 0;
 218   }
 219 
 220   void init(address base_address, u4 entry_count, u4 bucket_count, u4* buckets, u4* entries) {
 221     _base_address = base_address;
 222     _bucket_count = bucket_count;
 223     _entry_count = entry_count;
 224     _buckets = buckets;
 225     _entries = entries;
 226   }
 227 
 228   template <class I> inline void iterate(const I& iterator);
 229 
 230   bool exists(u4 value);
 231 
 232   // For reading from/writing to the CDS archive
 233   void serialize(SerializeClosure* soc);
 234 
 235   inline bool empty() {
 236     return (_entry_count == 0);
 237   }
 238 };
 239 
 240 template <class T, class N> class CompactHashtable : public SimpleCompactHashtable {
 241   friend class VMStructs;
 242 
 243 public:
 244   enum CompactHashtableType {
 245     _symbol_table = 0,
 246     _string_table = 1
 247   };
 248 
 249 private:
 250   u4 _type;
 251 
 252   inline Symbol* decode_entry(CompactHashtable<Symbol*, char>* const t,
 253                               u4 offset, const char* name, int len);
 254 
 255   inline oop decode_entry(CompactHashtable<oop, char>* const t,
 256                           u4 offset, const char* name, int len);
 257 public:
 258   CompactHashtable() : SimpleCompactHashtable() {}
 259 
 260   void set_type(CompactHashtableType type) {
 261     _type = (u4)type;
 262   }
 263 
 264   // Lookup an entry from the compact table
 265   inline T lookup(const N* name, unsigned int hash, int len);
 266 
 267   // iterate over symbols
 268   void symbols_do(SymbolClosure *cl);
 269 
 270   // iterate over strings
 271   void oops_do(OopClosure* f);
 272 
 273   // For reading from/writing to the CDS archive
 274   void serialize(SerializeClosure* soc);
 275 
 276   uintx base_address() {
 277     return (uintx) _base_address;
 278   }
 279 };
 280 
 281 ////////////////////////////////////////////////////////////////////////
 282 //
 283 // Read/Write the contents of a hashtable textual dump (created by
 284 // SymbolTable::dump and StringTable::dump).
 285 // Because the dump file may be big (hundred of MB in extreme cases),
 286 // we use mmap for fast access when reading it.
 287 //
 288 class HashtableTextDump {
 289   int _fd;
 290   const char* _base;
 291   const char* _p;
 292   const char* _end;
 293   const char* _filename;
 294   size_t      _size;
 295   int         _prefix_type;
 296   int         _line_no;
 297 public:
 298   HashtableTextDump(const char* filename);
 299   ~HashtableTextDump();
 300 
 301   enum {
 302     SymbolPrefix = 1 << 0,
 303     StringPrefix = 1 << 1,
 304     Unknown = 1 << 2
 305   };
 306 
 307   void quit(const char* err, const char* msg);
 308 
 309   inline int remain() {
 310     return (int)(_end - _p);
 311   }
 312 
 313   void corrupted(const char *p, const char *msg);
 314 
 315   inline void corrupted_if(bool cond, const char *msg) {
 316     if (cond) {
 317       corrupted(_p, msg);
 318     }
 319   }
 320 
 321   bool skip_newline();
 322   int skip(char must_be_char);
 323   void skip_past(char c);
 324   void check_version(const char* ver);
 325 
 326   inline void get_num(char delim, int *num) {
 327     const char* p   = _p;
 328     const char* end = _end;
 329     u8 n = 0;
 330 
 331     while (p < end) {
 332       char c = *p++;
 333       if ('0' <= c && c <= '9') {
 334         n = n * 10 + (c - '0');
 335         if (n > (u8)INT_MAX) {
 336           corrupted(_p, "Num overflow");
 337         }
 338       } else if (c == delim) {
 339         _p = p;
 340         *num = (int)n;
 341         return;
 342       } else {
 343         // Not [0-9], not 'delim'
 344         corrupted(_p, "Unrecognized format");;
 345       }
 346     }
 347 
 348     corrupted(_end, "Incorrect format");
 349     ShouldNotReachHere();
 350   }
 351 
 352   void scan_prefix_type();
 353   int scan_prefix(int* utf8_length);
 354   int scan_string_prefix();
 355   int scan_symbol_prefix();
 356 
 357   jchar unescape(const char* from, const char* end, int count);
 358   void get_utf8(char* utf8_buffer, int utf8_length);
 359   static void put_utf8(outputStream* st, const char* utf8_string, int utf8_length);
 360 };
 361 
 362 #endif // SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP