1 /*
   2  * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP
  26 #define SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP
  27 
  28 #include "classfile/stringTable.hpp"
  29 #include "classfile/symbolTable.hpp"
  30 #include "memory/allocation.inline.hpp"
  31 #include "oops/symbol.hpp"
  32 #include "services/diagnosticCommand.hpp"
  33 #include "utilities/hashtable.hpp"
  34 
  35 class NumberSeq;
  36 
  37 // Stats for symbol tables in the CDS archive
  38 class CompactHashtableStats VALUE_OBJ_CLASS_SPEC {
  39 public:
  40   int hashentry_count;
  41   int hashentry_bytes;
  42   int bucket_count;
  43   int bucket_bytes;
  44 };
  45 
  46 /////////////////////////////////////////////////////////////////////////
  47 //
  48 // The compact hash table writer. Used at dump time for writing out
  49 // the compact table to the shared archive.
  50 //
  51 // At dump time, the CompactHashtableWriter obtains all entries from the
  52 // symbol table and adds them to a new temporary hash table. The hash
  53 // table size (number of buckets) is calculated using
  54 // '(num_entries + bucket_size - 1) / bucket_size'. The default bucket
  55 // size is 4 and can be changed by -XX:SharedSymbolTableBucketSize option.
  56 // 4 is chosen because it produces smaller sized bucket on average for
  57 // faster lookup. It also has relatively small number of empty buckets and
  58 // good distribution of the entries.
  59 //
  60 // We use a simple hash function (symbol_hash % num_bucket) for the table.
  61 // The new table is compacted when written out. Please see comments
  62 // above the CompactHashtable class for the table layout detail. The bucket
  63 // offsets are written to the archive as part of the compact table. The
  64 // bucket offset is encoded in the low 30-bit (0-29) and the bucket type
  65 // (regular or compact) are encoded in bit[31, 30]. For buckets with more
  66 // than one entry, both symbol hash and symbol offset are written to the
  67 // table. For buckets with only one entry, only the symbol offset is written
  68 // to the table and the buckets are tagged as compact in their type bits.
  69 // Buckets without entry are skipped from the table. Their offsets are
  70 // still written out for faster lookup.
  71 //
  72 class CompactHashtableWriter: public StackObj {
  73 public:
  74   class Entry: public CHeapObj<mtSymbol> {
  75     Entry* _next;
  76     unsigned int _hash;
  77     void* _literal;
  78 
  79   public:
  80     Entry(unsigned int hash, Symbol *symbol) : _next(NULL), _hash(hash), _literal(symbol) {}
  81 
  82     void *value() {
  83       return _literal;
  84     }
  85     Symbol *symbol() {
  86       return (Symbol*)_literal;
  87     }
  88     unsigned int hash() {
  89       return _hash;
  90     }
  91     Entry *next()           {return _next;}
  92     void set_next(Entry *p) {_next = p;}
  93   }; // class CompactHashtableWriter::Entry
  94 
  95 private:
  96   static int number_of_buckets(int num_entries);
  97 
  98   const char* _table_name;
  99   int _num_entries;
 100   int _num_buckets;
 101   juint* _bucket_sizes;
 102   Entry** _buckets;
 103   int _required_bytes;
 104   CompactHashtableStats* _stats;
 105 
 106 public:
 107   // This is called at dump-time only
 108   CompactHashtableWriter(const char* table_name, int num_entries, CompactHashtableStats* stats);
 109   ~CompactHashtableWriter();
 110 
 111   int get_required_bytes() {
 112     return _required_bytes;
 113   }
 114 
 115   void add(unsigned int hash, Symbol* symbol) {
 116     add(hash, new Entry(hash, symbol));
 117   }
 118 
 119 private:
 120   void add(unsigned int hash, Entry* entry);
 121   juint* dump_table(juint* p, juint** first_bucket, NumberSeq* summary);
 122   juint* dump_buckets(juint* table, juint* p, NumberSeq* summary);
 123 
 124 public:
 125   void dump(char** top, char* end);
 126 };
 127 
 128 #define REGULAR_BUCKET_TYPE       0
 129 #define COMPACT_BUCKET_TYPE       1
 130 #define TABLEEND_BUCKET_TYPE      3
 131 #define BUCKET_OFFSET_MASK        0x3FFFFFFF
 132 #define BUCKET_OFFSET(info)       ((info) & BUCKET_OFFSET_MASK)
 133 #define BUCKET_TYPE_SHIFT         30
 134 #define BUCKET_TYPE(info)         (((info) & ~BUCKET_OFFSET_MASK) >> BUCKET_TYPE_SHIFT)
 135 #define BUCKET_INFO(offset, type) (((type) << BUCKET_TYPE_SHIFT) | ((offset) & BUCKET_OFFSET_MASK))
 136 
 137 /////////////////////////////////////////////////////////////////////////////
 138 //
 139 // CompactHashtable is used to stored the CDS archive's symbol table. Used
 140 // at runtime only to access the compact table from the archive.
 141 //
 142 // Because these tables are read-only (no entries can be added/deleted) at run-time
 143 // and tend to have large number of entries, we try to minimize the footprint
 144 // cost per entry.
 145 //
 146 // Layout of compact symbol table in the shared archive:
 147 //
 148 //   uintx base_address;
 149 //   juint num_symbols;
 150 //   juint num_buckets;
 151 //   juint bucket_infos[num_buckets+1]; // bit[31,30]: type; bit[29-0]: offset
 152 //   juint table[]
 153 //
 154 // -----------------------------------
 155 // | base_address  | num_symbols     |
 156 // |---------------------------------|
 157 // | num_buckets   | bucket_info0    |
 158 // |---------------------------------|
 159 // | bucket_info1  | bucket_info2    |
 160 // | bucket_info3    ...             |
 161 // | ....          | table_end_info  |
 162 // |---------------------------------|
 163 // | entry0                          |
 164 // | entry1                          |
 165 // | entry2                          |
 166 // |                                 |
 167 // | ...                             |
 168 // -----------------------------------
 169 //
 170 // The size of the bucket_info table is 'num_buckets + 1'. Each entry of the
 171 // bucket_info table is a 32-bit encoding of the bucket type and bucket offset,
 172 // with the type in the left-most 2-bit and offset in the remaining 30-bit.
 173 // The last entry is a special type. It contains the offset of the last
 174 // bucket end. We use that information when traversing the compact table.
 175 //
 176 // There are two types of buckets, regular buckets and compact buckets. The
 177 // compact buckets have '01' in their highest 2-bit, and regular buckets have
 178 // '00' in their highest 2-bit.
 179 //
 180 // For normal buckets, each symbol's entry is 8 bytes in the table[]:
 181 //   juint hash;    /* symbol hash */
 182 //   juint offset;  /* Symbol* sym = (Symbol*)(base_address + offset) */
 183 //
 184 // For compact buckets, each entry has only the 4-byte 'offset' in the table[].
 185 //
 186 // See CompactHashtable::lookup() for how the table is searched at runtime.
 187 // See CompactHashtableWriter::dump() for how the table is written at CDS
 188 // dump time.
 189 //
 190 template <class T, class N> class CompactHashtable VALUE_OBJ_CLASS_SPEC {
 191   friend class VMStructs;
 192   uintx  _base_address;
 193   juint  _entry_count;
 194   juint  _bucket_count;
 195   juint  _table_end_offset;
 196   juint* _buckets;
 197 
 198   inline bool equals(T entry, const char* name, int len) {
 199     if (entry->equals(name, len)) {
 200       assert(entry->refcount() == -1, "must be shared");
 201       return true;
 202     } else {
 203       return false;
 204     }
 205   }
 206 
 207 public:
 208   CompactHashtable() {
 209     _entry_count = 0;
 210     _bucket_count = 0;
 211     _table_end_offset = 0;
 212     _buckets = 0;
 213   }
 214   const char* init(const char *buffer);
 215 
 216   // Lookup an entry from the compact table
 217   inline T lookup(const N* name, unsigned int hash, int len) {
 218     if (_entry_count > 0) {
 219       assert(!DumpSharedSpaces, "run-time only");
 220       int index = hash % _bucket_count;
 221       juint bucket_info = _buckets[index];
 222       juint bucket_offset = BUCKET_OFFSET(bucket_info);
 223       int   bucket_type = BUCKET_TYPE(bucket_info);
 224       juint* bucket = _buckets + bucket_offset;
 225       juint* bucket_end = _buckets;
 226 
 227       if (bucket_type == COMPACT_BUCKET_TYPE) {
 228         // the compact bucket has one entry with symbol offset only
 229         T entry = (T)((void*)(_base_address + bucket[0]));
 230         if (equals(entry, name, len)) {
 231           return entry;
 232         }
 233       } else {
 234         // This is a regular bucket, which has more than one
 235         // entries. Each entry is a pair of symbol (hash, offset).
 236         // Seek until the end of the bucket.
 237         bucket_end += BUCKET_OFFSET(_buckets[index + 1]);
 238         while (bucket < bucket_end) {
 239           unsigned int h = (unsigned int)(bucket[0]);
 240           if (h == hash) {
 241             juint offset = bucket[1];
 242             T entry = (T)((void*)(_base_address + offset));
 243             if (equals(entry, name, len)) {
 244               return entry;
 245             }
 246           }
 247           bucket += 2;
 248         }
 249       }
 250     }
 251     return NULL;
 252   }
 253 
 254   // iterate over symbols
 255   void symbols_do(SymbolClosure *cl);
 256 };
 257 
 258 ////////////////////////////////////////////////////////////////////////
 259 //
 260 // Read/Write the contents of a hashtable textual dump (created by
 261 // SymbolTable::dump).
 262 // Because the dump file may be big (hundred of MB in extreme cases),
 263 // we use mmap for fast access when reading it.
 264 //
 265 class HashtableTextDump VALUE_OBJ_CLASS_SPEC {
 266   int _fd;
 267   const char* _base;
 268   const char* _p;
 269   const char* _end;
 270   const char* _filename;
 271   size_t      _size;
 272 public:
 273   HashtableTextDump(const char* filename);
 274   ~HashtableTextDump();
 275 
 276   void quit(const char* err, const char* msg);
 277 
 278   inline int remain() {
 279     return (int)(_end - _p);
 280   }
 281 
 282   void corrupted(const char *p);
 283 
 284   inline void corrupted_if(bool cond) {
 285     if (cond) {
 286       corrupted(_p);
 287     }
 288   }
 289 
 290   bool skip_newline();
 291   int skip(char must_be_char);
 292   void skip_past(char c);
 293   void check_version(const char* ver);
 294 
 295   inline int get_num(char delim) {
 296     const char* p   = _p;
 297     const char* end = _end;
 298     int num = 0;
 299 
 300     while (p < end) {
 301       char c = *p ++;
 302       if ('0' <= c && c <= '9') {
 303         num = num * 10 + (c - '0');
 304       } else if (c == delim) {
 305         _p = p;
 306         return num;
 307       } else {
 308         corrupted(p-1);
 309       }
 310     }
 311     corrupted(_end);
 312     ShouldNotReachHere();
 313     return 0;
 314   }
 315 
 316   int scan_prefix();
 317   int scan_prefix2();
 318 
 319   jchar unescape(const char* from, const char* end, int count);
 320   void get_utf8(char* utf8_buffer, int utf8_length);
 321   static void put_utf8(outputStream* st, const char* utf8_string, int utf8_length);
 322 };
 323 
 324 ///////////////////////////////////////////////////////////////////////
 325 //
 326 // jcmd command support for symbol table and string table dumping:
 327 //   VM.symboltable -verbose: for dumping the symbol table
 328 //   VM.stringtable -verbose: for dumping the string table
 329 //
 330 class VM_DumpHashtable : public VM_Operation {
 331 private:
 332   outputStream* _out;
 333   int _which;
 334   bool _verbose;
 335 public:
 336   enum {
 337     DumpSymbols = 1 << 0,
 338     DumpStrings = 1 << 1,
 339     DumpSysDict = 1 << 2  // not implemented yet
 340   };
 341   VM_DumpHashtable(outputStream* out, int which, bool verbose) {
 342     _out = out;
 343     _which = which;
 344     _verbose = verbose;
 345   }
 346 
 347   virtual VMOp_Type type() const { return VMOp_DumpHashtable; }
 348 
 349   virtual void doit() {
 350     switch (_which) {
 351     case DumpSymbols:
 352       SymbolTable::dump(_out, _verbose);
 353       break;
 354     case DumpStrings:
 355       StringTable::dump(_out, _verbose);
 356       break;
 357     default:
 358       ShouldNotReachHere();
 359     }
 360   }
 361 };
 362 
 363 class SymboltableDCmd : public DCmdWithParser {
 364 protected:
 365   DCmdArgument<bool> _verbose;
 366 public:
 367   SymboltableDCmd(outputStream* output, bool heap);
 368   static const char* name() {
 369     return "VM.symboltable";
 370   }
 371   static const char* description() {
 372     return "Dump symbol table.";
 373   }
 374   static const char* impact() {
 375     return "Medium: Depends on Java content.";
 376   }
 377   static const JavaPermission permission() {
 378     JavaPermission p = {"java.lang.management.ManagementPermission",
 379                         "monitor", NULL};
 380     return p;
 381   }
 382   static int num_arguments();
 383   virtual void execute(DCmdSource source, TRAPS);
 384 };
 385 
 386 class StringtableDCmd : public DCmdWithParser {
 387 protected:
 388   DCmdArgument<bool> _verbose;
 389 public:
 390   StringtableDCmd(outputStream* output, bool heap);
 391   static const char* name() {
 392     return "VM.stringtable";
 393   }
 394   static const char* description() {
 395     return "Dump string table.";
 396   }
 397   static const char* impact() {
 398     return "Medium: Depends on Java content.";
 399   }
 400   static const JavaPermission permission() {
 401     JavaPermission p = {"java.lang.management.ManagementPermission",
 402                         "monitor", NULL};
 403     return p;
 404   }
 405   static int num_arguments();
 406   virtual void execute(DCmdSource source, TRAPS);
 407 };
 408 
 409 #endif // SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP