1 /*
   2  * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP
  26 #define SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP
  27 
  28 #include "classfile/stringTable.hpp"
  29 #include "classfile/symbolTable.hpp"
  30 #include "oops/symbol.hpp"
  31 #include "services/diagnosticCommand.hpp"
  32 #include "utilities/hashtable.hpp"
  33 
  34 class NumberSeq;
  35 
  36 // Stats for symbol tables in the CDS archive
  37 class CompactHashtableStats VALUE_OBJ_CLASS_SPEC {
  38 public:
  39   int hashentry_count;
  40   int hashentry_bytes;
  41   int bucket_count;
  42   int bucket_bytes;
  43 };
  44 
  45 /////////////////////////////////////////////////////////////////////////
  46 //
  47 // The compact hash table writer. Used at dump time for writing out
  48 // the compact table to the shared archive.
  49 //
  50 // At dump time, the CompactHashtableWriter obtains all entries from the
  51 // symbol/string table and adds them to a new temporary hash table. The hash
  52 // table size (number of buckets) is calculated using
  53 // '(num_entries + bucket_size - 1) / bucket_size'. The default bucket
  54 // size is 4 and can be changed by -XX:SharedSymbolTableBucketSize option.
  55 // 4 is chosen because it produces smaller sized bucket on average for
  56 // faster lookup. It also has relatively small number of empty buckets and
  57 // good distribution of the entries.
  58 //
  59 // We use a simple hash function (hash % num_bucket) for the table.
  60 // The new table is compacted when written out. Please see comments
  61 // above the CompactHashtable class for the table layout detail. The bucket
  62 // offsets are written to the archive as part of the compact table. The
  63 // bucket offset is encoded in the low 30-bit (0-29) and the bucket type
  64 // (regular or compact) are encoded in bit[31, 30]. For buckets with more
  65 // than one entry, both hash and entry offset are written to the
  66 // table. For buckets with only one entry, only the entry offset is written
  67 // to the table and the buckets are tagged as compact in their type bits.
  68 // Buckets without entry are skipped from the table. Their offsets are
  69 // still written out for faster lookup.
  70 //
  71 class CompactHashtableWriter: public StackObj {
  72 public:
  73   class Entry: public CHeapObj<mtSymbol> {
  74     Entry* _next;
  75     unsigned int _hash;
  76     void* _literal;
  77 
  78   public:
  79     Entry(unsigned int hash, Symbol *symbol) : _next(NULL), _hash(hash), _literal(symbol) {}
  80     Entry(unsigned int hash, oop string)     : _next(NULL), _hash(hash), _literal(string) {}
  81 
  82     void *value() {
  83       return _literal;
  84     }
  85     Symbol *symbol() {
  86       return (Symbol*)_literal;
  87     }
  88     oop string() {
  89       return (oop)_literal;
  90     }
  91     unsigned int hash() {
  92       return _hash;
  93     }
  94     Entry *next()           {return _next;}
  95     void set_next(Entry *p) {_next = p;}
  96   }; // class CompactHashtableWriter::Entry
  97 
  98 private:
  99   static int number_of_buckets(int num_entries);
 100 
 101   int _type;
 102   int _num_entries;
 103   int _num_buckets;
 104   juint* _bucket_sizes;
 105   Entry** _buckets;
 106   int _required_bytes;
 107   CompactHashtableStats* _stats;
 108 
 109 public:
 110   // This is called at dump-time only
 111   CompactHashtableWriter(int table_type, int num_entries, CompactHashtableStats* stats);
 112   ~CompactHashtableWriter();
 113 
 114   int get_required_bytes() {
 115     return _required_bytes;
 116   }
 117 
 118   inline void add(unsigned int hash, Symbol* symbol);
 119   inline void add(unsigned int hash, oop string);
 120 
 121 private:
 122   void add(unsigned int hash, Entry* entry);
 123   juint* dump_table(juint* p, juint** first_bucket, NumberSeq* summary);
 124   juint* dump_buckets(juint* table, juint* p, NumberSeq* summary);
 125 
 126 public:
 127   void dump(char** top, char* end);
 128   const char* table_name();
 129 };
 130 
 131 #define REGULAR_BUCKET_TYPE       0
 132 #define COMPACT_BUCKET_TYPE       1
 133 #define TABLEEND_BUCKET_TYPE      3
 134 #define BUCKET_OFFSET_MASK        0x3FFFFFFF
 135 #define BUCKET_OFFSET(info)       ((info) & BUCKET_OFFSET_MASK)
 136 #define BUCKET_TYPE_SHIFT         30
 137 #define BUCKET_TYPE(info)         (((info) & ~BUCKET_OFFSET_MASK) >> BUCKET_TYPE_SHIFT)
 138 #define BUCKET_INFO(offset, type) (((type) << BUCKET_TYPE_SHIFT) | ((offset) & BUCKET_OFFSET_MASK))
 139 
 140 /////////////////////////////////////////////////////////////////////////////
 141 //
 142 // CompactHashtable is used to stored the CDS archive's symbol/string table. Used
 143 // at runtime only to access the compact table from the archive.
 144 //
 145 // Because these tables are read-only (no entries can be added/deleted) at run-time
 146 // and tend to have large number of entries, we try to minimize the footprint
 147 // cost per entry.
 148 //
 149 // Layout of compact table in the shared archive:
 150 //
 151 //   uintx base_address;
 152 //   juint num_entries;
 153 //   juint num_buckets;
 154 //   juint bucket_infos[num_buckets+1]; // bit[31,30]: type; bit[29-0]: offset
 155 //   juint table[]
 156 //
 157 // -----------------------------------
 158 // | base_address  | num_entries     |
 159 // |---------------------------------|
 160 // | num_buckets   | bucket_info0    |
 161 // |---------------------------------|
 162 // | bucket_info1  | bucket_info2    |
 163 // | bucket_info3    ...             |
 164 // | ....          | table_end_info  |
 165 // |---------------------------------|
 166 // | entry0                          |
 167 // | entry1                          |
 168 // | entry2                          |
 169 // |                                 |
 170 // | ...                             |
 171 // -----------------------------------
 172 //
 173 // The size of the bucket_info table is 'num_buckets + 1'. Each entry of the
 174 // bucket_info table is a 32-bit encoding of the bucket type and bucket offset,
 175 // with the type in the left-most 2-bit and offset in the remaining 30-bit.
 176 // The last entry is a special type. It contains the offset of the last
 177 // bucket end. We use that information when traversing the compact table.
 178 //
 179 // There are two types of buckets, regular buckets and compact buckets. The
 180 // compact buckets have '01' in their highest 2-bit, and regular buckets have
 181 // '00' in their highest 2-bit.
 182 //
 183 // For normal buckets, each entry is 8 bytes in the table[]:
 184 //   juint hash;    /* symbol/string hash */
 185 //   union {
 186 //     juint offset;  /* Symbol* sym = (Symbol*)(base_address + offset) */
 187 //     narrowOop str; /* String narrowOop encoding */
 188 //   }
 189 //
 190 //
 191 // For compact buckets, each entry has only the 4-byte 'offset' in the table[].
 192 //
 193 // See CompactHashtable::lookup() for how the table is searched at runtime.
 194 // See CompactHashtableWriter::dump() for how the table is written at CDS
 195 // dump time.
 196 //
 197 template <class T, class N> class CompactHashtable VALUE_OBJ_CLASS_SPEC {
 198   friend class VMStructs;
 199 
 200  public:
 201   enum CompactHashtableType {
 202     _symbol_table = 0,
 203     _string_table = 1
 204   };
 205 
 206 private:
 207   CompactHashtableType _type;
 208   uintx  _base_address;
 209   juint  _entry_count;
 210   juint  _bucket_count;
 211   juint  _table_end_offset;
 212   juint* _buckets;
 213 
 214   inline Symbol* lookup_entry(CompactHashtable<Symbol*, char>* const t,
 215                               juint* addr, const char* name, int len);
 216 
 217   inline oop lookup_entry(CompactHashtable<oop, char>* const t,
 218                           juint* addr, const char* name, int len);
 219 public:
 220   CompactHashtable() {
 221     _entry_count = 0;
 222     _bucket_count = 0;
 223     _table_end_offset = 0;
 224     _buckets = 0;
 225   }
 226   const char* init(CompactHashtableType type, const char *buffer);
 227 
 228   void reset() {
 229     _entry_count = 0;
 230     _bucket_count = 0;
 231     _table_end_offset = 0;
 232     _buckets = 0;
 233   }
 234 
 235   // Lookup an entry from the compact table
 236   inline T lookup(const N* name, unsigned int hash, int len);
 237 
 238   // iterate over symbols
 239   void symbols_do(SymbolClosure *cl);
 240 
 241   // iterate over strings
 242   void oops_do(OopClosure* f);
 243 };
 244 
 245 ////////////////////////////////////////////////////////////////////////
 246 //
 247 // Read/Write the contents of a hashtable textual dump (created by
 248 // SymbolTable::dump and StringTable::dump).
 249 // Because the dump file may be big (hundred of MB in extreme cases),
 250 // we use mmap for fast access when reading it.
 251 //
 252 class HashtableTextDump VALUE_OBJ_CLASS_SPEC {
 253   int _fd;
 254   const char* _base;
 255   const char* _p;
 256   const char* _end;
 257   const char* _filename;
 258   size_t      _size;
 259   int         _prefix_type;
 260   int         _line_no;
 261 public:
 262   HashtableTextDump(const char* filename);
 263   ~HashtableTextDump();
 264 
 265   enum {
 266     SymbolPrefix = 1 << 0,
 267     StringPrefix = 1 << 1,
 268     Unknown = 1 << 2
 269   };
 270 
 271   void quit(const char* err, const char* msg);
 272 
 273   inline int remain() {
 274     return (int)(_end - _p);
 275   }
 276 
 277   void corrupted(const char *p, const char *msg);
 278 
 279   inline void corrupted_if(bool cond) {
 280     if (cond) {
 281       corrupted(_p, NULL);
 282     }
 283   }
 284 
 285   bool skip_newline();
 286   int skip(char must_be_char);
 287   void skip_past(char c);
 288   void check_version(const char* ver);
 289 
 290   inline bool get_num(char delim, int *utf8_length) {
 291     const char* p   = _p;
 292     const char* end = _end;
 293     int num = 0;
 294 
 295     while (p < end) {
 296       char c = *p ++;
 297       if ('0' <= c && c <= '9') {
 298         num = num * 10 + (c - '0');
 299       } else if (c == delim) {
 300         _p = p;
 301         *utf8_length = num;
 302         return true;
 303       } else {
 304         // Not [0-9], not 'delim'
 305         return false;
 306       }
 307     }
 308     corrupted(_end, "Incorrect format");
 309     ShouldNotReachHere();
 310     return false;
 311   }
 312 
 313   void scan_prefix_type();
 314   int scan_prefix(int* utf8_length);
 315   int scan_string_prefix();
 316   int scan_symbol_prefix();
 317 
 318   jchar unescape(const char* from, const char* end, int count);
 319   void get_utf8(char* utf8_buffer, int utf8_length);
 320   static void put_utf8(outputStream* st, const char* utf8_string, int utf8_length);
 321 };
 322 
 323 ///////////////////////////////////////////////////////////////////////
 324 //
 325 // jcmd command support for symbol table and string table dumping:
 326 //   VM.symboltable -verbose: for dumping the symbol table
 327 //   VM.stringtable -verbose: for dumping the string table
 328 //
 329 class VM_DumpHashtable : public VM_Operation {
 330 private:
 331   outputStream* _out;
 332   int _which;
 333   bool _verbose;
 334 public:
 335   enum {
 336     DumpSymbols = 1 << 0,
 337     DumpStrings = 1 << 1,
 338     DumpSysDict = 1 << 2  // not implemented yet
 339   };
 340   VM_DumpHashtable(outputStream* out, int which, bool verbose) {
 341     _out = out;
 342     _which = which;
 343     _verbose = verbose;
 344   }
 345 
 346   virtual VMOp_Type type() const { return VMOp_DumpHashtable; }
 347 
 348   virtual void doit() {
 349     switch (_which) {
 350     case DumpSymbols:
 351       SymbolTable::dump(_out, _verbose);
 352       break;
 353     case DumpStrings:
 354       StringTable::dump(_out, _verbose);
 355       break;
 356     default:
 357       ShouldNotReachHere();
 358     }
 359   }
 360 };
 361 
 362 class SymboltableDCmd : public DCmdWithParser {
 363 protected:
 364   DCmdArgument<bool> _verbose;
 365 public:
 366   SymboltableDCmd(outputStream* output, bool heap);
 367   static const char* name() {
 368     return "VM.symboltable";
 369   }
 370   static const char* description() {
 371     return "Dump symbol table.";
 372   }
 373   static const char* impact() {
 374     return "Medium: Depends on Java content.";
 375   }
 376   static const JavaPermission permission() {
 377     JavaPermission p = {"java.lang.management.ManagementPermission",
 378                         "monitor", NULL};
 379     return p;
 380   }
 381   static int num_arguments();
 382   virtual void execute(DCmdSource source, TRAPS);
 383 };
 384 
 385 class StringtableDCmd : public DCmdWithParser {
 386 protected:
 387   DCmdArgument<bool> _verbose;
 388 public:
 389   StringtableDCmd(outputStream* output, bool heap);
 390   static const char* name() {
 391     return "VM.stringtable";
 392   }
 393   static const char* description() {
 394     return "Dump string table.";
 395   }
 396   static const char* impact() {
 397     return "Medium: Depends on Java content.";
 398   }
 399   static const JavaPermission permission() {
 400     JavaPermission p = {"java.lang.management.ManagementPermission",
 401                         "monitor", NULL};
 402     return p;
 403   }
 404   static int num_arguments();
 405   virtual void execute(DCmdSource source, TRAPS);
 406 };
 407 
 408 #endif // SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP