1 /*
   2  * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP
  26 #define SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP
  27 
  28 #include "classfile/stringTable.hpp"
  29 #include "classfile/symbolTable.hpp"
  30 #include "memory/allocation.inline.hpp"
  31 #include "oops/oop.inline.hpp"
  32 #include "oops/symbol.hpp"
  33 #include "services/diagnosticCommand.hpp"
  34 #include "utilities/hashtable.hpp"
  35 
  36 class NumberSeq;
  37 
  38 // Stats for symbol tables in the CDS archive
  39 class CompactHashtableStats VALUE_OBJ_CLASS_SPEC {
  40 public:
  41   int hashentry_count;
  42   int hashentry_bytes;
  43   int bucket_count;
  44   int bucket_bytes;
  45 };
  46 
  47 /////////////////////////////////////////////////////////////////////////
  48 //
  49 // The compact hash table writer. Used at dump time for writing out
  50 // the compact table to the shared archive.
  51 //
  52 // At dump time, the CompactHashtableWriter obtains all entries from the
  53 // symbol/string table and adds them to a new temporary hash table. The hash
  54 // table size (number of buckets) is calculated using
  55 // '(num_entries + bucket_size - 1) / bucket_size'. The default bucket
  56 // size is 4 and can be changed by -XX:SharedSymbolTableBucketSize option.
  57 // 4 is chosen because it produces smaller sized bucket on average for
  58 // faster lookup. It also has relatively small number of empty buckets and
  59 // good distribution of the entries.
  60 //
  61 // We use a simple hash function (hash % num_bucket) for the table.
  62 // The new table is compacted when written out. Please see comments
  63 // above the CompactHashtable class for the table layout detail. The bucket
  64 // offsets are written to the archive as part of the compact table. The
  65 // bucket offset is encoded in the low 30-bit (0-29) and the bucket type
  66 // (regular or compact) are encoded in bit[31, 30]. For buckets with more
  67 // than one entry, both hash and entry offset are written to the
  68 // table. For buckets with only one entry, only the entry offset is written
  69 // to the table and the buckets are tagged as compact in their type bits.
  70 // Buckets without entry are skipped from the table. Their offsets are
  71 // still written out for faster lookup.
  72 //
  73 class CompactHashtableWriter: public StackObj {
  74 public:
  75   class Entry: public CHeapObj<mtSymbol> {
  76     Entry* _next;
  77     unsigned int _hash;
  78     void* _literal;
  79 
  80   public:
  81     Entry(unsigned int hash, Symbol *symbol) : _next(NULL), _hash(hash), _literal(symbol) {}
  82     Entry(unsigned int hash, oop string)     : _next(NULL), _hash(hash), _literal(string) {}
  83 
  84     void *value() {
  85       return _literal;
  86     }
  87     Symbol *symbol() {
  88       return (Symbol*)_literal;
  89     }
  90     oop string() {
  91       return (oop)_literal;
  92     }
  93     unsigned int hash() {
  94       return _hash;
  95     }
  96     Entry *next()           {return _next;}
  97     void set_next(Entry *p) {_next = p;}
  98   }; // class CompactHashtableWriter::Entry
  99 
 100 private:
 101   static int number_of_buckets(int num_entries);
 102 
 103   int _type;
 104   int _num_entries;
 105   int _num_buckets;
 106   juint* _bucket_sizes;
 107   Entry** _buckets;
 108   int _required_bytes;
 109   CompactHashtableStats* _stats;
 110 
 111 public:
 112   // This is called at dump-time only
 113   CompactHashtableWriter(int table_type, int num_entries, CompactHashtableStats* stats);
 114   ~CompactHashtableWriter();
 115 
 116   int get_required_bytes() {
 117     return _required_bytes;
 118   }
 119 
 120   void add(unsigned int hash, Symbol* symbol) {
 121     add(hash, new Entry(hash, symbol));
 122   }
 123 
 124   void add(unsigned int hash, oop string) {
 125     add(hash, new Entry(hash, string));
 126   }
 127 
 128 private:
 129   void add(unsigned int hash, Entry* entry);
 130   juint* dump_table(juint* p, juint** first_bucket, NumberSeq* summary);
 131   juint* dump_buckets(juint* table, juint* p, NumberSeq* summary);
 132 
 133 public:
 134   void dump(char** top, char* end);
 135   const char* table_name();
 136 };
 137 
 138 #define REGULAR_BUCKET_TYPE       0
 139 #define COMPACT_BUCKET_TYPE       1
 140 #define TABLEEND_BUCKET_TYPE      3
 141 #define BUCKET_OFFSET_MASK        0x3FFFFFFF
 142 #define BUCKET_OFFSET(info)       ((info) & BUCKET_OFFSET_MASK)
 143 #define BUCKET_TYPE_SHIFT         30
 144 #define BUCKET_TYPE(info)         (((info) & ~BUCKET_OFFSET_MASK) >> BUCKET_TYPE_SHIFT)
 145 #define BUCKET_INFO(offset, type) (((type) << BUCKET_TYPE_SHIFT) | ((offset) & BUCKET_OFFSET_MASK))
 146 
 147 /////////////////////////////////////////////////////////////////////////////
 148 //
 149 // CompactHashtable is used to stored the CDS archive's symbol/string table. Used
 150 // at runtime only to access the compact table from the archive.
 151 //
 152 // Because these tables are read-only (no entries can be added/deleted) at run-time
 153 // and tend to have large number of entries, we try to minimize the footprint
 154 // cost per entry.
 155 //
 156 // Layout of compact table in the shared archive:
 157 //
 158 //   uintx base_address;
 159 //   juint num_entries;
 160 //   juint num_buckets;
 161 //   juint bucket_infos[num_buckets+1]; // bit[31,30]: type; bit[29-0]: offset
 162 //   juint table[]
 163 //
 164 // -----------------------------------
 165 // | base_address  | num_entries     |
 166 // |---------------------------------|
 167 // | num_buckets   | bucket_info0    |
 168 // |---------------------------------|
 169 // | bucket_info1  | bucket_info2    |
 170 // | bucket_info3    ...             |
 171 // | ....          | table_end_info  |
 172 // |---------------------------------|
 173 // | entry0                          |
 174 // | entry1                          |
 175 // | entry2                          |
 176 // |                                 |
 177 // | ...                             |
 178 // -----------------------------------
 179 //
 180 // The size of the bucket_info table is 'num_buckets + 1'. Each entry of the
 181 // bucket_info table is a 32-bit encoding of the bucket type and bucket offset,
 182 // with the type in the left-most 2-bit and offset in the remaining 30-bit.
 183 // The last entry is a special type. It contains the offset of the last
 184 // bucket end. We use that information when traversing the compact table.
 185 //
 186 // There are two types of buckets, regular buckets and compact buckets. The
 187 // compact buckets have '01' in their highest 2-bit, and regular buckets have
 188 // '00' in their highest 2-bit.
 189 //
 190 // For normal buckets, each entry is 8 bytes in the table[]:
 191 //   juint hash;    /* symbol/string hash */
 192 //   union {
 193 //     juint offset;  /* Symbol* sym = (Symbol*)(base_address + offset) */
 194 //     narrowOop str; /* String narrowOop encoding */
 195 //   }
 196 //   
 197 //
 198 // For compact buckets, each entry has only the 4-byte 'offset' in the table[].
 199 //
 200 // See CompactHashtable::lookup() for how the table is searched at runtime.
 201 // See CompactHashtableWriter::dump() for how the table is written at CDS
 202 // dump time.
 203 //
 204 template <class T, class N> class CompactHashtable VALUE_OBJ_CLASS_SPEC {
 205   friend class VMStructs;
 206 
 207  public:
 208   enum CompactHashtableType {
 209     _symbol_table = 0,
 210     _string_table = 1
 211   };
 212 
 213 private:
 214   CompactHashtableType _type;
 215   uintx  _base_address;
 216   juint  _entry_count;
 217   juint  _bucket_count;
 218   juint  _table_end_offset;
 219   juint* _buckets;
 220 
 221   inline Symbol* lookup_entry(CompactHashtable<Symbol*, char>* const t,
 222                               juint* addr, const char* name, int len) {
 223     Symbol* sym = (Symbol*)((void*)(_base_address + *addr));
 224     if (sym->equals(name, len)) {
 225       assert(sym->refcount() == -1, "must be shared");
 226       return sym;
 227     }
 228 
 229     return NULL;
 230   }
 231 
 232   inline oop lookup_entry(CompactHashtable<oop, char>* const t,
 233                         juint* addr, const char* name, int len) {
 234     narrowOop obj = (narrowOop)(*addr);
 235     oop string = oopDesc::decode_heap_oop(obj);
 236     if (java_lang_String::equals(string, (jchar*)name, len)) {
 237       return string;
 238     }
 239 
 240     return NULL;
 241   }
 242 
 243 public:
 244   CompactHashtable() {
 245     _entry_count = 0;
 246     _bucket_count = 0;
 247     _table_end_offset = 0;
 248     _buckets = 0;
 249   }
 250   const char* init(CompactHashtableType type, const char *buffer);
 251 
 252   void reset() {
 253     _entry_count = 0;
 254     _bucket_count = 0;
 255     _table_end_offset = 0;
 256     _buckets = 0;
 257   }
 258 
 259   // Lookup an entry from the compact table
 260   inline T lookup(const N* name, unsigned int hash, int len) {
 261     if (_entry_count > 0) {
 262       assert(!DumpSharedSpaces, "run-time only");
 263       int index = hash % _bucket_count;
 264       juint bucket_info = _buckets[index];
 265       juint bucket_offset = BUCKET_OFFSET(bucket_info);
 266       int   bucket_type = BUCKET_TYPE(bucket_info);
 267       juint* bucket = _buckets + bucket_offset;
 268       juint* bucket_end = _buckets;
 269 
 270       if (bucket_type == COMPACT_BUCKET_TYPE) {
 271         // the compact bucket has one entry with entry offset only
 272         T res = lookup_entry(this, &bucket[0], name, len);
 273         if (res != NULL) {
 274           return res;
 275         }
 276       } else {
 277         // This is a regular bucket, which has more than one
 278         // entries. Each entry is a pair of entry (hash, offset).
 279         // Seek until the end of the bucket.
 280         bucket_end += BUCKET_OFFSET(_buckets[index + 1]);
 281         while (bucket < bucket_end) {
 282           unsigned int h = (unsigned int)(bucket[0]);
 283           if (h == hash) {
 284             T res = lookup_entry(this, &bucket[1], name, len);
 285             if (res != NULL) {
 286               return res;
 287             }
 288           }
 289           bucket += 2;
 290         }
 291       }
 292     }
 293     return NULL;
 294   }
 295 
 296   // iterate over symbols
 297   void symbols_do(SymbolClosure *cl);
 298 
 299   // iterate over strings
 300   void oops_do(OopClosure* f);
 301 };
 302 
 303 ////////////////////////////////////////////////////////////////////////
 304 //
 305 // Read/Write the contents of a hashtable textual dump (created by
 306 // SymbolTable::dump and StringTable::dump).
 307 // Because the dump file may be big (hundred of MB in extreme cases),
 308 // we use mmap for fast access when reading it.
 309 //
 310 class HashtableTextDump VALUE_OBJ_CLASS_SPEC {
 311   int _fd;
 312   const char* _base;
 313   const char* _p;
 314   const char* _end;
 315   const char* _filename;
 316   size_t      _size;
 317   int         _prefix_type;
 318   int         _line_no;
 319 public:
 320   HashtableTextDump(const char* filename);
 321   ~HashtableTextDump();
 322 
 323   enum {
 324     SymbolPrefix = 1 << 0,
 325     StringPrefix = 1 << 1,
 326     Unknown = 1 << 2
 327   };
 328 
 329   void quit(const char* err, const char* msg);
 330 
 331   inline int remain() {
 332     return (int)(_end - _p);
 333   }
 334 
 335   void corrupted(const char *p, const char *msg);
 336 
 337   inline void corrupted_if(bool cond) {
 338     if (cond) {
 339       corrupted(_p, NULL);
 340     }
 341   }
 342 
 343   bool skip_newline();
 344   int skip(char must_be_char);
 345   void skip_past(char c);
 346   void check_version(const char* ver);
 347 
 348   inline bool get_num(char delim, int *utf8_length) {
 349     const char* p   = _p;
 350     const char* end = _end;
 351     int num = 0;
 352 
 353     while (p < end) {
 354       char c = *p ++;
 355       if ('0' <= c && c <= '9') {
 356         num = num * 10 + (c - '0');
 357       } else if (c == delim) {
 358         _p = p;
 359         *utf8_length = num;
 360         return true;
 361       } else {
 362         // Not [0-9], not 'delim'
 363         return false;
 364       }
 365     }
 366     corrupted(_end, "Incorrect format");
 367     ShouldNotReachHere();
 368     return false;
 369   }
 370 
 371   void scan_prefix_type();
 372   int scan_prefix(int* utf8_length);
 373   int scan_string_prefix();
 374   int scan_symbol_prefix();
 375 
 376   jchar unescape(const char* from, const char* end, int count);
 377   void get_utf8(char* utf8_buffer, int utf8_length);
 378   static void put_utf8(outputStream* st, const char* utf8_string, int utf8_length);
 379 };
 380 
 381 ///////////////////////////////////////////////////////////////////////
 382 //
 383 // jcmd command support for symbol table and string table dumping:
 384 //   VM.symboltable -verbose: for dumping the symbol table
 385 //   VM.stringtable -verbose: for dumping the string table
 386 //
 387 class VM_DumpHashtable : public VM_Operation {
 388 private:
 389   outputStream* _out;
 390   int _which;
 391   bool _verbose;
 392 public:
 393   enum {
 394     DumpSymbols = 1 << 0,
 395     DumpStrings = 1 << 1,
 396     DumpSysDict = 1 << 2  // not implemented yet
 397   };
 398   VM_DumpHashtable(outputStream* out, int which, bool verbose) {
 399     _out = out;
 400     _which = which;
 401     _verbose = verbose;
 402   }
 403 
 404   virtual VMOp_Type type() const { return VMOp_DumpHashtable; }
 405 
 406   virtual void doit() {
 407     switch (_which) {
 408     case DumpSymbols:
 409       SymbolTable::dump(_out, _verbose);
 410       break;
 411     case DumpStrings:
 412       StringTable::dump(_out, _verbose);
 413       break;
 414     default:
 415       ShouldNotReachHere();
 416     }
 417   }
 418 };
 419 
 420 class SymboltableDCmd : public DCmdWithParser {
 421 protected:
 422   DCmdArgument<bool> _verbose;
 423 public:
 424   SymboltableDCmd(outputStream* output, bool heap);
 425   static const char* name() {
 426     return "VM.symboltable";
 427   }
 428   static const char* description() {
 429     return "Dump symbol table.";
 430   }
 431   static const char* impact() {
 432     return "Medium: Depends on Java content.";
 433   }
 434   static const JavaPermission permission() {
 435     JavaPermission p = {"java.lang.management.ManagementPermission",
 436                         "monitor", NULL};
 437     return p;
 438   }
 439   static int num_arguments();
 440   virtual void execute(DCmdSource source, TRAPS);
 441 };
 442 
 443 class StringtableDCmd : public DCmdWithParser {
 444 protected:
 445   DCmdArgument<bool> _verbose;
 446 public:
 447   StringtableDCmd(outputStream* output, bool heap);
 448   static const char* name() {
 449     return "VM.stringtable";
 450   }
 451   static const char* description() {
 452     return "Dump string table.";
 453   }
 454   static const char* impact() {
 455     return "Medium: Depends on Java content.";
 456   }
 457   static const JavaPermission permission() {
 458     JavaPermission p = {"java.lang.management.ManagementPermission",
 459                         "monitor", NULL};
 460     return p;
 461   }
 462   static int num_arguments();
 463   virtual void execute(DCmdSource source, TRAPS);
 464 };
 465 
 466 #endif // SHARE_VM_CLASSFILE_COMPACTHASHTABLE_HPP