1 /*
   2  * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/compactHashtable.inline.hpp"
  27 #include "classfile/javaClasses.hpp"
  28 #include "logging/logMessage.hpp"
  29 #include "memory/metadataFactory.hpp"
  30 #include "memory/metaspaceShared.hpp"
  31 #include "prims/jvm.h"
  32 #include "utilities/numberSeq.hpp"
  33 #include <sys/stat.h>
  34 
  35 /////////////////////////////////////////////////////
  36 //
  37 // The compact hash table writer implementations
  38 //
  39 CompactHashtableWriter::CompactHashtableWriter(int num_buckets,
  40                                                CompactHashtableStats* stats) {
  41   assert(DumpSharedSpaces, "dump-time only");
  42   assert(num_buckets > 0, "no buckets");
  43   _num_buckets = num_buckets;
  44   _num_entries = 0;
  45   _buckets = NEW_C_HEAP_ARRAY(GrowableArray<Entry>*, _num_buckets, mtSymbol);
  46   for (int i=0; i<_num_buckets; i++) {
  47     _buckets[i] = new (ResourceObj::C_HEAP, mtSymbol) GrowableArray<Entry>(0, true, mtSymbol);
  48   }
  49 
  50   stats->bucket_count = _num_buckets;
  51   stats->bucket_bytes = (_num_buckets + 1) * (sizeof(u4));
  52   _stats = stats;
  53   _compact_buckets = NULL;
  54   _compact_entries = NULL;
  55   _num_empty_buckets = 0;
  56   _num_value_only_buckets = 0;
  57   _num_other_buckets = 0;
  58 }
  59 
  60 CompactHashtableWriter::~CompactHashtableWriter() {
  61   for (int index = 0; index < _num_buckets; index++) {
  62     GrowableArray<Entry>* bucket = _buckets[index];
  63     delete bucket;
  64   }
  65 
  66   FREE_C_HEAP_ARRAY(GrowableArray<Entry>*, _buckets);
  67 }
  68 
  69 // Add a symbol entry to the temporary hash table
  70 void CompactHashtableWriter::add(unsigned int hash, u4 value) {
  71   int index = hash % _num_buckets;
  72   _buckets[index]->append_if_missing(Entry(hash, value));
  73   _num_entries++;
  74 }
  75 
  76 void CompactHashtableWriter::allocate_table() {
  77   int entries_space = 0;
  78   for (int index = 0; index < _num_buckets; index++) {
  79     GrowableArray<Entry>* bucket = _buckets[index];
  80     int bucket_size = bucket->length();
  81     if (bucket_size == 1) {
  82       entries_space++;
  83     } else {
  84       entries_space += 2 * bucket_size;
  85     }
  86   }
  87 
  88   if (entries_space & ~BUCKET_OFFSET_MASK) {
  89     vm_exit_during_initialization("CompactHashtableWriter::allocate_table: Overflow! "
  90                                   "Too many entries.");
  91   }
  92 
  93   Thread* THREAD = VMThread::vm_thread();
  94   ClassLoaderData* loader_data = ClassLoaderData::the_null_class_loader_data();
  95   _compact_buckets = MetadataFactory::new_array<u4>(loader_data, _num_buckets + 1, THREAD);
  96   _compact_entries = MetadataFactory::new_array<u4>(loader_data, entries_space, THREAD);
  97 
  98   _stats->hashentry_count = _num_entries;
  99   _stats->hashentry_bytes = entries_space * sizeof(u4);
 100 }
 101 
 102 // Write the compact table's buckets
 103 void CompactHashtableWriter::dump_table(NumberSeq* summary) {
 104   u4 offset = 0;
 105   for (int index = 0; index < _num_buckets; index++) {
 106     GrowableArray<Entry>* bucket = _buckets[index];
 107     int bucket_size = bucket->length();
 108     if (bucket_size == 1) {
 109       // bucket with one entry is compacted and only has the symbol offset
 110       _compact_buckets->at_put(index, BUCKET_INFO(offset, VALUE_ONLY_BUCKET_TYPE));
 111 
 112       Entry ent = bucket->at(0);
 113       _compact_entries->at_put(offset++, ent.value());
 114       _num_value_only_buckets++;
 115     } else {
 116       // regular bucket, each entry is a symbol (hash, offset) pair
 117       _compact_buckets->at_put(index, BUCKET_INFO(offset, REGULAR_BUCKET_TYPE));
 118 
 119       for (int i=0; i<bucket_size; i++) {
 120         Entry ent = bucket->at(i);
 121         _compact_entries->at_put(offset++, u4(ent.hash())); // write entry hash
 122         _compact_entries->at_put(offset++, ent.value());
 123       }
 124       if (bucket_size == 0) {
 125         _num_empty_buckets++;
 126       } else {
 127         _num_other_buckets++;
 128       }
 129     }
 130     summary->add(bucket_size);
 131   }
 132 
 133   // Mark the end of the buckets
 134   _compact_buckets->at_put(_num_buckets, BUCKET_INFO(offset, TABLEEND_BUCKET_TYPE));
 135   assert(offset == (u4)_compact_entries->length(), "sanity");
 136 }
 137 
 138 
 139 // Write the compact table
 140 void CompactHashtableWriter::dump(SimpleCompactHashtable *cht, const char* table_name) {
 141   NumberSeq summary;
 142   allocate_table();
 143   dump_table(&summary);
 144 
 145   int table_bytes = _stats->bucket_bytes + _stats->hashentry_bytes;
 146   address base_address = address(MetaspaceShared::shared_rs()->base());
 147   cht->init(base_address,  _num_entries, _num_buckets,
 148             _compact_buckets->data(), _compact_entries->data());
 149 
 150   if (log_is_enabled(Info, cds, hashtables)) {
 151     ResourceMark rm;
 152     LogMessage(cds, hashtables) msg;
 153     stringStream info_stream;
 154 
 155     double avg_cost = 0.0;
 156     if (_num_entries > 0) {
 157       avg_cost = double(table_bytes)/double(_num_entries);
 158     }
 159     info_stream.print_cr("Shared %s table stats -------- base: " PTR_FORMAT,
 160                          table_name, (intptr_t)base_address);
 161     info_stream.print_cr("Number of entries       : %9d", _num_entries);
 162     info_stream.print_cr("Total bytes used        : %9d", table_bytes);
 163     info_stream.print_cr("Average bytes per entry : %9.3f", avg_cost);
 164     info_stream.print_cr("Average bucket size     : %9.3f", summary.avg());
 165     info_stream.print_cr("Variance of bucket size : %9.3f", summary.variance());
 166     info_stream.print_cr("Std. dev. of bucket size: %9.3f", summary.sd());
 167     info_stream.print_cr("Empty buckets           : %9d", _num_empty_buckets);
 168     info_stream.print_cr("Value_Only buckets      : %9d", _num_value_only_buckets);
 169     info_stream.print_cr("Other buckets           : %9d", _num_other_buckets);
 170     msg.info("%s", info_stream.as_string());
 171   }
 172 }
 173 
 174 /////////////////////////////////////////////////////////////
 175 //
 176 // Customization for dumping Symbol and String tables
 177 
 178 void CompactSymbolTableWriter::add(unsigned int hash, Symbol *symbol) {
 179   address base_address = address(MetaspaceShared::shared_rs()->base());
 180 
 181   uintx deltax = address(symbol) - base_address;
 182   // The symbols are in RO space, which is smaler than MAX_SHARED_DELTA.
 183   // The assert below is just to be extra cautious.
 184   assert(deltax <= MAX_SHARED_DELTA, "the delta is too large to encode");
 185   u4 delta = u4(deltax);
 186 
 187   CompactHashtableWriter::add(hash, delta);
 188 }
 189 
 190 void CompactStringTableWriter::add(unsigned int hash, oop string) {
 191   CompactHashtableWriter::add(hash, oopDesc::encode_heap_oop(string));
 192 }
 193 
 194 void CompactSymbolTableWriter::dump(CompactHashtable<Symbol*, char> *cht) {
 195   CompactHashtableWriter::dump(cht, "symbol");
 196 }
 197 
 198 void CompactStringTableWriter::dump(CompactHashtable<oop, char> *cht) {
 199   CompactHashtableWriter::dump(cht, "string");
 200 }
 201 
 202 /////////////////////////////////////////////////////////////
 203 //
 204 // The CompactHashtable implementation
 205 //
 206 
 207 void SimpleCompactHashtable::serialize(SerializeClosure* soc) {
 208   soc->do_ptr((void**)&_base_address);
 209   soc->do_u4(&_entry_count);
 210   soc->do_u4(&_bucket_count);
 211   soc->do_ptr((void**)&_buckets);
 212   soc->do_ptr((void**)&_entries);
 213 }
 214 
 215 bool SimpleCompactHashtable::exists(u4 value) {
 216   assert(!DumpSharedSpaces, "run-time only");
 217 
 218   if (_entry_count == 0) {
 219     return false;
 220   }
 221 
 222   unsigned int hash = (unsigned int)value;
 223   int index = hash % _bucket_count;
 224   u4 bucket_info = _buckets[index];
 225   u4 bucket_offset = BUCKET_OFFSET(bucket_info);
 226   int bucket_type = BUCKET_TYPE(bucket_info);
 227   u4* entry = _entries + bucket_offset;
 228 
 229   if (bucket_type == VALUE_ONLY_BUCKET_TYPE) {
 230     return (entry[0] == value);
 231   } else {
 232     u4*entry_max = _entries + BUCKET_OFFSET(_buckets[index + 1]);
 233     while (entry <entry_max) {
 234       if (entry[1] == value) {
 235         return true;
 236       }
 237       entry += 2;
 238     }
 239     return false;
 240   }
 241 }
 242 
 243 template <class I>
 244 inline void SimpleCompactHashtable::iterate(const I& iterator) {
 245   assert(!DumpSharedSpaces, "run-time only");
 246   for (u4 i = 0; i < _bucket_count; i++) {
 247     u4 bucket_info = _buckets[i];
 248     u4 bucket_offset = BUCKET_OFFSET(bucket_info);
 249     int bucket_type = BUCKET_TYPE(bucket_info);
 250     u4* entry = _entries + bucket_offset;
 251 
 252     if (bucket_type == VALUE_ONLY_BUCKET_TYPE) {
 253       iterator.do_value(_base_address, entry[0]);
 254     } else {
 255       u4*entry_max = _entries + BUCKET_OFFSET(_buckets[i + 1]);
 256       while (entry < entry_max) {
 257         iterator.do_value(_base_address, entry[1]);
 258         entry += 2;
 259       }
 260     }
 261   }
 262 }
 263 
 264 template <class T, class N> void CompactHashtable<T, N>::serialize(SerializeClosure* soc) {
 265   SimpleCompactHashtable::serialize(soc);
 266   soc->do_u4(&_type);
 267 }
 268 
 269 class CompactHashtable_SymbolIterator {
 270   SymbolClosure* const _closure;
 271 public:
 272   CompactHashtable_SymbolIterator(SymbolClosure *cl) : _closure(cl) {}
 273   inline void do_value(address base_address, u4 offset) const {
 274     Symbol* sym = (Symbol*)((void*)(base_address + offset));
 275     _closure->do_symbol(&sym);
 276   }
 277 };
 278 
 279 template <class T, class N> void CompactHashtable<T, N>::symbols_do(SymbolClosure *cl) {
 280   CompactHashtable_SymbolIterator iterator(cl);
 281   iterate(iterator);
 282 }
 283 
 284 class CompactHashtable_OopIterator {
 285   OopClosure* const _closure;
 286 public:
 287   CompactHashtable_OopIterator(OopClosure *cl) : _closure(cl) {}
 288   inline void do_value(address base_address, u4 offset) const {
 289     narrowOop o = (narrowOop)offset;
 290     _closure->do_oop(&o);
 291   }
 292 };
 293 
 294 template <class T, class N> void CompactHashtable<T, N>::oops_do(OopClosure* cl) {
 295   assert(_type == _string_table || _bucket_count == 0, "sanity");
 296   CompactHashtable_OopIterator iterator(cl);
 297   iterate(iterator);
 298 }
 299 
 300 // Explicitly instantiate these types
 301 template class CompactHashtable<Symbol*, char>;
 302 template class CompactHashtable<oop, char>;
 303 
 304 #ifndef O_BINARY       // if defined (Win32) use binary files.
 305 #define O_BINARY 0     // otherwise do nothing.
 306 #endif
 307 
 308 ////////////////////////////////////////////////////////
 309 //
 310 // HashtableTextDump
 311 //
 312 HashtableTextDump::HashtableTextDump(const char* filename) : _fd(-1) {
 313   struct stat st;
 314   if (os::stat(filename, &st) != 0) {
 315     quit("Unable to get hashtable dump file size", filename);
 316   }
 317   _size = st.st_size;
 318   _fd = open(filename, O_RDONLY | O_BINARY, 0);
 319   if (_fd < 0) {
 320     quit("Unable to open hashtable dump file", filename);
 321   }
 322   _base = os::map_memory(_fd, filename, 0, NULL, _size, true, false);
 323   if (_base == NULL) {
 324     quit("Unable to map hashtable dump file", filename);
 325   }
 326   _p = _base;
 327   _end = _base + st.st_size;
 328   _filename = filename;
 329   _prefix_type = Unknown;
 330   _line_no = 1;
 331 }
 332 
 333 HashtableTextDump::~HashtableTextDump() {
 334   os::unmap_memory((char*)_base, _size);
 335   if (_fd >= 0) {
 336     close(_fd);
 337   }
 338 }
 339 
 340 void HashtableTextDump::quit(const char* err, const char* msg) {
 341   vm_exit_during_initialization(err, msg);
 342 }
 343 
 344 void HashtableTextDump::corrupted(const char *p, const char* msg) {
 345   char info[100];
 346   jio_snprintf(info, sizeof(info),
 347                "%s. Corrupted at line %d (file pos %d)",
 348                msg, _line_no, (int)(p - _base));
 349   quit(info, _filename);
 350 }
 351 
 352 bool HashtableTextDump::skip_newline() {
 353   if (_p[0] == '\r' && _p[1] == '\n') {
 354     _p += 2;
 355   } else if (_p[0] == '\n') {
 356     _p += 1;
 357   } else {
 358     corrupted(_p, "Unexpected character");
 359   }
 360   _line_no++;
 361   return true;
 362 }
 363 
 364 int HashtableTextDump::skip(char must_be_char) {
 365   corrupted_if(remain() < 1, "Truncated");
 366   corrupted_if(*_p++ != must_be_char, "Unexpected character");
 367   return 0;
 368 }
 369 
 370 void HashtableTextDump::skip_past(char c) {
 371   for (;;) {
 372     corrupted_if(remain() < 1, "Truncated");
 373     if (*_p++ == c) {
 374       return;
 375     }
 376   }
 377 }
 378 
 379 void HashtableTextDump::check_version(const char* ver) {
 380   int len = (int)strlen(ver);
 381   corrupted_if(remain() < len, "Truncated");
 382   if (strncmp(_p, ver, len) != 0) {
 383     quit("wrong version of hashtable dump file", _filename);
 384   }
 385   _p += len;
 386   skip_newline();
 387 }
 388 
 389 void HashtableTextDump::scan_prefix_type() {
 390   _p++;
 391   if (strncmp(_p, "SECTION: String", 15) == 0) {
 392     _p += 15;
 393     _prefix_type = StringPrefix;
 394   } else if (strncmp(_p, "SECTION: Symbol", 15) == 0) {
 395     _p += 15;
 396     _prefix_type = SymbolPrefix;
 397   } else {
 398     _prefix_type = Unknown;
 399   }
 400   skip_newline();
 401 }
 402 
 403 int HashtableTextDump::scan_prefix(int* utf8_length) {
 404   if (*_p == '@') {
 405     scan_prefix_type();
 406   }
 407 
 408   switch (_prefix_type) {
 409   case SymbolPrefix:
 410     *utf8_length = scan_symbol_prefix(); break;
 411   case StringPrefix:
 412     *utf8_length = scan_string_prefix(); break;
 413   default:
 414     tty->print_cr("Shared input data type: Unknown.");
 415     corrupted(_p, "Unknown data type");
 416   }
 417 
 418   return _prefix_type;
 419 }
 420 
 421 int HashtableTextDump::scan_string_prefix() {
 422   // Expect /[0-9]+: /
 423   int utf8_length = 0;
 424   get_num(':', &utf8_length);
 425   if (*_p != ' ') {
 426     corrupted(_p, "Wrong prefix format for string");
 427   }
 428   _p++;
 429   return utf8_length;
 430 }
 431 
 432 int HashtableTextDump::scan_symbol_prefix() {
 433   // Expect /[0-9]+ (-|)[0-9]+: /
 434   int utf8_length = 0;
 435   get_num(' ', &utf8_length);
 436   if (*_p == '-') {
 437     _p++;
 438   }
 439   int ref_num;
 440   get_num(':', &ref_num);
 441   if (*_p != ' ') {
 442     corrupted(_p, "Wrong prefix format for symbol");
 443   }
 444   _p++;
 445   return utf8_length;
 446 }
 447 
 448 jchar HashtableTextDump::unescape(const char* from, const char* end, int count) {
 449   jchar value = 0;
 450 
 451   corrupted_if(from + count > end, "Truncated");
 452 
 453   for (int i=0; i<count; i++) {
 454     char c = *from++;
 455     switch (c) {
 456     case '0': case '1': case '2': case '3': case '4':
 457     case '5': case '6': case '7': case '8': case '9':
 458       value = (value << 4) + c - '0';
 459       break;
 460     case 'a': case 'b': case 'c':
 461     case 'd': case 'e': case 'f':
 462       value = (value << 4) + 10 + c - 'a';
 463       break;
 464     case 'A': case 'B': case 'C':
 465     case 'D': case 'E': case 'F':
 466       value = (value << 4) + 10 + c - 'A';
 467       break;
 468     default:
 469       ShouldNotReachHere();
 470     }
 471   }
 472   return value;
 473 }
 474 
 475 void HashtableTextDump::get_utf8(char* utf8_buffer, int utf8_length) {
 476   // cache in local vars
 477   const char* from = _p;
 478   const char* end = _end;
 479   char* to = utf8_buffer;
 480   int n = utf8_length;
 481 
 482   for (; n > 0 && from < end; n--) {
 483     if (*from != '\\') {
 484       *to++ = *from++;
 485     } else {
 486       corrupted_if(from + 2 > end, "Truncated");
 487       char c = from[1];
 488       from += 2;
 489       switch (c) {
 490       case 'x':
 491         {
 492           jchar value = unescape(from, end, 2);
 493           from += 2;
 494           assert(value <= 0xff, "sanity");
 495           *to++ = (char)(value & 0xff);
 496         }
 497         break;
 498       case 't':  *to++ = '\t'; break;
 499       case 'n':  *to++ = '\n'; break;
 500       case 'r':  *to++ = '\r'; break;
 501       case '\\': *to++ = '\\'; break;
 502       default:
 503         corrupted(_p, "Unsupported character");
 504       }
 505     }
 506   }
 507   corrupted_if(n > 0, "Truncated"); // expected more chars but file has ended
 508   _p = from;
 509   skip_newline();
 510 }
 511 
 512 // NOTE: the content is NOT the same as
 513 // UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen).
 514 // We want to escape \r\n\t so that output [1] is more readable; [2] can be more easily
 515 // parsed by scripts; [3] quickly processed by HashtableTextDump::get_utf8()
 516 void HashtableTextDump::put_utf8(outputStream* st, const char* utf8_string, int utf8_length) {
 517   const char *c = utf8_string;
 518   const char *end = c + utf8_length;
 519   for (; c < end; c++) {
 520     switch (*c) {
 521     case '\t': st->print("\\t"); break;
 522     case '\r': st->print("\\r"); break;
 523     case '\n': st->print("\\n"); break;
 524     case '\\': st->print("\\\\"); break;
 525     default:
 526       if (isprint(*c)) {
 527         st->print("%c", *c);
 528       } else {
 529         st->print("\\x%02x", ((unsigned int)*c) & 0xff);
 530       }
 531     }
 532   }
 533 }