1 /*
   2  * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/compactHashtable.inline.hpp"
  27 #include "classfile/javaClasses.hpp"
  28 #include "memory/metadataFactory.hpp"
  29 #include "memory/metaspaceShared.hpp"
  30 #include "prims/jvm.h"
  31 #include "utilities/numberSeq.hpp"
  32 #include <sys/stat.h>
  33 
  34 /////////////////////////////////////////////////////
  35 //
  36 // The compact hash table writer implementations
  37 //
  38 CompactHashtableWriter::CompactHashtableWriter(int num_buckets,
  39                                                CompactHashtableStats* stats) {
  40   assert(DumpSharedSpaces, "dump-time only");
  41   assert(num_buckets > 0, "no buckets");
  42   _num_buckets = num_buckets;
  43   _num_entries = 0;
  44   _buckets = NEW_C_HEAP_ARRAY(GrowableArray<Entry>*, _num_buckets, mtSymbol);
  45   for (int i=0; i<_num_buckets; i++) {
  46     _buckets[i] = new (ResourceObj::C_HEAP, mtSymbol) GrowableArray<Entry>(0, true, mtSymbol);
  47   }
  48 
  49   stats->bucket_count = _num_buckets;
  50   stats->bucket_bytes = (_num_buckets + 1) * (sizeof(u4));
  51   _stats = stats;
  52   _compact_buckets = NULL;
  53   _compact_entries = NULL;
  54   _num_empty_buckets = 0;
  55   _num_value_only_buckets = 0;
  56   _num_other_buckets = 0;
  57 }
  58 
  59 CompactHashtableWriter::~CompactHashtableWriter() {
  60   for (int index = 0; index < _num_buckets; index++) {
  61     GrowableArray<Entry>* bucket = _buckets[index];
  62     delete bucket;
  63   }
  64 
  65   FREE_C_HEAP_ARRAY(GrowableArray<Entry>*, _buckets);
  66 }
  67 
  68 // Add a symbol entry to the temporary hash table
  69 void CompactHashtableWriter::add(unsigned int hash, u4 value) {
  70   int index = hash % _num_buckets;
  71   _buckets[index]->append_if_missing(Entry(hash, value));
  72   _num_entries++;
  73 }
  74 
  75 void CompactHashtableWriter::allocate_table() {
  76   int entries_space = 0;
  77   for (int index = 0; index < _num_buckets; index++) {
  78     GrowableArray<Entry>* bucket = _buckets[index];
  79     int bucket_size = bucket->length();
  80     if (bucket_size == 1) {
  81       entries_space++;
  82     } else {
  83       entries_space += 2 * bucket_size;
  84     }
  85   }
  86 
  87   if (entries_space & ~BUCKET_OFFSET_MASK) {
  88     vm_exit_during_initialization("CompactHashtableWriter::allocate_table: Overflow! "
  89                                   "Too many entries.");
  90   }
  91 
  92   Thread* THREAD = VMThread::vm_thread();
  93   ClassLoaderData* loader_data = ClassLoaderData::the_null_class_loader_data();
  94   _compact_buckets = MetadataFactory::new_array<u4>(loader_data, _num_buckets + 1, THREAD);
  95   _compact_entries = MetadataFactory::new_array<u4>(loader_data, entries_space, THREAD);
  96 
  97   _stats->hashentry_count = _num_entries;
  98   _stats->hashentry_bytes = entries_space * sizeof(u4);
  99 }
 100 
 101 // Write the compact table's buckets
 102 void CompactHashtableWriter::dump_table(NumberSeq* summary) {
 103   u4 offset = 0;
 104   for (int index = 0; index < _num_buckets; index++) {
 105     GrowableArray<Entry>* bucket = _buckets[index];
 106     int bucket_size = bucket->length();
 107     if (bucket_size == 1) {
 108       // bucket with one entry is compacted and only has the symbol offset
 109       _compact_buckets->at_put(index, BUCKET_INFO(offset, VALUE_ONLY_BUCKET_TYPE));
 110 
 111       Entry ent = bucket->at(0);
 112       _compact_entries->at_put(offset++, ent.value());
 113       _num_value_only_buckets++;
 114     } else {
 115       // regular bucket, each entry is a symbol (hash, offset) pair
 116       _compact_buckets->at_put(index, BUCKET_INFO(offset, REGULAR_BUCKET_TYPE));
 117 
 118       for (int i=0; i<bucket_size; i++) {
 119         Entry ent = bucket->at(i);
 120         _compact_entries->at_put(offset++, u4(ent.hash())); // write entry hash
 121         _compact_entries->at_put(offset++, ent.value());
 122       }
 123       if (bucket_size == 0) {
 124         _num_empty_buckets++;
 125       } else {
 126         _num_other_buckets++;
 127       }
 128     }
 129     summary->add(bucket_size);
 130   }
 131 
 132   // Mark the end of the buckets
 133   _compact_buckets->at_put(_num_buckets, BUCKET_INFO(offset, TABLEEND_BUCKET_TYPE));
 134   assert(offset == (u4)_compact_entries->length(), "sanity");
 135 }
 136 
 137 
 138 // Write the compact table
 139 void CompactHashtableWriter::dump(SimpleCompactHashtable *cht, const char* table_name) {
 140   NumberSeq summary;
 141   allocate_table();
 142   dump_table(&summary);
 143 
 144   int table_bytes = _stats->bucket_bytes + _stats->hashentry_bytes;
 145   address base_address = address(MetaspaceShared::shared_rs()->base());
 146   cht->init(base_address,  _num_entries, _num_buckets,
 147             _compact_buckets->data(), _compact_entries->data());
 148 
 149   if (PrintSharedSpaces) {
 150     double avg_cost = 0.0;
 151     if (_num_entries > 0) {
 152       avg_cost = double(table_bytes)/double(_num_entries);
 153     }
 154     tty->print_cr("Shared %s table stats -------- base: " PTR_FORMAT,
 155                   table_name, (intptr_t)base_address);
 156     tty->print_cr("Number of entries       : %9d", _num_entries);
 157     tty->print_cr("Total bytes used        : %9d", table_bytes);
 158     tty->print_cr("Average bytes per entry : %9.3f", avg_cost);
 159     tty->print_cr("Average bucket size     : %9.3f", summary.avg());
 160     tty->print_cr("Variance of bucket size : %9.3f", summary.variance());
 161     tty->print_cr("Std. dev. of bucket size: %9.3f", summary.sd());
 162     tty->print_cr("Empty buckets           : %9d", _num_empty_buckets);
 163     tty->print_cr("Value_Only buckets      : %9d", _num_value_only_buckets);
 164     tty->print_cr("Other buckets           : %9d", _num_other_buckets);
 165   }
 166 }
 167 
 168 /////////////////////////////////////////////////////////////
 169 //
 170 // Customization for dumping Symbol and String tables
 171 
 172 void CompactSymbolTableWriter::add(unsigned int hash, Symbol *symbol) {
 173   address base_address = address(MetaspaceShared::shared_rs()->base());
 174 
 175   uintx deltax = address(symbol) - base_address;
 176   // The symbols are in RO space, which is smaler than MAX_SHARED_DELTA.
 177   // The assert below is just to be extra cautious.
 178   assert(deltax <= MAX_SHARED_DELTA, "the delta is too large to encode");
 179   u4 delta = u4(deltax);
 180 
 181   CompactHashtableWriter::add(hash, delta);
 182 }
 183 
 184 void CompactStringTableWriter::add(unsigned int hash, oop string) {
 185   CompactHashtableWriter::add(hash, oopDesc::encode_heap_oop(string));
 186 }
 187 
 188 void CompactSymbolTableWriter::dump(CompactHashtable<Symbol*, char> *cht) {
 189   CompactHashtableWriter::dump(cht, "symbol");
 190 }
 191 
 192 void CompactStringTableWriter::dump(CompactHashtable<oop, char> *cht) {
 193   CompactHashtableWriter::dump(cht, "string");
 194 }
 195 
 196 /////////////////////////////////////////////////////////////
 197 //
 198 // The CompactHashtable implementation
 199 //
 200 
 201 void SimpleCompactHashtable::serialize(SerializeClosure* soc) {
 202   soc->do_ptr((void**)&_base_address);
 203   soc->do_u4(&_entry_count);
 204   soc->do_u4(&_bucket_count);
 205   soc->do_ptr((void**)&_buckets);
 206   soc->do_ptr((void**)&_entries);
 207 }
 208 
 209 bool SimpleCompactHashtable::exists(u4 value) {
 210   assert(!DumpSharedSpaces, "run-time only");
 211 
 212   if (_entry_count == 0) {
 213     return false;
 214   }
 215 
 216   unsigned int hash = (unsigned int)value;
 217   int index = hash % _bucket_count;
 218   u4 bucket_info = _buckets[index];
 219   u4 bucket_offset = BUCKET_OFFSET(bucket_info);
 220   int bucket_type = BUCKET_TYPE(bucket_info);
 221   u4* entry = _entries + bucket_offset;
 222 
 223   if (bucket_type == VALUE_ONLY_BUCKET_TYPE) {
 224     return (entry[0] == value);
 225   } else {
 226     u4*entry_max = _entries + BUCKET_OFFSET(_buckets[index + 1]);
 227     while (entry <entry_max) {
 228       if (entry[1] == value) {
 229         return true;
 230       }
 231       entry += 2;
 232     }
 233     return false;
 234   }
 235 }
 236 
 237 template <class I>
 238 inline void SimpleCompactHashtable::iterate(const I& iterator) {
 239   assert(!DumpSharedSpaces, "run-time only");
 240   for (u4 i = 0; i < _bucket_count; i++) {
 241     u4 bucket_info = _buckets[i];
 242     u4 bucket_offset = BUCKET_OFFSET(bucket_info);
 243     int bucket_type = BUCKET_TYPE(bucket_info);
 244     u4* entry = _entries + bucket_offset;
 245 
 246     if (bucket_type == VALUE_ONLY_BUCKET_TYPE) {
 247       iterator.do_value(_base_address, entry[0]);
 248     } else {
 249       u4*entry_max = _entries + BUCKET_OFFSET(_buckets[i + 1]);
 250       while (entry < entry_max) {
 251         iterator.do_value(_base_address, entry[1]);
 252         entry += 2;
 253       }
 254     }
 255   }
 256 }
 257 
 258 template <class T, class N> void CompactHashtable<T, N>::serialize(SerializeClosure* soc) {
 259   SimpleCompactHashtable::serialize(soc);
 260   soc->do_u4(&_type);
 261 }
 262 
 263 class CompactHashtable_SymbolIterator {
 264   SymbolClosure* const _closure;
 265 public:
 266   CompactHashtable_SymbolIterator(SymbolClosure *cl) : _closure(cl) {}
 267   inline void do_value(address base_address, u4 offset) const {
 268     Symbol* sym = (Symbol*)((void*)(base_address + offset));
 269     _closure->do_symbol(&sym);
 270   }
 271 };
 272 
 273 template <class T, class N> void CompactHashtable<T, N>::symbols_do(SymbolClosure *cl) {
 274   CompactHashtable_SymbolIterator iterator(cl);
 275   iterate(iterator);
 276 }
 277 
 278 class CompactHashtable_OopIterator {
 279   OopClosure* const _closure;
 280 public:
 281   CompactHashtable_OopIterator(OopClosure *cl) : _closure(cl) {}
 282   inline void do_value(address base_address, u4 offset) const {
 283     narrowOop o = (narrowOop)offset;
 284     _closure->do_oop(&o);
 285   }
 286 };
 287 
 288 template <class T, class N> void CompactHashtable<T, N>::oops_do(OopClosure* cl) {
 289   assert(_type == _string_table || _bucket_count == 0, "sanity");
 290   CompactHashtable_OopIterator iterator(cl);
 291   iterate(iterator);
 292 }
 293 
 294 // Explicitly instantiate these types
 295 template class CompactHashtable<Symbol*, char>;
 296 template class CompactHashtable<oop, char>;
 297 
 298 #ifndef O_BINARY       // if defined (Win32) use binary files.
 299 #define O_BINARY 0     // otherwise do nothing.
 300 #endif
 301 
 302 ////////////////////////////////////////////////////////
 303 //
 304 // HashtableTextDump
 305 //
 306 HashtableTextDump::HashtableTextDump(const char* filename) : _fd(-1) {
 307   struct stat st;
 308   if (os::stat(filename, &st) != 0) {
 309     quit("Unable to get hashtable dump file size", filename);
 310   }
 311   _size = st.st_size;
 312   _fd = open(filename, O_RDONLY | O_BINARY, 0);
 313   if (_fd < 0) {
 314     quit("Unable to open hashtable dump file", filename);
 315   }
 316   _base = os::map_memory(_fd, filename, 0, NULL, _size, true, false);
 317   if (_base == NULL) {
 318     quit("Unable to map hashtable dump file", filename);
 319   }
 320   _p = _base;
 321   _end = _base + st.st_size;
 322   _filename = filename;
 323   _prefix_type = Unknown;
 324   _line_no = 1;
 325 }
 326 
 327 HashtableTextDump::~HashtableTextDump() {
 328   os::unmap_memory((char*)_base, _size);
 329   if (_fd >= 0) {
 330     close(_fd);
 331   }
 332 }
 333 
 334 void HashtableTextDump::quit(const char* err, const char* msg) {
 335   vm_exit_during_initialization(err, msg);
 336 }
 337 
 338 void HashtableTextDump::corrupted(const char *p, const char* msg) {
 339   char info[100];
 340   jio_snprintf(info, sizeof(info),
 341                "%s. Corrupted at line %d (file pos %d)",
 342                msg, _line_no, (int)(p - _base));
 343   quit(info, _filename);
 344 }
 345 
 346 bool HashtableTextDump::skip_newline() {
 347   if (_p[0] == '\r' && _p[1] == '\n') {
 348     _p += 2;
 349   } else if (_p[0] == '\n') {
 350     _p += 1;
 351   } else {
 352     corrupted(_p, "Unexpected character");
 353   }
 354   _line_no++;
 355   return true;
 356 }
 357 
 358 int HashtableTextDump::skip(char must_be_char) {
 359   corrupted_if(remain() < 1, "Truncated");
 360   corrupted_if(*_p++ != must_be_char, "Unexpected character");
 361   return 0;
 362 }
 363 
 364 void HashtableTextDump::skip_past(char c) {
 365   for (;;) {
 366     corrupted_if(remain() < 1, "Truncated");
 367     if (*_p++ == c) {
 368       return;
 369     }
 370   }
 371 }
 372 
 373 void HashtableTextDump::check_version(const char* ver) {
 374   int len = (int)strlen(ver);
 375   corrupted_if(remain() < len, "Truncated");
 376   if (strncmp(_p, ver, len) != 0) {
 377     quit("wrong version of hashtable dump file", _filename);
 378   }
 379   _p += len;
 380   skip_newline();
 381 }
 382 
 383 void HashtableTextDump::scan_prefix_type() {
 384   _p++;
 385   if (strncmp(_p, "SECTION: String", 15) == 0) {
 386     _p += 15;
 387     _prefix_type = StringPrefix;
 388   } else if (strncmp(_p, "SECTION: Symbol", 15) == 0) {
 389     _p += 15;
 390     _prefix_type = SymbolPrefix;
 391   } else {
 392     _prefix_type = Unknown;
 393   }
 394   skip_newline();
 395 }
 396 
 397 int HashtableTextDump::scan_prefix(int* utf8_length) {
 398   if (*_p == '@') {
 399     scan_prefix_type();
 400   }
 401 
 402   switch (_prefix_type) {
 403   case SymbolPrefix:
 404     *utf8_length = scan_symbol_prefix(); break;
 405   case StringPrefix:
 406     *utf8_length = scan_string_prefix(); break;
 407   default:
 408     tty->print_cr("Shared input data type: Unknown.");
 409     corrupted(_p, "Unknown data type");
 410   }
 411 
 412   return _prefix_type;
 413 }
 414 
 415 int HashtableTextDump::scan_string_prefix() {
 416   // Expect /[0-9]+: /
 417   int utf8_length = 0;
 418   get_num(':', &utf8_length);
 419   if (*_p != ' ') {
 420     corrupted(_p, "Wrong prefix format for string");
 421   }
 422   _p++;
 423   return utf8_length;
 424 }
 425 
 426 int HashtableTextDump::scan_symbol_prefix() {
 427   // Expect /[0-9]+ (-|)[0-9]+: /
 428   int utf8_length = 0;
 429   get_num(' ', &utf8_length);
 430   if (*_p == '-') {
 431     _p++;
 432   }
 433   int ref_num;
 434   get_num(':', &ref_num);
 435   if (*_p != ' ') {
 436     corrupted(_p, "Wrong prefix format for symbol");
 437   }
 438   _p++;
 439   return utf8_length;
 440 }
 441 
 442 jchar HashtableTextDump::unescape(const char* from, const char* end, int count) {
 443   jchar value = 0;
 444 
 445   corrupted_if(from + count > end, "Truncated");
 446 
 447   for (int i=0; i<count; i++) {
 448     char c = *from++;
 449     switch (c) {
 450     case '0': case '1': case '2': case '3': case '4':
 451     case '5': case '6': case '7': case '8': case '9':
 452       value = (value << 4) + c - '0';
 453       break;
 454     case 'a': case 'b': case 'c':
 455     case 'd': case 'e': case 'f':
 456       value = (value << 4) + 10 + c - 'a';
 457       break;
 458     case 'A': case 'B': case 'C':
 459     case 'D': case 'E': case 'F':
 460       value = (value << 4) + 10 + c - 'A';
 461       break;
 462     default:
 463       ShouldNotReachHere();
 464     }
 465   }
 466   return value;
 467 }
 468 
 469 void HashtableTextDump::get_utf8(char* utf8_buffer, int utf8_length) {
 470   // cache in local vars
 471   const char* from = _p;
 472   const char* end = _end;
 473   char* to = utf8_buffer;
 474   int n = utf8_length;
 475 
 476   for (; n > 0 && from < end; n--) {
 477     if (*from != '\\') {
 478       *to++ = *from++;
 479     } else {
 480       corrupted_if(from + 2 > end, "Truncated");
 481       char c = from[1];
 482       from += 2;
 483       switch (c) {
 484       case 'x':
 485         {
 486           jchar value = unescape(from, end, 2);
 487           from += 2;
 488           assert(value <= 0xff, "sanity");
 489           *to++ = (char)(value & 0xff);
 490         }
 491         break;
 492       case 't':  *to++ = '\t'; break;
 493       case 'n':  *to++ = '\n'; break;
 494       case 'r':  *to++ = '\r'; break;
 495       case '\\': *to++ = '\\'; break;
 496       default:
 497         corrupted(_p, "Unsupported character");
 498       }
 499     }
 500   }
 501   corrupted_if(n > 0, "Truncated"); // expected more chars but file has ended
 502   _p = from;
 503   skip_newline();
 504 }
 505 
 506 // NOTE: the content is NOT the same as
 507 // UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen).
 508 // We want to escape \r\n\t so that output [1] is more readable; [2] can be more easily
 509 // parsed by scripts; [3] quickly processed by HashtableTextDump::get_utf8()
 510 void HashtableTextDump::put_utf8(outputStream* st, const char* utf8_string, int utf8_length) {
 511   const char *c = utf8_string;
 512   const char *end = c + utf8_length;
 513   for (; c < end; c++) {
 514     switch (*c) {
 515     case '\t': st->print("\\t"); break;
 516     case '\r': st->print("\\r"); break;
 517     case '\n': st->print("\\n"); break;
 518     case '\\': st->print("\\\\"); break;
 519     default:
 520       if (isprint(*c)) {
 521         st->print("%c", *c);
 522       } else {
 523         st->print("\\x%02x", ((unsigned int)*c) & 0xff);
 524       }
 525     }
 526   }
 527 }