1 /*
   2  * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/javaClasses.hpp"
  27 #include "memory/metaspaceShared.hpp"
  28 #include "prims/jvm.h"
  29 #include "utilities/numberSeq.hpp"
  30 #include <sys/stat.h>
  31 
  32 /////////////////////////////////////////////////////
  33 //
  34 // The compact hash table writer implementations
  35 //
  36 CompactHashtableWriter::CompactHashtableWriter(int table_type,
  37                                                int num_entries,
  38                                                CompactHashtableStats* stats) {
  39   assert(DumpSharedSpaces, "dump-time only");
  40   _type = table_type;
  41   _num_entries = num_entries;
  42   _num_buckets = number_of_buckets(_num_entries);
  43   _buckets = NEW_C_HEAP_ARRAY(Entry*, _num_buckets, mtSymbol);
  44   memset(_buckets, 0, sizeof(Entry*) * _num_buckets);
  45 
  46   /* bucket sizes table */
  47   _bucket_sizes = NEW_C_HEAP_ARRAY(juint, _num_buckets, mtSymbol);
  48   memset(_bucket_sizes, 0, sizeof(juint) * _num_buckets);
  49 
  50   stats->hashentry_count = _num_entries;
  51   // Compact buckets' entries will have only the 4-byte offset, but
  52   // we don't know how many there will be at this point. So use a
  53   // conservative estimate here. The size is adjusted later when we
  54   // write out the buckets.
  55   stats->hashentry_bytes = _num_entries * 8;
  56   stats->bucket_count    = _num_buckets;
  57   stats->bucket_bytes    = (_num_buckets + 1) * (sizeof(juint));
  58   _stats = stats;
  59 
  60   // See compactHashtable.hpp for table layout
  61   _required_bytes = sizeof(juint) * 2; // _base_address, written as 2 juints
  62   _required_bytes+= sizeof(juint) +    // num_entries
  63                     sizeof(juint) +    // num_buckets
  64                     stats->hashentry_bytes +
  65                     stats->bucket_bytes;
  66 }
  67 
  68 CompactHashtableWriter::~CompactHashtableWriter() {
  69   for (int index = 0; index < _num_buckets; index++) {
  70     Entry* next = NULL;
  71     for (Entry* tent = _buckets[index]; tent; tent = next) {
  72       next = tent->next();
  73       delete tent;
  74     }
  75   }
  76 
  77   FREE_C_HEAP_ARRAY(juint, _bucket_sizes);
  78   FREE_C_HEAP_ARRAY(Entry*, _buckets);
  79 }
  80 
  81 // Calculate the number of buckets in the temporary hash table
  82 int CompactHashtableWriter::number_of_buckets(int num_entries) {
  83   const int buksize = (int)SharedSymbolTableBucketSize;
  84   int num_buckets = (num_entries + buksize - 1) / buksize;
  85   num_buckets = (num_buckets + 1) & (~0x01);
  86 
  87   return num_buckets;
  88 }
  89 
  90 // Add a symbol entry to the temporary hash table
  91 void CompactHashtableWriter::add(unsigned int hash, Entry* entry) {
  92   int index = hash % _num_buckets;
  93   entry->set_next(_buckets[index]);
  94   _buckets[index] = entry;
  95   _bucket_sizes[index] ++;
  96 }
  97 
  98 // Write the compact table's bucket infos
  99 juint* CompactHashtableWriter::dump_table(juint* p, juint** first_bucket,
 100                                           NumberSeq* summary) {
 101   int index;
 102   juint* compact_table = p;
 103   // Compute the start of the buckets, include the compact_bucket_infos table
 104   // and the table end offset.
 105   juint offset = _num_buckets + 1;
 106   *first_bucket = compact_table + offset;
 107 
 108   for (index = 0; index < _num_buckets; index++) {
 109     int bucket_size = _bucket_sizes[index];
 110     if (bucket_size == 1) {
 111       // bucket with one entry is compacted and only has the symbol offset
 112       compact_table[index] = BUCKET_INFO(offset, COMPACT_BUCKET_TYPE);
 113       offset += bucket_size; // each entry contains symbol offset only
 114     } else {
 115       // regular bucket, each entry is a symbol (hash, offset) pair
 116       compact_table[index] = BUCKET_INFO(offset, REGULAR_BUCKET_TYPE);
 117       offset += bucket_size * 2; // each hash entry is 2 juints
 118     }
 119     if (offset & ~BUCKET_OFFSET_MASK) {
 120       vm_exit_during_initialization("CompactHashtableWriter::dump_table: Overflow! "
 121                                     "Too many symbols.");
 122     }
 123     summary->add(bucket_size);
 124   }
 125   // Mark the end of the table
 126   compact_table[_num_buckets] = BUCKET_INFO(offset, TABLEEND_BUCKET_TYPE);
 127 
 128   return compact_table;
 129 }
 130 
 131 // Write the compact table's entries
 132 juint* CompactHashtableWriter::dump_buckets(juint* compact_table, juint* p,
 133                                             NumberSeq* summary) {
 134   uintx base_address = 0;
 135   uintx max_delta = 0;
 136   int num_compact_buckets = 0;
 137   if (_type == CompactHashtable<Symbol*, char>::_symbol_table) {
 138     base_address = uintx(MetaspaceShared::shared_rs()->base());
 139     max_delta    = uintx(MetaspaceShared::shared_rs()->size());
 140     assert(max_delta <= 0x7fffffff, "range check");
 141   } else {
 142     assert((_type == CompactHashtable<oop, char>::_string_table), "unknown table");
 143     assert(UseCompressedOops, "UseCompressedOops is required");
 144   }
 145 
 146   assert(p != NULL, "sanity");
 147   for (int index = 0; index < _num_buckets; index++) {
 148     juint count = 0;
 149     int bucket_size = _bucket_sizes[index];
 150     int bucket_type = BUCKET_TYPE(compact_table[index]);
 151 
 152     if (bucket_size == 1) {
 153       assert(bucket_type == COMPACT_BUCKET_TYPE, "Bad bucket type");
 154       num_compact_buckets ++;
 155     }
 156     for (Entry* tent = _buckets[index]; tent;
 157          tent = tent->next()) {
 158       if (bucket_type == REGULAR_BUCKET_TYPE) {
 159         *p++ = juint(tent->hash()); // write entry hash
 160       }
 161       if (_type == CompactHashtable<Symbol*, char>::_symbol_table) {
 162         uintx deltax = uintx(tent->value()) - base_address;
 163         assert(deltax < max_delta, "range check");
 164         juint delta = juint(deltax);
 165         *p++ = delta; // write entry offset
 166       } else {
 167         *p++ = oopDesc::encode_heap_oop(tent->string());
 168       }
 169       count ++;
 170     }
 171     assert(count == _bucket_sizes[index], "sanity");
 172   }
 173 
 174   // Adjust the hashentry_bytes in CompactHashtableStats. Each compact
 175   // bucket saves 4-byte.
 176   _stats->hashentry_bytes -= num_compact_buckets * 4;
 177 
 178   return p;
 179 }
 180 
 181 // Write the compact table
 182 void CompactHashtableWriter::dump(char** top, char* end) {
 183   NumberSeq summary;
 184   char* old_top = *top;
 185   juint* p = (juint*)(*top);
 186 
 187   uintx base_address = uintx(MetaspaceShared::shared_rs()->base());
 188 
 189   // Now write the following at the beginning of the table:
 190   //      base_address (uintx)
 191   //      num_entries  (juint)
 192   //      num_buckets  (juint)
 193   *p++ = high(base_address);
 194   *p++ = low (base_address); // base address
 195   *p++ = _num_entries;  // number of entries in the table
 196   *p++ = _num_buckets;  // number of buckets in the table
 197 
 198   juint* first_bucket = NULL;
 199   juint* compact_table = dump_table(p, &first_bucket, &summary);
 200   juint* bucket_end = dump_buckets(compact_table, first_bucket, &summary);
 201 
 202   assert(bucket_end <= (juint*)end, "cannot write past end");
 203   *top = (char*)bucket_end;
 204 
 205   if (PrintSharedSpaces) {
 206     double avg_cost = 0.0;
 207     if (_num_entries > 0) {
 208       avg_cost = double(_required_bytes)/double(_num_entries);
 209     }
 210     tty->print_cr("Shared %s table stats -------- base: " PTR_FORMAT,
 211                   table_name(), (intptr_t)base_address);
 212     tty->print_cr("Number of entries       : %9d", _num_entries);
 213     tty->print_cr("Total bytes used        : %9d", (int)((*top) - old_top));
 214     tty->print_cr("Average bytes per entry : %9.3f", avg_cost);
 215     tty->print_cr("Average bucket size     : %9.3f", summary.avg());
 216     tty->print_cr("Variance of bucket size : %9.3f", summary.variance());
 217     tty->print_cr("Std. dev. of bucket size: %9.3f", summary.sd());
 218     tty->print_cr("Maximum bucket size     : %9d", (int)summary.maximum());
 219   }
 220 }
 221 
 222 const char* CompactHashtableWriter::table_name() {
 223   switch (_type) {
 224   case CompactHashtable<Symbol*, char>::_symbol_table: return "symbol";
 225   case CompactHashtable<oop, char>::_string_table: return "string";
 226   default:
 227     ;
 228   }
 229   return "unknown";
 230 }
 231 
 232 /////////////////////////////////////////////////////////////
 233 //
 234 // The CompactHashtable implementation
 235 //
 236 template <class T, class N> const char* CompactHashtable<T, N>::init(
 237                            CompactHashtableType type, const char* buffer) {
 238   assert(!DumpSharedSpaces, "run-time only");
 239   _type = type;
 240   juint*p = (juint*)buffer;
 241   juint upper = *p++;
 242   juint lower = *p++;
 243   _base_address = uintx(jlong_from(upper, lower));
 244   _entry_count = *p++;
 245   _bucket_count = *p++;
 246   _buckets = p;
 247   _table_end_offset = BUCKET_OFFSET(p[_bucket_count]); // located at the end of the bucket_info table
 248 
 249   juint *end = _buckets + _table_end_offset;
 250   return (const char*)end;
 251 }
 252 
 253 template <class T, class N> void CompactHashtable<T, N>::symbols_do(SymbolClosure *cl) {
 254   assert(!DumpSharedSpaces, "run-time only");
 255   for (juint i = 0; i < _bucket_count; i ++) {
 256     juint bucket_info = _buckets[i];
 257     juint bucket_offset = BUCKET_OFFSET(bucket_info);
 258     int   bucket_type = BUCKET_TYPE(bucket_info);
 259     juint* bucket = _buckets + bucket_offset;
 260     juint* bucket_end = _buckets;
 261 
 262     Symbol* sym;
 263     if (bucket_type == COMPACT_BUCKET_TYPE) {
 264       sym = (Symbol*)((void*)(_base_address + bucket[0]));
 265       cl->do_symbol(&sym);
 266     } else {
 267       bucket_end += BUCKET_OFFSET(_buckets[i + 1]);
 268       while (bucket < bucket_end) {
 269         sym = (Symbol*)((void*)(_base_address + bucket[1]));
 270         cl->do_symbol(&sym);
 271         bucket += 2;
 272       }
 273     }
 274   }
 275 }
 276 
 277 template <class T, class N> void CompactHashtable<T, N>::oops_do(OopClosure* f) {
 278   assert(!DumpSharedSpaces, "run-time only");
 279   assert(_type == _string_table || _bucket_count == 0, "sanity");
 280   for (juint i = 0; i < _bucket_count; i ++) {
 281     juint bucket_info = _buckets[i];
 282     juint bucket_offset = BUCKET_OFFSET(bucket_info);
 283     int   bucket_type = BUCKET_TYPE(bucket_info);
 284     juint* bucket = _buckets + bucket_offset;
 285     juint* bucket_end = _buckets;
 286 
 287     narrowOop o;
 288     if (bucket_type == COMPACT_BUCKET_TYPE) {
 289       o = (narrowOop)bucket[0];
 290       f->do_oop(&o);
 291     } else {
 292       bucket_end += BUCKET_OFFSET(_buckets[i + 1]);
 293       while (bucket < bucket_end) {
 294         o = (narrowOop)bucket[1];
 295         f->do_oop(&o);
 296         bucket += 2;
 297       }
 298     }
 299   }
 300 }
 301 
 302 // Explicitly instantiate these types
 303 template class CompactHashtable<Symbol*, char>;
 304 template class CompactHashtable<oop, char>;
 305 
 306 #ifndef O_BINARY       // if defined (Win32) use binary files.
 307 #define O_BINARY 0     // otherwise do nothing.
 308 #endif
 309 
 310 ////////////////////////////////////////////////////////
 311 //
 312 // HashtableTextDump
 313 //
 314 HashtableTextDump::HashtableTextDump(const char* filename) : _fd(-1) {
 315   struct stat st;
 316   if (os::stat(filename, &st) != 0) {
 317     quit("Unable to get hashtable dump file size", filename);
 318   }
 319   _size = st.st_size;
 320   _fd = open(filename, O_RDONLY | O_BINARY, 0);
 321   if (_fd < 0) {
 322     quit("Unable to open hashtable dump file", filename);
 323   }
 324   _base = os::map_memory(_fd, filename, 0, NULL, _size, true, false);
 325   if (_base == NULL) {
 326     quit("Unable to map hashtable dump file", filename);
 327   }
 328   _p = _base;
 329   _end = _base + st.st_size;
 330   _filename = filename;
 331   _prefix_type = Unknown;
 332   _line_no = 1;
 333 }
 334 
 335 HashtableTextDump::~HashtableTextDump() {
 336   os::unmap_memory((char*)_base, _size);
 337   if (_fd >= 0) {
 338     close(_fd);
 339   }
 340 }
 341 
 342 void HashtableTextDump::quit(const char* err, const char* msg) {
 343   vm_exit_during_initialization(err, msg);
 344 }
 345 
 346 void HashtableTextDump::corrupted(const char *p, const char* msg) {
 347   char info[100];
 348   jio_snprintf(info, sizeof(info),
 349                "%s. Corrupted at line %d (file pos %d)",
 350                msg, _line_no, (int)(p - _base));
 351   quit(info, _filename);
 352 }
 353 
 354 bool HashtableTextDump::skip_newline() {
 355   if (_p[0] == '\r' && _p[1] == '\n') {
 356     _p += 2;
 357   } else if (_p[0] == '\n') {
 358     _p += 1;
 359   } else {
 360     corrupted(_p, "Unexpected character");
 361   }
 362   _line_no ++;
 363   return true;
 364 }
 365 
 366 int HashtableTextDump::skip(char must_be_char) {
 367   corrupted_if(remain() < 1);
 368   corrupted_if(*_p++ != must_be_char);
 369   return 0;
 370 }
 371 
 372 void HashtableTextDump::skip_past(char c) {
 373   for (;;) {
 374     corrupted_if(remain() < 1);
 375     if (*_p++ == c) {
 376       return;
 377     }
 378   }
 379 }
 380 
 381 void HashtableTextDump::check_version(const char* ver) {
 382   int len = (int)strlen(ver);
 383   corrupted_if(remain() < len);
 384   if (strncmp(_p, ver, len) != 0) {
 385     quit("wrong version of hashtable dump file", _filename);
 386   }
 387   _p += len;
 388   skip_newline();
 389 }
 390 
 391 void HashtableTextDump::scan_prefix_type() {
 392   _p ++;
 393   if (strncmp(_p, "SECTION: String", 15) == 0) {
 394     _p += 15;
 395     _prefix_type = StringPrefix;
 396   } else if (strncmp(_p, "SECTION: Symbol", 15) == 0) {
 397     _p += 15;
 398     _prefix_type = SymbolPrefix;
 399   } else {
 400     _prefix_type = Unknown;
 401   }
 402   skip_newline();
 403 }
 404 
 405 int HashtableTextDump::scan_prefix(int* utf8_length) {
 406   if (*_p == '@') {
 407     scan_prefix_type();
 408   }
 409 
 410   switch (_prefix_type) {
 411   case SymbolPrefix:
 412     *utf8_length = scan_symbol_prefix(); break;
 413   case StringPrefix:
 414     *utf8_length = scan_string_prefix(); break;
 415   default:
 416     tty->print_cr("Shared input data type: Unknown.");
 417     corrupted(_p, "Unknown data type");
 418   }
 419 
 420   return _prefix_type;
 421 }
 422 
 423 int HashtableTextDump::scan_string_prefix() {
 424   // Expect /[0-9]+: /
 425   int utf8_length = 0;
 426   get_num(':', &utf8_length);
 427   if (*_p != ' ') {
 428     corrupted(_p, "Wrong prefix format for string");
 429   }
 430   _p++;
 431   return utf8_length;
 432 }
 433 
 434 int HashtableTextDump::scan_symbol_prefix() {
 435   // Expect /[0-9]+ (-|)[0-9]+: /
 436   int utf8_length = 0;
 437   get_num(' ', &utf8_length);
 438   if (*_p == '-') {
 439     _p++;
 440   }
 441   int ref_num;
 442   get_num(':', &ref_num);
 443   if (*_p != ' ') {
 444     corrupted(_p, "Wrong prefix format for symbol");
 445   }
 446   _p++;
 447   return utf8_length;
 448 }
 449 
 450 jchar HashtableTextDump::unescape(const char* from, const char* end, int count) {
 451   jchar value = 0;
 452 
 453   corrupted_if(from + count > end);
 454 
 455   for (int i=0; i<count; i++) {
 456     char c = *from++;
 457     switch (c) {
 458     case '0': case '1': case '2': case '3': case '4':
 459     case '5': case '6': case '7': case '8': case '9':
 460       value = (value << 4) + c - '0';
 461       break;
 462     case 'a': case 'b': case 'c':
 463     case 'd': case 'e': case 'f':
 464       value = (value << 4) + 10 + c - 'a';
 465       break;
 466     case 'A': case 'B': case 'C':
 467     case 'D': case 'E': case 'F':
 468       value = (value << 4) + 10 + c - 'A';
 469       break;
 470     default:
 471       ShouldNotReachHere();
 472     }
 473   }
 474   return value;
 475 }
 476 
 477 void HashtableTextDump::get_utf8(char* utf8_buffer, int utf8_length) {
 478   // cache in local vars
 479   const char* from = _p;
 480   const char* end = _end;
 481   char* to = utf8_buffer;
 482   int n = utf8_length;
 483 
 484   for (; n > 0 && from < end; n--) {
 485     if (*from != '\\') {
 486       *to++ = *from++;
 487     } else {
 488       corrupted_if(from + 2 > end);
 489       char c = from[1];
 490       from += 2;
 491       switch (c) {
 492       case 'x':
 493         {
 494           jchar value = unescape(from, end, 2);
 495           from += 2;
 496           assert(value <= 0xff, "sanity");
 497           *to++ = (char)(value & 0xff);
 498         }
 499         break;
 500       case 't':  *to++ = '\t'; break;
 501       case 'n':  *to++ = '\n'; break;
 502       case 'r':  *to++ = '\r'; break;
 503       case '\\': *to++ = '\\'; break;
 504       default:
 505         corrupted(_p, "Unsupported character");
 506       }
 507     }
 508   }
 509   corrupted_if(n > 0); // expected more chars but file has ended
 510   _p = from;
 511   skip_newline();
 512 }
 513 
 514 // NOTE: the content is NOT the same as
 515 // UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen).
 516 // We want to escape \r\n\t so that output [1] is more readable; [2] can be more easily
 517 // parsed by scripts; [3] quickly processed by HashtableTextDump::get_utf8()
 518 void HashtableTextDump::put_utf8(outputStream* st, const char* utf8_string, int utf8_length) {
 519   const char *c = utf8_string;
 520   const char *end = c + utf8_length;
 521   for (; c < end; c++) {
 522     switch (*c) {
 523     case '\t': st->print("\\t"); break;
 524     case '\r': st->print("\\r"); break;
 525     case '\n': st->print("\\n"); break;
 526     case '\\': st->print("\\\\"); break;
 527     default:
 528       if (isprint(*c)) {
 529         st->print("%c", *c);
 530       } else {
 531         st->print("\\x%02x", ((unsigned int)*c) & 0xff);
 532       }
 533     }
 534   }
 535 }