1 /*
   2  * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/javaClasses.hpp"
  27 #include "classfile/compactHashtable.inline.hpp"
  28 #include "memory/metaspaceShared.hpp"
  29 #include "prims/jvm.h"
  30 #include "utilities/numberSeq.hpp"
  31 #include <sys/stat.h>
  32 
  33 /////////////////////////////////////////////////////
  34 //
  35 // The compact hash table writer implementations
  36 //
  37 CompactHashtableWriter::CompactHashtableWriter(int table_type,
  38                                                int num_entries,
  39                                                CompactHashtableStats* stats) {
  40   assert(DumpSharedSpaces, "dump-time only");
  41   _type = table_type;
  42   _num_entries = num_entries;
  43   _num_buckets = number_of_buckets(_num_entries);
  44   _buckets = NEW_C_HEAP_ARRAY(Entry*, _num_buckets, mtSymbol);
  45   memset(_buckets, 0, sizeof(Entry*) * _num_buckets);
  46 
  47   /* bucket sizes table */
  48   _bucket_sizes = NEW_C_HEAP_ARRAY(juint, _num_buckets, mtSymbol);
  49   memset(_bucket_sizes, 0, sizeof(juint) * _num_buckets);
  50 
  51   stats->hashentry_count = _num_entries;
  52   // Compact buckets' entries will have only the 4-byte offset, but
  53   // we don't know how many there will be at this point. So use a
  54   // conservative estimate here. The size is adjusted later when we
  55   // write out the buckets.
  56   stats->hashentry_bytes = _num_entries * 8;
  57   stats->bucket_count    = _num_buckets;
  58   stats->bucket_bytes    = (_num_buckets + 1) * (sizeof(juint));
  59   _stats = stats;
  60 
  61   // See compactHashtable.hpp for table layout
  62   _required_bytes = sizeof(juint) * 2; // _base_address, written as 2 juints
  63   _required_bytes+= sizeof(juint) +    // num_entries
  64                     sizeof(juint) +    // num_buckets
  65                     stats->hashentry_bytes +
  66                     stats->bucket_bytes;
  67 }
  68 
  69 CompactHashtableWriter::~CompactHashtableWriter() {
  70   for (int index = 0; index < _num_buckets; index++) {
  71     Entry* next = NULL;
  72     for (Entry* tent = _buckets[index]; tent; tent = next) {
  73       next = tent->next();
  74       delete tent;
  75     }
  76   }
  77 
  78   FREE_C_HEAP_ARRAY(juint, _bucket_sizes);
  79   FREE_C_HEAP_ARRAY(Entry*, _buckets);
  80 }
  81 
  82 // Calculate the number of buckets in the temporary hash table
  83 int CompactHashtableWriter::number_of_buckets(int num_entries) {
  84   const int buksize = (int)SharedSymbolTableBucketSize;
  85   int num_buckets = (num_entries + buksize - 1) / buksize;
  86   num_buckets = (num_buckets + 1) & (~0x01);
  87 
  88   return num_buckets;
  89 }
  90 
  91 // Add a symbol entry to the temporary hash table
  92 void CompactHashtableWriter::add(unsigned int hash, Entry* entry) {
  93   int index = hash % _num_buckets;
  94   entry->set_next(_buckets[index]);
  95   _buckets[index] = entry;
  96   _bucket_sizes[index] ++;
  97 }
  98 
  99 // Write the compact table's bucket infos
 100 juint* CompactHashtableWriter::dump_table(juint* p, juint** first_bucket,
 101                                           NumberSeq* summary) {
 102   int index;
 103   juint* compact_table = p;
 104   // Compute the start of the buckets, include the compact_bucket_infos table
 105   // and the table end offset.
 106   juint offset = _num_buckets + 1;
 107   *first_bucket = compact_table + offset;
 108 
 109   for (index = 0; index < _num_buckets; index++) {
 110     int bucket_size = _bucket_sizes[index];
 111     if (bucket_size == 1) {
 112       // bucket with one entry is compacted and only has the symbol offset
 113       compact_table[index] = BUCKET_INFO(offset, COMPACT_BUCKET_TYPE);
 114       offset += bucket_size; // each entry contains symbol offset only
 115     } else {
 116       // regular bucket, each entry is a symbol (hash, offset) pair
 117       compact_table[index] = BUCKET_INFO(offset, REGULAR_BUCKET_TYPE);
 118       offset += bucket_size * 2; // each hash entry is 2 juints
 119     }
 120     if (offset & ~BUCKET_OFFSET_MASK) {
 121       vm_exit_during_initialization("CompactHashtableWriter::dump_table: Overflow! "
 122                                     "Too many symbols.");
 123     }
 124     summary->add(bucket_size);
 125   }
 126   // Mark the end of the table
 127   compact_table[_num_buckets] = BUCKET_INFO(offset, TABLEEND_BUCKET_TYPE);
 128 
 129   return compact_table;
 130 }
 131 
 132 // Write the compact table's entries
 133 juint* CompactHashtableWriter::dump_buckets(juint* compact_table, juint* p,
 134                                             NumberSeq* summary) {
 135   uintx base_address = 0;
 136   uintx max_delta = 0;
 137   int num_compact_buckets = 0;
 138   if (_type == CompactHashtable<Symbol*, char>::_symbol_table) {
 139     base_address = uintx(MetaspaceShared::shared_rs()->base());
 140     max_delta    = uintx(MetaspaceShared::shared_rs()->size());
 141     assert(max_delta <= MAX_SHARED_DELTA, "range check");
 142   } else {
 143     assert((_type == CompactHashtable<oop, char>::_string_table), "unknown table");
 144     assert(UseCompressedOops, "UseCompressedOops is required");
 145   }
 146 
 147   assert(p != NULL, "sanity");
 148   for (int index = 0; index < _num_buckets; index++) {
 149     juint count = 0;
 150     int bucket_size = _bucket_sizes[index];
 151     int bucket_type = BUCKET_TYPE(compact_table[index]);
 152 
 153     if (bucket_size == 1) {
 154       assert(bucket_type == COMPACT_BUCKET_TYPE, "Bad bucket type");
 155       num_compact_buckets ++;
 156     }
 157     for (Entry* tent = _buckets[index]; tent;
 158          tent = tent->next()) {
 159       if (bucket_type == REGULAR_BUCKET_TYPE) {
 160         *p++ = juint(tent->hash()); // write entry hash
 161       }
 162       if (_type == CompactHashtable<Symbol*, char>::_symbol_table) {
 163         uintx deltax = uintx(tent->value()) - base_address;
 164         assert(deltax < max_delta, "range check");
 165         juint delta = juint(deltax);
 166         *p++ = delta; // write entry offset
 167       } else {
 168         *p++ = oopDesc::encode_heap_oop(tent->string());
 169       }
 170       count ++;
 171     }
 172     assert(count == _bucket_sizes[index], "sanity");
 173   }
 174 
 175   // Adjust the hashentry_bytes in CompactHashtableStats. Each compact
 176   // bucket saves 4-byte.
 177   _stats->hashentry_bytes -= num_compact_buckets * 4;
 178 
 179   return p;
 180 }
 181 
 182 // Write the compact table
 183 void CompactHashtableWriter::dump(char** top, char* end) {
 184   NumberSeq summary;
 185   char* old_top = *top;
 186   juint* p = (juint*)(*top);
 187 
 188   uintx base_address = uintx(MetaspaceShared::shared_rs()->base());
 189 
 190   // Now write the following at the beginning of the table:
 191   //      base_address (uintx)
 192   //      num_entries  (juint)
 193   //      num_buckets  (juint)
 194   *p++ = high(base_address);
 195   *p++ = low (base_address); // base address
 196   *p++ = _num_entries;  // number of entries in the table
 197   *p++ = _num_buckets;  // number of buckets in the table
 198 
 199   juint* first_bucket = NULL;
 200   juint* compact_table = dump_table(p, &first_bucket, &summary);
 201   juint* bucket_end = dump_buckets(compact_table, first_bucket, &summary);
 202 
 203   assert(bucket_end <= (juint*)end, "cannot write past end");
 204   *top = (char*)bucket_end;
 205 
 206   if (PrintSharedSpaces) {
 207     double avg_cost = 0.0;
 208     if (_num_entries > 0) {
 209       avg_cost = double(_required_bytes)/double(_num_entries);
 210     }
 211     tty->print_cr("Shared %s table stats -------- base: " PTR_FORMAT,
 212                   table_name(), (intptr_t)base_address);
 213     tty->print_cr("Number of entries       : %9d", _num_entries);
 214     tty->print_cr("Total bytes used        : %9d", (int)((*top) - old_top));
 215     tty->print_cr("Average bytes per entry : %9.3f", avg_cost);
 216     tty->print_cr("Average bucket size     : %9.3f", summary.avg());
 217     tty->print_cr("Variance of bucket size : %9.3f", summary.variance());
 218     tty->print_cr("Std. dev. of bucket size: %9.3f", summary.sd());
 219     tty->print_cr("Maximum bucket size     : %9d", (int)summary.maximum());
 220   }
 221 }
 222 
 223 const char* CompactHashtableWriter::table_name() {
 224   switch (_type) {
 225   case CompactHashtable<Symbol*, char>::_symbol_table: return "symbol";
 226   case CompactHashtable<oop, char>::_string_table: return "string";
 227   default:
 228     ;
 229   }
 230   return "unknown";
 231 }
 232 
 233 /////////////////////////////////////////////////////////////
 234 //
 235 // The CompactHashtable implementation
 236 //
 237 template <class T, class N> const char* CompactHashtable<T, N>::init(
 238                            CompactHashtableType type, const char* buffer) {
 239   assert(!DumpSharedSpaces, "run-time only");
 240   _type = type;
 241   juint*p = (juint*)buffer;
 242   juint upper = *p++;
 243   juint lower = *p++;
 244   _base_address = uintx(jlong_from(upper, lower));
 245   _entry_count = *p++;
 246   _bucket_count = *p++;
 247   _buckets = p;
 248   _table_end_offset = BUCKET_OFFSET(p[_bucket_count]); // located at the end of the bucket_info table
 249 
 250   juint *end = _buckets + _table_end_offset;
 251   return (const char*)end;
 252 }
 253 
 254 template <class T, class N> void CompactHashtable<T, N>::symbols_do(SymbolClosure *cl) {
 255   assert(!DumpSharedSpaces, "run-time only");
 256   for (juint i = 0; i < _bucket_count; i ++) {
 257     juint bucket_info = _buckets[i];
 258     juint bucket_offset = BUCKET_OFFSET(bucket_info);
 259     int   bucket_type = BUCKET_TYPE(bucket_info);
 260     juint* bucket = _buckets + bucket_offset;
 261     juint* bucket_end = _buckets;
 262 
 263     Symbol* sym;
 264     if (bucket_type == COMPACT_BUCKET_TYPE) {
 265       sym = (Symbol*)((void*)(_base_address + bucket[0]));
 266       cl->do_symbol(&sym);
 267     } else {
 268       bucket_end += BUCKET_OFFSET(_buckets[i + 1]);
 269       while (bucket < bucket_end) {
 270         sym = (Symbol*)((void*)(_base_address + bucket[1]));
 271         cl->do_symbol(&sym);
 272         bucket += 2;
 273       }
 274     }
 275   }
 276 }
 277 
 278 template <class T, class N> void CompactHashtable<T, N>::oops_do(OopClosure* f) {
 279   assert(!DumpSharedSpaces, "run-time only");
 280   assert(_type == _string_table || _bucket_count == 0, "sanity");
 281   for (juint i = 0; i < _bucket_count; i ++) {
 282     juint bucket_info = _buckets[i];
 283     juint bucket_offset = BUCKET_OFFSET(bucket_info);
 284     int   bucket_type = BUCKET_TYPE(bucket_info);
 285     juint* bucket = _buckets + bucket_offset;
 286     juint* bucket_end = _buckets;
 287 
 288     narrowOop o;
 289     if (bucket_type == COMPACT_BUCKET_TYPE) {
 290       o = (narrowOop)bucket[0];
 291       f->do_oop(&o);
 292     } else {
 293       bucket_end += BUCKET_OFFSET(_buckets[i + 1]);
 294       while (bucket < bucket_end) {
 295         o = (narrowOop)bucket[1];
 296         f->do_oop(&o);
 297         bucket += 2;
 298       }
 299     }
 300   }
 301 }
 302 
 303 // Explicitly instantiate these types
 304 template class CompactHashtable<Symbol*, char>;
 305 template class CompactHashtable<oop, char>;
 306 
 307 #ifndef O_BINARY       // if defined (Win32) use binary files.
 308 #define O_BINARY 0     // otherwise do nothing.
 309 #endif
 310 
 311 ////////////////////////////////////////////////////////
 312 //
 313 // HashtableTextDump
 314 //
 315 HashtableTextDump::HashtableTextDump(const char* filename) : _fd(-1) {
 316   struct stat st;
 317   if (os::stat(filename, &st) != 0) {
 318     quit("Unable to get hashtable dump file size", filename);
 319   }
 320   _size = st.st_size;
 321   _fd = open(filename, O_RDONLY | O_BINARY, 0);
 322   if (_fd < 0) {
 323     quit("Unable to open hashtable dump file", filename);
 324   }
 325   _base = os::map_memory(_fd, filename, 0, NULL, _size, true, false);
 326   if (_base == NULL) {
 327     quit("Unable to map hashtable dump file", filename);
 328   }
 329   _p = _base;
 330   _end = _base + st.st_size;
 331   _filename = filename;
 332   _prefix_type = Unknown;
 333   _line_no = 1;
 334 }
 335 
 336 HashtableTextDump::~HashtableTextDump() {
 337   os::unmap_memory((char*)_base, _size);
 338   if (_fd >= 0) {
 339     close(_fd);
 340   }
 341 }
 342 
 343 void HashtableTextDump::quit(const char* err, const char* msg) {
 344   vm_exit_during_initialization(err, msg);
 345 }
 346 
 347 void HashtableTextDump::corrupted(const char *p, const char* msg) {
 348   char info[100];
 349   jio_snprintf(info, sizeof(info),
 350                "%s. Corrupted at line %d (file pos %d)",
 351                msg, _line_no, (int)(p - _base));
 352   quit(info, _filename);
 353 }
 354 
 355 bool HashtableTextDump::skip_newline() {
 356   if (_p[0] == '\r' && _p[1] == '\n') {
 357     _p += 2;
 358   } else if (_p[0] == '\n') {
 359     _p += 1;
 360   } else {
 361     corrupted(_p, "Unexpected character");
 362   }
 363   _line_no ++;
 364   return true;
 365 }
 366 
 367 int HashtableTextDump::skip(char must_be_char) {
 368   corrupted_if(remain() < 1);
 369   corrupted_if(*_p++ != must_be_char);
 370   return 0;
 371 }
 372 
 373 void HashtableTextDump::skip_past(char c) {
 374   for (;;) {
 375     corrupted_if(remain() < 1);
 376     if (*_p++ == c) {
 377       return;
 378     }
 379   }
 380 }
 381 
 382 void HashtableTextDump::check_version(const char* ver) {
 383   int len = (int)strlen(ver);
 384   corrupted_if(remain() < len);
 385   if (strncmp(_p, ver, len) != 0) {
 386     quit("wrong version of hashtable dump file", _filename);
 387   }
 388   _p += len;
 389   skip_newline();
 390 }
 391 
 392 void HashtableTextDump::scan_prefix_type() {
 393   _p ++;
 394   if (strncmp(_p, "SECTION: String", 15) == 0) {
 395     _p += 15;
 396     _prefix_type = StringPrefix;
 397   } else if (strncmp(_p, "SECTION: Symbol", 15) == 0) {
 398     _p += 15;
 399     _prefix_type = SymbolPrefix;
 400   } else {
 401     _prefix_type = Unknown;
 402   }
 403   skip_newline();
 404 }
 405 
 406 int HashtableTextDump::scan_prefix(int* utf8_length) {
 407   if (*_p == '@') {
 408     scan_prefix_type();
 409   }
 410 
 411   switch (_prefix_type) {
 412   case SymbolPrefix:
 413     *utf8_length = scan_symbol_prefix(); break;
 414   case StringPrefix:
 415     *utf8_length = scan_string_prefix(); break;
 416   default:
 417     tty->print_cr("Shared input data type: Unknown.");
 418     corrupted(_p, "Unknown data type");
 419   }
 420 
 421   return _prefix_type;
 422 }
 423 
 424 int HashtableTextDump::scan_string_prefix() {
 425   // Expect /[0-9]+: /
 426   int utf8_length = 0;
 427   get_num(':', &utf8_length);
 428   if (*_p != ' ') {
 429     corrupted(_p, "Wrong prefix format for string");
 430   }
 431   _p++;
 432   return utf8_length;
 433 }
 434 
 435 int HashtableTextDump::scan_symbol_prefix() {
 436   // Expect /[0-9]+ (-|)[0-9]+: /
 437   int utf8_length = 0;
 438   get_num(' ', &utf8_length);
 439   if (*_p == '-') {
 440     _p++;
 441   }
 442   int ref_num;
 443   get_num(':', &ref_num);
 444   if (*_p != ' ') {
 445     corrupted(_p, "Wrong prefix format for symbol");
 446   }
 447   _p++;
 448   return utf8_length;
 449 }
 450 
 451 jchar HashtableTextDump::unescape(const char* from, const char* end, int count) {
 452   jchar value = 0;
 453 
 454   corrupted_if(from + count > end);
 455 
 456   for (int i=0; i<count; i++) {
 457     char c = *from++;
 458     switch (c) {
 459     case '0': case '1': case '2': case '3': case '4':
 460     case '5': case '6': case '7': case '8': case '9':
 461       value = (value << 4) + c - '0';
 462       break;
 463     case 'a': case 'b': case 'c':
 464     case 'd': case 'e': case 'f':
 465       value = (value << 4) + 10 + c - 'a';
 466       break;
 467     case 'A': case 'B': case 'C':
 468     case 'D': case 'E': case 'F':
 469       value = (value << 4) + 10 + c - 'A';
 470       break;
 471     default:
 472       ShouldNotReachHere();
 473     }
 474   }
 475   return value;
 476 }
 477 
 478 void HashtableTextDump::get_utf8(char* utf8_buffer, int utf8_length) {
 479   // cache in local vars
 480   const char* from = _p;
 481   const char* end = _end;
 482   char* to = utf8_buffer;
 483   int n = utf8_length;
 484 
 485   for (; n > 0 && from < end; n--) {
 486     if (*from != '\\') {
 487       *to++ = *from++;
 488     } else {
 489       corrupted_if(from + 2 > end);
 490       char c = from[1];
 491       from += 2;
 492       switch (c) {
 493       case 'x':
 494         {
 495           jchar value = unescape(from, end, 2);
 496           from += 2;
 497           assert(value <= 0xff, "sanity");
 498           *to++ = (char)(value & 0xff);
 499         }
 500         break;
 501       case 't':  *to++ = '\t'; break;
 502       case 'n':  *to++ = '\n'; break;
 503       case 'r':  *to++ = '\r'; break;
 504       case '\\': *to++ = '\\'; break;
 505       default:
 506         corrupted(_p, "Unsupported character");
 507       }
 508     }
 509   }
 510   corrupted_if(n > 0); // expected more chars but file has ended
 511   _p = from;
 512   skip_newline();
 513 }
 514 
 515 // NOTE: the content is NOT the same as
 516 // UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen).
 517 // We want to escape \r\n\t so that output [1] is more readable; [2] can be more easily
 518 // parsed by scripts; [3] quickly processed by HashtableTextDump::get_utf8()
 519 void HashtableTextDump::put_utf8(outputStream* st, const char* utf8_string, int utf8_length) {
 520   const char *c = utf8_string;
 521   const char *end = c + utf8_length;
 522   for (; c < end; c++) {
 523     switch (*c) {
 524     case '\t': st->print("\\t"); break;
 525     case '\r': st->print("\\r"); break;
 526     case '\n': st->print("\\n"); break;
 527     case '\\': st->print("\\\\"); break;
 528     default:
 529       if (isprint(*c)) {
 530         st->print("%c", *c);
 531       } else {
 532         st->print("\\x%02x", ((unsigned int)*c) & 0xff);
 533       }
 534     }
 535   }
 536 }