1 /*
   2  * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "jvm.h"
  27 #include "classfile/compactHashtable.inline.hpp"
  28 #include "classfile/javaClasses.hpp"
  29 #include "logging/logMessage.hpp"
  30 #include "memory/heapShared.inline.hpp"
  31 #include "memory/metadataFactory.hpp"
  32 #include "memory/metaspaceShared.hpp"
  33 #include "oops/compressedOops.inline.hpp"
  34 #include "runtime/vmThread.hpp"
  35 #include "utilities/numberSeq.hpp"
  36 #include <sys/stat.h>
  37 
  38 /////////////////////////////////////////////////////
  39 //
  40 // The compact hash table writer implementations
  41 //
  42 CompactHashtableWriter::CompactHashtableWriter(int num_buckets,
  43                                                CompactHashtableStats* stats) {
  44   assert(DumpSharedSpaces, "dump-time only");
  45   assert(num_buckets > 0, "no buckets");
  46   _num_buckets = num_buckets;
  47   _num_entries = 0;
  48   _buckets = NEW_C_HEAP_ARRAY(GrowableArray<Entry>*, _num_buckets, mtSymbol);
  49   for (int i=0; i<_num_buckets; i++) {
  50     _buckets[i] = new (ResourceObj::C_HEAP, mtSymbol) GrowableArray<Entry>(0, true, mtSymbol);
  51   }
  52 
  53   _stats = stats;
  54   _compact_buckets = NULL;
  55   _compact_entries = NULL;
  56   _num_empty_buckets = 0;
  57   _num_value_only_buckets = 0;
  58   _num_other_buckets = 0;
  59 }
  60 
  61 CompactHashtableWriter::~CompactHashtableWriter() {
  62   for (int index = 0; index < _num_buckets; index++) {
  63     GrowableArray<Entry>* bucket = _buckets[index];
  64     delete bucket;
  65   }
  66 
  67   FREE_C_HEAP_ARRAY(GrowableArray<Entry>*, _buckets);
  68 }
  69 
  70 // Add a symbol entry to the temporary hash table
  71 void CompactHashtableWriter::add(unsigned int hash, u4 value) {
  72   int index = hash % _num_buckets;
  73   _buckets[index]->append_if_missing(Entry(hash, value));
  74   _num_entries++;
  75 }
  76 
  77 void CompactHashtableWriter::allocate_table() {
  78   int entries_space = 0;
  79   for (int index = 0; index < _num_buckets; index++) {
  80     GrowableArray<Entry>* bucket = _buckets[index];
  81     int bucket_size = bucket->length();
  82     if (bucket_size == 1) {
  83       entries_space++;
  84     } else {
  85       entries_space += 2 * bucket_size;
  86     }
  87   }
  88 
  89   if (entries_space & ~BUCKET_OFFSET_MASK) {
  90     vm_exit_during_initialization("CompactHashtableWriter::allocate_table: Overflow! "
  91                                   "Too many entries.");
  92   }
  93 
  94   _compact_buckets = MetaspaceShared::new_ro_array<u4>(_num_buckets + 1);
  95   _compact_entries = MetaspaceShared::new_ro_array<u4>(entries_space);
  96 
  97   _stats->bucket_count    = _num_buckets;
  98   _stats->bucket_bytes    = _compact_buckets->size() * BytesPerWord;
  99   _stats->hashentry_count = _num_entries;
 100   _stats->hashentry_bytes = _compact_entries->size() * BytesPerWord;
 101 }
 102 
 103 // Write the compact table's buckets
 104 void CompactHashtableWriter::dump_table(NumberSeq* summary) {
 105   u4 offset = 0;
 106   for (int index = 0; index < _num_buckets; index++) {
 107     GrowableArray<Entry>* bucket = _buckets[index];
 108     int bucket_size = bucket->length();
 109     if (bucket_size == 1) {
 110       // bucket with one entry is compacted and only has the symbol offset
 111       _compact_buckets->at_put(index, BUCKET_INFO(offset, VALUE_ONLY_BUCKET_TYPE));
 112 
 113       Entry ent = bucket->at(0);
 114       _compact_entries->at_put(offset++, ent.value());
 115       _num_value_only_buckets++;
 116     } else {
 117       // regular bucket, each entry is a symbol (hash, offset) pair
 118       _compact_buckets->at_put(index, BUCKET_INFO(offset, REGULAR_BUCKET_TYPE));
 119 
 120       for (int i=0; i<bucket_size; i++) {
 121         Entry ent = bucket->at(i);
 122         _compact_entries->at_put(offset++, u4(ent.hash())); // write entry hash
 123         _compact_entries->at_put(offset++, ent.value());
 124       }
 125       if (bucket_size == 0) {
 126         _num_empty_buckets++;
 127       } else {
 128         _num_other_buckets++;
 129       }
 130     }
 131     summary->add(bucket_size);
 132   }
 133 
 134   // Mark the end of the buckets
 135   _compact_buckets->at_put(_num_buckets, BUCKET_INFO(offset, TABLEEND_BUCKET_TYPE));
 136   assert(offset == (u4)_compact_entries->length(), "sanity");
 137 }
 138 
 139 
 140 // Write the compact table
 141 void CompactHashtableWriter::dump(SimpleCompactHashtable *cht, const char* table_name) {
 142   NumberSeq summary;
 143   allocate_table();
 144   dump_table(&summary);
 145 
 146   int table_bytes = _stats->bucket_bytes + _stats->hashentry_bytes;
 147   address base_address = address(MetaspaceShared::shared_rs()->base());
 148   cht->init(base_address,  _num_entries, _num_buckets,
 149             _compact_buckets->data(), _compact_entries->data());
 150 
 151   LogMessage(cds, hashtables) msg;
 152   if (msg.is_info()) {
 153     double avg_cost = 0.0;
 154     if (_num_entries > 0) {
 155       avg_cost = double(table_bytes)/double(_num_entries);
 156     }
 157     msg.info("Shared %s table stats -------- base: " PTR_FORMAT,
 158                          table_name, (intptr_t)base_address);
 159     msg.info("Number of entries       : %9d", _num_entries);
 160     msg.info("Total bytes used        : %9d", table_bytes);
 161     msg.info("Average bytes per entry : %9.3f", avg_cost);
 162     msg.info("Average bucket size     : %9.3f", summary.avg());
 163     msg.info("Variance of bucket size : %9.3f", summary.variance());
 164     msg.info("Std. dev. of bucket size: %9.3f", summary.sd());
 165     msg.info("Empty buckets           : %9d", _num_empty_buckets);
 166     msg.info("Value_Only buckets      : %9d", _num_value_only_buckets);
 167     msg.info("Other buckets           : %9d", _num_other_buckets);
 168   }
 169 }
 170 
 171 /////////////////////////////////////////////////////////////
 172 //
 173 // Customization for dumping Symbol and String tables
 174 
 175 void CompactSymbolTableWriter::add(unsigned int hash, Symbol *symbol) {
 176   uintx deltax = MetaspaceShared::object_delta(symbol);
 177   // When the symbols are stored into the archive, we already check that
 178   // they won't be more than MAX_SHARED_DELTA from the base address, or
 179   // else the dumping would have been aborted.
 180   assert(deltax <= MAX_SHARED_DELTA, "must not be");
 181   u4 delta = u4(deltax);
 182 
 183   CompactHashtableWriter::add(hash, delta);
 184 }
 185 
 186 void CompactStringTableWriter::add(unsigned int hash, oop string) {
 187   CompactHashtableWriter::add(hash, CompressedOops::encode(string));
 188 }
 189 
 190 void CompactSymbolTableWriter::dump(CompactHashtable<Symbol*, char> *cht) {
 191   CompactHashtableWriter::dump(cht, "symbol");
 192 }
 193 
 194 void CompactStringTableWriter::dump(CompactHashtable<oop, char> *cht) {
 195   CompactHashtableWriter::dump(cht, "string");
 196 }
 197 
 198 /////////////////////////////////////////////////////////////
 199 //
 200 // The CompactHashtable implementation
 201 //
 202 
 203 void SimpleCompactHashtable::serialize(SerializeClosure* soc) {
 204   soc->do_ptr((void**)&_base_address);
 205   soc->do_u4(&_entry_count);
 206   soc->do_u4(&_bucket_count);
 207   soc->do_ptr((void**)&_buckets);
 208   soc->do_ptr((void**)&_entries);
 209 }
 210 
 211 bool SimpleCompactHashtable::exists(u4 value) {
 212   assert(!DumpSharedSpaces, "run-time only");
 213 
 214   if (_entry_count == 0) {
 215     return false;
 216   }
 217 
 218   unsigned int hash = (unsigned int)value;
 219   int index = hash % _bucket_count;
 220   u4 bucket_info = _buckets[index];
 221   u4 bucket_offset = BUCKET_OFFSET(bucket_info);
 222   int bucket_type = BUCKET_TYPE(bucket_info);
 223   u4* entry = _entries + bucket_offset;
 224 
 225   if (bucket_type == VALUE_ONLY_BUCKET_TYPE) {
 226     return (entry[0] == value);
 227   } else {
 228     u4*entry_max = _entries + BUCKET_OFFSET(_buckets[index + 1]);
 229     while (entry <entry_max) {
 230       if (entry[1] == value) {
 231         return true;
 232       }
 233       entry += 2;
 234     }
 235     return false;
 236   }
 237 }
 238 
 239 template <class I>
 240 inline void SimpleCompactHashtable::iterate(const I& iterator) {
 241   for (u4 i = 0; i < _bucket_count; i++) {
 242     u4 bucket_info = _buckets[i];
 243     u4 bucket_offset = BUCKET_OFFSET(bucket_info);
 244     int bucket_type = BUCKET_TYPE(bucket_info);
 245     u4* entry = _entries + bucket_offset;
 246 
 247     if (bucket_type == VALUE_ONLY_BUCKET_TYPE) {
 248       iterator.do_value(_base_address, entry[0]);
 249     } else {
 250       u4*entry_max = _entries + BUCKET_OFFSET(_buckets[i + 1]);
 251       while (entry < entry_max) {
 252         iterator.do_value(_base_address, entry[1]);
 253         entry += 2;
 254       }
 255     }
 256   }
 257 }
 258 
 259 template <class T, class N> void CompactHashtable<T, N>::serialize(SerializeClosure* soc) {
 260   SimpleCompactHashtable::serialize(soc);
 261   soc->do_u4(&_type);
 262 }
 263 
 264 class CompactHashtable_SymbolIterator {
 265   SymbolClosure* const _closure;
 266 public:
 267   CompactHashtable_SymbolIterator(SymbolClosure *cl) : _closure(cl) {}
 268   inline void do_value(address base_address, u4 offset) const {
 269     Symbol* sym = (Symbol*)((void*)(base_address + offset));
 270     _closure->do_symbol(&sym);
 271   }
 272 };
 273 
 274 template <class T, class N> void CompactHashtable<T, N>::symbols_do(SymbolClosure *cl) {
 275   CompactHashtable_SymbolIterator iterator(cl);
 276   iterate(iterator);
 277 }
 278 
 279 class CompactHashtable_OopIterator {
 280   OopClosure* const _closure;
 281 public:
 282   CompactHashtable_OopIterator(OopClosure *cl) : _closure(cl) {}
 283   inline void do_value(address base_address, u4 offset) const {
 284     narrowOop v = (narrowOop)offset;
 285     oop obj = HeapShared::decode_with_archived_oop_encoding_mode(v);
 286     _closure->do_oop(&obj);
 287   }
 288 };
 289 
 290 template <class T, class N> void CompactHashtable<T, N>::oops_do(OopClosure* cl) {
 291   assert(_type == _string_table || _bucket_count == 0, "sanity");
 292   CompactHashtable_OopIterator iterator(cl);
 293   iterate(iterator);
 294 }
 295 
 296 // Explicitly instantiate these types
 297 template class CompactHashtable<Symbol*, char>;
 298 template class CompactHashtable<oop, char>;
 299 
 300 #ifndef O_BINARY       // if defined (Win32) use binary files.
 301 #define O_BINARY 0     // otherwise do nothing.
 302 #endif
 303 
 304 ////////////////////////////////////////////////////////
 305 //
 306 // HashtableTextDump
 307 //
 308 HashtableTextDump::HashtableTextDump(const char* filename) : _fd(-1) {
 309   struct stat st;
 310   if (os::stat(filename, &st) != 0) {
 311     quit("Unable to get hashtable dump file size", filename);
 312   }
 313   _size = st.st_size;
 314   _fd = open(filename, O_RDONLY | O_BINARY, 0);
 315   if (_fd < 0) {
 316     quit("Unable to open hashtable dump file", filename);
 317   }
 318   _base = os::map_memory(_fd, filename, 0, NULL, _size, true, false);
 319   if (_base == NULL) {
 320     quit("Unable to map hashtable dump file", filename);
 321   }
 322   _p = _base;
 323   _end = _base + st.st_size;
 324   _filename = filename;
 325   _prefix_type = Unknown;
 326   _line_no = 1;
 327 }
 328 
 329 HashtableTextDump::~HashtableTextDump() {
 330   os::unmap_memory((char*)_base, _size);
 331   if (_fd >= 0) {
 332     close(_fd);
 333   }
 334 }
 335 
 336 void HashtableTextDump::quit(const char* err, const char* msg) {
 337   vm_exit_during_initialization(err, msg);
 338 }
 339 
 340 void HashtableTextDump::corrupted(const char *p, const char* msg) {
 341   char info[100];
 342   jio_snprintf(info, sizeof(info),
 343                "%s. Corrupted at line %d (file pos %d)",
 344                msg, _line_no, (int)(p - _base));
 345   quit(info, _filename);
 346 }
 347 
 348 bool HashtableTextDump::skip_newline() {
 349   if (_p[0] == '\r' && _p[1] == '\n') {
 350     _p += 2;
 351   } else if (_p[0] == '\n') {
 352     _p += 1;
 353   } else {
 354     corrupted(_p, "Unexpected character");
 355   }
 356   _line_no++;
 357   return true;
 358 }
 359 
 360 int HashtableTextDump::skip(char must_be_char) {
 361   corrupted_if(remain() < 1, "Truncated");
 362   corrupted_if(*_p++ != must_be_char, "Unexpected character");
 363   return 0;
 364 }
 365 
 366 void HashtableTextDump::skip_past(char c) {
 367   for (;;) {
 368     corrupted_if(remain() < 1, "Truncated");
 369     if (*_p++ == c) {
 370       return;
 371     }
 372   }
 373 }
 374 
 375 void HashtableTextDump::check_version(const char* ver) {
 376   int len = (int)strlen(ver);
 377   corrupted_if(remain() < len, "Truncated");
 378   if (strncmp(_p, ver, len) != 0) {
 379     quit("wrong version of hashtable dump file", _filename);
 380   }
 381   _p += len;
 382   skip_newline();
 383 }
 384 
 385 void HashtableTextDump::scan_prefix_type() {
 386   _p++;
 387   if (strncmp(_p, "SECTION: String", 15) == 0) {
 388     _p += 15;
 389     _prefix_type = StringPrefix;
 390   } else if (strncmp(_p, "SECTION: Symbol", 15) == 0) {
 391     _p += 15;
 392     _prefix_type = SymbolPrefix;
 393   } else {
 394     _prefix_type = Unknown;
 395   }
 396   skip_newline();
 397 }
 398 
 399 int HashtableTextDump::scan_prefix(int* utf8_length) {
 400   if (*_p == '@') {
 401     scan_prefix_type();
 402   }
 403 
 404   switch (_prefix_type) {
 405   case SymbolPrefix:
 406     *utf8_length = scan_symbol_prefix(); break;
 407   case StringPrefix:
 408     *utf8_length = scan_string_prefix(); break;
 409   default:
 410     tty->print_cr("Shared input data type: Unknown.");
 411     corrupted(_p, "Unknown data type");
 412   }
 413 
 414   return _prefix_type;
 415 }
 416 
 417 int HashtableTextDump::scan_string_prefix() {
 418   // Expect /[0-9]+: /
 419   int utf8_length = 0;
 420   get_num(':', &utf8_length);
 421   if (*_p != ' ') {
 422     corrupted(_p, "Wrong prefix format for string");
 423   }
 424   _p++;
 425   return utf8_length;
 426 }
 427 
 428 int HashtableTextDump::scan_symbol_prefix() {
 429   // Expect /[0-9]+ (-|)[0-9]+: /
 430   int utf8_length = 0;
 431   get_num(' ', &utf8_length);
 432   if (*_p == '-') {
 433     _p++;
 434   }
 435   int ref_num;
 436   get_num(':', &ref_num);
 437   if (*_p != ' ') {
 438     corrupted(_p, "Wrong prefix format for symbol");
 439   }
 440   _p++;
 441   return utf8_length;
 442 }
 443 
 444 jchar HashtableTextDump::unescape(const char* from, const char* end, int count) {
 445   jchar value = 0;
 446 
 447   corrupted_if(from + count > end, "Truncated");
 448 
 449   for (int i=0; i<count; i++) {
 450     char c = *from++;
 451     switch (c) {
 452     case '0': case '1': case '2': case '3': case '4':
 453     case '5': case '6': case '7': case '8': case '9':
 454       value = (value << 4) + c - '0';
 455       break;
 456     case 'a': case 'b': case 'c':
 457     case 'd': case 'e': case 'f':
 458       value = (value << 4) + 10 + c - 'a';
 459       break;
 460     case 'A': case 'B': case 'C':
 461     case 'D': case 'E': case 'F':
 462       value = (value << 4) + 10 + c - 'A';
 463       break;
 464     default:
 465       ShouldNotReachHere();
 466     }
 467   }
 468   return value;
 469 }
 470 
 471 void HashtableTextDump::get_utf8(char* utf8_buffer, int utf8_length) {
 472   // cache in local vars
 473   const char* from = _p;
 474   const char* end = _end;
 475   char* to = utf8_buffer;
 476   int n = utf8_length;
 477 
 478   for (; n > 0 && from < end; n--) {
 479     if (*from != '\\') {
 480       *to++ = *from++;
 481     } else {
 482       corrupted_if(from + 2 > end, "Truncated");
 483       char c = from[1];
 484       from += 2;
 485       switch (c) {
 486       case 'x':
 487         {
 488           jchar value = unescape(from, end, 2);
 489           from += 2;
 490           assert(value <= 0xff, "sanity");
 491           *to++ = (char)(value & 0xff);
 492         }
 493         break;
 494       case 't':  *to++ = '\t'; break;
 495       case 'n':  *to++ = '\n'; break;
 496       case 'r':  *to++ = '\r'; break;
 497       case '\\': *to++ = '\\'; break;
 498       default:
 499         corrupted(_p, "Unsupported character");
 500       }
 501     }
 502   }
 503   corrupted_if(n > 0, "Truncated"); // expected more chars but file has ended
 504   _p = from;
 505   skip_newline();
 506 }
 507 
 508 // NOTE: the content is NOT the same as
 509 // UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen).
 510 // We want to escape \r\n\t so that output [1] is more readable; [2] can be more easily
 511 // parsed by scripts; [3] quickly processed by HashtableTextDump::get_utf8()
 512 void HashtableTextDump::put_utf8(outputStream* st, const char* utf8_string, int utf8_length) {
 513   const char *c = utf8_string;
 514   const char *end = c + utf8_length;
 515   for (; c < end; c++) {
 516     switch (*c) {
 517     case '\t': st->print("\\t"); break;
 518     case '\r': st->print("\\r"); break;
 519     case '\n': st->print("\\n"); break;
 520     case '\\': st->print("\\\\"); break;
 521     default:
 522       if (isprint(*c)) {
 523         st->print("%c", *c);
 524       } else {
 525         st->print("\\x%02x", ((unsigned int)*c) & 0xff);
 526       }
 527     }
 528   }
 529 }