1 /*
   2  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "jvm.h"
  27 #include "classfile/compactHashtable.hpp"
  28 #include "classfile/javaClasses.hpp"
  29 #include "logging/logMessage.hpp"
  30 #include "memory/dynamicArchive.hpp"
  31 #include "memory/heapShared.inline.hpp"
  32 #include "memory/metadataFactory.hpp"
  33 #include "memory/metaspaceShared.hpp"
  34 #include "runtime/globals.hpp"
  35 #include "runtime/vmThread.hpp"
  36 #include "utilities/numberSeq.hpp"
  37 #include <sys/stat.h>
  38 
  39 #if INCLUDE_CDS
  40 /////////////////////////////////////////////////////
  41 //
  42 // The compact hash table writer implementations
  43 //
  44 CompactHashtableWriter::CompactHashtableWriter(int num_entries,
  45                                                CompactHashtableStats* stats) {
  46   Arguments::assert_is_dumping_archive();
  47   assert(num_entries >= 0, "sanity");
  48   _num_buckets = calculate_num_buckets(num_entries);
  49   assert(_num_buckets > 0, "no buckets");
  50 
  51   _num_entries_written = 0;
  52   _buckets = NEW_C_HEAP_ARRAY(GrowableArray<Entry>*, _num_buckets, mtSymbol);
  53   for (int i=0; i<_num_buckets; i++) {
  54     _buckets[i] = new (ResourceObj::C_HEAP, mtSymbol) GrowableArray<Entry>(0, true, mtSymbol);
  55   }
  56 
  57   _stats = stats;
  58   _compact_buckets = NULL;
  59   _compact_entries = NULL;
  60   _num_empty_buckets = 0;
  61   _num_value_only_buckets = 0;
  62   _num_other_buckets = 0;
  63 }
  64 
  65 CompactHashtableWriter::~CompactHashtableWriter() {
  66   for (int index = 0; index < _num_buckets; index++) {
  67     GrowableArray<Entry>* bucket = _buckets[index];
  68     delete bucket;
  69   }
  70 
  71   FREE_C_HEAP_ARRAY(GrowableArray<Entry>*, _buckets);
  72 }
  73 
  74 size_t CompactHashtableWriter::estimate_size(int num_entries) {
  75   int num_buckets = calculate_num_buckets(num_entries);
  76   size_t bucket_bytes = MetaspaceShared::ro_array_bytesize<u4>(num_buckets + 1);
  77 
  78   // In worst case, we have no VALUE_ONLY_BUCKET_TYPE, so each entry takes 2 slots
  79   int entries_space = 2 * num_entries;
  80   size_t entry_bytes = MetaspaceShared::ro_array_bytesize<u4>(entries_space);
  81 
  82   return bucket_bytes
  83        + entry_bytes
  84        + SimpleCompactHashtable::calculate_header_size();
  85 }
  86 
  87 // Add a symbol entry to the temporary hash table
  88 void CompactHashtableWriter::add(unsigned int hash, u4 value) {
  89   int index = hash % _num_buckets;
  90   _buckets[index]->append_if_missing(Entry(hash, value));
  91   _num_entries_written++;
  92 }
  93 
  94 void CompactHashtableWriter::allocate_table() {
  95   int entries_space = 0;
  96   for (int index = 0; index < _num_buckets; index++) {
  97     GrowableArray<Entry>* bucket = _buckets[index];
  98     int bucket_size = bucket->length();
  99     if (bucket_size == 1) {
 100       entries_space++;
 101     } else if (bucket_size > 1) {
 102       entries_space += 2 * bucket_size;
 103     }
 104   }
 105 
 106   if (entries_space & ~BUCKET_OFFSET_MASK) {
 107     vm_exit_during_initialization("CompactHashtableWriter::allocate_table: Overflow! "
 108                                   "Too many entries.");
 109   }
 110 
 111   _compact_buckets = MetaspaceShared::new_ro_array<u4>(_num_buckets + 1);
 112   _compact_entries = MetaspaceShared::new_ro_array<u4>(entries_space);
 113 
 114   _stats->bucket_count    = _num_buckets;
 115   _stats->bucket_bytes    = _compact_buckets->size() * BytesPerWord;
 116   _stats->hashentry_count = _num_entries_written;
 117   _stats->hashentry_bytes = _compact_entries->size() * BytesPerWord;
 118 }
 119 
 120 // Write the compact table's buckets
 121 void CompactHashtableWriter::dump_table(NumberSeq* summary) {
 122   u4 offset = 0;
 123   for (int index = 0; index < _num_buckets; index++) {
 124     GrowableArray<Entry>* bucket = _buckets[index];
 125     int bucket_size = bucket->length();
 126     if (bucket_size == 1) {
 127       // bucket with one entry is compacted and only has the symbol offset
 128       _compact_buckets->at_put(index, BUCKET_INFO(offset, VALUE_ONLY_BUCKET_TYPE));
 129 
 130       Entry ent = bucket->at(0);
 131       _compact_entries->at_put(offset++, ent.value());
 132       _num_value_only_buckets++;
 133     } else {
 134       // regular bucket, each entry is a symbol (hash, offset) pair
 135       _compact_buckets->at_put(index, BUCKET_INFO(offset, REGULAR_BUCKET_TYPE));
 136 
 137       for (int i=0; i<bucket_size; i++) {
 138         Entry ent = bucket->at(i);
 139         _compact_entries->at_put(offset++, u4(ent.hash())); // write entry hash
 140         _compact_entries->at_put(offset++, ent.value());
 141       }
 142       if (bucket_size == 0) {
 143         _num_empty_buckets++;
 144       } else {
 145         _num_other_buckets++;
 146       }
 147     }
 148     summary->add(bucket_size);
 149   }
 150 
 151   // Mark the end of the buckets
 152   _compact_buckets->at_put(_num_buckets, BUCKET_INFO(offset, TABLEEND_BUCKET_TYPE));
 153   assert(offset == (u4)_compact_entries->length(), "sanity");
 154 }
 155 
 156 
 157 // Write the compact table
 158 void CompactHashtableWriter::dump(SimpleCompactHashtable *cht, const char* table_name) {
 159   NumberSeq summary;
 160   allocate_table();
 161   dump_table(&summary);
 162 
 163   int table_bytes = _stats->bucket_bytes + _stats->hashentry_bytes;
 164   address base_address = address(SharedBaseAddress);
 165   cht->init(base_address,  _num_entries_written, _num_buckets,
 166             _compact_buckets->data(), _compact_entries->data());
 167 
 168   LogMessage(cds, hashtables) msg;
 169   if (msg.is_info()) {
 170     double avg_cost = 0.0;
 171     if (_num_entries_written > 0) {
 172       avg_cost = double(table_bytes)/double(_num_entries_written);
 173     }
 174     msg.info("Shared %s table stats -------- base: " PTR_FORMAT,
 175                          table_name, (intptr_t)base_address);
 176     msg.info("Number of entries       : %9d", _num_entries_written);
 177     msg.info("Total bytes used        : %9d", table_bytes);
 178     msg.info("Average bytes per entry : %9.3f", avg_cost);
 179     msg.info("Average bucket size     : %9.3f", summary.avg());
 180     msg.info("Variance of bucket size : %9.3f", summary.variance());
 181     msg.info("Std. dev. of bucket size: %9.3f", summary.sd());
 182     msg.info("Maximum bucket size     : %9d", (int)summary.maximum());
 183     msg.info("Empty buckets           : %9d", _num_empty_buckets);
 184     msg.info("Value_Only buckets      : %9d", _num_value_only_buckets);
 185     msg.info("Other buckets           : %9d", _num_other_buckets);
 186   }
 187 }
 188 
 189 /////////////////////////////////////////////////////////////
 190 //
 191 // The CompactHashtable implementation
 192 //
 193 
 194 void SimpleCompactHashtable::init(address base_address, u4 entry_count, u4 bucket_count, u4* buckets, u4* entries) {
 195   _bucket_count = bucket_count;
 196   _entry_count = entry_count;
 197   _base_address = base_address;
 198   if (DynamicDumpSharedSpaces) {
 199     _buckets = DynamicArchive::buffer_to_target(buckets);
 200     _entries = DynamicArchive::buffer_to_target(entries);
 201   } else {
 202     _buckets = buckets;
 203     _entries = entries;
 204   }
 205 }
 206 
 207 size_t SimpleCompactHashtable::calculate_header_size() {
 208   // We have 5 fields. Each takes up sizeof(intptr_t). See WriteClosure::do_u4
 209   size_t bytes = sizeof(intptr_t) * 5;
 210   return bytes;
 211 }
 212 
 213 void SimpleCompactHashtable::serialize_header(SerializeClosure* soc) {
 214   // NOTE: if you change this function, you MUST change the number 5 in
 215   // calculate_header_size() accordingly.
 216   soc->do_u4(&_entry_count);
 217   soc->do_u4(&_bucket_count);
 218   soc->do_ptr((void**)&_buckets);
 219   soc->do_ptr((void**)&_entries);
 220   if (soc->reading()) {
 221     _base_address = (address)SharedBaseAddress;
 222   }
 223 }
 224 #endif // INCLUDE_CDS
 225 
 226 #ifndef O_BINARY       // if defined (Win32) use binary files.
 227 #define O_BINARY 0     // otherwise do nothing.
 228 #endif
 229 
 230 ////////////////////////////////////////////////////////
 231 //
 232 // HashtableTextDump
 233 //
 234 HashtableTextDump::HashtableTextDump(const char* filename) : _fd(-1) {
 235   struct stat st;
 236   if (os::stat(filename, &st) != 0) {
 237     quit("Unable to get hashtable dump file size", filename);
 238   }
 239   _size = st.st_size;
 240   _fd = os::open(filename, O_RDONLY | O_BINARY, 0);
 241   if (_fd < 0) {
 242     quit("Unable to open hashtable dump file", filename);
 243   }
 244   _base = os::map_memory(_fd, filename, 0, NULL, _size, true, false);
 245   if (_base == NULL) {
 246     quit("Unable to map hashtable dump file", filename);
 247   }
 248   _p = _base;
 249   _end = _base + st.st_size;
 250   _filename = filename;
 251   _prefix_type = Unknown;
 252   _line_no = 1;
 253 }
 254 
 255 HashtableTextDump::~HashtableTextDump() {
 256   os::unmap_memory((char*)_base, _size);
 257   if (_fd >= 0) {
 258     close(_fd);
 259   }
 260 }
 261 
 262 void HashtableTextDump::quit(const char* err, const char* msg) {
 263   vm_exit_during_initialization(err, msg);
 264 }
 265 
 266 void HashtableTextDump::corrupted(const char *p, const char* msg) {
 267   char info[100];
 268   jio_snprintf(info, sizeof(info),
 269                "%s. Corrupted at line %d (file pos %d)",
 270                msg, _line_no, (int)(p - _base));
 271   quit(info, _filename);
 272 }
 273 
 274 bool HashtableTextDump::skip_newline() {
 275   if (_p[0] == '\r' && _p[1] == '\n') {
 276     _p += 2;
 277   } else if (_p[0] == '\n') {
 278     _p += 1;
 279   } else {
 280     corrupted(_p, "Unexpected character");
 281   }
 282   _line_no++;
 283   return true;
 284 }
 285 
 286 int HashtableTextDump::skip(char must_be_char) {
 287   corrupted_if(remain() < 1, "Truncated");
 288   corrupted_if(*_p++ != must_be_char, "Unexpected character");
 289   return 0;
 290 }
 291 
 292 void HashtableTextDump::skip_past(char c) {
 293   for (;;) {
 294     corrupted_if(remain() < 1, "Truncated");
 295     if (*_p++ == c) {
 296       return;
 297     }
 298   }
 299 }
 300 
 301 void HashtableTextDump::check_version(const char* ver) {
 302   int len = (int)strlen(ver);
 303   corrupted_if(remain() < len, "Truncated");
 304   if (strncmp(_p, ver, len) != 0) {
 305     quit("wrong version of hashtable dump file", _filename);
 306   }
 307   _p += len;
 308   skip_newline();
 309 }
 310 
 311 void HashtableTextDump::scan_prefix_type() {
 312   _p++;
 313   if (strncmp(_p, "SECTION: String", 15) == 0) {
 314     _p += 15;
 315     _prefix_type = StringPrefix;
 316   } else if (strncmp(_p, "SECTION: Symbol", 15) == 0) {
 317     _p += 15;
 318     _prefix_type = SymbolPrefix;
 319   } else {
 320     _prefix_type = Unknown;
 321   }
 322   skip_newline();
 323 }
 324 
 325 int HashtableTextDump::scan_prefix(int* utf8_length) {
 326   if (*_p == '@') {
 327     scan_prefix_type();
 328   }
 329 
 330   switch (_prefix_type) {
 331   case SymbolPrefix:
 332     *utf8_length = scan_symbol_prefix(); break;
 333   case StringPrefix:
 334     *utf8_length = scan_string_prefix(); break;
 335   default:
 336     tty->print_cr("Shared input data type: Unknown.");
 337     corrupted(_p, "Unknown data type");
 338   }
 339 
 340   return _prefix_type;
 341 }
 342 
 343 int HashtableTextDump::scan_string_prefix() {
 344   // Expect /[0-9]+: /
 345   int utf8_length = 0;
 346   get_num(':', &utf8_length);
 347   if (*_p != ' ') {
 348     corrupted(_p, "Wrong prefix format for string");
 349   }
 350   _p++;
 351   return utf8_length;
 352 }
 353 
 354 int HashtableTextDump::scan_symbol_prefix() {
 355   // Expect /[0-9]+ (-|)[0-9]+: /
 356   int utf8_length = 0;
 357   get_num(' ', &utf8_length);
 358   if (*_p == '-') {
 359     _p++;
 360   }
 361   int ref_num;
 362   get_num(':', &ref_num);
 363   if (*_p != ' ') {
 364     corrupted(_p, "Wrong prefix format for symbol");
 365   }
 366   _p++;
 367   return utf8_length;
 368 }
 369 
 370 jchar HashtableTextDump::unescape(const char* from, const char* end, int count) {
 371   jchar value = 0;
 372 
 373   corrupted_if(from + count > end, "Truncated");
 374 
 375   for (int i=0; i<count; i++) {
 376     char c = *from++;
 377     switch (c) {
 378     case '0': case '1': case '2': case '3': case '4':
 379     case '5': case '6': case '7': case '8': case '9':
 380       value = (value << 4) + c - '0';
 381       break;
 382     case 'a': case 'b': case 'c':
 383     case 'd': case 'e': case 'f':
 384       value = (value << 4) + 10 + c - 'a';
 385       break;
 386     case 'A': case 'B': case 'C':
 387     case 'D': case 'E': case 'F':
 388       value = (value << 4) + 10 + c - 'A';
 389       break;
 390     default:
 391       ShouldNotReachHere();
 392     }
 393   }
 394   return value;
 395 }
 396 
 397 void HashtableTextDump::get_utf8(char* utf8_buffer, int utf8_length) {
 398   // cache in local vars
 399   const char* from = _p;
 400   const char* end = _end;
 401   char* to = utf8_buffer;
 402   int n = utf8_length;
 403 
 404   for (; n > 0 && from < end; n--) {
 405     if (*from != '\\') {
 406       *to++ = *from++;
 407     } else {
 408       corrupted_if(from + 2 > end, "Truncated");
 409       char c = from[1];
 410       from += 2;
 411       switch (c) {
 412       case 'x':
 413         {
 414           jchar value = unescape(from, end, 2);
 415           from += 2;
 416           assert(value <= 0xff, "sanity");
 417           *to++ = (char)(value & 0xff);
 418         }
 419         break;
 420       case 't':  *to++ = '\t'; break;
 421       case 'n':  *to++ = '\n'; break;
 422       case 'r':  *to++ = '\r'; break;
 423       case '\\': *to++ = '\\'; break;
 424       default:
 425         corrupted(_p, "Unsupported character");
 426       }
 427     }
 428   }
 429   corrupted_if(n > 0, "Truncated"); // expected more chars but file has ended
 430   _p = from;
 431   skip_newline();
 432 }
 433 
 434 // NOTE: the content is NOT the same as
 435 // UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen).
 436 // We want to escape \r\n\t so that output [1] is more readable; [2] can be more easily
 437 // parsed by scripts; [3] quickly processed by HashtableTextDump::get_utf8()
 438 void HashtableTextDump::put_utf8(outputStream* st, const char* utf8_string, int utf8_length) {
 439   const char *c = utf8_string;
 440   const char *end = c + utf8_length;
 441   for (; c < end; c++) {
 442     switch (*c) {
 443     case '\t': st->print("\\t"); break;
 444     case '\r': st->print("\\r"); break;
 445     case '\n': st->print("\\n"); break;
 446     case '\\': st->print("\\\\"); break;
 447     default:
 448       if (isprint(*c)) {
 449         st->print("%c", *c);
 450       } else {
 451         st->print("\\x%02x", ((unsigned int)*c) & 0xff);
 452       }
 453     }
 454   }
 455 }