1 /*
   2  * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "jvm.h"
  27 #include "classfile/compactHashtable.hpp"
  28 #include "classfile/javaClasses.hpp"
  29 #include "logging/logMessage.hpp"
  30 #include "memory/heapShared.inline.hpp"
  31 #include "memory/metadataFactory.hpp"
  32 #include "memory/metaspaceShared.hpp"
  33 #include "oops/compressedOops.inline.hpp"
  34 #include "runtime/vmThread.hpp"
  35 #include "utilities/numberSeq.hpp"
  36 #include <sys/stat.h>
  37 
  38 #if INCLUDE_CDS
  39 /////////////////////////////////////////////////////
  40 //
  41 // The compact hash table writer implementations
  42 //
  43 CompactHashtableWriter::CompactHashtableWriter(int num_buckets,
  44                                                CompactHashtableStats* stats) {
  45   assert(DumpSharedSpaces, "dump-time only");
  46   assert(num_buckets > 0, "no buckets");
  47   _num_buckets = num_buckets;
  48   _num_entries = 0;
  49   _buckets = NEW_C_HEAP_ARRAY(GrowableArray<Entry>*, _num_buckets, mtSymbol);
  50   for (int i=0; i<_num_buckets; i++) {
  51     _buckets[i] = new (ResourceObj::C_HEAP, mtSymbol) GrowableArray<Entry>(0, true, mtSymbol);
  52   }
  53 
  54   _stats = stats;
  55   _compact_buckets = NULL;
  56   _compact_entries = NULL;
  57   _num_empty_buckets = 0;
  58   _num_value_only_buckets = 0;
  59   _num_other_buckets = 0;
  60 }
  61 
  62 CompactHashtableWriter::~CompactHashtableWriter() {
  63   for (int index = 0; index < _num_buckets; index++) {
  64     GrowableArray<Entry>* bucket = _buckets[index];
  65     delete bucket;
  66   }
  67 
  68   FREE_C_HEAP_ARRAY(GrowableArray<Entry>*, _buckets);
  69 }
  70 
  71 // Add a symbol entry to the temporary hash table
  72 void CompactHashtableWriter::add(unsigned int hash, u4 value) {
  73   int index = hash % _num_buckets;
  74   _buckets[index]->append_if_missing(Entry(hash, value));
  75   _num_entries++;
  76 }
  77 
  78 void CompactHashtableWriter::allocate_table() {
  79   int entries_space = 0;
  80   for (int index = 0; index < _num_buckets; index++) {
  81     GrowableArray<Entry>* bucket = _buckets[index];
  82     int bucket_size = bucket->length();
  83     if (bucket_size == 1) {
  84       entries_space++;
  85     } else {
  86       entries_space += 2 * bucket_size;
  87     }
  88   }
  89 
  90   if (entries_space & ~BUCKET_OFFSET_MASK) {
  91     vm_exit_during_initialization("CompactHashtableWriter::allocate_table: Overflow! "
  92                                   "Too many entries.");
  93   }
  94 
  95   _compact_buckets = MetaspaceShared::new_ro_array<u4>(_num_buckets + 1);
  96   _compact_entries = MetaspaceShared::new_ro_array<u4>(entries_space);
  97 
  98   _stats->bucket_count    = _num_buckets;
  99   _stats->bucket_bytes    = _compact_buckets->size() * BytesPerWord;
 100   _stats->hashentry_count = _num_entries;
 101   _stats->hashentry_bytes = _compact_entries->size() * BytesPerWord;
 102 }
 103 
 104 // Write the compact table's buckets
 105 void CompactHashtableWriter::dump_table(NumberSeq* summary) {
 106   u4 offset = 0;
 107   for (int index = 0; index < _num_buckets; index++) {
 108     GrowableArray<Entry>* bucket = _buckets[index];
 109     int bucket_size = bucket->length();
 110     if (bucket_size == 1) {
 111       // bucket with one entry is compacted and only has the symbol offset
 112       _compact_buckets->at_put(index, BUCKET_INFO(offset, VALUE_ONLY_BUCKET_TYPE));
 113 
 114       Entry ent = bucket->at(0);
 115       _compact_entries->at_put(offset++, ent.value());
 116       _num_value_only_buckets++;
 117     } else {
 118       // regular bucket, each entry is a symbol (hash, offset) pair
 119       _compact_buckets->at_put(index, BUCKET_INFO(offset, REGULAR_BUCKET_TYPE));
 120 
 121       for (int i=0; i<bucket_size; i++) {
 122         Entry ent = bucket->at(i);
 123         _compact_entries->at_put(offset++, u4(ent.hash())); // write entry hash
 124         _compact_entries->at_put(offset++, ent.value());
 125       }
 126       if (bucket_size == 0) {
 127         _num_empty_buckets++;
 128       } else {
 129         _num_other_buckets++;
 130       }
 131     }
 132     summary->add(bucket_size);
 133   }
 134 
 135   // Mark the end of the buckets
 136   _compact_buckets->at_put(_num_buckets, BUCKET_INFO(offset, TABLEEND_BUCKET_TYPE));
 137   assert(offset == (u4)_compact_entries->length(), "sanity");
 138 }
 139 
 140 
 141 // Write the compact table
 142 void CompactHashtableWriter::dump(SimpleCompactHashtable *cht, const char* table_name) {
 143   NumberSeq summary;
 144   allocate_table();
 145   dump_table(&summary);
 146 
 147   int table_bytes = _stats->bucket_bytes + _stats->hashentry_bytes;
 148   address base_address = address(MetaspaceShared::shared_rs()->base());
 149   cht->init(base_address,  _num_entries, _num_buckets,
 150             _compact_buckets->data(), _compact_entries->data());
 151 
 152   LogMessage(cds, hashtables) msg;
 153   if (msg.is_info()) {
 154     double avg_cost = 0.0;
 155     if (_num_entries > 0) {
 156       avg_cost = double(table_bytes)/double(_num_entries);
 157     }
 158     msg.info("Shared %s table stats -------- base: " PTR_FORMAT,
 159                          table_name, (intptr_t)base_address);
 160     msg.info("Number of entries       : %9d", _num_entries);
 161     msg.info("Total bytes used        : %9d", table_bytes);
 162     msg.info("Average bytes per entry : %9.3f", avg_cost);
 163     msg.info("Average bucket size     : %9.3f", summary.avg());
 164     msg.info("Variance of bucket size : %9.3f", summary.variance());
 165     msg.info("Std. dev. of bucket size: %9.3f", summary.sd());
 166     msg.info("Empty buckets           : %9d", _num_empty_buckets);
 167     msg.info("Value_Only buckets      : %9d", _num_value_only_buckets);
 168     msg.info("Other buckets           : %9d", _num_other_buckets);
 169   }
 170 }
 171 
 172 /////////////////////////////////////////////////////////////
 173 //
 174 // The CompactHashtable implementation
 175 //
 176 
 177 void SimpleCompactHashtable::serialize_header(SerializeClosure* soc) {
 178   soc->do_ptr((void**)&_base_address);
 179   soc->do_u4(&_entry_count);
 180   soc->do_u4(&_bucket_count);
 181   soc->do_ptr((void**)&_buckets);
 182   soc->do_ptr((void**)&_entries);
 183 }
 184 #endif // INCLUDE_CDS
 185 
 186 #ifndef O_BINARY       // if defined (Win32) use binary files.
 187 #define O_BINARY 0     // otherwise do nothing.
 188 #endif
 189 
 190 ////////////////////////////////////////////////////////
 191 //
 192 // HashtableTextDump
 193 //
 194 HashtableTextDump::HashtableTextDump(const char* filename) : _fd(-1) {
 195   struct stat st;
 196   if (os::stat(filename, &st) != 0) {
 197     quit("Unable to get hashtable dump file size", filename);
 198   }
 199   _size = st.st_size;
 200   _fd = open(filename, O_RDONLY | O_BINARY, 0);
 201   if (_fd < 0) {
 202     quit("Unable to open hashtable dump file", filename);
 203   }
 204   _base = os::map_memory(_fd, filename, 0, NULL, _size, true, false);
 205   if (_base == NULL) {
 206     quit("Unable to map hashtable dump file", filename);
 207   }
 208   _p = _base;
 209   _end = _base + st.st_size;
 210   _filename = filename;
 211   _prefix_type = Unknown;
 212   _line_no = 1;
 213 }
 214 
 215 HashtableTextDump::~HashtableTextDump() {
 216   os::unmap_memory((char*)_base, _size);
 217   if (_fd >= 0) {
 218     close(_fd);
 219   }
 220 }
 221 
 222 void HashtableTextDump::quit(const char* err, const char* msg) {
 223   vm_exit_during_initialization(err, msg);
 224 }
 225 
 226 void HashtableTextDump::corrupted(const char *p, const char* msg) {
 227   char info[100];
 228   jio_snprintf(info, sizeof(info),
 229                "%s. Corrupted at line %d (file pos %d)",
 230                msg, _line_no, (int)(p - _base));
 231   quit(info, _filename);
 232 }
 233 
 234 bool HashtableTextDump::skip_newline() {
 235   if (_p[0] == '\r' && _p[1] == '\n') {
 236     _p += 2;
 237   } else if (_p[0] == '\n') {
 238     _p += 1;
 239   } else {
 240     corrupted(_p, "Unexpected character");
 241   }
 242   _line_no++;
 243   return true;
 244 }
 245 
 246 int HashtableTextDump::skip(char must_be_char) {
 247   corrupted_if(remain() < 1, "Truncated");
 248   corrupted_if(*_p++ != must_be_char, "Unexpected character");
 249   return 0;
 250 }
 251 
 252 void HashtableTextDump::skip_past(char c) {
 253   for (;;) {
 254     corrupted_if(remain() < 1, "Truncated");
 255     if (*_p++ == c) {
 256       return;
 257     }
 258   }
 259 }
 260 
 261 void HashtableTextDump::check_version(const char* ver) {
 262   int len = (int)strlen(ver);
 263   corrupted_if(remain() < len, "Truncated");
 264   if (strncmp(_p, ver, len) != 0) {
 265     quit("wrong version of hashtable dump file", _filename);
 266   }
 267   _p += len;
 268   skip_newline();
 269 }
 270 
 271 void HashtableTextDump::scan_prefix_type() {
 272   _p++;
 273   if (strncmp(_p, "SECTION: String", 15) == 0) {
 274     _p += 15;
 275     _prefix_type = StringPrefix;
 276   } else if (strncmp(_p, "SECTION: Symbol", 15) == 0) {
 277     _p += 15;
 278     _prefix_type = SymbolPrefix;
 279   } else {
 280     _prefix_type = Unknown;
 281   }
 282   skip_newline();
 283 }
 284 
 285 int HashtableTextDump::scan_prefix(int* utf8_length) {
 286   if (*_p == '@') {
 287     scan_prefix_type();
 288   }
 289 
 290   switch (_prefix_type) {
 291   case SymbolPrefix:
 292     *utf8_length = scan_symbol_prefix(); break;
 293   case StringPrefix:
 294     *utf8_length = scan_string_prefix(); break;
 295   default:
 296     tty->print_cr("Shared input data type: Unknown.");
 297     corrupted(_p, "Unknown data type");
 298   }
 299 
 300   return _prefix_type;
 301 }
 302 
 303 int HashtableTextDump::scan_string_prefix() {
 304   // Expect /[0-9]+: /
 305   int utf8_length = 0;
 306   get_num(':', &utf8_length);
 307   if (*_p != ' ') {
 308     corrupted(_p, "Wrong prefix format for string");
 309   }
 310   _p++;
 311   return utf8_length;
 312 }
 313 
 314 int HashtableTextDump::scan_symbol_prefix() {
 315   // Expect /[0-9]+ (-|)[0-9]+: /
 316   int utf8_length = 0;
 317   get_num(' ', &utf8_length);
 318   if (*_p == '-') {
 319     _p++;
 320   }
 321   int ref_num;
 322   get_num(':', &ref_num);
 323   if (*_p != ' ') {
 324     corrupted(_p, "Wrong prefix format for symbol");
 325   }
 326   _p++;
 327   return utf8_length;
 328 }
 329 
 330 jchar HashtableTextDump::unescape(const char* from, const char* end, int count) {
 331   jchar value = 0;
 332 
 333   corrupted_if(from + count > end, "Truncated");
 334 
 335   for (int i=0; i<count; i++) {
 336     char c = *from++;
 337     switch (c) {
 338     case '0': case '1': case '2': case '3': case '4':
 339     case '5': case '6': case '7': case '8': case '9':
 340       value = (value << 4) + c - '0';
 341       break;
 342     case 'a': case 'b': case 'c':
 343     case 'd': case 'e': case 'f':
 344       value = (value << 4) + 10 + c - 'a';
 345       break;
 346     case 'A': case 'B': case 'C':
 347     case 'D': case 'E': case 'F':
 348       value = (value << 4) + 10 + c - 'A';
 349       break;
 350     default:
 351       ShouldNotReachHere();
 352     }
 353   }
 354   return value;
 355 }
 356 
 357 void HashtableTextDump::get_utf8(char* utf8_buffer, int utf8_length) {
 358   // cache in local vars
 359   const char* from = _p;
 360   const char* end = _end;
 361   char* to = utf8_buffer;
 362   int n = utf8_length;
 363 
 364   for (; n > 0 && from < end; n--) {
 365     if (*from != '\\') {
 366       *to++ = *from++;
 367     } else {
 368       corrupted_if(from + 2 > end, "Truncated");
 369       char c = from[1];
 370       from += 2;
 371       switch (c) {
 372       case 'x':
 373         {
 374           jchar value = unescape(from, end, 2);
 375           from += 2;
 376           assert(value <= 0xff, "sanity");
 377           *to++ = (char)(value & 0xff);
 378         }
 379         break;
 380       case 't':  *to++ = '\t'; break;
 381       case 'n':  *to++ = '\n'; break;
 382       case 'r':  *to++ = '\r'; break;
 383       case '\\': *to++ = '\\'; break;
 384       default:
 385         corrupted(_p, "Unsupported character");
 386       }
 387     }
 388   }
 389   corrupted_if(n > 0, "Truncated"); // expected more chars but file has ended
 390   _p = from;
 391   skip_newline();
 392 }
 393 
 394 // NOTE: the content is NOT the same as
 395 // UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen).
 396 // We want to escape \r\n\t so that output [1] is more readable; [2] can be more easily
 397 // parsed by scripts; [3] quickly processed by HashtableTextDump::get_utf8()
 398 void HashtableTextDump::put_utf8(outputStream* st, const char* utf8_string, int utf8_length) {
 399   const char *c = utf8_string;
 400   const char *end = c + utf8_length;
 401   for (; c < end; c++) {
 402     switch (*c) {
 403     case '\t': st->print("\\t"); break;
 404     case '\r': st->print("\\r"); break;
 405     case '\n': st->print("\\n"); break;
 406     case '\\': st->print("\\\\"); break;
 407     default:
 408       if (isprint(*c)) {
 409         st->print("%c", *c);
 410       } else {
 411         st->print("\\x%02x", ((unsigned int)*c) & 0xff);
 412       }
 413     }
 414   }
 415 }