New src/share/vm/classfile/compactHashtable.cpp

   1 /*
   2  * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/javaClasses.hpp"
  27 #include "memory/metaspaceShared.hpp"
  28 #include "utilities/numberSeq.hpp"
  29 #include <sys/stat.h>
  30 
  31 /////////////////////////////////////////////////////
  32 //
  33 // The compact hash table writer implementations
  34 //
  35 CompactHashtableWriter::CompactHashtableWriter(const char* table_name,
  36                                                int num_entries,
  37                                                CompactHashtableStats* stats) {
  38   assert(DumpSharedSpaces, "dump-time only");
  39   _table_name = table_name;
  40   _num_entries = num_entries;
  41   _num_buckets = number_of_buckets(_num_entries);
  42   _buckets = NEW_C_HEAP_ARRAY(Entry*, _num_buckets, mtSymbol);
  43   memset(_buckets, 0, sizeof(Entry*) * _num_buckets);
  44 
  45   /* bucket sizes table */
  46   _bucket_sizes = NEW_C_HEAP_ARRAY(juint, _num_buckets, mtSymbol);
  47   memset(_bucket_sizes, 0, sizeof(juint) * _num_buckets);
  48 
  49   stats->hashentry_count = _num_entries;
  50   // Compact buckets' entries will have only the 4-byte offset, but
  51   // we don't know how many there will be at this point. So use a
  52   // conservative estimate here. The size is adjusted later when we
  53   // write out the buckets.
  54   stats->hashentry_bytes = _num_entries * 8;
  55   stats->bucket_count    = _num_buckets;
  56   stats->bucket_bytes    = _num_buckets * (sizeof(juint) + sizeof(jushort));
  57 
  58   // See compactHashtable.hpp for table layout
  59   _required_bytes = sizeof(juint) * 2; // _base_address, written as 2 juints
  60   _required_bytes+= sizeof(juint) +    // num_entries
  61                     sizeof(juint) +    // num_buckets
  62                     sizeof(juint) +    // table_end_offset
  63                     stats->hashentry_bytes +
  64                     stats->bucket_bytes;
  65 }
  66 
  67 CompactHashtableWriter::~CompactHashtableWriter() {
  68   for (int index = 0; index < _num_buckets; index++) {
  69     Entry* next = NULL;
  70     for (Entry* tent = _buckets[index]; tent; tent = next) {
  71       next = tent->next();
  72       delete tent;
  73     }
  74   }
  75 
  76   FREE_C_HEAP_ARRAY(juint, _bucket_sizes);
  77   FREE_C_HEAP_ARRAY(Entry*, _buckets);
  78 }
  79 
  80 // Calculate the number of buckets in the temporary hash table
  81 int CompactHashtableWriter::number_of_buckets(int num_entries) {
  82   const int buksize = (int)SharedSymbolTableBucketSize;
  83   int num_buckets = (num_entries + buksize - 1) / buksize;
  84   num_buckets = (num_buckets + 1) & (~0x01);
  85 
  86   return num_buckets;
  87 }
  88 
  89 // Add a symbol entry to the temporary hash table
  90 void CompactHashtableWriter::add(unsigned int hash, Entry* entry) {
  91   int index = hash % _num_buckets;
  92   entry->set_next(_buckets[index]);
  93   _buckets[index] = entry;
  94   _bucket_sizes[index] ++;
  95 }
  96 
  97 // Write the compact table's bucket infos
  98 juint* CompactHashtableWriter::dump_table(juint* p, NumberSeq* summary) {
  99   int index;
 100   juint* compact_table = p;
 101   // The start of the buckets, skip the compact_bucket_infos table
 102   juint offset = _num_buckets;
 103 
 104   for (index = 0; index < _num_buckets; index++) {
 105     int bucket_size = _bucket_sizes[index];
 106     if (bucket_size == 1) {
 107       // bucket with one entry is compacted and only has the symbol offset
 108       compact_table[index] = BUCKET_INFO(offset, COMPACT_BUCKET_TYPE);
 109       offset += bucket_size; // each entry contains symbol offset only
 110     } else {
 111       // regular bucket, each entry is a symbol (hash, offset) pair
 112       compact_table[index] = BUCKET_INFO(offset, REGULAR_BUCKET_TYPE);
 113       offset += bucket_size * 2; // each hash entry is 2 juints
 114     }
 115     if (offset & ~BUCKET_OFFSET_MASK) {
 116       vm_exit_during_initialization("CompactHashtableWriter::dump_table: Overflow! "
 117                                     "Too many symbols.");
 118     }
 119     summary->add(bucket_size);
 120   }
 121 
 122   return compact_table;
 123 }
 124 
 125 // Write the compact table's entries
 126 juint* CompactHashtableWriter::dump_buckets(juint* compact_table, juint* p,
 127                                             NumberSeq* summary) {
 128   uintx base_address = uintx(MetaspaceShared::shared_rs()->base());
 129   uintx max_delta    = uintx(MetaspaceShared::shared_rs()->size());
 130   assert(max_delta <= 0x7fffffff, "range check");
 131   int num_compact_buckets = 0;
 132 
 133   for (int index = 0; index < _num_buckets; index++) {
 134     juint count = 0;
 135     int bucket_size = _bucket_sizes[index];
 136     int bucket_type = BUCKET_TYPE(compact_table[index]);
 137 
 138     if (bucket_size == 1) {
 139       assert(bucket_type == COMPACT_BUCKET_TYPE, "Bad bucket type");
 140       num_compact_buckets ++;
 141     }
 142     for (Entry* tent = _buckets[index]; tent;
 143          tent = tent->next()) {
 144       if (bucket_type == REGULAR_BUCKET_TYPE) {
 145         *p++ = juint(tent->hash()); // write symbol hash 
 146       }
 147       uintx deltax = uintx(tent->value()) - base_address;
 148       assert(deltax < max_delta, "range check");
 149       juint delta = juint(deltax);
 150       *p++ = delta; // write symbol offset
 151       count ++;
 152     }
 153     assert(count == _bucket_sizes[index], "sanity");
 154   }
 155   
 156   // Adjust the hashentry_bytes in CompactHashtableStats. Each compact
 157   // bucket saves 4-byte.
 158   CompactHashtableStats* stats = &MetaspaceShared::stats()->symbol;
 159   stats->hashentry_bytes -= num_compact_buckets * 4;
 160 
 161   return p;
 162 }
 163 
 164 // Write the compact table
 165 void CompactHashtableWriter::dump(char** top, char* end) {
 166   NumberSeq summary;
 167   char* old_top = *top;
 168   juint* patch_addr;
 169   juint* p = (juint*)(*top);
 170 
 171   uintx base_address = uintx(MetaspaceShared::shared_rs()->base());
 172 
 173 #ifdef _LP64
 174   *p++ = juint(base_address >> 32);
 175 #else
 176   *p++ = 0;
 177 #endif
 178   *p++ = juint(base_address & 0xffffffff); // base address
 179   *p++ = _num_entries;  // number of entries in the table
 180   *p++ = _num_buckets;  // number of buckets in the table
 181   patch_addr = p;
 182   *p++ = 0;             // table_end offset, patched after dumping the table
 183 
 184   juint* compact_table = dump_table(p, &summary);
 185   juint* first_bucket = compact_table + _num_buckets;
 186   juint* bucket_end = dump_buckets(compact_table, first_bucket, &summary);
 187   // patch the the table end offset
 188   *patch_addr = bucket_end - compact_table;
 189 
 190   assert(bucket_end <= (juint*)end, "cannot write past end");
 191   *top = (char*)bucket_end;
 192 
 193   if (PrintSharedSpaces) {
 194     double avg_cost = 0.0;
 195     if (_num_entries > 0) {
 196       avg_cost = double(_required_bytes)/double(_num_entries);
 197     }
 198     tty->print_cr("Shared %s table stats -------- base: " PTR_FORMAT, _table_name, (intptr_t)base_address);
 199     tty->print_cr("Number of entries       : %9d", _num_entries);
 200     tty->print_cr("Total bytes used        : %9d", (int)((*top) - old_top));
 201     tty->print_cr("Average bytes per entry : %9.3f", avg_cost);
 202     tty->print_cr("Average bucket size     : %9.3f", summary.avg());
 203     tty->print_cr("Variance of bucket size : %9.3f", summary.variance());
 204     tty->print_cr("Std. dev. of bucket size: %9.3f", summary.sd());
 205     tty->print_cr("Maximum bucket size     : %9d", (int)summary.maximum());
 206   }
 207 }
 208 
 209 /////////////////////////////////////////////////////////////
 210 //
 211 // The CompactHashtable implementation
 212 //
 213 template <class T, class N> const char* CompactHashtable<T, N>::init(const char* buffer) {
 214   assert(!DumpSharedSpaces, "run-time only");
 215   juint*p = (juint*)buffer;
 216   juint upper = *p++;
 217   juint lower = *p++;
 218 #ifdef _LP64
 219   _base_address = (uintx(upper) << 32 ) + uintx(lower);
 220 #else
 221   _base_address = uintx(lower);
 222 #endif
 223   _entry_count = *p++;
 224   _bucket_count = *p++;
 225   _table_end_offset = *p++;
 226   _buckets = p;
 227 
 228   juint *end = _buckets + _table_end_offset;
 229   return (const char*)end;
 230 }
 231 
 232 // Explicitly instantiate these types
 233 template class CompactHashtable<Symbol*, char>;
 234 
 235 #ifndef O_BINARY       // if defined (Win32) use binary files.
 236 #define O_BINARY 0     // otherwise do nothing.
 237 #endif
 238 
 239 ////////////////////////////////////////////////////////
 240 //
 241 // HashtableTextDump
 242 //
 243 HashtableTextDump::HashtableTextDump(const char* filename) : _fd(-1) {
 244   struct stat st;
 245   if (os::stat(filename, &st) != 0) {
 246     quit("Unable to get hashtable dump file size", filename);
 247   }
 248   _size = st.st_size;
 249   _fd = open(filename, O_RDONLY | O_BINARY, 0);
 250   if (_fd < 0) {
 251     quit("Unable to open hashtable dump file", filename);
 252   }
 253   _base = os::map_memory(_fd, filename, 0, NULL, _size, true, false);
 254   if (_base == NULL) {
 255     quit("Unable to map hashtable dump file", filename);
 256   }
 257   _p = _base;
 258   _end = _base + st.st_size;
 259   _filename = filename;
 260 }
 261 
 262 HashtableTextDump::~HashtableTextDump() {
 263   os::unmap_memory((char*)_base, _size);
 264   if (_fd >= 0) {
 265     close(_fd);
 266   }
 267 }
 268 
 269 void HashtableTextDump::quit(const char* err, const char* msg) {
 270   vm_exit_during_initialization(err, msg);
 271 }
 272 
 273 void HashtableTextDump::corrupted(const char *p) {
 274   char info[60];
 275   sprintf(info, "corrupted at pos %d", (int)(p - _base));
 276   quit(info, _filename);
 277 }
 278 
 279 bool HashtableTextDump::skip_newline() {
 280   if (_p[0] == '\r' && _p[1] == '\n') {
 281     _p += 2;
 282   } else if (_p[0] == '\n') {
 283     _p += 1;
 284   } else {
 285     corrupted(_p);
 286   }
 287   return true;
 288 }
 289 
 290 int HashtableTextDump::skip(char must_be_char) {
 291   corrupted_if(remain() < 1);
 292   corrupted_if(*_p++ != must_be_char);
 293   return 0;
 294 }
 295 
 296 void HashtableTextDump::skip_past(char c) {
 297   for (;;) {
 298     corrupted_if(remain() < 1);
 299     if (*_p++ == c) {
 300       return;
 301     }
 302   }
 303 }
 304 
 305 void HashtableTextDump::check_version(const char* ver) {
 306   int len = (int)strlen(ver);
 307   corrupted_if(remain() < len);
 308   if (strncmp(_p, ver, len) != 0) {
 309     quit("wrong version of hashtable dump file", _filename);
 310   }
 311   _p += len;
 312   skip_newline();
 313 }
 314 
 315 
 316 int HashtableTextDump::scan_prefix() {
 317   // Expect /[0-9]+: /
 318   int utf8_length = get_num(':');
 319   if (*_p != ' ') {
 320     corrupted(_p);
 321   }
 322   _p++;
 323   return utf8_length;
 324 }
 325 
 326 int HashtableTextDump::scan_prefix2() {
 327   // Expect /[0-9]+ (-|)[0-9]+: /
 328   int utf8_length = get_num(' ');
 329   if (*_p == '-') {
 330     _p++;
 331   }
 332   (void)get_num(':');
 333   if (*_p != ' ') {
 334     corrupted(_p);
 335   }
 336   _p++;
 337   return utf8_length;
 338 }
 339 
 340 jchar HashtableTextDump::unescape(const char* from, const char* end, int count) {
 341   jchar value = 0;
 342 
 343   corrupted_if(from + count > end);
 344 
 345   for (int i=0; i<count; i++) {
 346     char c = *from++;
 347     switch (c) {
 348     case '0': case '1': case '2': case '3': case '4':
 349     case '5': case '6': case '7': case '8': case '9':
 350       value = (value << 4) + c - '0';
 351       break;
 352     case 'a': case 'b': case 'c':
 353     case 'd': case 'e': case 'f':
 354       value = (value << 4) + 10 + c - 'a';
 355       break;
 356     case 'A': case 'B': case 'C':
 357     case 'D': case 'E': case 'F':
 358       value = (value << 4) + 10 + c - 'A';
 359       break;
 360     default:
 361       ShouldNotReachHere();
 362     }
 363   }
 364   return value;
 365 }
 366 
 367 void HashtableTextDump::get_utf8(char* utf8_buffer, int utf8_length) {
 368   // cache in local vars
 369   const char* from = _p;
 370   const char* end = _end;
 371   char* to = utf8_buffer;
 372   int n = utf8_length;
 373 
 374   for (; n > 0 && from < end; n--) {
 375     if (*from != '\\') {
 376       *to++ = *from++;
 377     } else {
 378       corrupted_if(from + 2 > end);
 379       char c = from[1];
 380       from += 2;
 381       switch (c) {
 382       case 'x':
 383         {
 384           jchar value = unescape(from, end, 2);
 385           from += 2;
 386           assert(value <= 0xff, "sanity");
 387           *to++ = (char)(value & 0xff);
 388         }
 389         break;
 390       case 't':  *to++ = '\t'; break;
 391       case 'n':  *to++ = '\n'; break;
 392       case 'r':  *to++ = '\r'; break;
 393       case '\\': *to++ = '\\'; break;
 394       default:
 395         ShouldNotReachHere();
 396       }
 397     }
 398   }
 399   corrupted_if(n > 0); // expected more chars but file has ended
 400   _p = from;
 401   skip_newline();
 402 }
 403 
 404 // NOTE: the content is NOT the same as
 405 // UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen).
 406 // We want to escape \r\n\t so that output [1] is more readable; [2] can be more easily
 407 // parsed by scripts; [3] quickly processed by HashtableTextDump::get_utf8()
 408 void HashtableTextDump::put_utf8(outputStream* st, const char* utf8_string, int utf8_length) {
 409   const char *c = utf8_string;
 410   const char *end = c + utf8_length;
 411   for (; c < end; c++) {
 412     switch (*c) {
 413     case '\t': st->print("\\t"); break;
 414     case '\r': st->print("\\r"); break;
 415     case '\n': st->print("\\n"); break;
 416     case '\\': st->print("\\\\"); break;
 417     default:
 418       if (isprint(*c)) {
 419         st->print("%c", *c);
 420       } else {
 421         st->print("\\x%02x", ((unsigned int)*c) & 0xff);
 422       }
 423     }
 424   }
 425 }