1 /* 2 * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "jvm.h" 27 #include "classfile/compactHashtable.hpp" 28 #include "classfile/javaClasses.hpp" 29 #include "logging/logMessage.hpp" 30 #include "memory/dynamicArchive.hpp" 31 #include "memory/heapShared.inline.hpp" 32 #include "memory/metadataFactory.hpp" 33 #include "memory/metaspaceShared.hpp" 34 #include "runtime/globals.hpp" 35 #include "runtime/vmThread.hpp" 36 #include "utilities/numberSeq.hpp" 37 #include <sys/stat.h> 38 39 #if INCLUDE_CDS 40 ///////////////////////////////////////////////////// 41 // 42 // The compact hash table writer implementations 43 // 44 CompactHashtableWriter::CompactHashtableWriter(int num_entries, 45 CompactHashtableStats* stats) { 46 Arguments::assert_is_dumping_archive(); 47 assert(num_entries >= 0, "sanity"); 48 _num_buckets = calculate_num_buckets(num_entries); 49 assert(_num_buckets > 0, "no buckets"); 50 51 _num_entries_written = 0; 52 _buckets = NEW_C_HEAP_ARRAY(GrowableArray<Entry>*, _num_buckets, mtSymbol); 53 for (int i=0; i<_num_buckets; i++) { 54 _buckets[i] = new (ResourceObj::C_HEAP, mtSymbol) GrowableArray<Entry>(0, true, mtSymbol); 55 } 56 57 _stats = stats; 58 _compact_buckets = NULL; 59 _compact_entries = NULL; 60 _num_empty_buckets = 0; 61 _num_value_only_buckets = 0; 62 _num_other_buckets = 0; 63 } 64 65 CompactHashtableWriter::~CompactHashtableWriter() { 66 for (int index = 0; index < _num_buckets; index++) { 67 GrowableArray<Entry>* bucket = _buckets[index]; 68 delete bucket; 69 } 70 71 FREE_C_HEAP_ARRAY(GrowableArray<Entry>*, _buckets); 72 } 73 74 size_t CompactHashtableWriter::estimate_size(int num_entries) { 75 int num_buckets = calculate_num_buckets(num_entries); 76 size_t bucket_bytes = MetaspaceShared::ro_array_bytesize<u4>(num_buckets + 1); 77 78 // In worst case, we have no VALUE_ONLY_BUCKET_TYPE, so each entry takes 2 slots 79 int entries_space = 2 * num_entries; 80 size_t entry_bytes = MetaspaceShared::ro_array_bytesize<u4>(entries_space); 81 82 return bucket_bytes 83 + entry_bytes 84 + SimpleCompactHashtable::calculate_header_size(); 85 } 86 87 // Add a symbol entry to the temporary hash table 88 void CompactHashtableWriter::add(unsigned int hash, u4 value) { 89 int index = hash % _num_buckets; 90 _buckets[index]->append_if_missing(Entry(hash, value)); 91 _num_entries_written++; 92 } 93 94 void CompactHashtableWriter::allocate_table() { 95 int entries_space = 0; 96 for (int index = 0; index < _num_buckets; index++) { 97 GrowableArray<Entry>* bucket = _buckets[index]; 98 int bucket_size = bucket->length(); 99 if (bucket_size == 1) { 100 entries_space++; 101 } else if (bucket_size > 1) { 102 entries_space += 2 * bucket_size; 103 } 104 } 105 106 if (entries_space & ~BUCKET_OFFSET_MASK) { 107 vm_exit_during_initialization("CompactHashtableWriter::allocate_table: Overflow! " 108 "Too many entries."); 109 } 110 111 _compact_buckets = MetaspaceShared::new_ro_array<u4>(_num_buckets + 1); 112 _compact_entries = MetaspaceShared::new_ro_array<u4>(entries_space); 113 114 _stats->bucket_count = _num_buckets; 115 _stats->bucket_bytes = _compact_buckets->size() * BytesPerWord; 116 _stats->hashentry_count = _num_entries_written; 117 _stats->hashentry_bytes = _compact_entries->size() * BytesPerWord; 118 } 119 120 // Write the compact table's buckets 121 void CompactHashtableWriter::dump_table(NumberSeq* summary) { 122 u4 offset = 0; 123 for (int index = 0; index < _num_buckets; index++) { 124 GrowableArray<Entry>* bucket = _buckets[index]; 125 int bucket_size = bucket->length(); 126 if (bucket_size == 1) { 127 // bucket with one entry is compacted and only has the symbol offset 128 _compact_buckets->at_put(index, BUCKET_INFO(offset, VALUE_ONLY_BUCKET_TYPE)); 129 130 Entry ent = bucket->at(0); 131 _compact_entries->at_put(offset++, ent.value()); 132 _num_value_only_buckets++; 133 } else { 134 // regular bucket, each entry is a symbol (hash, offset) pair 135 _compact_buckets->at_put(index, BUCKET_INFO(offset, REGULAR_BUCKET_TYPE)); 136 137 for (int i=0; i<bucket_size; i++) { 138 Entry ent = bucket->at(i); 139 _compact_entries->at_put(offset++, u4(ent.hash())); // write entry hash 140 _compact_entries->at_put(offset++, ent.value()); 141 } 142 if (bucket_size == 0) { 143 _num_empty_buckets++; 144 } else { 145 _num_other_buckets++; 146 } 147 } 148 summary->add(bucket_size); 149 } 150 151 // Mark the end of the buckets 152 _compact_buckets->at_put(_num_buckets, BUCKET_INFO(offset, TABLEEND_BUCKET_TYPE)); 153 assert(offset == (u4)_compact_entries->length(), "sanity"); 154 } 155 156 157 // Write the compact table 158 void CompactHashtableWriter::dump(SimpleCompactHashtable *cht, const char* table_name) { 159 NumberSeq summary; 160 allocate_table(); 161 dump_table(&summary); 162 163 int table_bytes = _stats->bucket_bytes + _stats->hashentry_bytes; 164 address base_address = address(SharedBaseAddress); 165 cht->init(base_address, _num_entries_written, _num_buckets, 166 _compact_buckets->data(), _compact_entries->data()); 167 168 LogMessage(cds, hashtables) msg; 169 if (msg.is_info()) { 170 double avg_cost = 0.0; 171 if (_num_entries_written > 0) { 172 avg_cost = double(table_bytes)/double(_num_entries_written); 173 } 174 msg.info("Shared %s table stats -------- base: " PTR_FORMAT, 175 table_name, (intptr_t)base_address); 176 msg.info("Number of entries : %9d", _num_entries_written); 177 msg.info("Total bytes used : %9d", table_bytes); 178 msg.info("Average bytes per entry : %9.3f", avg_cost); 179 msg.info("Average bucket size : %9.3f", summary.avg()); 180 msg.info("Variance of bucket size : %9.3f", summary.variance()); 181 msg.info("Std. dev. of bucket size: %9.3f", summary.sd()); 182 msg.info("Maximum bucket size : %9d", (int)summary.maximum()); 183 msg.info("Empty buckets : %9d", _num_empty_buckets); 184 msg.info("Value_Only buckets : %9d", _num_value_only_buckets); 185 msg.info("Other buckets : %9d", _num_other_buckets); 186 } 187 } 188 189 ///////////////////////////////////////////////////////////// 190 // 191 // The CompactHashtable implementation 192 // 193 194 void SimpleCompactHashtable::init(address base_address, u4 entry_count, u4 bucket_count, u4* buckets, u4* entries) { 195 _bucket_count = bucket_count; 196 _entry_count = entry_count; 197 _base_address = base_address; 198 if (DynamicDumpSharedSpaces) { 199 _buckets = DynamicArchive::buffer_to_target(buckets); 200 _entries = DynamicArchive::buffer_to_target(entries); 201 } else { 202 _buckets = buckets; 203 _entries = entries; 204 } 205 } 206 207 size_t SimpleCompactHashtable::calculate_header_size() { 208 // We have 5 fields. Each takes up sizeof(intptr_t). See WriteClosure::do_u4 209 size_t bytes = sizeof(intptr_t) * 5; 210 return bytes; 211 } 212 213 void SimpleCompactHashtable::serialize_header(SerializeClosure* soc) { 214 // NOTE: if you change this function, you MUST change the number 5 in 215 // calculate_header_size() accordingly. 216 soc->do_u4(&_entry_count); 217 soc->do_u4(&_bucket_count); 218 soc->do_ptr((void**)&_buckets); 219 soc->do_ptr((void**)&_entries); 220 if (soc->reading()) { 221 _base_address = (address)SharedBaseAddress; 222 } 223 } 224 #endif // INCLUDE_CDS 225 226 #ifndef O_BINARY // if defined (Win32) use binary files. 227 #define O_BINARY 0 // otherwise do nothing. 228 #endif 229 230 //////////////////////////////////////////////////////// 231 // 232 // HashtableTextDump 233 // 234 HashtableTextDump::HashtableTextDump(const char* filename) : _fd(-1) { 235 struct stat st; 236 if (os::stat(filename, &st) != 0) { 237 quit("Unable to get hashtable dump file size", filename); 238 } 239 _size = st.st_size; 240 _fd = os::open(filename, O_RDONLY | O_BINARY, 0); 241 if (_fd < 0) { 242 quit("Unable to open hashtable dump file", filename); 243 } 244 _base = os::map_memory(_fd, filename, 0, NULL, _size, true, false); 245 if (_base == NULL) { 246 quit("Unable to map hashtable dump file", filename); 247 } 248 _p = _base; 249 _end = _base + st.st_size; 250 _filename = filename; 251 _prefix_type = Unknown; 252 _line_no = 1; 253 } 254 255 HashtableTextDump::~HashtableTextDump() { 256 os::unmap_memory((char*)_base, _size); 257 if (_fd >= 0) { 258 close(_fd); 259 } 260 } 261 262 void HashtableTextDump::quit(const char* err, const char* msg) { 263 vm_exit_during_initialization(err, msg); 264 } 265 266 void HashtableTextDump::corrupted(const char *p, const char* msg) { 267 char info[100]; 268 jio_snprintf(info, sizeof(info), 269 "%s. Corrupted at line %d (file pos %d)", 270 msg, _line_no, (int)(p - _base)); 271 quit(info, _filename); 272 } 273 274 bool HashtableTextDump::skip_newline() { 275 if (_p[0] == '\r' && _p[1] == '\n') { 276 _p += 2; 277 } else if (_p[0] == '\n') { 278 _p += 1; 279 } else { 280 corrupted(_p, "Unexpected character"); 281 } 282 _line_no++; 283 return true; 284 } 285 286 int HashtableTextDump::skip(char must_be_char) { 287 corrupted_if(remain() < 1, "Truncated"); 288 corrupted_if(*_p++ != must_be_char, "Unexpected character"); 289 return 0; 290 } 291 292 void HashtableTextDump::skip_past(char c) { 293 for (;;) { 294 corrupted_if(remain() < 1, "Truncated"); 295 if (*_p++ == c) { 296 return; 297 } 298 } 299 } 300 301 void HashtableTextDump::check_version(const char* ver) { 302 int len = (int)strlen(ver); 303 corrupted_if(remain() < len, "Truncated"); 304 if (strncmp(_p, ver, len) != 0) { 305 quit("wrong version of hashtable dump file", _filename); 306 } 307 _p += len; 308 skip_newline(); 309 } 310 311 void HashtableTextDump::scan_prefix_type() { 312 _p++; 313 if (strncmp(_p, "SECTION: String", 15) == 0) { 314 _p += 15; 315 _prefix_type = StringPrefix; 316 } else if (strncmp(_p, "SECTION: Symbol", 15) == 0) { 317 _p += 15; 318 _prefix_type = SymbolPrefix; 319 } else { 320 _prefix_type = Unknown; 321 } 322 skip_newline(); 323 } 324 325 int HashtableTextDump::scan_prefix(int* utf8_length) { 326 if (*_p == '@') { 327 scan_prefix_type(); 328 } 329 330 switch (_prefix_type) { 331 case SymbolPrefix: 332 *utf8_length = scan_symbol_prefix(); break; 333 case StringPrefix: 334 *utf8_length = scan_string_prefix(); break; 335 default: 336 tty->print_cr("Shared input data type: Unknown."); 337 corrupted(_p, "Unknown data type"); 338 } 339 340 return _prefix_type; 341 } 342 343 int HashtableTextDump::scan_string_prefix() { 344 // Expect /[0-9]+: / 345 int utf8_length = 0; 346 get_num(':', &utf8_length); 347 if (*_p != ' ') { 348 corrupted(_p, "Wrong prefix format for string"); 349 } 350 _p++; 351 return utf8_length; 352 } 353 354 int HashtableTextDump::scan_symbol_prefix() { 355 // Expect /[0-9]+ (-|)[0-9]+: / 356 int utf8_length = 0; 357 get_num(' ', &utf8_length); 358 if (*_p == '-') { 359 _p++; 360 } 361 int ref_num; 362 get_num(':', &ref_num); 363 if (*_p != ' ') { 364 corrupted(_p, "Wrong prefix format for symbol"); 365 } 366 _p++; 367 return utf8_length; 368 } 369 370 jchar HashtableTextDump::unescape(const char* from, const char* end, int count) { 371 jchar value = 0; 372 373 corrupted_if(from + count > end, "Truncated"); 374 375 for (int i=0; i<count; i++) { 376 char c = *from++; 377 switch (c) { 378 case '0': case '1': case '2': case '3': case '4': 379 case '5': case '6': case '7': case '8': case '9': 380 value = (value << 4) + c - '0'; 381 break; 382 case 'a': case 'b': case 'c': 383 case 'd': case 'e': case 'f': 384 value = (value << 4) + 10 + c - 'a'; 385 break; 386 case 'A': case 'B': case 'C': 387 case 'D': case 'E': case 'F': 388 value = (value << 4) + 10 + c - 'A'; 389 break; 390 default: 391 ShouldNotReachHere(); 392 } 393 } 394 return value; 395 } 396 397 void HashtableTextDump::get_utf8(char* utf8_buffer, int utf8_length) { 398 // cache in local vars 399 const char* from = _p; 400 const char* end = _end; 401 char* to = utf8_buffer; 402 int n = utf8_length; 403 404 for (; n > 0 && from < end; n--) { 405 if (*from != '\\') { 406 *to++ = *from++; 407 } else { 408 corrupted_if(from + 2 > end, "Truncated"); 409 char c = from[1]; 410 from += 2; 411 switch (c) { 412 case 'x': 413 { 414 jchar value = unescape(from, end, 2); 415 from += 2; 416 assert(value <= 0xff, "sanity"); 417 *to++ = (char)(value & 0xff); 418 } 419 break; 420 case 't': *to++ = '\t'; break; 421 case 'n': *to++ = '\n'; break; 422 case 'r': *to++ = '\r'; break; 423 case '\\': *to++ = '\\'; break; 424 default: 425 corrupted(_p, "Unsupported character"); 426 } 427 } 428 } 429 corrupted_if(n > 0, "Truncated"); // expected more chars but file has ended 430 _p = from; 431 skip_newline(); 432 } 433 434 // NOTE: the content is NOT the same as 435 // UTF8::as_quoted_ascii(const char* utf8_str, int utf8_length, char* buf, int buflen). 436 // We want to escape \r\n\t so that output [1] is more readable; [2] can be more easily 437 // parsed by scripts; [3] quickly processed by HashtableTextDump::get_utf8() 438 void HashtableTextDump::put_utf8(outputStream* st, const char* utf8_string, int utf8_length) { 439 const char *c = utf8_string; 440 const char *end = c + utf8_length; 441 for (; c < end; c++) { 442 switch (*c) { 443 case '\t': st->print("\\t"); break; 444 case '\r': st->print("\\r"); break; 445 case '\n': st->print("\\n"); break; 446 case '\\': st->print("\\\\"); break; 447 default: 448 if (isprint(*c)) { 449 st->print("%c", *c); 450 } else { 451 st->print("\\x%02x", ((unsigned int)*c) & 0xff); 452 } 453 } 454 } 455 }