1 /*
   2  * Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 # include "incls/_precompiled.incl"
  26 # include "incls/_symbolTable.cpp.incl"
  27 
  28 // --------------------------------------------------------------------------
  29 
  30 SymbolTable* SymbolTable::_the_table = NULL;
  31 
  32 // Lookup a symbol in a bucket.
  33 
  34 symbolOop SymbolTable::lookup(int index, const char* name,
  35                               int len, unsigned int hash) {
  36   for (HashtableEntry* e = bucket(index); e != NULL; e = e->next()) {
  37     if (e->hash() == hash) {
  38       symbolOop sym = symbolOop(e->literal());
  39       if (sym->equals(name, len)) {
  40         return sym;
  41       }
  42     }
  43   }
  44   return NULL;
  45 }
  46 
  47 
  48 // We take care not to be blocking while holding the
  49 // SymbolTable_lock. Otherwise, the system might deadlock, since the
  50 // symboltable is used during compilation (VM_thread) The lock free
  51 // synchronization is simplified by the fact that we do not delete
  52 // entries in the symbol table during normal execution (only during
  53 // safepoints).
  54 
  55 symbolOop SymbolTable::lookup(const char* name, int len, TRAPS) {
  56   unsigned int hashValue = hash_symbol(name, len);
  57   int index = the_table()->hash_to_index(hashValue);
  58 
  59   symbolOop s = the_table()->lookup(index, name, len, hashValue);
  60 
  61   // Found
  62   if (s != NULL) return s;
  63 
  64   // Otherwise, add to symbol to table
  65   return the_table()->basic_add(index, (u1*)name, len, hashValue, CHECK_NULL);
  66 }
  67 
  68 symbolOop SymbolTable::lookup(symbolHandle sym, int begin, int end, TRAPS) {
  69   char* buffer;
  70   int index, len;
  71   unsigned int hashValue;
  72   char* name;
  73   {
  74     debug_only(No_Safepoint_Verifier nsv;)
  75 
  76     name = (char*)sym->base() + begin;
  77     len = end - begin;
  78     hashValue = hash_symbol(name, len);
  79     index = the_table()->hash_to_index(hashValue);
  80     symbolOop s = the_table()->lookup(index, name, len, hashValue);
  81 
  82     // Found
  83     if (s != NULL) return s;
  84   }
  85 
  86   // Otherwise, add to symbol to table. Copy to a C string first.
  87   char stack_buf[128];
  88   ResourceMark rm(THREAD);
  89   if (len <= 128) {
  90     buffer = stack_buf;
  91   } else {
  92     buffer = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, len);
  93   }
  94   for (int i=0; i<len; i++) {
  95     buffer[i] = name[i];
  96   }
  97   // Make sure there is no safepoint in the code above since name can't move.
  98   // We can't include the code in No_Safepoint_Verifier because of the
  99   // ResourceMark.
 100 
 101   return the_table()->basic_add(index, (u1*)buffer, len, hashValue, CHECK_NULL);
 102 }
 103 
 104 symbolOop SymbolTable::lookup_only(const char* name, int len,
 105                                    unsigned int& hash) {
 106   hash = hash_symbol(name, len);
 107   int index = the_table()->hash_to_index(hash);
 108 
 109   return the_table()->lookup(index, name, len, hash);
 110 }
 111 
 112 // Suggestion: Push unicode-based lookup all the way into the hashing
 113 // and probing logic, so there is no need for convert_to_utf8 until
 114 // an actual new symbolOop is created.
 115 symbolOop SymbolTable::lookup_unicode(const jchar* name, int utf16_length, TRAPS) {
 116   int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
 117   char stack_buf[128];
 118   if (utf8_length < (int) sizeof(stack_buf)) {
 119     char* chars = stack_buf;
 120     UNICODE::convert_to_utf8(name, utf16_length, chars);
 121     return lookup(chars, utf8_length, THREAD);
 122   } else {
 123     ResourceMark rm(THREAD);
 124     char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
 125     UNICODE::convert_to_utf8(name, utf16_length, chars);
 126     return lookup(chars, utf8_length, THREAD);
 127   }
 128 }
 129 
 130 symbolOop SymbolTable::lookup_only_unicode(const jchar* name, int utf16_length,
 131                                            unsigned int& hash) {
 132   int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
 133   char stack_buf[128];
 134   if (utf8_length < (int) sizeof(stack_buf)) {
 135     char* chars = stack_buf;
 136     UNICODE::convert_to_utf8(name, utf16_length, chars);
 137     return lookup_only(chars, utf8_length, hash);
 138   } else {
 139     ResourceMark rm;
 140     char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
 141     UNICODE::convert_to_utf8(name, utf16_length, chars);
 142     return lookup_only(chars, utf8_length, hash);
 143   }
 144 }
 145 
 146 void SymbolTable::add(constantPoolHandle cp, int names_count,
 147                       const char** names, int* lengths, int* cp_indices,
 148                       unsigned int* hashValues, TRAPS) {
 149   SymbolTable* table = the_table();
 150   bool added = table->basic_add(cp, names_count, names, lengths,
 151                                 cp_indices, hashValues, CHECK);
 152   if (!added) {
 153     // do it the hard way
 154     for (int i=0; i<names_count; i++) {
 155       int index = table->hash_to_index(hashValues[i]);
 156       symbolOop sym = table->basic_add(index, (u1*)names[i], lengths[i],
 157                                        hashValues[i], CHECK);
 158       cp->symbol_at_put(cp_indices[i], sym);
 159     }
 160   }
 161 }
 162 
 163 symbolOop SymbolTable::basic_add(int index, u1 *name, int len,
 164                                  unsigned int hashValue, TRAPS) {
 165   assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
 166          "proposed name of symbol must be stable");
 167 
 168   // We assume that lookup() has been called already, that it failed,
 169   // and symbol was not found.  We create the symbol here.
 170   symbolKlass* sk  = (symbolKlass*) Universe::symbolKlassObj()->klass_part();
 171   symbolOop s_oop = sk->allocate_symbol(name, len, CHECK_NULL);
 172   symbolHandle sym (THREAD, s_oop);
 173 
 174   // Allocation must be done before grapping the SymbolTable_lock lock
 175   MutexLocker ml(SymbolTable_lock, THREAD);
 176 
 177   assert(sym->equals((char*)name, len), "symbol must be properly initialized");
 178 
 179   // Since look-up was done lock-free, we need to check if another
 180   // thread beat us in the race to insert the symbol.
 181 
 182   symbolOop test = lookup(index, (char*)name, len, hashValue);
 183   if (test != NULL) {
 184     // A race occurred and another thread introduced the symbol, this one
 185     // will be dropped and collected.
 186     return test;
 187   }
 188 
 189   HashtableEntry* entry = new_entry(hashValue, sym());
 190   add_entry(index, entry);
 191   return sym();
 192 }
 193 
 194 bool SymbolTable::basic_add(constantPoolHandle cp, int names_count,
 195                             const char** names, int* lengths,
 196                             int* cp_indices, unsigned int* hashValues,
 197                             TRAPS) {
 198   symbolKlass* sk  = (symbolKlass*) Universe::symbolKlassObj()->klass_part();
 199   symbolOop sym_oops[symbol_alloc_batch_size];
 200   bool allocated = sk->allocate_symbols(names_count, names, lengths,
 201                                         sym_oops, CHECK_false);
 202   if (!allocated) {
 203     return false;
 204   }
 205   symbolHandle syms[symbol_alloc_batch_size];
 206   int i;
 207   for (i=0; i<names_count; i++) {
 208     syms[i] = symbolHandle(THREAD, sym_oops[i]);
 209   }
 210 
 211   // Allocation must be done before grabbing the SymbolTable_lock lock
 212   MutexLocker ml(SymbolTable_lock, THREAD);
 213 
 214   for (i=0; i<names_count; i++) {
 215     assert(syms[i]->equals(names[i], lengths[i]), "symbol must be properly initialized");
 216     // Since look-up was done lock-free, we need to check if another
 217     // thread beat us in the race to insert the symbol.
 218     int index = hash_to_index(hashValues[i]);
 219     symbolOop test = lookup(index, names[i], lengths[i], hashValues[i]);
 220     if (test != NULL) {
 221       // A race occurred and another thread introduced the symbol, this one
 222       // will be dropped and collected. Use test instead.
 223       cp->symbol_at_put(cp_indices[i], test);
 224     } else {
 225       symbolOop sym = syms[i]();
 226       HashtableEntry* entry = new_entry(hashValues[i], sym);
 227       add_entry(index, entry);
 228       cp->symbol_at_put(cp_indices[i], sym);
 229     }
 230   }
 231 
 232   return true;
 233 }
 234 
 235 
 236 void SymbolTable::verify() {
 237   for (int i = 0; i < the_table()->table_size(); ++i) {
 238     HashtableEntry* p = the_table()->bucket(i);
 239     for ( ; p != NULL; p = p->next()) {
 240       symbolOop s = symbolOop(p->literal());
 241       guarantee(s != NULL, "symbol is NULL");
 242       s->verify();
 243       guarantee(s->is_perm(), "symbol not in permspace");
 244       unsigned int h = hash_symbol((char*)s->bytes(), s->utf8_length());
 245       guarantee(p->hash() == h, "broken hash in symbol table entry");
 246       guarantee(the_table()->hash_to_index(h) == i,
 247                 "wrong index in symbol table");
 248     }
 249   }
 250 }
 251 
 252 
 253 //---------------------------------------------------------------------------
 254 // Non-product code
 255 
 256 #ifndef PRODUCT
 257 
 258 void SymbolTable::print_histogram() {
 259   MutexLocker ml(SymbolTable_lock);
 260   const int results_length = 100;
 261   int results[results_length];
 262   int i,j;
 263 
 264   // initialize results to zero
 265   for (j = 0; j < results_length; j++) {
 266     results[j] = 0;
 267   }
 268 
 269   int total = 0;
 270   int max_symbols = 0;
 271   int out_of_range = 0;
 272   for (i = 0; i < the_table()->table_size(); i++) {
 273     HashtableEntry* p = the_table()->bucket(i);
 274     for ( ; p != NULL; p = p->next()) {
 275       int counter = symbolOop(p->literal())->utf8_length();
 276       total += counter;
 277       if (counter < results_length) {
 278         results[counter]++;
 279       } else {
 280         out_of_range++;
 281       }
 282       max_symbols = MAX2(max_symbols, counter);
 283     }
 284   }
 285   tty->print_cr("Symbol Table:");
 286   tty->print_cr("%8s %5d", "Total  ", total);
 287   tty->print_cr("%8s %5d", "Maximum", max_symbols);
 288   tty->print_cr("%8s %3.2f", "Average",
 289           ((float) total / (float) the_table()->table_size()));
 290   tty->print_cr("%s", "Histogram:");
 291   tty->print_cr(" %s %29s", "Length", "Number chains that length");
 292   for (i = 0; i < results_length; i++) {
 293     if (results[i] > 0) {
 294       tty->print_cr("%6d %10d", i, results[i]);
 295     }
 296   }
 297   int line_length = 70;
 298   tty->print_cr("%s %30s", " Length", "Number chains that length");
 299   for (i = 0; i < results_length; i++) {
 300     if (results[i] > 0) {
 301       tty->print("%4d", i);
 302       for (j = 0; (j < results[i]) && (j < line_length);  j++) {
 303         tty->print("%1s", "*");
 304       }
 305       if (j == line_length) {
 306         tty->print("%1s", "+");
 307       }
 308       tty->cr();
 309     }
 310   }
 311   tty->print_cr(" %s %d: %d\n", "Number chains longer than",
 312                     results_length, out_of_range);
 313 }
 314 
 315 #endif // PRODUCT
 316 
 317 // --------------------------------------------------------------------------
 318 
 319 #ifdef ASSERT
 320 class StableMemoryChecker : public StackObj {
 321   enum { _bufsize = wordSize*4 };
 322 
 323   address _region;
 324   jint    _size;
 325   u1      _save_buf[_bufsize];
 326 
 327   int sample(u1* save_buf) {
 328     if (_size <= _bufsize) {
 329       memcpy(save_buf, _region, _size);
 330       return _size;
 331     } else {
 332       // copy head and tail
 333       memcpy(&save_buf[0],          _region,                      _bufsize/2);
 334       memcpy(&save_buf[_bufsize/2], _region + _size - _bufsize/2, _bufsize/2);
 335       return (_bufsize/2)*2;
 336     }
 337   }
 338 
 339  public:
 340   StableMemoryChecker(const void* region, jint size) {
 341     _region = (address) region;
 342     _size   = size;
 343     sample(_save_buf);
 344   }
 345 
 346   bool verify() {
 347     u1 check_buf[sizeof(_save_buf)];
 348     int check_size = sample(check_buf);
 349     return (0 == memcmp(_save_buf, check_buf, check_size));
 350   }
 351 
 352   void set_region(const void* region) { _region = (address) region; }
 353 };
 354 #endif
 355 
 356 
 357 // --------------------------------------------------------------------------
 358 
 359 
 360 // Compute the hash value for a java.lang.String object which would
 361 // contain the characters passed in. This hash value is used for at
 362 // least two purposes.
 363 //
 364 // (a) As the hash value used by the StringTable for bucket selection
 365 //     and comparison (stored in the HashtableEntry structures).  This
 366 //     is used in the String.intern() method.
 367 //
 368 // (b) As the hash value used by the String object itself, in
 369 //     String.hashCode().  This value is normally calculate in Java code
 370 //     in the String.hashCode method(), but is precomputed for String
 371 //     objects in the shared archive file.
 372 //
 373 //     For this reason, THIS ALGORITHM MUST MATCH String.hashCode().
 374 
 375 int StringTable::hash_string(jchar* s, int len) {
 376   unsigned h = 0;
 377   while (len-- > 0) {
 378     h = 31*h + (unsigned) *s;
 379     s++;
 380   }
 381   return h;
 382 }
 383 
 384 
 385 StringTable* StringTable::_the_table = NULL;
 386 
 387 oop StringTable::lookup(int index, jchar* name,
 388                         int len, unsigned int hash) {
 389   for (HashtableEntry* l = bucket(index); l != NULL; l = l->next()) {
 390     if (l->hash() == hash) {
 391       if (java_lang_String::equals(l->literal(), name, len)) {
 392         return l->literal();
 393       }
 394     }
 395   }
 396   return NULL;
 397 }
 398 
 399 
 400 oop StringTable::basic_add(int index, Handle string_or_null, jchar* name,
 401                            int len, unsigned int hashValue, TRAPS) {
 402   debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
 403   assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
 404          "proposed name of symbol must be stable");
 405 
 406   Handle string;
 407   // try to reuse the string if possible
 408   if (!string_or_null.is_null() && string_or_null()->is_perm()) {
 409     string = string_or_null;
 410   } else {
 411     string = java_lang_String::create_tenured_from_unicode(name, len, CHECK_NULL);
 412   }
 413 
 414   // Allocation must be done before grapping the SymbolTable_lock lock
 415   MutexLocker ml(StringTable_lock, THREAD);
 416 
 417   assert(java_lang_String::equals(string(), name, len),
 418          "string must be properly initialized");
 419 
 420   // Since look-up was done lock-free, we need to check if another
 421   // thread beat us in the race to insert the symbol.
 422 
 423   oop test = lookup(index, name, len, hashValue); // calls lookup(u1*, int)
 424   if (test != NULL) {
 425     // Entry already added
 426     return test;
 427   }
 428 
 429   HashtableEntry* entry = new_entry(hashValue, string());
 430   add_entry(index, entry);
 431   return string();
 432 }
 433 
 434 
 435 oop StringTable::lookup(symbolOop symbol) {
 436   ResourceMark rm;
 437   int length;
 438   jchar* chars = symbol->as_unicode(length);
 439   unsigned int hashValue = hash_string(chars, length);
 440   int index = the_table()->hash_to_index(hashValue);
 441   return the_table()->lookup(index, chars, length, hashValue);
 442 }
 443 
 444 
 445 oop StringTable::intern(Handle string_or_null, jchar* name,
 446                         int len, TRAPS) {
 447   unsigned int hashValue = hash_string(name, len);
 448   int index = the_table()->hash_to_index(hashValue);
 449   oop string = the_table()->lookup(index, name, len, hashValue);
 450 
 451   // Found
 452   if (string != NULL) return string;
 453 
 454   // Otherwise, add to symbol to table
 455   return the_table()->basic_add(index, string_or_null, name, len,
 456                                 hashValue, CHECK_NULL);
 457 }
 458 
 459 oop StringTable::intern(symbolOop symbol, TRAPS) {
 460   if (symbol == NULL) return NULL;
 461   ResourceMark rm(THREAD);
 462   int length;
 463   jchar* chars = symbol->as_unicode(length);
 464   Handle string;
 465   oop result = intern(string, chars, length, CHECK_NULL);
 466   return result;
 467 }
 468 
 469 
 470 oop StringTable::intern(oop string, TRAPS)
 471 {
 472   if (string == NULL) return NULL;
 473   ResourceMark rm(THREAD);
 474   int length;
 475   Handle h_string (THREAD, string);
 476   jchar* chars = java_lang_String::as_unicode_string(string, length);
 477   oop result = intern(h_string, chars, length, CHECK_NULL);
 478   return result;
 479 }
 480 
 481 
 482 oop StringTable::intern(const char* utf8_string, TRAPS) {
 483   if (utf8_string == NULL) return NULL;
 484   ResourceMark rm(THREAD);
 485   int length = UTF8::unicode_length(utf8_string);
 486   jchar* chars = NEW_RESOURCE_ARRAY(jchar, length);
 487   UTF8::convert_to_unicode(utf8_string, chars, length);
 488   Handle string;
 489   oop result = intern(string, chars, length, CHECK_NULL);
 490   return result;
 491 }
 492 
 493 void StringTable::verify() {
 494   for (int i = 0; i < the_table()->table_size(); ++i) {
 495     HashtableEntry* p = the_table()->bucket(i);
 496     for ( ; p != NULL; p = p->next()) {
 497       oop s = p->literal();
 498       guarantee(s != NULL, "interned string is NULL");
 499       guarantee(s->is_perm(), "interned string not in permspace");
 500 
 501       int length;
 502       jchar* chars = java_lang_String::as_unicode_string(s, length);
 503       unsigned int h = hash_string(chars, length);
 504       guarantee(p->hash() == h, "broken hash in string table entry");
 505       guarantee(the_table()->hash_to_index(h) == i,
 506                 "wrong index in string table");
 507     }
 508   }
 509 }