1 /*
   2  * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/javaClasses.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "classfile/systemDictionary.hpp"
  29 #include "gc_interface/collectedHeap.inline.hpp"
  30 #include "memory/filemap.hpp"
  31 #include "memory/gcLocker.inline.hpp"
  32 #include "oops/oop.inline.hpp"
  33 #include "oops/oop.inline2.hpp"
  34 #include "oops/symbolKlass.hpp"
  35 #include "runtime/mutexLocker.hpp"
  36 #include "utilities/hashtable.inline.hpp"
  37 
  38 // --------------------------------------------------------------------------
  39 
  40 SymbolTable* SymbolTable::_the_table = NULL;
  41 
  42 // Lookup a symbol in a bucket.
  43 
  44 symbolOop SymbolTable::lookup(int index, const char* name,
  45                               int len, unsigned int hash) {
  46   for (HashtableEntry* e = bucket(index); e != NULL; e = e->next()) {
  47     if (e->hash() == hash) {
  48       symbolOop sym = symbolOop(e->literal());
  49       if (sym->equals(name, len)) {
  50         return sym;
  51       }
  52     }
  53   }
  54   return NULL;
  55 }
  56 
  57 
  58 // We take care not to be blocking while holding the
  59 // SymbolTable_lock. Otherwise, the system might deadlock, since the
  60 // symboltable is used during compilation (VM_thread) The lock free
  61 // synchronization is simplified by the fact that we do not delete
  62 // entries in the symbol table during normal execution (only during
  63 // safepoints).
  64 
  65 symbolOop SymbolTable::lookup(const char* name, int len, TRAPS) {
  66   unsigned int hashValue = hash_symbol(name, len);
  67   int index = the_table()->hash_to_index(hashValue);
  68 
  69   symbolOop s = the_table()->lookup(index, name, len, hashValue);
  70 
  71   // Found
  72   if (s != NULL) return s;
  73 
  74   // Otherwise, add to symbol to table
  75   return the_table()->basic_add(index, (u1*)name, len, hashValue, CHECK_NULL);
  76 }
  77 
  78 symbolOop SymbolTable::lookup(symbolHandle sym, int begin, int end, TRAPS) {
  79   char* buffer;
  80   int index, len;
  81   unsigned int hashValue;
  82   char* name;
  83   {
  84     debug_only(No_Safepoint_Verifier nsv;)
  85 
  86     name = (char*)sym->base() + begin;
  87     len = end - begin;
  88     hashValue = hash_symbol(name, len);
  89     index = the_table()->hash_to_index(hashValue);
  90     symbolOop s = the_table()->lookup(index, name, len, hashValue);
  91 
  92     // Found
  93     if (s != NULL) return s;
  94   }
  95 
  96   // Otherwise, add to symbol to table. Copy to a C string first.
  97   char stack_buf[128];
  98   ResourceMark rm(THREAD);
  99   if (len <= 128) {
 100     buffer = stack_buf;
 101   } else {
 102     buffer = NEW_RESOURCE_ARRAY_IN_THREAD(THREAD, char, len);
 103   }
 104   for (int i=0; i<len; i++) {
 105     buffer[i] = name[i];
 106   }
 107   // Make sure there is no safepoint in the code above since name can't move.
 108   // We can't include the code in No_Safepoint_Verifier because of the
 109   // ResourceMark.
 110 
 111   return the_table()->basic_add(index, (u1*)buffer, len, hashValue, CHECK_NULL);
 112 }
 113 
 114 symbolOop SymbolTable::lookup_only(const char* name, int len,
 115                                    unsigned int& hash) {
 116   hash = hash_symbol(name, len);
 117   int index = the_table()->hash_to_index(hash);
 118 
 119   return the_table()->lookup(index, name, len, hash);
 120 }
 121 
 122 // Suggestion: Push unicode-based lookup all the way into the hashing
 123 // and probing logic, so there is no need for convert_to_utf8 until
 124 // an actual new symbolOop is created.
 125 symbolOop SymbolTable::lookup_unicode(const jchar* name, int utf16_length, TRAPS) {
 126   int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
 127   char stack_buf[128];
 128   if (utf8_length < (int) sizeof(stack_buf)) {
 129     char* chars = stack_buf;
 130     UNICODE::convert_to_utf8(name, utf16_length, chars);
 131     return lookup(chars, utf8_length, THREAD);
 132   } else {
 133     ResourceMark rm(THREAD);
 134     char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
 135     UNICODE::convert_to_utf8(name, utf16_length, chars);
 136     return lookup(chars, utf8_length, THREAD);
 137   }
 138 }
 139 
 140 symbolOop SymbolTable::lookup_only_unicode(const jchar* name, int utf16_length,
 141                                            unsigned int& hash) {
 142   int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length);
 143   char stack_buf[128];
 144   if (utf8_length < (int) sizeof(stack_buf)) {
 145     char* chars = stack_buf;
 146     UNICODE::convert_to_utf8(name, utf16_length, chars);
 147     return lookup_only(chars, utf8_length, hash);
 148   } else {
 149     ResourceMark rm;
 150     char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);;
 151     UNICODE::convert_to_utf8(name, utf16_length, chars);
 152     return lookup_only(chars, utf8_length, hash);
 153   }
 154 }
 155 
 156 void SymbolTable::add(constantPoolHandle cp, int names_count,
 157                       const char** names, int* lengths, int* cp_indices,
 158                       unsigned int* hashValues, TRAPS) {
 159   SymbolTable* table = the_table();
 160   bool added = table->basic_add(cp, names_count, names, lengths,
 161                                 cp_indices, hashValues, CHECK);
 162   if (!added) {
 163     // do it the hard way
 164     for (int i=0; i<names_count; i++) {
 165       int index = table->hash_to_index(hashValues[i]);
 166       symbolOop sym = table->basic_add(index, (u1*)names[i], lengths[i],
 167                                        hashValues[i], CHECK);
 168       cp->symbol_at_put(cp_indices[i], sym);
 169     }
 170   }
 171 }
 172 
 173 symbolOop SymbolTable::basic_add(int index, u1 *name, int len,
 174                                  unsigned int hashValue, TRAPS) {
 175   assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
 176          "proposed name of symbol must be stable");
 177 
 178   // We assume that lookup() has been called already, that it failed,
 179   // and symbol was not found.  We create the symbol here.
 180   symbolKlass* sk  = (symbolKlass*) Universe::symbolKlassObj()->klass_part();
 181   symbolOop s_oop = sk->allocate_symbol(name, len, CHECK_NULL);
 182   symbolHandle sym (THREAD, s_oop);
 183 
 184   // Allocation must be done before grapping the SymbolTable_lock lock
 185   MutexLocker ml(SymbolTable_lock, THREAD);
 186 
 187   assert(sym->equals((char*)name, len), "symbol must be properly initialized");
 188 
 189   // Since look-up was done lock-free, we need to check if another
 190   // thread beat us in the race to insert the symbol.
 191 
 192   symbolOop test = lookup(index, (char*)name, len, hashValue);
 193   if (test != NULL) {
 194     // A race occurred and another thread introduced the symbol, this one
 195     // will be dropped and collected.
 196     return test;
 197   }
 198 
 199   HashtableEntry* entry = new_entry(hashValue, sym());
 200   add_entry(index, entry);
 201   return sym();
 202 }
 203 
 204 bool SymbolTable::basic_add(constantPoolHandle cp, int names_count,
 205                             const char** names, int* lengths,
 206                             int* cp_indices, unsigned int* hashValues,
 207                             TRAPS) {
 208   symbolKlass* sk  = (symbolKlass*) Universe::symbolKlassObj()->klass_part();
 209   symbolOop sym_oops[symbol_alloc_batch_size];
 210   bool allocated = sk->allocate_symbols(names_count, names, lengths,
 211                                         sym_oops, CHECK_false);
 212   if (!allocated) {
 213     return false;
 214   }
 215   symbolHandle syms[symbol_alloc_batch_size];
 216   int i;
 217   for (i=0; i<names_count; i++) {
 218     syms[i] = symbolHandle(THREAD, sym_oops[i]);
 219   }
 220 
 221   // Allocation must be done before grabbing the SymbolTable_lock lock
 222   MutexLocker ml(SymbolTable_lock, THREAD);
 223 
 224   for (i=0; i<names_count; i++) {
 225     assert(syms[i]->equals(names[i], lengths[i]), "symbol must be properly initialized");
 226     // Since look-up was done lock-free, we need to check if another
 227     // thread beat us in the race to insert the symbol.
 228     int index = hash_to_index(hashValues[i]);
 229     symbolOop test = lookup(index, names[i], lengths[i], hashValues[i]);
 230     if (test != NULL) {
 231       // A race occurred and another thread introduced the symbol, this one
 232       // will be dropped and collected. Use test instead.
 233       cp->symbol_at_put(cp_indices[i], test);
 234     } else {
 235       symbolOop sym = syms[i]();
 236       HashtableEntry* entry = new_entry(hashValues[i], sym);
 237       add_entry(index, entry);
 238       cp->symbol_at_put(cp_indices[i], sym);
 239     }
 240   }
 241 
 242   return true;
 243 }
 244 
 245 
 246 void SymbolTable::verify() {
 247   for (int i = 0; i < the_table()->table_size(); ++i) {
 248     HashtableEntry* p = the_table()->bucket(i);
 249     for ( ; p != NULL; p = p->next()) {
 250       symbolOop s = symbolOop(p->literal());
 251       guarantee(s != NULL, "symbol is NULL");
 252       s->verify();
 253       guarantee(s->is_perm(), "symbol not in permspace");
 254       unsigned int h = hash_symbol((char*)s->bytes(), s->utf8_length());
 255       guarantee(p->hash() == h, "broken hash in symbol table entry");
 256       guarantee(the_table()->hash_to_index(h) == i,
 257                 "wrong index in symbol table");
 258     }
 259   }
 260 }
 261 
 262 
 263 //---------------------------------------------------------------------------
 264 // Non-product code
 265 
 266 #ifndef PRODUCT
 267 
 268 void SymbolTable::print_histogram() {
 269   MutexLocker ml(SymbolTable_lock);
 270   const int results_length = 100;
 271   int results[results_length];
 272   int i,j;
 273 
 274   // initialize results to zero
 275   for (j = 0; j < results_length; j++) {
 276     results[j] = 0;
 277   }
 278 
 279   int total = 0;
 280   int max_symbols = 0;
 281   int out_of_range = 0;
 282   for (i = 0; i < the_table()->table_size(); i++) {
 283     HashtableEntry* p = the_table()->bucket(i);
 284     for ( ; p != NULL; p = p->next()) {
 285       int counter = symbolOop(p->literal())->utf8_length();
 286       total += counter;
 287       if (counter < results_length) {
 288         results[counter]++;
 289       } else {
 290         out_of_range++;
 291       }
 292       max_symbols = MAX2(max_symbols, counter);
 293     }
 294   }
 295   tty->print_cr("Symbol Table:");
 296   tty->print_cr("%8s %5d", "Total  ", total);
 297   tty->print_cr("%8s %5d", "Maximum", max_symbols);
 298   tty->print_cr("%8s %3.2f", "Average",
 299           ((float) total / (float) the_table()->table_size()));
 300   tty->print_cr("%s", "Histogram:");
 301   tty->print_cr(" %s %29s", "Length", "Number chains that length");
 302   for (i = 0; i < results_length; i++) {
 303     if (results[i] > 0) {
 304       tty->print_cr("%6d %10d", i, results[i]);
 305     }
 306   }
 307   int line_length = 70;
 308   tty->print_cr("%s %30s", " Length", "Number chains that length");
 309   for (i = 0; i < results_length; i++) {
 310     if (results[i] > 0) {
 311       tty->print("%4d", i);
 312       for (j = 0; (j < results[i]) && (j < line_length);  j++) {
 313         tty->print("%1s", "*");
 314       }
 315       if (j == line_length) {
 316         tty->print("%1s", "+");
 317       }
 318       tty->cr();
 319     }
 320   }
 321   tty->print_cr(" %s %d: %d\n", "Number chains longer than",
 322                     results_length, out_of_range);
 323 }
 324 
 325 #endif // PRODUCT
 326 
 327 // --------------------------------------------------------------------------
 328 
 329 #ifdef ASSERT
 330 class StableMemoryChecker : public StackObj {
 331   enum { _bufsize = wordSize*4 };
 332 
 333   address _region;
 334   jint    _size;
 335   u1      _save_buf[_bufsize];
 336 
 337   int sample(u1* save_buf) {
 338     if (_size <= _bufsize) {
 339       memcpy(save_buf, _region, _size);
 340       return _size;
 341     } else {
 342       // copy head and tail
 343       memcpy(&save_buf[0],          _region,                      _bufsize/2);
 344       memcpy(&save_buf[_bufsize/2], _region + _size - _bufsize/2, _bufsize/2);
 345       return (_bufsize/2)*2;
 346     }
 347   }
 348 
 349  public:
 350   StableMemoryChecker(const void* region, jint size) {
 351     _region = (address) region;
 352     _size   = size;
 353     sample(_save_buf);
 354   }
 355 
 356   bool verify() {
 357     u1 check_buf[sizeof(_save_buf)];
 358     int check_size = sample(check_buf);
 359     return (0 == memcmp(_save_buf, check_buf, check_size));
 360   }
 361 
 362   void set_region(const void* region) { _region = (address) region; }
 363 };
 364 #endif
 365 
 366 
 367 // --------------------------------------------------------------------------
 368 
 369 
 370 // Compute the hash value for a java.lang.String object which would
 371 // contain the characters passed in. This hash value is used for at
 372 // least two purposes.
 373 //
 374 // (a) As the hash value used by the StringTable for bucket selection
 375 //     and comparison (stored in the HashtableEntry structures).  This
 376 //     is used in the String.intern() method.
 377 //
 378 // (b) As the hash value used by the String object itself, in
 379 //     String.hashCode().  This value is normally calculate in Java code
 380 //     in the String.hashCode method(), but is precomputed for String
 381 //     objects in the shared archive file.
 382 //
 383 //     For this reason, THIS ALGORITHM MUST MATCH String.hashCode().
 384 
 385 int StringTable::hash_string(jchar* s, int len) {
 386   unsigned h = 0;
 387   while (len-- > 0) {
 388     h = 31*h + (unsigned) *s;
 389     s++;
 390   }
 391   return h;
 392 }
 393 
 394 
 395 StringTable* StringTable::_the_table = NULL;
 396 
 397 oop StringTable::lookup(int index, jchar* name,
 398                         int len, unsigned int hash) {
 399   for (HashtableEntry* l = bucket(index); l != NULL; l = l->next()) {
 400     if (l->hash() == hash) {
 401       if (java_lang_String::equals(l->literal(), name, len)) {
 402         return l->literal();
 403       }
 404     }
 405   }
 406   return NULL;
 407 }
 408 
 409 
 410 oop StringTable::basic_add(int index, Handle string_or_null, jchar* name,
 411                            int len, unsigned int hashValue, TRAPS) {
 412   debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
 413   assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
 414          "proposed name of symbol must be stable");
 415 
 416   Handle string;
 417   // try to reuse the string if possible
 418   if (!string_or_null.is_null() && string_or_null()->is_perm()) {
 419     string = string_or_null;
 420   } else {
 421     string = java_lang_String::create_tenured_from_unicode(name, len, CHECK_NULL);
 422   }
 423 
 424   // Allocation must be done before grapping the SymbolTable_lock lock
 425   MutexLocker ml(StringTable_lock, THREAD);
 426 
 427   assert(java_lang_String::equals(string(), name, len),
 428          "string must be properly initialized");
 429 
 430   // Since look-up was done lock-free, we need to check if another
 431   // thread beat us in the race to insert the symbol.
 432 
 433   oop test = lookup(index, name, len, hashValue); // calls lookup(u1*, int)
 434   if (test != NULL) {
 435     // Entry already added
 436     return test;
 437   }
 438 
 439   HashtableEntry* entry = new_entry(hashValue, string());
 440   add_entry(index, entry);
 441   return string();
 442 }
 443 
 444 
 445 oop StringTable::lookup(symbolOop symbol) {
 446   ResourceMark rm;
 447   int length;
 448   jchar* chars = symbol->as_unicode(length);
 449   unsigned int hashValue = hash_string(chars, length);
 450   int index = the_table()->hash_to_index(hashValue);
 451   return the_table()->lookup(index, chars, length, hashValue);
 452 }
 453 
 454 
 455 oop StringTable::intern(Handle string_or_null, jchar* name,
 456                         int len, TRAPS) {
 457   unsigned int hashValue = hash_string(name, len);
 458   int index = the_table()->hash_to_index(hashValue);
 459   oop string = the_table()->lookup(index, name, len, hashValue);
 460 
 461   // Found
 462   if (string != NULL) return string;
 463 
 464   // Otherwise, add to symbol to table
 465   return the_table()->basic_add(index, string_or_null, name, len,
 466                                 hashValue, CHECK_NULL);
 467 }
 468 
 469 oop StringTable::intern(symbolOop symbol, TRAPS) {
 470   if (symbol == NULL) return NULL;
 471   ResourceMark rm(THREAD);
 472   int length;
 473   jchar* chars = symbol->as_unicode(length);
 474   Handle string;
 475   oop result = intern(string, chars, length, CHECK_NULL);
 476   return result;
 477 }
 478 
 479 
 480 oop StringTable::intern(oop string, TRAPS)
 481 {
 482   if (string == NULL) return NULL;
 483   ResourceMark rm(THREAD);
 484   int length;
 485   Handle h_string (THREAD, string);
 486   jchar* chars = java_lang_String::as_unicode_string(string, length);
 487   oop result = intern(h_string, chars, length, CHECK_NULL);
 488   return result;
 489 }
 490 
 491 
 492 oop StringTable::intern(const char* utf8_string, TRAPS) {
 493   if (utf8_string == NULL) return NULL;
 494   ResourceMark rm(THREAD);
 495   int length = UTF8::unicode_length(utf8_string);
 496   jchar* chars = NEW_RESOURCE_ARRAY(jchar, length);
 497   UTF8::convert_to_unicode(utf8_string, chars, length);
 498   Handle string;
 499   oop result = intern(string, chars, length, CHECK_NULL);
 500   return result;
 501 }
 502 
 503 void StringTable::verify() {
 504   for (int i = 0; i < the_table()->table_size(); ++i) {
 505     HashtableEntry* p = the_table()->bucket(i);
 506     for ( ; p != NULL; p = p->next()) {
 507       oop s = p->literal();
 508       guarantee(s != NULL, "interned string is NULL");
 509       guarantee(s->is_perm(), "interned string not in permspace");
 510 
 511       int length;
 512       jchar* chars = java_lang_String::as_unicode_string(s, length);
 513       unsigned int h = hash_string(chars, length);
 514       guarantee(p->hash() == h, "broken hash in string table entry");
 515       guarantee(the_table()->hash_to_index(h) == i,
 516                 "wrong index in string table");
 517     }
 518   }
 519 }