1 /*
   2  * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 
  26 #include "precompiled.hpp"
  27 #include "classfile/altHashing.hpp"
  28 #include "classfile/classLoaderData.hpp"
  29 #include "gc/shared/collectedHeap.hpp"
  30 #include "logging/log.hpp"
  31 #include "logging/logStream.hpp"
  32 #include "memory/allocation.inline.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "oops/symbol.hpp"
  35 #include "runtime/atomic.hpp"
  36 #include "runtime/os.hpp"
  37 #include "utilities/utf8.hpp"
  38 
  39 uint32_t Symbol::pack_length_and_refcount(int length, int refcount) {
  40   STATIC_ASSERT(max_symbol_length == ((1 << 16) - 1));
  41   STATIC_ASSERT(PERM_REFCOUNT == ((1 << 16) - 1));
  42   assert(length >= 0, "negative length");
  43   assert(length <= max_symbol_length, "too long symbol");
  44   assert(refcount >= 0, "negative refcount");
  45   assert(refcount <= PERM_REFCOUNT, "invalid refcount");
  46   uint32_t hi = length;
  47   uint32_t lo = refcount;
  48   return (hi << 16) | lo;
  49 }
  50 
  51 Symbol::Symbol(const u1* name, int length, int refcount) {
  52   _length_and_refcount =  pack_length_and_refcount(length, refcount);
  53   _identity_hash = (short)os::random();
  54   for (int i = 0; i < length; i++) {
  55     byte_at_put(i, name[i]);
  56   }
  57 }
  58 
  59 void* Symbol::operator new(size_t sz, int len, TRAPS) throw() {
  60   int alloc_size = size(len)*wordSize;
  61   address res = (address) AllocateHeap(alloc_size, mtSymbol);
  62   return res;
  63 }
  64 
  65 void* Symbol::operator new(size_t sz, int len, Arena* arena, TRAPS) throw() {
  66   int alloc_size = size(len)*wordSize;
  67   address res = (address)arena->Amalloc_4(alloc_size);
  68   return res;
  69 }
  70 
  71 void Symbol::operator delete(void *p) {
  72   assert(((Symbol*)p)->refcount() == 0, "should not call this");
  73   FreeHeap(p);
  74 }
  75 
  76 // ------------------------------------------------------------------
  77 // Symbol::contains_byte_at
  78 //
  79 // Tests if the symbol contains the given byte at the given position.
  80 bool Symbol::contains_byte_at(int position, char code_byte) const {
  81   if (position < 0)  return false;  // can happen with ends_with
  82   if (position >= utf8_length()) return false;
  83   return code_byte == char_at(position);
  84 }
  85 
  86 // ------------------------------------------------------------------
  87 // Symbol::contains_utf8_at
  88 //
  89 // Tests if the symbol contains the given utf8 substring
  90 // at the given byte position.
  91 bool Symbol::contains_utf8_at(int position, const char* substring, int len) const {
  92   assert(len > 0 && substring != NULL && (int) strlen(substring) >= len,
  93          "substring must be valid");
  94   if (len == 1)  return contains_byte_at(position, substring[0]);
  95   if (position < 0)  return false;  // can happen with ends_with
  96   if (position + len > utf8_length()) return false;
  97   while (len-- > 0) {
  98     if (substring[len] != char_at(position + len))
  99       return false;
 100   }
 101   assert(len == -1, "we should be at the beginning");
 102   return true;
 103 }
 104 
 105 bool Symbol::is_Q_signature() const {
 106   return utf8_length() > 2 && char_at(0) == 'Q' && ends_with(';');
 107 }
 108 
 109 bool Symbol::is_Q_array_signature() const {
 110   int l = utf8_length();
 111   if (l < 2 || char_at(0) != '[' || char_at(l - 1) != ';') {
 112     return false;
 113   }
 114   for (int i = 1; i < (l - 2); i++) {
 115     char c = char_at(i);
 116     if (c == 'Q') {
 117       return true;
 118     }
 119     if (c != '[') {
 120       return false;
 121     }
 122   }
 123   return false;
 124 }
 125 
 126 bool Symbol::is_Q_singledim_array_signature() const {
 127   return utf8_length() > 3 && char_at(0) == '[' && char_at(1) == 'Q' && ends_with(';');
 128 }
 129 
 130 Symbol* Symbol::fundamental_name(TRAPS) {
 131   if ((char_at(0) == 'Q' || char_at(0) == 'L') && ends_with(';')) {
 132     return SymbolTable::lookup(this, 1, utf8_length() - 1, CHECK_NULL);
 133   } else {
 134     // reference count is incremented to be consistent with the behavior with
 135     // the SymbolTable::lookup() call above
 136     this->increment_refcount();
 137     return this;
 138   }
 139 }
 140 
 141 bool Symbol::is_same_fundamental_type(Symbol* s) const {
 142   if (this == s) return true;
 143   if (utf8_length() < 3) return false;
 144   int offset1, offset2, len;
 145   if (ends_with(';')) {
 146     if (char_at(0) != 'Q' && char_at(0) != 'L') return false;
 147     offset1 = 1;
 148     len = utf8_length() - 2;
 149   } else {
 150     offset1 = 0;
 151     len = utf8_length();
 152   }
 153   if (ends_with(';')) {
 154     if (s->char_at(0) != 'Q' && s->char_at(0) != 'L') return false;
 155     offset2 = 1;
 156   } else {
 157     offset2 = 0;
 158   }
 159   if ((offset2 + len) > s->utf8_length()) return false;
 160   if ((utf8_length() - offset1 * 2) != (s->utf8_length() - offset2 * 2))
 161     return false;
 162   int l = len;
 163   while (l-- > 0) {
 164     if (char_at(offset1 + l) != s->char_at(offset2 + l))
 165       return false;
 166   }
 167   return true;
 168 }
 169 
 170 // ------------------------------------------------------------------
 171 // Symbol::index_of
 172 //
 173 // Finds if the given string is a substring of this symbol's utf8 bytes.
 174 // Return -1 on failure.  Otherwise return the first index where str occurs.
 175 int Symbol::index_of_at(int i, const char* str, int len) const {
 176   assert(i >= 0 && i <= utf8_length(), "oob");
 177   if (len <= 0)  return 0;
 178   char first_char = str[0];
 179   address bytes = (address) ((Symbol*)this)->base();
 180   address limit = bytes + utf8_length() - len;  // inclusive limit
 181   address scan = bytes + i;
 182   if (scan > limit)
 183     return -1;
 184   for (; scan <= limit; scan++) {
 185     scan = (address) memchr(scan, first_char, (limit + 1 - scan));
 186     if (scan == NULL)
 187       return -1;  // not found
 188     assert(scan >= bytes+i && scan <= limit, "scan oob");
 189     if (memcmp(scan, str, len) == 0)
 190       return (int)(scan - bytes);
 191   }
 192   return -1;
 193 }
 194 
 195 
 196 char* Symbol::as_C_string(char* buf, int size) const {
 197   if (size > 0) {
 198     int len = MIN2(size - 1, utf8_length());
 199     for (int i = 0; i < len; i++) {
 200       buf[i] = char_at(i);
 201     }
 202     buf[len] = '\0';
 203   }
 204   return buf;
 205 }
 206 
 207 char* Symbol::as_C_string() const {
 208   int len = utf8_length();
 209   char* str = NEW_RESOURCE_ARRAY(char, len + 1);
 210   return as_C_string(str, len + 1);
 211 }
 212 
 213 void Symbol::print_utf8_on(outputStream* st) const {
 214   st->print("%s", as_C_string());
 215 }
 216 
 217 void Symbol::print_symbol_on(outputStream* st) const {
 218   char *s;
 219   st = st ? st : tty;
 220   {
 221     // ResourceMark may not affect st->print(). If st is a string
 222     // stream it could resize, using the same resource arena.
 223     ResourceMark rm;
 224     s = as_quoted_ascii();
 225     s = os::strdup(s);
 226   }
 227   if (s == NULL) {
 228     st->print("(null)");
 229   } else {
 230     st->print("%s", s);
 231     os::free(s);
 232   }
 233 }
 234 
 235 char* Symbol::as_quoted_ascii() const {
 236   const char *ptr = (const char *)&_body[0];
 237   int quoted_length = UTF8::quoted_ascii_length(ptr, utf8_length());
 238   char* result = NEW_RESOURCE_ARRAY(char, quoted_length + 1);
 239   UTF8::as_quoted_ascii(ptr, utf8_length(), result, quoted_length + 1);
 240   return result;
 241 }
 242 
 243 jchar* Symbol::as_unicode(int& length) const {
 244   Symbol* this_ptr = (Symbol*)this;
 245   length = UTF8::unicode_length((char*)this_ptr->bytes(), utf8_length());
 246   jchar* result = NEW_RESOURCE_ARRAY(jchar, length);
 247   if (length > 0) {
 248     UTF8::convert_to_unicode((char*)this_ptr->bytes(), result, length);
 249   }
 250   return result;
 251 }
 252 
 253 const char* Symbol::as_klass_external_name(char* buf, int size) const {
 254   if (size > 0) {
 255     char* str    = as_C_string(buf, size);
 256     int   length = (int)strlen(str);
 257     // Turn all '/'s into '.'s (also for array klasses)
 258     for (int index = 0; index < length; index++) {
 259       if (str[index] == '/') {
 260         str[index] = '.';
 261       }
 262     }
 263     return str;
 264   } else {
 265     return buf;
 266   }
 267 }
 268 
 269 const char* Symbol::as_klass_external_name() const {
 270   char* str    = as_C_string();
 271   int   length = (int)strlen(str);
 272   // Turn all '/'s into '.'s (also for array klasses)
 273   for (int index = 0; index < length; index++) {
 274     if (str[index] == '/') {
 275       str[index] = '.';
 276     }
 277   }
 278   return str;
 279 }
 280 
 281 static void print_class(outputStream *os, char *class_str, int len) {
 282   for (int i = 0; i < len; ++i) {
 283     if (class_str[i] == '/') {
 284       os->put('.');
 285     } else {
 286       os->put(class_str[i]);
 287     }
 288   }
 289 }
 290 
 291 static void print_array(outputStream *os, char *array_str, int len) {
 292   int dimensions = 0;
 293   for (int i = 0; i < len; ++i) {
 294     if (array_str[i] == '[') {
 295       dimensions++;
 296     } else if (array_str[i] == 'L' || array_str[i] == 'Q') {
 297       // Expected format: L<type name>;. Skip 'L' and ';' delimiting the type name.
 298       print_class(os, array_str+i+1, len-i-2);
 299       break;
 300     } else {
 301       os->print("%s", type2name(char2type(array_str[i])));
 302     }
 303   }
 304   for (int i = 0; i < dimensions; ++i) {
 305     os->print("[]");
 306   }
 307 }
 308 
 309 void Symbol::print_as_signature_external_return_type(outputStream *os) {
 310   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
 311     if (ss.at_return_type()) {
 312       if (ss.is_array()) {
 313         print_array(os, (char*)ss.raw_bytes(), (int)ss.raw_length());
 314       } else if (ss.is_object()) {
 315         // Expected format: L<type name>;. Skip 'L' and ';' delimiting the class name.
 316         print_class(os, (char*)ss.raw_bytes()+1, (int)ss.raw_length()-2);
 317       } else {
 318         os->print("%s", type2name(ss.type()));
 319       }
 320     }
 321   }
 322 }
 323 
 324 void Symbol::print_as_signature_external_parameters(outputStream *os) {
 325   bool first = true;
 326   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
 327     if (ss.at_return_type()) break;
 328     if (!first) { os->print(", "); }
 329     if (ss.is_array()) {
 330       print_array(os, (char*)ss.raw_bytes(), (int)ss.raw_length());
 331     } else if (ss.is_object()) {
 332       // Skip 'L' and ';'.
 333       print_class(os, (char*)ss.raw_bytes()+1, (int)ss.raw_length()-2);
 334     } else {
 335       os->print("%s", type2name(ss.type()));
 336     }
 337     first = false;
 338   }
 339 }
 340 
 341 // Increment refcount while checking for zero.  If the Symbol's refcount becomes zero
 342 // a thread could be concurrently removing the Symbol.  This is used during SymbolTable
 343 // lookup to avoid reviving a dead Symbol.
 344 bool Symbol::try_increment_refcount() {
 345   uint32_t found = _length_and_refcount;
 346   while (true) {
 347     uint32_t old_value = found;
 348     int refc = extract_refcount(old_value);
 349     if (refc == PERM_REFCOUNT) {
 350       return true;  // sticky max or created permanent
 351     } else if (refc == 0) {
 352       return false; // dead, can't revive.
 353     } else {
 354       found = Atomic::cmpxchg(old_value + 1, &_length_and_refcount, old_value);
 355       if (found == old_value) {
 356         return true; // successfully updated.
 357       }
 358       // refcount changed, try again.
 359     }
 360   }
 361 }
 362 
 363 // The increment_refcount() is called when not doing lookup. It is assumed that you
 364 // have a symbol with a non-zero refcount and it can't become zero while referenced by
 365 // this caller.
 366 void Symbol::increment_refcount() {
 367   if (!try_increment_refcount()) {
 368 #ifdef ASSERT
 369     print();
 370     fatal("refcount has gone to zero");
 371 #endif
 372   }
 373 #ifndef PRODUCT
 374   if (refcount() != PERM_REFCOUNT) { // not a permanent symbol
 375     NOT_PRODUCT(Atomic::inc(&_total_count);)
 376   }
 377 #endif
 378 }
 379 
 380 // Decrement refcount potentially while racing increment, so we need
 381 // to check the value after attempting to decrement so that if another
 382 // thread increments to PERM_REFCOUNT the value is not decremented.
 383 void Symbol::decrement_refcount() {
 384   uint32_t found = _length_and_refcount;
 385   while (true) {
 386     uint32_t old_value = found;
 387     int refc = extract_refcount(old_value);
 388     if (refc == PERM_REFCOUNT) {
 389       return;  // refcount is permanent, permanent is sticky
 390     } else if (refc == 0) {
 391 #ifdef ASSERT
 392       print();
 393       fatal("refcount underflow");
 394 #endif
 395       return;
 396     } else {
 397       found = Atomic::cmpxchg(old_value - 1, &_length_and_refcount, old_value);
 398       if (found == old_value) {
 399         return;  // successfully updated.
 400       }
 401       // refcount changed, try again.
 402     }
 403   }
 404 }
 405 
 406 void Symbol::make_permanent() {
 407   uint32_t found = _length_and_refcount;
 408   while (true) {
 409     uint32_t old_value = found;
 410     int refc = extract_refcount(old_value);
 411     if (refc == PERM_REFCOUNT) {
 412       return;  // refcount is permanent, permanent is sticky
 413     } else if (refc == 0) {
 414 #ifdef ASSERT
 415       print();
 416       fatal("refcount underflow");
 417 #endif
 418       return;
 419     } else {
 420       int len = extract_length(old_value);
 421       found = Atomic::cmpxchg(pack_length_and_refcount(len, PERM_REFCOUNT), &_length_and_refcount, old_value);
 422       if (found == old_value) {
 423         return;  // successfully updated.
 424       }
 425       // refcount changed, try again.
 426     }
 427   }
 428 }
 429 
 430 void Symbol::metaspace_pointers_do(MetaspaceClosure* it) {
 431   if (log_is_enabled(Trace, cds)) {
 432     LogStream trace_stream(Log(cds)::trace());
 433     trace_stream.print("Iter(Symbol): %p ", this);
 434     print_value_on(&trace_stream);
 435     trace_stream.cr();
 436   }
 437 }
 438 
 439 void Symbol::print_on(outputStream* st) const {
 440   st->print("Symbol: '");
 441   print_symbol_on(st);
 442   st->print("'");
 443   st->print(" count %d", refcount());
 444 }
 445 
 446 // The print_value functions are present in all builds, to support the
 447 // disassembler and error reporting.
 448 void Symbol::print_value_on(outputStream* st) const {
 449   st->print("'");
 450   for (int i = 0; i < utf8_length(); i++) {
 451     st->print("%c", char_at(i));
 452   }
 453   st->print("'");
 454 }
 455 
 456 bool Symbol::is_valid(Symbol* s) {
 457   if (!is_aligned(s, sizeof(MetaWord))) return false;
 458   if ((size_t)s < os::min_page_size()) return false;
 459 
 460   if (!os::is_readable_range(s, s + 1)) return false;
 461 
 462   // Symbols are not allocated in Java heap.
 463   if (Universe::heap()->is_in_reserved(s)) return false;
 464 
 465   int len = s->utf8_length();
 466   if (len < 0) return false;
 467 
 468   jbyte* bytes = (jbyte*) s->bytes();
 469   return os::is_readable_range(bytes, bytes + len);
 470 }
 471 
 472 void Symbol::print_Qvalue_on(outputStream* st) const {
 473   if (this == NULL) {
 474     st->print("NULL");
 475   } else {
 476     st->print("'Q");
 477     for (int i = 0; i < utf8_length(); i++) {
 478       st->print("%c", char_at(i));
 479     }
 480     st->print(";'");
 481   }
 482 }
 483 
 484 // SymbolTable prints this in its statistics
 485 NOT_PRODUCT(size_t Symbol::_total_count = 0;)