1 /*
   2  * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 
  26 #include "precompiled.hpp"
  27 #include "classfile/altHashing.hpp"
  28 #include "classfile/classLoaderData.hpp"
  29 #include "gc/shared/collectedHeap.hpp"
  30 #include "logging/log.hpp"
  31 #include "logging/logStream.hpp"
  32 #include "memory/allocation.inline.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "memory/universe.hpp"
  35 #include "oops/symbol.hpp"
  36 #include "runtime/atomic.hpp"
  37 #include "runtime/os.hpp"
  38 #include "utilities/utf8.hpp"
  39 
  40 uint32_t Symbol::pack_length_and_refcount(int length, int refcount) {
  41   STATIC_ASSERT(max_symbol_length == ((1 << 16) - 1));
  42   STATIC_ASSERT(PERM_REFCOUNT == ((1 << 16) - 1));
  43   assert(length >= 0, "negative length");
  44   assert(length <= max_symbol_length, "too long symbol");
  45   assert(refcount >= 0, "negative refcount");
  46   assert(refcount <= PERM_REFCOUNT, "invalid refcount");
  47   uint32_t hi = length;
  48   uint32_t lo = refcount;
  49   return (hi << 16) | lo;
  50 }
  51 
  52 Symbol::Symbol(const u1* name, int length, int refcount) {
  53   _length_and_refcount =  pack_length_and_refcount(length, refcount);
  54   _identity_hash = (short)os::random();
  55   for (int i = 0; i < length; i++) {
  56     byte_at_put(i, name[i]);
  57   }
  58 }
  59 
  60 void* Symbol::operator new(size_t sz, int len) throw() {
  61   int alloc_size = size(len)*wordSize;
  62   address res = (address) AllocateHeap(alloc_size, mtSymbol);
  63   return res;
  64 }
  65 
  66 void* Symbol::operator new(size_t sz, int len, Arena* arena) throw() {
  67   int alloc_size = size(len)*wordSize;
  68   address res = (address)arena->Amalloc_4(alloc_size);
  69   return res;
  70 }
  71 
  72 void Symbol::operator delete(void *p) {
  73   assert(((Symbol*)p)->refcount() == 0, "should not call this");
  74   FreeHeap(p);
  75 }
  76 
  77 void Symbol::set_permanent() {
  78   // This is called at a safepoint during dumping of a dynamic CDS archive.
  79   assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
  80   _length_and_refcount =  pack_length_and_refcount(length(), PERM_REFCOUNT);
  81 }
  82 
  83 
  84 // ------------------------------------------------------------------
  85 // Symbol::contains_byte_at
  86 //
  87 // Tests if the symbol contains the given byte at the given position.
  88 bool Symbol::contains_byte_at(int position, char code_byte) const {
  89   if (position < 0)  return false;  // can happen with ends_with
  90   if (position >= utf8_length()) return false;
  91   return code_byte == char_at(position);
  92 }
  93 
  94 // ------------------------------------------------------------------
  95 // Symbol::contains_utf8_at
  96 //
  97 // Tests if the symbol contains the given utf8 substring
  98 // at the given byte position.
  99 bool Symbol::contains_utf8_at(int position, const char* substring, int len) const {
 100   assert(len > 0 && substring != NULL && (int) strlen(substring) >= len,
 101          "substring must be valid");
 102   if (len == 1)  return contains_byte_at(position, substring[0]);
 103   if (position < 0)  return false;  // can happen with ends_with
 104   if (position + len > utf8_length()) return false;
 105   while (len-- > 0) {
 106     if (substring[len] != char_at(position + len))
 107       return false;
 108   }
 109   assert(len == -1, "we should be at the beginning");
 110   return true;
 111 }
 112 
 113 bool Symbol::is_Q_signature() const {
 114   return utf8_length() > 2 && char_at(0) == 'Q' && ends_with(';');
 115 }
 116 
 117 bool Symbol::is_Q_array_signature() const {
 118   int l = utf8_length();
 119   if (l < 2 || char_at(0) != '[' || char_at(l - 1) != ';') {
 120     return false;
 121   }
 122   for (int i = 1; i < (l - 2); i++) {
 123     char c = char_at(i);
 124     if (c == 'Q') {
 125       return true;
 126     }
 127     if (c != '[') {
 128       return false;
 129     }
 130   }
 131   return false;
 132 }
 133 
 134 bool Symbol::is_Q_method_signature() const {
 135   assert(SignatureVerifier::is_valid_method_signature(this), "must be");
 136   int len = utf8_length();
 137   if (len > 4 && char_at(0) == '(') {
 138     for (int i=1; i<len-3; i++) { // Must end with ")Qx;", where x is at least one character or more.
 139       if (char_at(i) == ')' && char_at(i+1) == 'Q') {
 140         return true;
 141       }
 142     }
 143   }
 144   return false;
 145 }
 146 
 147 bool Symbol::is_Q_singledim_array_signature() const {
 148   return utf8_length() > 3 && char_at(0) == '[' && char_at(1) == 'Q' && ends_with(';');
 149 }
 150 
 151 Symbol* Symbol::fundamental_name(TRAPS) {
 152   if ((char_at(0) == 'Q' || char_at(0) == 'L') && ends_with(';')) {
 153     return SymbolTable::new_symbol(this, 1, utf8_length() - 1);
 154   } else {
 155     // reference count is incremented to be consistent with the behavior with
 156     // the SymbolTable::new_symbol() call above
 157     this->increment_refcount();
 158     return this;
 159   }
 160 }
 161 
 162 bool Symbol::is_same_fundamental_type(Symbol* s) const {
 163   if (this == s) return true;
 164   if (utf8_length() < 3) return false;
 165   int offset1, offset2, len;
 166   if (ends_with(';')) {
 167     if (char_at(0) != 'Q' && char_at(0) != 'L') return false;
 168     offset1 = 1;
 169     len = utf8_length() - 2;
 170   } else {
 171     offset1 = 0;
 172     len = utf8_length();
 173   }
 174   if (ends_with(';')) {
 175     if (s->char_at(0) != 'Q' && s->char_at(0) != 'L') return false;
 176     offset2 = 1;
 177   } else {
 178     offset2 = 0;
 179   }
 180   if ((offset2 + len) > s->utf8_length()) return false;
 181   if ((utf8_length() - offset1 * 2) != (s->utf8_length() - offset2 * 2))
 182     return false;
 183   int l = len;
 184   while (l-- > 0) {
 185     if (char_at(offset1 + l) != s->char_at(offset2 + l))
 186       return false;
 187   }
 188   return true;
 189 }
 190 
 191 // ------------------------------------------------------------------
 192 // Symbol::index_of
 193 //
 194 // Finds if the given string is a substring of this symbol's utf8 bytes.
 195 // Return -1 on failure.  Otherwise return the first index where str occurs.
 196 int Symbol::index_of_at(int i, const char* str, int len) const {
 197   assert(i >= 0 && i <= utf8_length(), "oob");
 198   if (len <= 0)  return 0;
 199   char first_char = str[0];
 200   address bytes = (address) ((Symbol*)this)->base();
 201   address limit = bytes + utf8_length() - len;  // inclusive limit
 202   address scan = bytes + i;
 203   if (scan > limit)
 204     return -1;
 205   for (; scan <= limit; scan++) {
 206     scan = (address) memchr(scan, first_char, (limit + 1 - scan));
 207     if (scan == NULL)
 208       return -1;  // not found
 209     assert(scan >= bytes+i && scan <= limit, "scan oob");
 210     if (memcmp(scan, str, len) == 0)
 211       return (int)(scan - bytes);
 212   }
 213   return -1;
 214 }
 215 
 216 
 217 char* Symbol::as_C_string(char* buf, int size) const {
 218   if (size > 0) {
 219     int len = MIN2(size - 1, utf8_length());
 220     for (int i = 0; i < len; i++) {
 221       buf[i] = char_at(i);
 222     }
 223     buf[len] = '\0';
 224   }
 225   return buf;
 226 }
 227 
 228 char* Symbol::as_C_string() const {
 229   int len = utf8_length();
 230   char* str = NEW_RESOURCE_ARRAY(char, len + 1);
 231   return as_C_string(str, len + 1);
 232 }
 233 
 234 void Symbol::print_utf8_on(outputStream* st) const {
 235   st->print("%s", as_C_string());
 236 }
 237 
 238 void Symbol::print_symbol_on(outputStream* st) const {
 239   char *s;
 240   st = st ? st : tty;
 241   {
 242     // ResourceMark may not affect st->print(). If st is a string
 243     // stream it could resize, using the same resource arena.
 244     ResourceMark rm;
 245     s = as_quoted_ascii();
 246     s = os::strdup(s);
 247   }
 248   if (s == NULL) {
 249     st->print("(null)");
 250   } else {
 251     st->print("%s", s);
 252     os::free(s);
 253   }
 254 }
 255 
 256 char* Symbol::as_quoted_ascii() const {
 257   const char *ptr = (const char *)&_body[0];
 258   int quoted_length = UTF8::quoted_ascii_length(ptr, utf8_length());
 259   char* result = NEW_RESOURCE_ARRAY(char, quoted_length + 1);
 260   UTF8::as_quoted_ascii(ptr, utf8_length(), result, quoted_length + 1);
 261   return result;
 262 }
 263 
 264 jchar* Symbol::as_unicode(int& length) const {
 265   Symbol* this_ptr = (Symbol*)this;
 266   length = UTF8::unicode_length((char*)this_ptr->bytes(), utf8_length());
 267   jchar* result = NEW_RESOURCE_ARRAY(jchar, length);
 268   if (length > 0) {
 269     UTF8::convert_to_unicode((char*)this_ptr->bytes(), result, length);
 270   }
 271   return result;
 272 }
 273 
 274 const char* Symbol::as_klass_external_name(char* buf, int size) const {
 275   if (size > 0) {
 276     char* str    = as_C_string(buf, size);
 277     int   length = (int)strlen(str);
 278     // Turn all '/'s into '.'s (also for array klasses)
 279     for (int index = 0; index < length; index++) {
 280       if (str[index] == '/') {
 281         str[index] = '.';
 282       }
 283     }
 284     return str;
 285   } else {
 286     return buf;
 287   }
 288 }
 289 
 290 const char* Symbol::as_klass_external_name() const {
 291   char* str    = as_C_string();
 292   int   length = (int)strlen(str);
 293   // Turn all '/'s into '.'s (also for array klasses)
 294   for (int index = 0; index < length; index++) {
 295     if (str[index] == '/') {
 296       str[index] = '.';
 297     }
 298   }
 299   return str;
 300 }
 301 
 302 static void print_class(outputStream *os, char *class_str, int len) {
 303   for (int i = 0; i < len; ++i) {
 304     if (class_str[i] == '/') {
 305       os->put('.');
 306     } else {
 307       os->put(class_str[i]);
 308     }
 309   }
 310 }
 311 
 312 static void print_array(outputStream *os, char *array_str, int len) {
 313   int dimensions = 0;
 314   for (int i = 0; i < len; ++i) {
 315     if (array_str[i] == '[') {
 316       dimensions++;
 317     } else if (array_str[i] == 'L' || array_str[i] == 'Q') {
 318       // Expected format: L<type name>;. Skip 'L' and ';' delimiting the type name.
 319       print_class(os, array_str+i+1, len-i-2);
 320       break;
 321     } else {
 322       os->print("%s", type2name(char2type(array_str[i])));
 323     }
 324   }
 325   for (int i = 0; i < dimensions; ++i) {
 326     os->print("[]");
 327   }
 328 }
 329 
 330 void Symbol::print_as_signature_external_return_type(outputStream *os) {
 331   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
 332     if (ss.at_return_type()) {
 333       if (ss.is_array()) {
 334         print_array(os, (char*)ss.raw_bytes(), (int)ss.raw_length());
 335       } else if (ss.is_object()) {
 336         // Expected format: L<type name>;. Skip 'L' and ';' delimiting the class name.
 337         print_class(os, (char*)ss.raw_bytes()+1, (int)ss.raw_length()-2);
 338       } else {
 339         os->print("%s", type2name(ss.type()));
 340       }
 341     }
 342   }
 343 }
 344 
 345 void Symbol::print_as_signature_external_parameters(outputStream *os) {
 346   bool first = true;
 347   for (SignatureStream ss(this); !ss.is_done(); ss.next()) {
 348     if (ss.at_return_type()) break;
 349     if (!first) { os->print(", "); }
 350     if (ss.is_array()) {
 351       print_array(os, (char*)ss.raw_bytes(), (int)ss.raw_length());
 352     } else if (ss.is_object()) {
 353       // Skip 'L' and ';'.
 354       print_class(os, (char*)ss.raw_bytes()+1, (int)ss.raw_length()-2);
 355     } else {
 356       os->print("%s", type2name(ss.type()));
 357     }
 358     first = false;
 359   }
 360 }
 361 
 362 // Increment refcount while checking for zero.  If the Symbol's refcount becomes zero
 363 // a thread could be concurrently removing the Symbol.  This is used during SymbolTable
 364 // lookup to avoid reviving a dead Symbol.
 365 bool Symbol::try_increment_refcount() {
 366   uint32_t found = _length_and_refcount;
 367   while (true) {
 368     uint32_t old_value = found;
 369     int refc = extract_refcount(old_value);
 370     if (refc == PERM_REFCOUNT) {
 371       return true;  // sticky max or created permanent
 372     } else if (refc == 0) {
 373       return false; // dead, can't revive.
 374     } else {
 375       found = Atomic::cmpxchg(old_value + 1, &_length_and_refcount, old_value);
 376       if (found == old_value) {
 377         return true; // successfully updated.
 378       }
 379       // refcount changed, try again.
 380     }
 381   }
 382 }
 383 
 384 // The increment_refcount() is called when not doing lookup. It is assumed that you
 385 // have a symbol with a non-zero refcount and it can't become zero while referenced by
 386 // this caller.
 387 void Symbol::increment_refcount() {
 388   if (!try_increment_refcount()) {
 389 #ifdef ASSERT
 390     print();
 391     fatal("refcount has gone to zero");
 392 #endif
 393   }
 394 #ifndef PRODUCT
 395   if (refcount() != PERM_REFCOUNT) { // not a permanent symbol
 396     NOT_PRODUCT(Atomic::inc(&_total_count);)
 397   }
 398 #endif
 399 }
 400 
 401 // Decrement refcount potentially while racing increment, so we need
 402 // to check the value after attempting to decrement so that if another
 403 // thread increments to PERM_REFCOUNT the value is not decremented.
 404 void Symbol::decrement_refcount() {
 405   uint32_t found = _length_and_refcount;
 406   while (true) {
 407     uint32_t old_value = found;
 408     int refc = extract_refcount(old_value);
 409     if (refc == PERM_REFCOUNT) {
 410       return;  // refcount is permanent, permanent is sticky
 411     } else if (refc == 0) {
 412 #ifdef ASSERT
 413       print();
 414       fatal("refcount underflow");
 415 #endif
 416       return;
 417     } else {
 418       found = Atomic::cmpxchg(old_value - 1, &_length_and_refcount, old_value);
 419       if (found == old_value) {
 420         return;  // successfully updated.
 421       }
 422       // refcount changed, try again.
 423     }
 424   }
 425 }
 426 
 427 void Symbol::make_permanent() {
 428   uint32_t found = _length_and_refcount;
 429   while (true) {
 430     uint32_t old_value = found;
 431     int refc = extract_refcount(old_value);
 432     if (refc == PERM_REFCOUNT) {
 433       return;  // refcount is permanent, permanent is sticky
 434     } else if (refc == 0) {
 435 #ifdef ASSERT
 436       print();
 437       fatal("refcount underflow");
 438 #endif
 439       return;
 440     } else {
 441       int len = extract_length(old_value);
 442       found = Atomic::cmpxchg(pack_length_and_refcount(len, PERM_REFCOUNT), &_length_and_refcount, old_value);
 443       if (found == old_value) {
 444         return;  // successfully updated.
 445       }
 446       // refcount changed, try again.
 447     }
 448   }
 449 }
 450 
 451 void Symbol::metaspace_pointers_do(MetaspaceClosure* it) {
 452   if (log_is_enabled(Trace, cds)) {
 453     LogStream trace_stream(Log(cds)::trace());
 454     trace_stream.print("Iter(Symbol): %p ", this);
 455     print_value_on(&trace_stream);
 456     trace_stream.cr();
 457   }
 458 }
 459 
 460 void Symbol::print_on(outputStream* st) const {
 461   st->print("Symbol: '");
 462   print_symbol_on(st);
 463   st->print("'");
 464   st->print(" count %d", refcount());
 465 }
 466 
 467 void Symbol::print() const { print_on(tty); }
 468 
 469 // The print_value functions are present in all builds, to support the
 470 // disassembler and error reporting.
 471 void Symbol::print_value_on(outputStream* st) const {
 472   st->print("'");
 473   for (int i = 0; i < utf8_length(); i++) {
 474     st->print("%c", char_at(i));
 475   }
 476   st->print("'");
 477 }
 478 
 479 void Symbol::print_value() const { print_value_on(tty); }
 480 
 481 bool Symbol::is_valid(Symbol* s) {
 482   if (!is_aligned(s, sizeof(MetaWord))) return false;
 483   if ((size_t)s < os::min_page_size()) return false;
 484 
 485   if (!os::is_readable_range(s, s + 1)) return false;
 486 
 487   // Symbols are not allocated in Java heap.
 488   if (Universe::heap()->is_in_reserved(s)) return false;
 489 
 490   int len = s->utf8_length();
 491   if (len < 0) return false;
 492 
 493   jbyte* bytes = (jbyte*) s->bytes();
 494   return os::is_readable_range(bytes, bytes + len);
 495 }
 496 
 497 void Symbol::print_Qvalue_on(outputStream* st) const {
 498   if (this == NULL) {
 499     st->print("NULL");
 500   } else {
 501     st->print("'Q");
 502     for (int i = 0; i < utf8_length(); i++) {
 503       st->print("%c", char_at(i));
 504     }
 505     st->print(";'");
 506   }
 507 }
 508 
 509 // SymbolTable prints this in its statistics
 510 NOT_PRODUCT(size_t Symbol::_total_count = 0;)