1 /*
   2  * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/altHashing.hpp"
  27 #include "classfile/javaClasses.inline.hpp"
  28 #include "gc/g1/g1CollectedHeap.inline.hpp"
  29 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
  30 #include "gc/g1/g1StringDedup.hpp"
  31 #include "gc/g1/g1StringDedupTable.hpp"
  32 #include "gc/shared/gcLocker.hpp"
  33 #include "memory/padded.inline.hpp"
  34 #include "oops/oop.inline.hpp"
  35 #include "oops/typeArrayOop.hpp"
  36 #include "runtime/mutexLocker.hpp"
  37 
  38 //
  39 // Freelist in the deduplication table entry cache. Links table
  40 // entries together using their _next fields.
  41 //
  42 class G1StringDedupEntryFreeList : public CHeapObj<mtGC> {
  43 private:
  44   G1StringDedupEntry* _list;
  45   size_t              _length;
  46 
  47 public:
  48   G1StringDedupEntryFreeList() :
  49     _list(NULL),
  50     _length(0) {
  51   }
  52 
  53   void add(G1StringDedupEntry* entry) {
  54     entry->set_next(_list);
  55     _list = entry;
  56     _length++;
  57   }
  58 
  59   G1StringDedupEntry* remove() {
  60     G1StringDedupEntry* entry = _list;
  61     if (entry != NULL) {
  62       _list = entry->next();
  63       _length--;
  64     }
  65     return entry;
  66   }
  67 
  68   size_t length() {
  69     return _length;
  70   }
  71 };
  72 
  73 //
  74 // Cache of deduplication table entries. This cache provides fast allocation and
  75 // reuse of table entries to lower the pressure on the underlying allocator.
  76 // But more importantly, it provides fast/deferred freeing of table entries. This
  77 // is important because freeing of table entries is done during stop-the-world
  78 // phases and it is not uncommon for large number of entries to be freed at once.
  79 // Tables entries that are freed during these phases are placed onto a freelist in
  80 // the cache. The deduplication thread, which executes in a concurrent phase, will
  81 // later reuse or free the underlying memory for these entries.
  82 //
  83 // The cache allows for single-threaded allocations and multi-threaded frees.
  84 // Allocations are synchronized by StringDedupTable_lock as part of a table
  85 // modification.
  86 //
  87 class G1StringDedupEntryCache : public CHeapObj<mtGC> {
  88 private:
  89   // One freelist per GC worker to allow lock less freeing of
  90   // entries while doing a parallel scan of the table. Using
  91   // PaddedEnd to avoid false sharing.
  92   PaddedEnd<G1StringDedupEntryFreeList>* _lists;
  93   size_t                                 _nlists;
  94 
  95 public:
  96   G1StringDedupEntryCache();
  97   ~G1StringDedupEntryCache();
  98 
  99   // Get a table entry from the cache freelist, or allocate a new
 100   // entry if the cache is empty.
 101   G1StringDedupEntry* alloc();
 102 
 103   // Insert a table entry into the cache freelist.
 104   void free(G1StringDedupEntry* entry, uint worker_id);
 105 
 106   // Returns current number of entries in the cache.
 107   size_t size();
 108 
 109   // If the cache has grown above the given max size, trim it down
 110   // and deallocate the memory occupied by trimmed of entries.
 111   void trim(size_t max_size);
 112 };
 113 
 114 G1StringDedupEntryCache::G1StringDedupEntryCache() {
 115   _nlists = (size_t)ParallelGCThreads;
 116   _lists = PaddedArray<G1StringDedupEntryFreeList, mtGC>::create_unfreeable((uint)_nlists);
 117 }
 118 
 119 G1StringDedupEntryCache::~G1StringDedupEntryCache() {
 120   ShouldNotReachHere();
 121 }
 122 
 123 G1StringDedupEntry* G1StringDedupEntryCache::alloc() {
 124   for (size_t i = 0; i < _nlists; i++) {
 125     G1StringDedupEntry* entry = _lists[i].remove();
 126     if (entry != NULL) {
 127       return entry;
 128     }
 129   }
 130   return new G1StringDedupEntry();
 131 }
 132 
 133 void G1StringDedupEntryCache::free(G1StringDedupEntry* entry, uint worker_id) {
 134   assert(entry->obj() != NULL, "Double free");
 135   assert(worker_id < _nlists, "Invalid worker id");
 136   entry->set_obj(NULL);
 137   entry->set_hash(0);
 138   _lists[worker_id].add(entry);
 139 }
 140 
 141 size_t G1StringDedupEntryCache::size() {
 142   size_t size = 0;
 143   for (size_t i = 0; i < _nlists; i++) {
 144     size += _lists[i].length();
 145   }
 146   return size;
 147 }
 148 
 149 void G1StringDedupEntryCache::trim(size_t max_size) {
 150   size_t cache_size = 0;
 151   for (size_t i = 0; i < _nlists; i++) {
 152     G1StringDedupEntryFreeList* list = &_lists[i];
 153     cache_size += list->length();
 154     while (cache_size > max_size) {
 155       G1StringDedupEntry* entry = list->remove();
 156       assert(entry != NULL, "Should not be null");
 157       cache_size--;
 158       delete entry;
 159     }
 160   }
 161 }
 162 
 163 G1StringDedupTable*      G1StringDedupTable::_table = NULL;
 164 G1StringDedupEntryCache* G1StringDedupTable::_entry_cache = NULL;
 165 
 166 const size_t             G1StringDedupTable::_min_size = (1 << 10);   // 1024
 167 const size_t             G1StringDedupTable::_max_size = (1 << 24);   // 16777216
 168 const double             G1StringDedupTable::_grow_load_factor = 2.0; // Grow table at 200% load
 169 const double             G1StringDedupTable::_shrink_load_factor = _grow_load_factor / 3.0; // Shrink table at 67% load
 170 const double             G1StringDedupTable::_max_cache_factor = 0.1; // Cache a maximum of 10% of the table size
 171 const uintx              G1StringDedupTable::_rehash_multiple = 60;   // Hash bucket has 60 times more collisions than expected
 172 const uintx              G1StringDedupTable::_rehash_threshold = (uintx)(_rehash_multiple * _grow_load_factor);
 173 
 174 uintx                    G1StringDedupTable::_entries_added = 0;
 175 uintx                    G1StringDedupTable::_entries_removed = 0;
 176 uintx                    G1StringDedupTable::_resize_count = 0;
 177 uintx                    G1StringDedupTable::_rehash_count = 0;
 178 
 179 G1StringDedupTable::G1StringDedupTable(size_t size, jint hash_seed) :
 180   _size(size),
 181   _entries(0),
 182   _grow_threshold((uintx)(size * _grow_load_factor)),
 183   _shrink_threshold((uintx)(size * _shrink_load_factor)),
 184   _rehash_needed(false),
 185   _hash_seed(hash_seed) {
 186   assert(is_power_of_2(size), "Table size must be a power of 2");
 187   _buckets = NEW_C_HEAP_ARRAY(G1StringDedupEntry*, _size, mtGC);
 188   memset(_buckets, 0, _size * sizeof(G1StringDedupEntry*));
 189 }
 190 
 191 G1StringDedupTable::~G1StringDedupTable() {
 192   FREE_C_HEAP_ARRAY(G1StringDedupEntry*, _buckets);
 193 }
 194 
 195 void G1StringDedupTable::create() {
 196   assert(_table == NULL, "One string deduplication table allowed");
 197   _entry_cache = new G1StringDedupEntryCache();
 198   _table = new G1StringDedupTable(_min_size);
 199 }
 200 
 201 void G1StringDedupTable::add(typeArrayOop value, unsigned int hash, G1StringDedupEntry** list) {
 202   G1StringDedupEntry* entry = _entry_cache->alloc();
 203   entry->set_obj(value);
 204   entry->set_hash(hash);
 205   entry->set_next(*list);
 206   *list = entry;
 207   _entries++;
 208 }
 209 
 210 void G1StringDedupTable::remove(G1StringDedupEntry** pentry, uint worker_id) {
 211   G1StringDedupEntry* entry = *pentry;
 212   *pentry = entry->next();
 213   _entry_cache->free(entry, worker_id);
 214 }
 215 
 216 void G1StringDedupTable::transfer(G1StringDedupEntry** pentry, G1StringDedupTable* dest) {
 217   G1StringDedupEntry* entry = *pentry;
 218   *pentry = entry->next();
 219   unsigned int hash = entry->hash();
 220   size_t index = dest->hash_to_index(hash);
 221   G1StringDedupEntry** list = dest->bucket(index);
 222   entry->set_next(*list);
 223   *list = entry;
 224 }
 225 
 226 bool G1StringDedupTable::equals(typeArrayOop value1, typeArrayOop value2) {
 227   return (value1 == value2 ||
 228           (value1->length() == value2->length() &&
 229            (!memcmp(value1->base(T_CHAR),
 230                     value2->base(T_CHAR),
 231                     value1->length() * sizeof(jchar)))));
 232 }
 233 
 234 typeArrayOop G1StringDedupTable::lookup(typeArrayOop value, unsigned int hash,
 235                                         G1StringDedupEntry** list, uintx &count) {
 236   for (G1StringDedupEntry* entry = *list; entry != NULL; entry = entry->next()) {
 237     if (entry->hash() == hash) {
 238       typeArrayOop existing_value = entry->obj();
 239       if (equals(value, existing_value)) {
 240         // Match found
 241         return existing_value;
 242       }
 243     }
 244     count++;
 245   }
 246 
 247   // Not found
 248   return NULL;
 249 }
 250 
 251 typeArrayOop G1StringDedupTable::lookup_or_add_inner(typeArrayOop value, unsigned int hash) {
 252   size_t index = hash_to_index(hash);
 253   G1StringDedupEntry** list = bucket(index);
 254   uintx count = 0;
 255 
 256   // Lookup in list
 257   typeArrayOop existing_value = lookup(value, hash, list, count);
 258 
 259   // Check if rehash is needed
 260   if (count > _rehash_threshold) {
 261     _rehash_needed = true;
 262   }
 263 
 264   if (existing_value == NULL) {
 265     // Not found, add new entry
 266     add(value, hash, list);
 267 
 268     // Update statistics
 269     _entries_added++;
 270   }
 271 
 272   return existing_value;
 273 }
 274 
 275 unsigned int G1StringDedupTable::hash_code(typeArrayOop value) {
 276   unsigned int hash;
 277   int length = value->length();
 278   const jchar* data = (jchar*)value->base(T_CHAR);
 279 
 280   if (use_java_hash()) {
 281     hash = java_lang_String::hash_code(data, length);
 282   } else {
 283     hash = AltHashing::murmur3_32(_table->_hash_seed, data, length);
 284   }
 285 
 286   return hash;
 287 }
 288 
 289 void G1StringDedupTable::deduplicate(oop java_string, G1StringDedupStat& stat) {
 290   assert(java_lang_String::is_instance(java_string), "Must be a string");
 291   No_Safepoint_Verifier nsv;
 292 
 293   stat.inc_inspected();
 294 
 295   typeArrayOop value = java_lang_String::value(java_string);
 296   if (value == NULL) {
 297     // String has no value
 298     stat.inc_skipped();
 299     return;
 300   }
 301 
 302   unsigned int hash = 0;
 303 
 304   if (use_java_hash()) {
 305     // Get hash code from cache
 306     hash = java_lang_String::hash(java_string);
 307   }
 308 
 309   if (hash == 0) {
 310     // Compute hash
 311     hash = hash_code(value);
 312     stat.inc_hashed();
 313 
 314     if (use_java_hash() && hash != 0) {
 315       // Store hash code in cache
 316       java_lang_String::set_hash(java_string, hash);
 317     }
 318   }
 319 
 320   typeArrayOop existing_value = lookup_or_add(value, hash);
 321   if (existing_value == value) {
 322     // Same value, already known
 323     stat.inc_known();
 324     return;
 325   }
 326 
 327   // Get size of value array
 328   uintx size_in_bytes = value->size() * HeapWordSize;
 329   stat.inc_new(size_in_bytes);
 330 
 331   if (existing_value != NULL) {
 332     // Enqueue the reference to make sure it is kept alive. Concurrent mark might
 333     // otherwise declare it dead if there are no other strong references to this object.
 334     G1SATBCardTableModRefBS::enqueue(existing_value);
 335 
 336     // Existing value found, deduplicate string
 337     java_lang_String::set_value(java_string, existing_value);
 338 
 339     if (G1CollectedHeap::heap()->is_in_young(value)) {
 340       stat.inc_deduped_young(size_in_bytes);
 341     } else {
 342       stat.inc_deduped_old(size_in_bytes);
 343     }
 344   }
 345 }
 346 
 347 G1StringDedupTable* G1StringDedupTable::prepare_resize() {
 348   size_t size = _table->_size;
 349 
 350   // Check if the hashtable needs to be resized
 351   if (_table->_entries > _table->_grow_threshold) {
 352     // Grow table, double the size
 353     size *= 2;
 354     if (size > _max_size) {
 355       // Too big, don't resize
 356       return NULL;
 357     }
 358   } else if (_table->_entries < _table->_shrink_threshold) {
 359     // Shrink table, half the size
 360     size /= 2;
 361     if (size < _min_size) {
 362       // Too small, don't resize
 363       return NULL;
 364     }
 365   } else if (StringDeduplicationResizeALot) {
 366     // Force grow
 367     size *= 2;
 368     if (size > _max_size) {
 369       // Too big, force shrink instead
 370       size /= 4;
 371     }
 372   } else {
 373     // Resize not needed
 374     return NULL;
 375   }
 376 
 377   // Update statistics
 378   _resize_count++;
 379 
 380   // Allocate the new table. The new table will be populated by workers
 381   // calling unlink_or_oops_do() and finally installed by finish_resize().
 382   return new G1StringDedupTable(size, _table->_hash_seed);
 383 }
 384 
 385 void G1StringDedupTable::finish_resize(G1StringDedupTable* resized_table) {
 386   assert(resized_table != NULL, "Invalid table");
 387 
 388   resized_table->_entries = _table->_entries;
 389 
 390   // Free old table
 391   delete _table;
 392 
 393   // Install new table
 394   _table = resized_table;
 395 }
 396 
 397 void G1StringDedupTable::unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl, uint worker_id) {
 398   // The table is divided into partitions to allow lock-less parallel processing by
 399   // multiple worker threads. A worker thread first claims a partition, which ensures
 400   // exclusive access to that part of the table, then continues to process it. To allow
 401   // shrinking of the table in parallel we also need to make sure that the same worker
 402   // thread processes all partitions where entries will hash to the same destination
 403   // partition. Since the table size is always a power of two and we always shrink by
 404   // dividing the table in half, we know that for a given partition there is only one
 405   // other partition whoes entries will hash to the same destination partition. That
 406   // other partition is always the sibling partition in the second half of the table.
 407   // For example, if the table is divided into 8 partitions, the sibling of partition 0
 408   // is partition 4, the sibling of partition 1 is partition 5, etc.
 409   size_t table_half = _table->_size / 2;
 410 
 411   // Let each partition be one page worth of buckets
 412   size_t partition_size = MIN2(table_half, os::vm_page_size() / sizeof(G1StringDedupEntry*));
 413   assert(table_half % partition_size == 0, "Invalid partition size");
 414 
 415   // Number of entries removed during the scan
 416   uintx removed = 0;
 417 
 418   for (;;) {
 419     // Grab next partition to scan
 420     size_t partition_begin = cl->claim_table_partition(partition_size);
 421     size_t partition_end = partition_begin + partition_size;
 422     if (partition_begin >= table_half) {
 423       // End of table
 424       break;
 425     }
 426 
 427     // Scan the partition followed by the sibling partition in the second half of the table
 428     removed += unlink_or_oops_do(cl, partition_begin, partition_end, worker_id);
 429     removed += unlink_or_oops_do(cl, table_half + partition_begin, table_half + partition_end, worker_id);
 430   }
 431 
 432   // Delayed update avoid contention on the table lock
 433   if (removed > 0) {
 434     MutexLockerEx ml(StringDedupTable_lock, Mutex::_no_safepoint_check_flag);
 435     _table->_entries -= removed;
 436     _entries_removed += removed;
 437   }
 438 }
 439 
 440 uintx G1StringDedupTable::unlink_or_oops_do(G1StringDedupUnlinkOrOopsDoClosure* cl,
 441                                             size_t partition_begin,
 442                                             size_t partition_end,
 443                                             uint worker_id) {
 444   uintx removed = 0;
 445   for (size_t bucket = partition_begin; bucket < partition_end; bucket++) {
 446     G1StringDedupEntry** entry = _table->bucket(bucket);
 447     while (*entry != NULL) {
 448       oop* p = (oop*)(*entry)->obj_addr();
 449       if (cl->is_alive(*p)) {
 450         cl->keep_alive(p);
 451         if (cl->is_resizing()) {
 452           // We are resizing the table, transfer entry to the new table
 453           _table->transfer(entry, cl->resized_table());
 454         } else {
 455           if (cl->is_rehashing()) {
 456             // We are rehashing the table, rehash the entry but keep it
 457             // in the table. We can't transfer entries into the new table
 458             // at this point since we don't have exclusive access to all
 459             // destination partitions. finish_rehash() will do a single
 460             // threaded transfer of all entries.
 461             typeArrayOop value = (typeArrayOop)*p;
 462             unsigned int hash = hash_code(value);
 463             (*entry)->set_hash(hash);
 464           }
 465 
 466           // Move to next entry
 467           entry = (*entry)->next_addr();
 468         }
 469       } else {
 470         // Not alive, remove entry from table
 471         _table->remove(entry, worker_id);
 472         removed++;
 473       }
 474     }
 475   }
 476 
 477   return removed;
 478 }
 479 
 480 G1StringDedupTable* G1StringDedupTable::prepare_rehash() {
 481   if (!_table->_rehash_needed && !StringDeduplicationRehashALot) {
 482     // Rehash not needed
 483     return NULL;
 484   }
 485 
 486   // Update statistics
 487   _rehash_count++;
 488 
 489   // Compute new hash seed
 490   _table->_hash_seed = AltHashing::compute_seed();
 491 
 492   // Allocate the new table, same size and hash seed
 493   return new G1StringDedupTable(_table->_size, _table->_hash_seed);
 494 }
 495 
 496 void G1StringDedupTable::finish_rehash(G1StringDedupTable* rehashed_table) {
 497   assert(rehashed_table != NULL, "Invalid table");
 498 
 499   // Move all newly rehashed entries into the correct buckets in the new table
 500   for (size_t bucket = 0; bucket < _table->_size; bucket++) {
 501     G1StringDedupEntry** entry = _table->bucket(bucket);
 502     while (*entry != NULL) {
 503       _table->transfer(entry, rehashed_table);
 504     }
 505   }
 506 
 507   rehashed_table->_entries = _table->_entries;
 508 
 509   // Free old table
 510   delete _table;
 511 
 512   // Install new table
 513   _table = rehashed_table;
 514 }
 515 
 516 void G1StringDedupTable::verify() {
 517   for (size_t bucket = 0; bucket < _table->_size; bucket++) {
 518     // Verify entries
 519     G1StringDedupEntry** entry = _table->bucket(bucket);
 520     while (*entry != NULL) {
 521       typeArrayOop value = (*entry)->obj();
 522       guarantee(value != NULL, "Object must not be NULL");
 523       guarantee(G1CollectedHeap::heap()->is_in_reserved(value), "Object must be on the heap");
 524       guarantee(!value->is_forwarded(), "Object must not be forwarded");
 525       guarantee(value->is_typeArray(), "Object must be a typeArrayOop");
 526       unsigned int hash = hash_code(value);
 527       guarantee((*entry)->hash() == hash, "Table entry has inorrect hash");
 528       guarantee(_table->hash_to_index(hash) == bucket, "Table entry has incorrect index");
 529       entry = (*entry)->next_addr();
 530     }
 531 
 532     // Verify that we do not have entries with identical oops or identical arrays.
 533     // We only need to compare entries in the same bucket. If the same oop or an
 534     // identical array has been inserted more than once into different/incorrect
 535     // buckets the verification step above will catch that.
 536     G1StringDedupEntry** entry1 = _table->bucket(bucket);
 537     while (*entry1 != NULL) {
 538       typeArrayOop value1 = (*entry1)->obj();
 539       G1StringDedupEntry** entry2 = (*entry1)->next_addr();
 540       while (*entry2 != NULL) {
 541         typeArrayOop value2 = (*entry2)->obj();
 542         guarantee(!equals(value1, value2), "Table entries must not have identical arrays");
 543         entry2 = (*entry2)->next_addr();
 544       }
 545       entry1 = (*entry1)->next_addr();
 546     }
 547   }
 548 }
 549 
 550 void G1StringDedupTable::trim_entry_cache() {
 551   MutexLockerEx ml(StringDedupTable_lock, Mutex::_no_safepoint_check_flag);
 552   size_t max_cache_size = (size_t)(_table->_size * _max_cache_factor);
 553   _entry_cache->trim(max_cache_size);
 554 }
 555 
 556 void G1StringDedupTable::print_statistics(outputStream* st) {
 557   st->print_cr(
 558     "   [Table]\n"
 559     "      [Memory Usage: "G1_STRDEDUP_BYTES_FORMAT_NS"]\n"
 560     "      [Size: "SIZE_FORMAT", Min: "SIZE_FORMAT", Max: "SIZE_FORMAT"]\n"
 561     "      [Entries: "UINTX_FORMAT", Load: "G1_STRDEDUP_PERCENT_FORMAT_NS", Cached: " UINTX_FORMAT ", Added: "UINTX_FORMAT", Removed: "UINTX_FORMAT"]\n"
 562     "      [Resize Count: "UINTX_FORMAT", Shrink Threshold: "UINTX_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT_NS"), Grow Threshold: "UINTX_FORMAT"("G1_STRDEDUP_PERCENT_FORMAT_NS")]\n"
 563     "      [Rehash Count: "UINTX_FORMAT", Rehash Threshold: "UINTX_FORMAT", Hash Seed: 0x%x]\n"
 564     "      [Age Threshold: "UINTX_FORMAT"]",
 565     G1_STRDEDUP_BYTES_PARAM(_table->_size * sizeof(G1StringDedupEntry*) + (_table->_entries + _entry_cache->size()) * sizeof(G1StringDedupEntry)),
 566     _table->_size, _min_size, _max_size,
 567     _table->_entries, (double)_table->_entries / (double)_table->_size * 100.0, _entry_cache->size(), _entries_added, _entries_removed,
 568     _resize_count, _table->_shrink_threshold, _shrink_load_factor * 100.0, _table->_grow_threshold, _grow_load_factor * 100.0,
 569     _rehash_count, _rehash_threshold, _table->_hash_seed,
 570     StringDeduplicationAgeThreshold);
 571 }