181 _entries(0),
182 _grow_threshold((uintx)(size * _grow_load_factor)),
183 _shrink_threshold((uintx)(size * _shrink_load_factor)),
184 _rehash_needed(false),
185 _hash_seed(hash_seed) {
186 assert(is_power_of_2(size), "Table size must be a power of 2");
187 _buckets = NEW_C_HEAP_ARRAY(G1StringDedupEntry*, _size, mtGC);
188 memset(_buckets, 0, _size * sizeof(G1StringDedupEntry*));
189 }
190
191 G1StringDedupTable::~G1StringDedupTable() {
192 FREE_C_HEAP_ARRAY(G1StringDedupEntry*, _buckets);
193 }
194
195 void G1StringDedupTable::create() {
196 assert(_table == NULL, "One string deduplication table allowed");
197 _entry_cache = new G1StringDedupEntryCache();
198 _table = new G1StringDedupTable(_min_size);
199 }
200
201 void G1StringDedupTable::add(typeArrayOop value, unsigned int hash, G1StringDedupEntry** list) {
202 G1StringDedupEntry* entry = _entry_cache->alloc();
203 entry->set_obj(value);
204 entry->set_hash(hash);
205 entry->set_next(*list);
206 *list = entry;
207 _entries++;
208 }
209
210 void G1StringDedupTable::remove(G1StringDedupEntry** pentry, uint worker_id) {
211 G1StringDedupEntry* entry = *pentry;
212 *pentry = entry->next();
213 _entry_cache->free(entry, worker_id);
214 }
215
216 void G1StringDedupTable::transfer(G1StringDedupEntry** pentry, G1StringDedupTable* dest) {
217 G1StringDedupEntry* entry = *pentry;
218 *pentry = entry->next();
219 unsigned int hash = entry->hash();
220 size_t index = dest->hash_to_index(hash);
221 G1StringDedupEntry** list = dest->bucket(index);
222 entry->set_next(*list);
223 *list = entry;
224 }
225
226 bool G1StringDedupTable::equals(typeArrayOop value1, typeArrayOop value2) {
227 return (value1 == value2 ||
228 (value1->length() == value2->length() &&
229 (!memcmp(value1->base(T_CHAR),
230 value2->base(T_CHAR),
231 value1->length() * sizeof(jchar)))));
232 }
233
234 typeArrayOop G1StringDedupTable::lookup(typeArrayOop value, unsigned int hash,
235 G1StringDedupEntry** list, uintx &count) {
236 for (G1StringDedupEntry* entry = *list; entry != NULL; entry = entry->next()) {
237 if (entry->hash() == hash) {
238 typeArrayOop existing_value = entry->obj();
239 if (equals(value, existing_value)) {
240 // Match found
241 return existing_value;
242 }
243 }
244 count++;
245 }
246
247 // Not found
248 return NULL;
249 }
250
251 typeArrayOop G1StringDedupTable::lookup_or_add_inner(typeArrayOop value, unsigned int hash) {
252 size_t index = hash_to_index(hash);
253 G1StringDedupEntry** list = bucket(index);
254 uintx count = 0;
255
256 // Lookup in list
257 typeArrayOop existing_value = lookup(value, hash, list, count);
258
259 // Check if rehash is needed
260 if (count > _rehash_threshold) {
261 _rehash_needed = true;
262 }
263
264 if (existing_value == NULL) {
265 // Not found, add new entry
266 add(value, hash, list);
267
268 // Update statistics
269 _entries_added++;
270 }
271
272 return existing_value;
273 }
274
275 unsigned int G1StringDedupTable::hash_code(typeArrayOop value) {
276 unsigned int hash;
277 int length = value->length();
278 const jchar* data = (jchar*)value->base(T_CHAR);
279
280 if (use_java_hash()) {
281 hash = java_lang_String::hash_code(data, length);
282 } else {
283 hash = AltHashing::murmur3_32(_table->_hash_seed, data, length);
284 }
285
286 return hash;
287 }
288
289 void G1StringDedupTable::deduplicate(oop java_string, G1StringDedupStat& stat) {
290 assert(java_lang_String::is_instance(java_string), "Must be a string");
291 No_Safepoint_Verifier nsv;
292
293 stat.inc_inspected();
294
295 typeArrayOop value = java_lang_String::value(java_string);
296 if (value == NULL) {
297 // String has no value
298 stat.inc_skipped();
299 return;
300 }
301
302 unsigned int hash = 0;
303
304 if (use_java_hash()) {
305 // Get hash code from cache
306 hash = java_lang_String::hash(java_string);
307 }
308
309 if (hash == 0) {
310 // Compute hash
311 hash = hash_code(value);
312 stat.inc_hashed();
313
314 if (use_java_hash() && hash != 0) {
315 // Store hash code in cache
316 java_lang_String::set_hash(java_string, hash);
317 }
318 }
319
320 typeArrayOop existing_value = lookup_or_add(value, hash);
321 if (existing_value == value) {
322 // Same value, already known
323 stat.inc_known();
324 return;
325 }
326
327 // Get size of value array
328 uintx size_in_bytes = value->size() * HeapWordSize;
329 stat.inc_new(size_in_bytes);
330
331 if (existing_value != NULL) {
332 // Enqueue the reference to make sure it is kept alive. Concurrent mark might
333 // otherwise declare it dead if there are no other strong references to this object.
334 G1SATBCardTableModRefBS::enqueue(existing_value);
335
336 // Existing value found, deduplicate string
337 java_lang_String::set_value(java_string, existing_value);
338
339 if (G1CollectedHeap::heap()->is_in_young(value)) {
340 stat.inc_deduped_young(size_in_bytes);
442 size_t partition_end,
443 uint worker_id) {
444 uintx removed = 0;
445 for (size_t bucket = partition_begin; bucket < partition_end; bucket++) {
446 G1StringDedupEntry** entry = _table->bucket(bucket);
447 while (*entry != NULL) {
448 oop* p = (oop*)(*entry)->obj_addr();
449 if (cl->is_alive(*p)) {
450 cl->keep_alive(p);
451 if (cl->is_resizing()) {
452 // We are resizing the table, transfer entry to the new table
453 _table->transfer(entry, cl->resized_table());
454 } else {
455 if (cl->is_rehashing()) {
456 // We are rehashing the table, rehash the entry but keep it
457 // in the table. We can't transfer entries into the new table
458 // at this point since we don't have exclusive access to all
459 // destination partitions. finish_rehash() will do a single
460 // threaded transfer of all entries.
461 typeArrayOop value = (typeArrayOop)*p;
462 unsigned int hash = hash_code(value);
463 (*entry)->set_hash(hash);
464 }
465
466 // Move to next entry
467 entry = (*entry)->next_addr();
468 }
469 } else {
470 // Not alive, remove entry from table
471 _table->remove(entry, worker_id);
472 removed++;
473 }
474 }
475 }
476
477 return removed;
478 }
479
480 G1StringDedupTable* G1StringDedupTable::prepare_rehash() {
481 if (!_table->_rehash_needed && !StringDeduplicationRehashALot) {
482 // Rehash not needed
506
507 rehashed_table->_entries = _table->_entries;
508
509 // Free old table
510 delete _table;
511
512 // Install new table
513 _table = rehashed_table;
514 }
515
516 void G1StringDedupTable::verify() {
517 for (size_t bucket = 0; bucket < _table->_size; bucket++) {
518 // Verify entries
519 G1StringDedupEntry** entry = _table->bucket(bucket);
520 while (*entry != NULL) {
521 typeArrayOop value = (*entry)->obj();
522 guarantee(value != NULL, "Object must not be NULL");
523 guarantee(G1CollectedHeap::heap()->is_in_reserved(value), "Object must be on the heap");
524 guarantee(!value->is_forwarded(), "Object must not be forwarded");
525 guarantee(value->is_typeArray(), "Object must be a typeArrayOop");
526 unsigned int hash = hash_code(value);
527 guarantee((*entry)->hash() == hash, "Table entry has inorrect hash");
528 guarantee(_table->hash_to_index(hash) == bucket, "Table entry has incorrect index");
529 entry = (*entry)->next_addr();
530 }
531
532 // Verify that we do not have entries with identical oops or identical arrays.
533 // We only need to compare entries in the same bucket. If the same oop or an
534 // identical array has been inserted more than once into different/incorrect
535 // buckets the verification step above will catch that.
536 G1StringDedupEntry** entry1 = _table->bucket(bucket);
537 while (*entry1 != NULL) {
538 typeArrayOop value1 = (*entry1)->obj();
539 G1StringDedupEntry** entry2 = (*entry1)->next_addr();
540 while (*entry2 != NULL) {
541 typeArrayOop value2 = (*entry2)->obj();
542 guarantee(!equals(value1, value2), "Table entries must not have identical arrays");
543 entry2 = (*entry2)->next_addr();
544 }
545 entry1 = (*entry1)->next_addr();
546 }
547 }
548 }
549
550 void G1StringDedupTable::trim_entry_cache() {
551 MutexLockerEx ml(StringDedupTable_lock, Mutex::_no_safepoint_check_flag);
552 size_t max_cache_size = (size_t)(_table->_size * _max_cache_factor);
553 _entry_cache->trim(max_cache_size);
554 }
555
556 void G1StringDedupTable::print_statistics(outputStream* st) {
557 st->print_cr(
558 " [Table]\n"
559 " [Memory Usage: " G1_STRDEDUP_BYTES_FORMAT_NS "]\n"
560 " [Size: " SIZE_FORMAT ", Min: " SIZE_FORMAT ", Max: " SIZE_FORMAT "]\n"
561 " [Entries: " UINTX_FORMAT ", Load: " G1_STRDEDUP_PERCENT_FORMAT_NS ", Cached: " UINTX_FORMAT ", Added: " UINTX_FORMAT ", Removed: " UINTX_FORMAT "]\n"
562 " [Resize Count: " UINTX_FORMAT ", Shrink Threshold: " UINTX_FORMAT "(" G1_STRDEDUP_PERCENT_FORMAT_NS "), Grow Threshold: " UINTX_FORMAT "(" G1_STRDEDUP_PERCENT_FORMAT_NS ")]\n"
|
181 _entries(0),
182 _grow_threshold((uintx)(size * _grow_load_factor)),
183 _shrink_threshold((uintx)(size * _shrink_load_factor)),
184 _rehash_needed(false),
185 _hash_seed(hash_seed) {
186 assert(is_power_of_2(size), "Table size must be a power of 2");
187 _buckets = NEW_C_HEAP_ARRAY(G1StringDedupEntry*, _size, mtGC);
188 memset(_buckets, 0, _size * sizeof(G1StringDedupEntry*));
189 }
190
191 G1StringDedupTable::~G1StringDedupTable() {
192 FREE_C_HEAP_ARRAY(G1StringDedupEntry*, _buckets);
193 }
194
195 void G1StringDedupTable::create() {
196 assert(_table == NULL, "One string deduplication table allowed");
197 _entry_cache = new G1StringDedupEntryCache();
198 _table = new G1StringDedupTable(_min_size);
199 }
200
201 void G1StringDedupTable::add(typeArrayOop value, bool latin1, unsigned int hash, G1StringDedupEntry** list) {
202 G1StringDedupEntry* entry = _entry_cache->alloc();
203 entry->set_obj(value);
204 entry->set_hash(hash);
205 entry->set_latin1(latin1);
206 entry->set_next(*list);
207 *list = entry;
208 _entries++;
209 }
210
211 void G1StringDedupTable::remove(G1StringDedupEntry** pentry, uint worker_id) {
212 G1StringDedupEntry* entry = *pentry;
213 *pentry = entry->next();
214 _entry_cache->free(entry, worker_id);
215 }
216
217 void G1StringDedupTable::transfer(G1StringDedupEntry** pentry, G1StringDedupTable* dest) {
218 G1StringDedupEntry* entry = *pentry;
219 *pentry = entry->next();
220 unsigned int hash = entry->hash();
221 size_t index = dest->hash_to_index(hash);
222 G1StringDedupEntry** list = dest->bucket(index);
223 entry->set_next(*list);
224 *list = entry;
225 }
226
227 bool G1StringDedupTable::equals(typeArrayOop value1, typeArrayOop value2) {
228 return (value1 == value2 ||
229 (value1->length() == value2->length() &&
230 (!memcmp(value1->base(T_BYTE),
231 value2->base(T_BYTE),
232 value1->length() * sizeof(jbyte)))));
233 }
234
235 typeArrayOop G1StringDedupTable::lookup(typeArrayOop value, bool latin1, unsigned int hash,
236 G1StringDedupEntry** list, uintx &count) {
237 for (G1StringDedupEntry* entry = *list; entry != NULL; entry = entry->next()) {
238 if (entry->hash() == hash && entry->latin1() == latin1) {
239 typeArrayOop existing_value = entry->obj();
240 if (equals(value, existing_value)) {
241 // Match found
242 return existing_value;
243 }
244 }
245 count++;
246 }
247
248 // Not found
249 return NULL;
250 }
251
252 typeArrayOop G1StringDedupTable::lookup_or_add_inner(typeArrayOop value, bool latin1, unsigned int hash) {
253 size_t index = hash_to_index(hash);
254 G1StringDedupEntry** list = bucket(index);
255 uintx count = 0;
256
257 // Lookup in list
258 typeArrayOop existing_value = lookup(value, latin1, hash, list, count);
259
260 // Check if rehash is needed
261 if (count > _rehash_threshold) {
262 _rehash_needed = true;
263 }
264
265 if (existing_value == NULL) {
266 // Not found, add new entry
267 add(value, latin1, hash, list);
268
269 // Update statistics
270 _entries_added++;
271 }
272
273 return existing_value;
274 }
275
276 unsigned int G1StringDedupTable::hash_code(typeArrayOop value, bool latin1) {
277 unsigned int hash;
278 int length = value->length();
279 if (latin1) {
280 const jbyte* data = (jbyte*)value->base(T_BYTE);
281 if (use_java_hash()) {
282 hash = java_lang_String::hash_code(data, length);
283 } else {
284 hash = AltHashing::murmur3_32(_table->_hash_seed, data, length);
285 }
286 } else {
287 length /= sizeof(jchar) / sizeof(jbyte); // Convert number of bytes to number of chars
288 const jchar* data = (jchar*)value->base(T_CHAR);
289 if (use_java_hash()) {
290 hash = java_lang_String::hash_code(data, length);
291 } else {
292 hash = AltHashing::murmur3_32(_table->_hash_seed, data, length);
293 }
294 }
295
296 return hash;
297 }
298
299 void G1StringDedupTable::deduplicate(oop java_string, G1StringDedupStat& stat) {
300 assert(java_lang_String::is_instance(java_string), "Must be a string");
301 No_Safepoint_Verifier nsv;
302
303 stat.inc_inspected();
304
305 typeArrayOop value = java_lang_String::value(java_string);
306 if (value == NULL) {
307 // String has no value
308 stat.inc_skipped();
309 return;
310 }
311
312 bool latin1 = java_lang_String::is_latin1(java_string);
313 unsigned int hash = 0;
314
315 if (use_java_hash()) {
316 // Get hash code from cache
317 hash = java_lang_String::hash(java_string);
318 }
319
320 if (hash == 0) {
321 // Compute hash
322 hash = hash_code(value, latin1);
323 stat.inc_hashed();
324
325 if (use_java_hash() && hash != 0) {
326 // Store hash code in cache
327 java_lang_String::set_hash(java_string, hash);
328 }
329 }
330
331 typeArrayOop existing_value = lookup_or_add(value, latin1, hash);
332 if (existing_value == value) {
333 // Same value, already known
334 stat.inc_known();
335 return;
336 }
337
338 // Get size of value array
339 uintx size_in_bytes = value->size() * HeapWordSize;
340 stat.inc_new(size_in_bytes);
341
342 if (existing_value != NULL) {
343 // Enqueue the reference to make sure it is kept alive. Concurrent mark might
344 // otherwise declare it dead if there are no other strong references to this object.
345 G1SATBCardTableModRefBS::enqueue(existing_value);
346
347 // Existing value found, deduplicate string
348 java_lang_String::set_value(java_string, existing_value);
349
350 if (G1CollectedHeap::heap()->is_in_young(value)) {
351 stat.inc_deduped_young(size_in_bytes);
453 size_t partition_end,
454 uint worker_id) {
455 uintx removed = 0;
456 for (size_t bucket = partition_begin; bucket < partition_end; bucket++) {
457 G1StringDedupEntry** entry = _table->bucket(bucket);
458 while (*entry != NULL) {
459 oop* p = (oop*)(*entry)->obj_addr();
460 if (cl->is_alive(*p)) {
461 cl->keep_alive(p);
462 if (cl->is_resizing()) {
463 // We are resizing the table, transfer entry to the new table
464 _table->transfer(entry, cl->resized_table());
465 } else {
466 if (cl->is_rehashing()) {
467 // We are rehashing the table, rehash the entry but keep it
468 // in the table. We can't transfer entries into the new table
469 // at this point since we don't have exclusive access to all
470 // destination partitions. finish_rehash() will do a single
471 // threaded transfer of all entries.
472 typeArrayOop value = (typeArrayOop)*p;
473 bool latin1 = (*entry)->latin1();
474 unsigned int hash = hash_code(value, latin1);
475 (*entry)->set_hash(hash);
476 }
477
478 // Move to next entry
479 entry = (*entry)->next_addr();
480 }
481 } else {
482 // Not alive, remove entry from table
483 _table->remove(entry, worker_id);
484 removed++;
485 }
486 }
487 }
488
489 return removed;
490 }
491
492 G1StringDedupTable* G1StringDedupTable::prepare_rehash() {
493 if (!_table->_rehash_needed && !StringDeduplicationRehashALot) {
494 // Rehash not needed
518
519 rehashed_table->_entries = _table->_entries;
520
521 // Free old table
522 delete _table;
523
524 // Install new table
525 _table = rehashed_table;
526 }
527
528 void G1StringDedupTable::verify() {
529 for (size_t bucket = 0; bucket < _table->_size; bucket++) {
530 // Verify entries
531 G1StringDedupEntry** entry = _table->bucket(bucket);
532 while (*entry != NULL) {
533 typeArrayOop value = (*entry)->obj();
534 guarantee(value != NULL, "Object must not be NULL");
535 guarantee(G1CollectedHeap::heap()->is_in_reserved(value), "Object must be on the heap");
536 guarantee(!value->is_forwarded(), "Object must not be forwarded");
537 guarantee(value->is_typeArray(), "Object must be a typeArrayOop");
538 bool latin1 = (*entry)->latin1();
539 unsigned int hash = hash_code(value, latin1);
540 guarantee((*entry)->hash() == hash, "Table entry has inorrect hash");
541 guarantee(_table->hash_to_index(hash) == bucket, "Table entry has incorrect index");
542 entry = (*entry)->next_addr();
543 }
544
545 // Verify that we do not have entries with identical oops or identical arrays.
546 // We only need to compare entries in the same bucket. If the same oop or an
547 // identical array has been inserted more than once into different/incorrect
548 // buckets the verification step above will catch that.
549 G1StringDedupEntry** entry1 = _table->bucket(bucket);
550 while (*entry1 != NULL) {
551 typeArrayOop value1 = (*entry1)->obj();
552 bool latin1_1 = (*entry1)->latin1();
553 G1StringDedupEntry** entry2 = (*entry1)->next_addr();
554 while (*entry2 != NULL) {
555 typeArrayOop value2 = (*entry2)->obj();
556 bool latin1_2 = (*entry2)->latin1();
557 guarantee(latin1_1 != latin1_2 || !equals(value1, value2), "Table entries must not have identical arrays");
558 entry2 = (*entry2)->next_addr();
559 }
560 entry1 = (*entry1)->next_addr();
561 }
562 }
563 }
564
565 void G1StringDedupTable::trim_entry_cache() {
566 MutexLockerEx ml(StringDedupTable_lock, Mutex::_no_safepoint_check_flag);
567 size_t max_cache_size = (size_t)(_table->_size * _max_cache_factor);
568 _entry_cache->trim(max_cache_size);
569 }
570
571 void G1StringDedupTable::print_statistics(outputStream* st) {
572 st->print_cr(
573 " [Table]\n"
574 " [Memory Usage: " G1_STRDEDUP_BYTES_FORMAT_NS "]\n"
575 " [Size: " SIZE_FORMAT ", Min: " SIZE_FORMAT ", Max: " SIZE_FORMAT "]\n"
576 " [Entries: " UINTX_FORMAT ", Load: " G1_STRDEDUP_PERCENT_FORMAT_NS ", Cached: " UINTX_FORMAT ", Added: " UINTX_FORMAT ", Removed: " UINTX_FORMAT "]\n"
577 " [Resize Count: " UINTX_FORMAT ", Shrink Threshold: " UINTX_FORMAT "(" G1_STRDEDUP_PERCENT_FORMAT_NS "), Grow Threshold: " UINTX_FORMAT "(" G1_STRDEDUP_PERCENT_FORMAT_NS ")]\n"
|