1 /*
   2  * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "gc/shared/oopStorage.inline.hpp"
  27 #include "gc/shared/oopStorageParState.inline.hpp"
  28 #include "logging/log.hpp"
  29 #include "logging/logStream.hpp"
  30 #include "memory/allocation.inline.hpp"
  31 #include "runtime/atomic.hpp"
  32 #include "runtime/globals.hpp"
  33 #include "runtime/handles.inline.hpp"
  34 #include "runtime/mutex.hpp"
  35 #include "runtime/mutexLocker.hpp"
  36 #include "runtime/orderAccess.hpp"
  37 #include "runtime/safepoint.hpp"
  38 #include "runtime/stubRoutines.hpp"
  39 #include "runtime/thread.hpp"
  40 #include "utilities/align.hpp"
  41 #include "utilities/count_trailing_zeros.hpp"
  42 #include "utilities/debug.hpp"
  43 #include "utilities/globalDefinitions.hpp"
  44 #include "utilities/macros.hpp"
  45 #include "utilities/ostream.hpp"
  46 #include "utilities/spinYield.hpp"
  47 
  48 OopStorage::AllocateEntry::AllocateEntry() : _prev(NULL), _next(NULL) {}
  49 
  50 OopStorage::AllocateEntry::~AllocateEntry() {
  51   assert(_prev == NULL, "deleting attached block");
  52   assert(_next == NULL, "deleting attached block");
  53 }
  54 
  55 OopStorage::AllocateList::AllocateList(const AllocateEntry& (*get_entry)(const Block& block)) :
  56   _head(NULL), _tail(NULL), _get_entry(get_entry)
  57 {}
  58 
  59 OopStorage::AllocateList::~AllocateList() {
  60   // ~OopStorage() empties its lists before destroying them.
  61   assert(_head == NULL, "deleting non-empty block list");
  62   assert(_tail == NULL, "deleting non-empty block list");
  63 }
  64 
  65 void OopStorage::AllocateList::push_front(const Block& block) {
  66   const Block* old = _head;
  67   if (old == NULL) {
  68     assert(_tail == NULL, "invariant");
  69     _head = _tail = █
  70   } else {
  71     _get_entry(block)._next = old;
  72     _get_entry(*old)._prev = █
  73     _head = █
  74   }
  75 }
  76 
  77 void OopStorage::AllocateList::push_back(const Block& block) {
  78   const Block* old = _tail;
  79   if (old == NULL) {
  80     assert(_head == NULL, "invariant");
  81     _head = _tail = █
  82   } else {
  83     _get_entry(*old)._next = █
  84     _get_entry(block)._prev = old;
  85     _tail = █
  86   }
  87 }
  88 
  89 void OopStorage::AllocateList::unlink(const Block& block) {
  90   const AllocateEntry& block_entry = _get_entry(block);
  91   const Block* prev_blk = block_entry._prev;
  92   const Block* next_blk = block_entry._next;
  93   block_entry._prev = NULL;
  94   block_entry._next = NULL;
  95   if ((prev_blk == NULL) && (next_blk == NULL)) {
  96     assert(_head == &block, "invariant");
  97     assert(_tail == &block, "invariant");
  98     _head = _tail = NULL;
  99   } else if (prev_blk == NULL) {
 100     assert(_head == &block, "invariant");
 101     _get_entry(*next_blk)._prev = NULL;
 102     _head = next_blk;
 103   } else if (next_blk == NULL) {
 104     assert(_tail == &block, "invariant");
 105     _get_entry(*prev_blk)._next = NULL;
 106     _tail = prev_blk;
 107   } else {
 108     _get_entry(*next_blk)._prev = prev_blk;
 109     _get_entry(*prev_blk)._next = next_blk;
 110   }
 111 }
 112 
 113 OopStorage::ActiveArray::ActiveArray(size_t size) :
 114   _size(size),
 115   _block_count(0),
 116   _refcount(0)
 117 {}
 118 
 119 OopStorage::ActiveArray::~ActiveArray() {
 120   assert(_refcount == 0, "precondition");
 121 }
 122 
 123 OopStorage::ActiveArray* OopStorage::ActiveArray::create(size_t size, AllocFailType alloc_fail) {
 124   size_t size_in_bytes = blocks_offset() + sizeof(Block*) * size;
 125   void* mem = NEW_C_HEAP_ARRAY3(char, size_in_bytes, mtGC, CURRENT_PC, alloc_fail);
 126   if (mem == NULL) return NULL;
 127   return new (mem) ActiveArray(size);
 128 }
 129 
 130 void OopStorage::ActiveArray::destroy(ActiveArray* ba) {
 131   ba->~ActiveArray();
 132   FREE_C_HEAP_ARRAY(char, ba);
 133 }
 134 
 135 size_t OopStorage::ActiveArray::size() const {
 136   return _size;
 137 }
 138 
 139 size_t OopStorage::ActiveArray::block_count() const {
 140   return _block_count;
 141 }
 142 
 143 size_t OopStorage::ActiveArray::block_count_acquire() const {
 144   return OrderAccess::load_acquire(&_block_count);
 145 }
 146 
 147 void OopStorage::ActiveArray::increment_refcount() const {
 148   int new_value = Atomic::add(1, &_refcount);
 149   assert(new_value >= 1, "negative refcount %d", new_value - 1);
 150 }
 151 
 152 bool OopStorage::ActiveArray::decrement_refcount() const {
 153   int new_value = Atomic::sub(1, &_refcount);
 154   assert(new_value >= 0, "negative refcount %d", new_value);
 155   return new_value == 0;
 156 }
 157 
 158 bool OopStorage::ActiveArray::push(Block* block) {
 159   size_t index = _block_count;
 160   if (index < _size) {
 161     block->set_active_index(index);
 162     *block_ptr(index) = block;
 163     // Use a release_store to ensure all the setup is complete before
 164     // making the block visible.
 165     OrderAccess::release_store(&_block_count, index + 1);
 166     return true;
 167   } else {
 168     return false;
 169   }
 170 }
 171 
 172 void OopStorage::ActiveArray::remove(Block* block) {
 173   assert(_block_count > 0, "array is empty");
 174   size_t index = block->active_index();
 175   assert(*block_ptr(index) == block, "block not present");
 176   size_t last_index = _block_count - 1;
 177   Block* last_block = *block_ptr(last_index);
 178   last_block->set_active_index(index);
 179   *block_ptr(index) = last_block;
 180   _block_count = last_index;
 181 }
 182 
 183 void OopStorage::ActiveArray::copy_from(const ActiveArray* from) {
 184   assert(_block_count == 0, "array must be empty");
 185   size_t count = from->_block_count;
 186   assert(count <= _size, "precondition");
 187   Block* const* from_ptr = from->block_ptr(0);
 188   Block** to_ptr = block_ptr(0);
 189   for (size_t i = 0; i < count; ++i) {
 190     Block* block = *from_ptr++;
 191     assert(block->active_index() == i, "invariant");
 192     *to_ptr++ = block;
 193   }
 194   _block_count = count;
 195 }
 196 
 197 // Blocks start with an array of BitsPerWord oop entries.  That array
 198 // is divided into conceptual BytesPerWord sections of BitsPerByte
 199 // entries.  Blocks are allocated aligned on section boundaries, for
 200 // the convenience of mapping from an entry to the containing block;
 201 // see block_for_ptr().  Aligning on section boundary rather than on
 202 // the full _data wastes a lot less space, but makes for a bit more
 203 // work in block_for_ptr().
 204 
 205 const unsigned section_size = BitsPerByte;
 206 const unsigned section_count = BytesPerWord;
 207 const unsigned block_alignment = sizeof(oop) * section_size;
 208 
 209 OopStorage::Block::Block(const OopStorage* owner, void* memory) :
 210   _data(),
 211   _allocated_bitmask(0),
 212   _owner(owner),
 213   _memory(memory),
 214   _active_index(0),
 215   _allocate_entry(),
 216   _deferred_updates_next(NULL),
 217   _release_refcount(0)
 218 {
 219   STATIC_ASSERT(_data_pos == 0);
 220   STATIC_ASSERT(section_size * section_count == ARRAY_SIZE(_data));
 221   assert(offset_of(Block, _data) == _data_pos, "invariant");
 222   assert(owner != NULL, "NULL owner");
 223   assert(is_aligned(this, block_alignment), "misaligned block");
 224 }
 225 
 226 OopStorage::Block::~Block() {
 227   assert(_release_refcount == 0, "deleting block while releasing");
 228   assert(_deferred_updates_next == NULL, "deleting block with deferred update");
 229   // Clear fields used by block_for_ptr and entry validation, which
 230   // might help catch bugs.  Volatile to prevent dead-store elimination.
 231   const_cast<uintx volatile&>(_allocated_bitmask) = 0;
 232   const_cast<OopStorage* volatile&>(_owner) = NULL;
 233 }
 234 
 235 const OopStorage::AllocateEntry& OopStorage::Block::get_allocate_entry(const Block& block) {
 236   return block._allocate_entry;
 237 }
 238 
 239 size_t OopStorage::Block::allocation_size() {
 240   // _data must be first member, so aligning Block aligns _data.
 241   STATIC_ASSERT(_data_pos == 0);
 242   return sizeof(Block) + block_alignment - sizeof(void*);
 243 }
 244 
 245 size_t OopStorage::Block::allocation_alignment_shift() {
 246   return exact_log2(block_alignment);
 247 }
 248 
 249 inline bool is_full_bitmask(uintx bitmask) { return ~bitmask == 0; }
 250 inline bool is_empty_bitmask(uintx bitmask) { return bitmask == 0; }
 251 
 252 bool OopStorage::Block::is_full() const {
 253   return is_full_bitmask(allocated_bitmask());
 254 }
 255 
 256 bool OopStorage::Block::is_empty() const {
 257   return is_empty_bitmask(allocated_bitmask());
 258 }
 259 
 260 uintx OopStorage::Block::bitmask_for_entry(const oop* ptr) const {
 261   return bitmask_for_index(get_index(ptr));
 262 }
 263 
 264 // A block is deletable if
 265 // (1) It is empty.
 266 // (2) There is not a release() operation currently operating on it.
 267 // (3) It is not in the deferred updates list.
 268 // The order of tests is important for proper interaction between release()
 269 // and concurrent deletion.
 270 bool OopStorage::Block::is_deletable() const {
 271   return (OrderAccess::load_acquire(&_allocated_bitmask) == 0) &&
 272          (OrderAccess::load_acquire(&_release_refcount) == 0) &&
 273          (OrderAccess::load_acquire(&_deferred_updates_next) == NULL);
 274 }
 275 
 276 OopStorage::Block* OopStorage::Block::deferred_updates_next() const {
 277   return _deferred_updates_next;
 278 }
 279 
 280 void OopStorage::Block::set_deferred_updates_next(Block* block) {
 281   _deferred_updates_next = block;
 282 }
 283 
 284 bool OopStorage::Block::contains(const oop* ptr) const {
 285   const oop* base = get_pointer(0);
 286   return (base <= ptr) && (ptr < (base + ARRAY_SIZE(_data)));
 287 }
 288 
 289 size_t OopStorage::Block::active_index() const {
 290   return _active_index;
 291 }
 292 
 293 void OopStorage::Block::set_active_index(size_t index) {
 294   _active_index = index;
 295 }
 296 
 297 size_t OopStorage::Block::active_index_safe(const Block* block) {
 298   STATIC_ASSERT(sizeof(intptr_t) == sizeof(block->_active_index));
 299   assert(CanUseSafeFetchN(), "precondition");
 300   return SafeFetchN((intptr_t*)&block->_active_index, 0);
 301 }
 302 
 303 unsigned OopStorage::Block::get_index(const oop* ptr) const {
 304   assert(contains(ptr), PTR_FORMAT " not in block " PTR_FORMAT, p2i(ptr), p2i(this));
 305   return static_cast<unsigned>(ptr - get_pointer(0));
 306 }
 307 
 308 oop* OopStorage::Block::allocate() {
 309   // Use CAS loop because release may change bitmask outside of lock.
 310   uintx allocated = allocated_bitmask();
 311   while (true) {
 312     assert(!is_full_bitmask(allocated), "attempt to allocate from full block");
 313     unsigned index = count_trailing_zeros(~allocated);
 314     uintx new_value = allocated | bitmask_for_index(index);
 315     uintx fetched = Atomic::cmpxchg(new_value, &_allocated_bitmask, allocated);
 316     if (fetched == allocated) {
 317       return get_pointer(index); // CAS succeeded; return entry for index.
 318     }
 319     allocated = fetched;       // CAS failed; retry with latest value.
 320   }
 321 }
 322 
 323 OopStorage::Block* OopStorage::Block::new_block(const OopStorage* owner) {
 324   // _data must be first member: aligning block => aligning _data.
 325   STATIC_ASSERT(_data_pos == 0);
 326   size_t size_needed = allocation_size();
 327   void* memory = NEW_C_HEAP_ARRAY_RETURN_NULL(char, size_needed, mtGC);
 328   if (memory == NULL) {
 329     return NULL;
 330   }
 331   void* block_mem = align_up(memory, block_alignment);
 332   assert(sizeof(Block) + pointer_delta(block_mem, memory, 1) <= size_needed,
 333          "allocated insufficient space for aligned block");
 334   return ::new (block_mem) Block(owner, memory);
 335 }
 336 
 337 void OopStorage::Block::delete_block(const Block& block) {
 338   void* memory = block._memory;
 339   block.Block::~Block();
 340   FREE_C_HEAP_ARRAY(char, memory);
 341 }
 342 
 343 // This can return a false positive if ptr is not contained by some
 344 // block.  For some uses, it is a precondition that ptr is valid,
 345 // e.g. contained in some block in owner's _active_array.  Other uses
 346 // require additional validation of the result.
 347 OopStorage::Block*
 348 OopStorage::Block::block_for_ptr(const OopStorage* owner, const oop* ptr) {
 349   assert(CanUseSafeFetchN(), "precondition");
 350   STATIC_ASSERT(_data_pos == 0);
 351   // Const-ness of ptr is not related to const-ness of containing block.
 352   // Blocks are allocated section-aligned, so get the containing section.
 353   oop* section_start = align_down(const_cast<oop*>(ptr), block_alignment);
 354   // Start with a guess that the containing section is the last section,
 355   // so the block starts section_count-1 sections earlier.
 356   oop* section = section_start - (section_size * (section_count - 1));
 357   // Walk up through the potential block start positions, looking for
 358   // the owner in the expected location.  If we're below the actual block
 359   // start position, the value at the owner position will be some oop
 360   // (possibly NULL), which can never match the owner.
 361   intptr_t owner_addr = reinterpret_cast<intptr_t>(owner);
 362   for (unsigned i = 0; i < section_count; ++i, section += section_size) {
 363     Block* candidate = reinterpret_cast<Block*>(section);
 364     intptr_t* candidate_owner_addr
 365       = reinterpret_cast<intptr_t*>(&candidate->_owner);
 366     if (SafeFetchN(candidate_owner_addr, 0) == owner_addr) {
 367       return candidate;
 368     }
 369   }
 370   return NULL;
 371 }
 372 
 373 //////////////////////////////////////////////////////////////////////////////
 374 // Allocation
 375 //
 376 // Allocation involves the _allocate_list, which contains a subset of the
 377 // blocks owned by a storage object.  This is a doubly-linked list, linked
 378 // through dedicated fields in the blocks.  Full blocks are removed from this
 379 // list, though they are still present in the _active_array.  Empty blocks are
 380 // kept at the end of the _allocate_list, to make it easy for empty block
 381 // deletion to find them.
 382 //
 383 // allocate(), and delete_empty_blocks_concurrent() lock the
 384 // _allocate_mutex while performing any list and array modifications.
 385 //
 386 // allocate() and release() update a block's _allocated_bitmask using CAS
 387 // loops.  This prevents loss of updates even though release() performs
 388 // its updates without any locking.
 389 //
 390 // allocate() obtains the entry from the first block in the _allocate_list,
 391 // and updates that block's _allocated_bitmask to indicate the entry is in
 392 // use.  If this makes the block full (all entries in use), the block is
 393 // removed from the _allocate_list so it won't be considered by future
 394 // allocations until some entries in it are released.
 395 //
 396 // release() is performed lock-free. release() first looks up the block for
 397 // the entry, using address alignment to find the enclosing block (thereby
 398 // avoiding iteration over the _active_array).  Once the block has been
 399 // determined, its _allocated_bitmask needs to be updated, and its position in
 400 // the _allocate_list may need to be updated.  There are two cases:
 401 //
 402 // (a) If the block is neither full nor would become empty with the release of
 403 // the entry, only its _allocated_bitmask needs to be updated.  But if the CAS
 404 // update fails, the applicable case may change for the retry.
 405 //
 406 // (b) Otherwise, the _allocate_list also needs to be modified.  This requires
 407 // locking the _allocate_mutex.  To keep the release() operation lock-free,
 408 // rather than updating the _allocate_list itself, it instead performs a
 409 // lock-free push of the block onto the _deferred_updates list.  Entries on
 410 // that list are processed by allocate() and delete_empty_blocks_XXX(), while
 411 // they already hold the necessary lock.  That processing makes the block's
 412 // list state consistent with its current _allocated_bitmask.  The block is
 413 // added to the _allocate_list if not already present and the bitmask is not
 414 // full.  The block is moved to the end of the _allocated_list if the bitmask
 415 // is empty, for ease of empty block deletion processing.
 416 
 417 oop* OopStorage::allocate() {
 418   MutexLockerEx ml(_allocate_mutex, Mutex::_no_safepoint_check_flag);
 419   // Do some deferred update processing every time we allocate.
 420   // Continue processing deferred updates if _allocate_list is empty,
 421   // in the hope that we'll get a block from that, rather than
 422   // allocating a new block.
 423   while (reduce_deferred_updates() && (_allocate_list.head() == NULL)) {}
 424 
 425   // Use the first block in _allocate_list for the allocation.
 426   Block* block = _allocate_list.head();
 427   if (block == NULL) {
 428     // No available blocks; make a new one, and add to storage.
 429     {
 430       MutexUnlockerEx mul(_allocate_mutex, Mutex::_no_safepoint_check_flag);
 431       block = Block::new_block(this);
 432     }
 433     if (block == NULL) {
 434       while (_allocate_list.head() == NULL) {
 435         if (!reduce_deferred_updates()) {
 436           // Failed to make new block, no other thread made a block
 437           // available while the mutex was released, and didn't get
 438           // one from a deferred update either, so return failure.
 439           log_info(oopstorage, ref)("%s: failed block allocation", name());
 440           return NULL;
 441         }
 442       }
 443     } else {
 444       // Add new block to storage.
 445       log_info(oopstorage, blocks)("%s: new block " PTR_FORMAT, name(), p2i(block));
 446 
 447       // Add new block to the _active_array, growing if needed.
 448       if (!_active_array->push(block)) {
 449         if (expand_active_array()) {
 450           guarantee(_active_array->push(block), "push failed after expansion");
 451         } else {
 452           log_info(oopstorage, blocks)("%s: failed active array expand", name());
 453           Block::delete_block(*block);
 454           return NULL;
 455         }
 456       }
 457       // Add to end of _allocate_list.  The mutex release allowed
 458       // other threads to add blocks to the _allocate_list.  We prefer
 459       // to allocate from non-empty blocks, to allow empty blocks to
 460       // be deleted.
 461       _allocate_list.push_back(*block);
 462     }
 463     block = _allocate_list.head();
 464   }
 465   // Allocate from first block.
 466   assert(block != NULL, "invariant");
 467   assert(!block->is_full(), "invariant");
 468   if (block->is_empty()) {
 469     // Transitioning from empty to not empty.
 470     log_debug(oopstorage, blocks)("%s: block not empty " PTR_FORMAT, name(), p2i(block));
 471   }
 472   oop* result = block->allocate();
 473   assert(result != NULL, "allocation failed");
 474   assert(!block->is_empty(), "postcondition");
 475   Atomic::inc(&_allocation_count); // release updates outside lock.
 476   if (block->is_full()) {
 477     // Transitioning from not full to full.
 478     // Remove full blocks from consideration by future allocates.
 479     log_debug(oopstorage, blocks)("%s: block full " PTR_FORMAT, name(), p2i(block));
 480     _allocate_list.unlink(*block);
 481   }
 482   log_info(oopstorage, ref)("%s: allocated " PTR_FORMAT, name(), p2i(result));
 483   return result;
 484 }
 485 
 486 // Create a new, larger, active array with the same content as the
 487 // current array, and then replace, relinquishing the old array.
 488 // Return true if the array was successfully expanded, false to
 489 // indicate allocation failure.
 490 bool OopStorage::expand_active_array() {
 491   assert_lock_strong(_allocate_mutex);
 492   ActiveArray* old_array = _active_array;
 493   size_t new_size = 2 * old_array->size();
 494   log_info(oopstorage, blocks)("%s: expand active array " SIZE_FORMAT,
 495                                name(), new_size);
 496   ActiveArray* new_array = ActiveArray::create(new_size, AllocFailStrategy::RETURN_NULL);
 497   if (new_array == NULL) return false;
 498   new_array->copy_from(old_array);
 499   replace_active_array(new_array);
 500   relinquish_block_array(old_array);
 501   return true;
 502 }
 503 
 504 OopStorage::ProtectActive::ProtectActive() : _enter(0), _exit() {}
 505 
 506 // Begin read-side critical section.
 507 uint OopStorage::ProtectActive::read_enter() {
 508   return Atomic::add(2u, &_enter);
 509 }
 510 
 511 // End read-side critical section.
 512 void OopStorage::ProtectActive::read_exit(uint enter_value) {
 513   Atomic::add(2u, &_exit[enter_value & 1]);
 514 }
 515 
 516 // Wait until all readers that entered the critical section before
 517 // synchronization have exited that critical section.
 518 void OopStorage::ProtectActive::write_synchronize() {
 519   SpinYield spinner;
 520   // Determine old and new exit counters, based on bit0 of the
 521   // on-entry _enter counter.
 522   uint value = OrderAccess::load_acquire(&_enter);
 523   volatile uint* new_ptr = &_exit[(value + 1) & 1];
 524   // Atomically change the in-use exit counter to the new counter, by
 525   // adding 1 to the _enter counter (flipping bit0 between 0 and 1)
 526   // and initializing the new exit counter to that enter value.  Note:
 527   // The new exit counter is not being used by read operations until
 528   // this change succeeds.
 529   uint old;
 530   do {
 531     old = value;
 532     *new_ptr = ++value;
 533     value = Atomic::cmpxchg(value, &_enter, old);
 534   } while (old != value);
 535   // Readers that entered the critical section before we changed the
 536   // selected exit counter will use the old exit counter.  Readers
 537   // entering after the change will use the new exit counter.  Wait
 538   // for all the critical sections started before the change to
 539   // complete, e.g. for the value of old_ptr to catch up with old.
 540   volatile uint* old_ptr = &_exit[old & 1];
 541   while (old != OrderAccess::load_acquire(old_ptr)) {
 542     spinner.wait();
 543   }
 544 }
 545 
 546 // Make new_array the _active_array.  Increments new_array's refcount
 547 // to account for the new reference.  The assignment is atomic wrto
 548 // obtain_active_array; once this function returns, it is safe for the
 549 // caller to relinquish the old array.
 550 void OopStorage::replace_active_array(ActiveArray* new_array) {
 551   // Caller has the old array that is the current value of _active_array.
 552   // Update new_array refcount to account for the new reference.
 553   new_array->increment_refcount();
 554   // Install new_array, ensuring its initialization is complete first.
 555   OrderAccess::release_store(&_active_array, new_array);
 556   // Wait for any readers that could read the old array from _active_array.
 557   _protect_active.write_synchronize();
 558   // All obtain critical sections that could see the old array have
 559   // completed, having incremented the refcount of the old array.  The
 560   // caller can now safely relinquish the old array.
 561 }
 562 
 563 // Atomically (wrto replace_active_array) get the active array and
 564 // increment its refcount.  This provides safe access to the array,
 565 // even if an allocate operation expands and replaces the value of
 566 // _active_array.  The caller must relinquish the array when done
 567 // using it.
 568 OopStorage::ActiveArray* OopStorage::obtain_active_array() const {
 569   uint enter_value = _protect_active.read_enter();
 570   ActiveArray* result = OrderAccess::load_acquire(&_active_array);
 571   result->increment_refcount();
 572   _protect_active.read_exit(enter_value);
 573   return result;
 574 }
 575 
 576 // Decrement refcount of array and destroy if refcount is zero.
 577 void OopStorage::relinquish_block_array(ActiveArray* array) const {
 578   if (array->decrement_refcount()) {
 579     assert(array != _active_array, "invariant");
 580     ActiveArray::destroy(array);
 581   }
 582 }
 583 
 584 class OopStorage::WithActiveArray : public StackObj {
 585   const OopStorage* _storage;
 586   ActiveArray* _active_array;
 587 
 588 public:
 589   WithActiveArray(const OopStorage* storage) :
 590     _storage(storage),
 591     _active_array(storage->obtain_active_array())
 592   {}
 593 
 594   ~WithActiveArray() {
 595     _storage->relinquish_block_array(_active_array);
 596   }
 597 
 598   ActiveArray& active_array() const {
 599     return *_active_array;
 600   }
 601 };
 602 
 603 OopStorage::Block* OopStorage::find_block_or_null(const oop* ptr) const {
 604   assert(ptr != NULL, "precondition");
 605   return Block::block_for_ptr(this, ptr);
 606 }
 607 
 608 static void log_release_transitions(uintx releasing,
 609                                     uintx old_allocated,
 610                                     const OopStorage* owner,
 611                                     const void* block) {
 612   Log(oopstorage, blocks) log;
 613   LogStream ls(log.debug());
 614   if (is_full_bitmask(old_allocated)) {
 615     ls.print_cr("%s: block not full " PTR_FORMAT, owner->name(), p2i(block));
 616   }
 617   if (releasing == old_allocated) {
 618     ls.print_cr("%s: block empty " PTR_FORMAT, owner->name(), p2i(block));
 619   }
 620 }
 621 
 622 void OopStorage::Block::release_entries(uintx releasing, Block* volatile* deferred_list) {
 623   assert(releasing != 0, "preconditon");
 624   // Prevent empty block deletion when transitioning to empty.
 625   Atomic::inc(&_release_refcount);
 626 
 627   // Atomically update allocated bitmask.
 628   uintx old_allocated = _allocated_bitmask;
 629   while (true) {
 630     assert((releasing & ~old_allocated) == 0, "releasing unallocated entries");
 631     uintx new_value = old_allocated ^ releasing;
 632     uintx fetched = Atomic::cmpxchg(new_value, &_allocated_bitmask, old_allocated);
 633     if (fetched == old_allocated) break; // Successful update.
 634     old_allocated = fetched;             // Retry with updated bitmask.
 635   }
 636 
 637   // Now that the bitmask has been updated, if we have a state transition
 638   // (updated bitmask is empty or old bitmask was full), atomically push
 639   // this block onto the deferred updates list.  Some future call to
 640   // reduce_deferred_updates will make any needed changes related to this
 641   // block and _allocate_list.  This deferral avoids list updates and the
 642   // associated locking here.
 643   if ((releasing == old_allocated) || is_full_bitmask(old_allocated)) {
 644     // Log transitions.  Both transitions are possible in a single update.
 645     if (log_is_enabled(Debug, oopstorage, blocks)) {
 646       log_release_transitions(releasing, old_allocated, _owner, this);
 647     }
 648     // Attempt to claim responsibility for adding this block to the deferred
 649     // list, by setting the link to non-NULL by self-looping.  If this fails,
 650     // then someone else has made such a claim and the deferred update has not
 651     // yet been processed and will include our change, so we don't need to do
 652     // anything further.
 653     if (Atomic::replace_if_null(this, &_deferred_updates_next)) {
 654       // Successfully claimed.  Push, with self-loop for end-of-list.
 655       Block* head = *deferred_list;
 656       while (true) {
 657         _deferred_updates_next = (head == NULL) ? this : head;
 658         Block* fetched = Atomic::cmpxchg(this, deferred_list, head);
 659         if (fetched == head) break; // Successful update.
 660         head = fetched;             // Retry with updated head.
 661       }
 662       log_debug(oopstorage, blocks)("%s: deferred update " PTR_FORMAT,
 663                                     _owner->name(), p2i(this));
 664     }
 665   }
 666   // Release hold on empty block deletion.
 667   Atomic::dec(&_release_refcount);
 668 }
 669 
 670 // Process one available deferred update.  Returns true if one was processed.
 671 bool OopStorage::reduce_deferred_updates() {
 672   assert_locked_or_safepoint(_allocate_mutex);
 673   // Atomically pop a block off the list, if any available.
 674   // No ABA issue because this is only called by one thread at a time.
 675   // The atomicity is wrto pushes by release().
 676   Block* block = OrderAccess::load_acquire(&_deferred_updates);
 677   while (true) {
 678     if (block == NULL) return false;
 679     // Try atomic pop of block from list.
 680     Block* tail = block->deferred_updates_next();
 681     if (block == tail) tail = NULL; // Handle self-loop end marker.
 682     Block* fetched = Atomic::cmpxchg(tail, &_deferred_updates, block);
 683     if (fetched == block) break; // Update successful.
 684     block = fetched;             // Retry with updated block.
 685   }
 686   block->set_deferred_updates_next(NULL); // Clear tail after updating head.
 687   // Ensure bitmask read after pop is complete, including clearing tail, for
 688   // ordering with release().  Without this, we may be processing a stale
 689   // bitmask state here while blocking a release() operation from recording
 690   // the deferred update needed for its bitmask change.
 691   OrderAccess::storeload();
 692   // Process popped block.
 693   uintx allocated = block->allocated_bitmask();
 694 
 695   // Make membership in list consistent with bitmask state.
 696   if ((_allocate_list.ctail() != NULL) &&
 697       ((_allocate_list.ctail() == block) ||
 698        (_allocate_list.next(*block) != NULL))) {
 699     // Block is in the allocate list.
 700     assert(!is_full_bitmask(allocated), "invariant");
 701   } else if (!is_full_bitmask(allocated)) {
 702     // Block is not in the allocate list, but now should be.
 703     _allocate_list.push_front(*block);
 704   } // Else block is full and not in list, which is correct.
 705 
 706   // Move empty block to end of list, for possible deletion.
 707   if (is_empty_bitmask(allocated)) {
 708     _allocate_list.unlink(*block);
 709     _allocate_list.push_back(*block);
 710   }
 711 
 712   log_debug(oopstorage, blocks)("%s: processed deferred update " PTR_FORMAT,
 713                                 name(), p2i(block));
 714   return true;              // Processed one pending update.
 715 }
 716 
 717 inline void check_release_entry(const oop* entry) {
 718   assert(entry != NULL, "Releasing NULL");
 719   assert(*entry == NULL, "Releasing uncleared entry: " PTR_FORMAT, p2i(entry));
 720 }
 721 
 722 void OopStorage::release(const oop* ptr) {
 723   check_release_entry(ptr);
 724   Block* block = find_block_or_null(ptr);
 725   assert(block != NULL, "%s: invalid release " PTR_FORMAT, name(), p2i(ptr));
 726   log_info(oopstorage, ref)("%s: released " PTR_FORMAT, name(), p2i(ptr));
 727   block->release_entries(block->bitmask_for_entry(ptr), &_deferred_updates);
 728   Atomic::dec(&_allocation_count);
 729 }
 730 
 731 void OopStorage::release(const oop* const* ptrs, size_t size) {
 732   size_t i = 0;
 733   while (i < size) {
 734     check_release_entry(ptrs[i]);
 735     Block* block = find_block_or_null(ptrs[i]);
 736     assert(block != NULL, "%s: invalid release " PTR_FORMAT, name(), p2i(ptrs[i]));
 737     log_info(oopstorage, ref)("%s: released " PTR_FORMAT, name(), p2i(ptrs[i]));
 738     size_t count = 0;
 739     uintx releasing = 0;
 740     for ( ; i < size; ++i) {
 741       const oop* entry = ptrs[i];
 742       check_release_entry(entry);
 743       // If entry not in block, finish block and resume outer loop with entry.
 744       if (!block->contains(entry)) break;
 745       // Add entry to releasing bitmap.
 746       log_info(oopstorage, ref)("%s: released " PTR_FORMAT, name(), p2i(entry));
 747       uintx entry_bitmask = block->bitmask_for_entry(entry);
 748       assert((releasing & entry_bitmask) == 0,
 749              "Duplicate entry: " PTR_FORMAT, p2i(entry));
 750       releasing |= entry_bitmask;
 751       ++count;
 752     }
 753     // Release the contiguous entries that are in block.
 754     block->release_entries(releasing, &_deferred_updates);
 755     Atomic::sub(count, &_allocation_count);
 756   }
 757 }
 758 
 759 const char* dup_name(const char* name) {
 760   char* dup = NEW_C_HEAP_ARRAY(char, strlen(name) + 1, mtGC);
 761   strcpy(dup, name);
 762   return dup;
 763 }
 764 
 765 const size_t initial_active_array_size = 8;
 766 
 767 OopStorage::OopStorage(const char* name,
 768                        Mutex* allocate_mutex,
 769                        Mutex* active_mutex) :
 770   _name(dup_name(name)),
 771   _active_array(ActiveArray::create(initial_active_array_size)),
 772   _allocate_list(&Block::get_allocate_entry),
 773   _deferred_updates(NULL),
 774   _allocate_mutex(allocate_mutex),
 775   _active_mutex(active_mutex),
 776   _allocation_count(0),
 777   _concurrent_iteration_active(false)
 778 {
 779   _active_array->increment_refcount();
 780   assert(_active_mutex->rank() < _allocate_mutex->rank(),
 781          "%s: active_mutex must have lower rank than allocate_mutex", _name);
 782   assert(_active_mutex->_safepoint_check_required != Mutex::_safepoint_check_always,
 783          "%s: active mutex requires safepoint check", _name);
 784   assert(_allocate_mutex->_safepoint_check_required != Mutex::_safepoint_check_always,
 785          "%s: allocate mutex requires safepoint check", _name);
 786 }
 787 
 788 void OopStorage::delete_empty_block(const Block& block) {
 789   assert(block.is_empty(), "discarding non-empty block");
 790   log_info(oopstorage, blocks)("%s: delete empty block " PTR_FORMAT, name(), p2i(&block));
 791   Block::delete_block(block);
 792 }
 793 
 794 OopStorage::~OopStorage() {
 795   Block* block;
 796   while ((block = _deferred_updates) != NULL) {
 797     _deferred_updates = block->deferred_updates_next();
 798     block->set_deferred_updates_next(NULL);
 799   }
 800   while ((block = _allocate_list.head()) != NULL) {
 801     _allocate_list.unlink(*block);
 802   }
 803   bool unreferenced = _active_array->decrement_refcount();
 804   assert(unreferenced, "deleting storage while _active_array is referenced");
 805   for (size_t i = _active_array->block_count(); 0 < i; ) {
 806     block = _active_array->at(--i);
 807     Block::delete_block(*block);
 808   }
 809   ActiveArray::destroy(_active_array);
 810   FREE_C_HEAP_ARRAY(char, _name);
 811 }
 812 
 813 void OopStorage::delete_empty_blocks_safepoint() {
 814   assert_at_safepoint();
 815   // Process any pending release updates, which may make more empty
 816   // blocks available for deletion.
 817   while (reduce_deferred_updates()) {}
 818   // Don't interfere with a concurrent iteration.
 819   if (_concurrent_iteration_active) return;
 820   // Delete empty (and otherwise deletable) blocks from end of _allocate_list.
 821   for (Block* block = _allocate_list.tail();
 822        (block != NULL) && block->is_deletable();
 823        block = _allocate_list.tail()) {
 824     _active_array->remove(block);
 825     _allocate_list.unlink(*block);
 826     delete_empty_block(*block);
 827   }
 828 }
 829 
 830 void OopStorage::delete_empty_blocks_concurrent() {
 831   MutexLockerEx ml(_allocate_mutex, Mutex::_no_safepoint_check_flag);
 832   // Other threads could be adding to the empty block count while we
 833   // release the mutex across the block deletions.  Set an upper bound
 834   // on how many blocks we'll try to release, so other threads can't
 835   // cause an unbounded stay in this function.
 836   size_t limit = block_count();
 837 
 838   for (size_t i = 0; i < limit; ++i) {
 839     // Additional updates might become available while we dropped the
 840     // lock.  But limit number processed to limit lock duration.
 841     reduce_deferred_updates();
 842 
 843     Block* block = _allocate_list.tail();
 844     if ((block == NULL) || !block->is_deletable()) {
 845       // No block to delete, so done.  There could be more pending
 846       // deferred updates that could give us more work to do; deal with
 847       // that in some later call, to limit lock duration here.
 848       return;
 849     }
 850 
 851     {
 852       MutexLockerEx aml(_active_mutex, Mutex::_no_safepoint_check_flag);
 853       // Don't interfere with a concurrent iteration.
 854       if (_concurrent_iteration_active) return;
 855       _active_array->remove(block);
 856     }
 857     // Remove block from _allocate_list and delete it.
 858     _allocate_list.unlink(*block);
 859     // Release mutex while deleting block.
 860     MutexUnlockerEx ul(_allocate_mutex, Mutex::_no_safepoint_check_flag);
 861     delete_empty_block(*block);
 862   }
 863 }
 864 
 865 OopStorage::EntryStatus OopStorage::allocation_status(const oop* ptr) const {
 866   const Block* block = find_block_or_null(ptr);
 867   if (block != NULL) {
 868     // Prevent block deletion and _active_array modification.
 869     MutexLockerEx ml(_allocate_mutex, Mutex::_no_safepoint_check_flag);
 870     // Block could be a false positive, so get index carefully.
 871     size_t index = Block::active_index_safe(block);
 872     if ((index < _active_array->block_count()) &&
 873         (block == _active_array->at(index)) &&
 874         block->contains(ptr)) {
 875       if ((block->allocated_bitmask() & block->bitmask_for_entry(ptr)) != 0) {
 876         return ALLOCATED_ENTRY;
 877       } else {
 878         return UNALLOCATED_ENTRY;
 879       }
 880     }
 881   }
 882   return INVALID_ENTRY;
 883 }
 884 
 885 size_t OopStorage::allocation_count() const {
 886   return _allocation_count;
 887 }
 888 
 889 size_t OopStorage::block_count() const {
 890   WithActiveArray wab(this);
 891   // Count access is racy, but don't care.
 892   return wab.active_array().block_count();
 893 }
 894 
 895 size_t OopStorage::total_memory_usage() const {
 896   size_t total_size = sizeof(OopStorage);
 897   total_size += strlen(name()) + 1;
 898   total_size += sizeof(ActiveArray);
 899   WithActiveArray wab(this);
 900   const ActiveArray& blocks = wab.active_array();
 901   // Count access is racy, but don't care.
 902   total_size += blocks.block_count() * Block::allocation_size();
 903   total_size += blocks.size() * sizeof(Block*);
 904   return total_size;
 905 }
 906 
 907 // Parallel iteration support
 908 
 909 uint OopStorage::BasicParState::default_estimated_thread_count(bool concurrent) {
 910   uint configured = concurrent ? ConcGCThreads : ParallelGCThreads;
 911   return MAX2(1u, configured);  // Never estimate zero threads.
 912 }
 913 
 914 OopStorage::BasicParState::BasicParState(const OopStorage* storage,
 915                                          uint estimated_thread_count,
 916                                          bool concurrent) :
 917   _storage(storage),
 918   _active_array(_storage->obtain_active_array()),
 919   _block_count(0),              // initialized properly below
 920   _next_block(0),
 921   _estimated_thread_count(estimated_thread_count),
 922   _concurrent(concurrent)
 923 {
 924   assert(estimated_thread_count > 0, "estimated thread count must be positive");
 925   update_iteration_state(true);
 926   // Get the block count *after* iteration state updated, so concurrent
 927   // empty block deletion is suppressed and can't reduce the count.  But
 928   // ensure the count we use was written after the block with that count
 929   // was fully initialized; see ActiveArray::push.
 930   _block_count = _active_array->block_count_acquire();
 931 }
 932 
 933 OopStorage::BasicParState::~BasicParState() {
 934   _storage->relinquish_block_array(_active_array);
 935   update_iteration_state(false);
 936 }
 937 
 938 void OopStorage::BasicParState::update_iteration_state(bool value) {
 939   if (_concurrent) {
 940     MutexLockerEx ml(_storage->_active_mutex, Mutex::_no_safepoint_check_flag);
 941     assert(_storage->_concurrent_iteration_active != value, "precondition");
 942     _storage->_concurrent_iteration_active = value;
 943   }
 944 }
 945 
 946 bool OopStorage::BasicParState::claim_next_segment(IterationData* data) {
 947   data->_processed += data->_segment_end - data->_segment_start;
 948   size_t start = OrderAccess::load_acquire(&_next_block);
 949   if (start >= _block_count) {
 950     return finish_iteration(data); // No more blocks available.
 951   }
 952   // Try to claim several at a time, but not *too* many.  We want to
 953   // avoid deciding there are many available and selecting a large
 954   // quantity, get delayed, and then end up claiming most or all of
 955   // the remaining largish amount of work, leaving nothing for other
 956   // threads to do.  But too small a step can lead to contention
 957   // over _next_block, esp. when the work per block is small.
 958   size_t max_step = 10;
 959   size_t remaining = _block_count - start;
 960   size_t step = MIN2(max_step, 1 + (remaining / _estimated_thread_count));
 961   // Atomic::add with possible overshoot.  This can perform better
 962   // than a CAS loop on some platforms when there is contention.
 963   // We can cope with the uncertainty by recomputing start/end from
 964   // the result of the add, and dealing with potential overshoot.
 965   size_t end = Atomic::add(step, &_next_block);
 966   // _next_block may have changed, so recompute start from result of add.
 967   start = end - step;
 968   // _next_block may have changed so much that end has overshot.
 969   end = MIN2(end, _block_count);
 970   // _next_block may have changed so much that even start has overshot.
 971   if (start < _block_count) {
 972     // Record claimed segment for iteration.
 973     data->_segment_start = start;
 974     data->_segment_end = end;
 975     return true;                // Success.
 976   } else {
 977     // No more blocks to claim.
 978     return finish_iteration(data);
 979   }
 980 }
 981 
 982 bool OopStorage::BasicParState::finish_iteration(const IterationData* data) const {
 983   log_debug(oopstorage, blocks, stats)
 984            ("Parallel iteration on %s: blocks = " SIZE_FORMAT
 985             ", processed = " SIZE_FORMAT " (%2.f%%)",
 986             _storage->name(), _block_count, data->_processed,
 987             percent_of(data->_processed, _block_count));
 988   return false;
 989 }
 990 
 991 const char* OopStorage::name() const { return _name; }
 992 
 993 #ifndef PRODUCT
 994 
 995 void OopStorage::print_on(outputStream* st) const {
 996   size_t allocations = _allocation_count;
 997   size_t blocks = _active_array->block_count();
 998 
 999   double data_size = section_size * section_count;
1000   double alloc_percentage = percent_of((double)allocations, blocks * data_size);
1001 
1002   st->print("%s: " SIZE_FORMAT " entries in " SIZE_FORMAT " blocks (%.F%%), " SIZE_FORMAT " bytes",
1003             name(), allocations, blocks, alloc_percentage, total_memory_usage());
1004   if (_concurrent_iteration_active) {
1005     st->print(", concurrent iteration active");
1006   }
1007 }
1008 
1009 #endif // !PRODUCT