1 /*
   2  * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "gc/shared/ptrQueue.hpp"
  27 #include "logging/log.hpp"
  28 #include "memory/allocation.hpp"
  29 #include "memory/allocation.inline.hpp"
  30 #include "runtime/atomic.hpp"
  31 #include "runtime/mutex.hpp"
  32 #include "runtime/mutexLocker.hpp"
  33 #include "runtime/orderAccess.hpp"
  34 #include "runtime/thread.inline.hpp"
  35 #include "utilities/globalCounter.inline.hpp"
  36 
  37 #include <new>
  38 
  39 PtrQueue::PtrQueue(PtrQueueSet* qset, bool active) :
  40   _qset(qset),
  41   _active(active),
  42   _index(0),
  43   _capacity_in_bytes(0),
  44   _buf(NULL)
  45 {}
  46 
  47 PtrQueue::~PtrQueue() {
  48   assert(_buf == NULL, "queue must be flushed before delete");
  49 }
  50 
  51 void PtrQueue::flush_impl() {
  52   if (_buf != NULL) {
  53     BufferNode* node = BufferNode::make_node_from_buffer(_buf, index());
  54     if (is_empty()) {
  55       // No work to do.
  56       qset()->deallocate_buffer(node);
  57     } else {
  58       qset()->enqueue_completed_buffer(node);
  59     }
  60     _buf = NULL;
  61     set_index(0);
  62   }
  63 }
  64 
  65 void PtrQueue::enqueue_known_active(void* ptr) {
  66   while (_index == 0) {
  67     handle_zero_index();
  68   }
  69 
  70   assert(_buf != NULL, "postcondition");
  71   assert(index() > 0, "postcondition");
  72   assert(index() <= capacity(), "invariant");
  73   _index -= _element_size;
  74   _buf[index()] = ptr;
  75 }
  76 
  77 void PtrQueue::handle_zero_index() {
  78   assert(index() == 0, "precondition");
  79 
  80   if (_buf != NULL) {
  81     handle_completed_buffer();
  82   } else {
  83     // Bootstrapping kludge; lazily initialize capacity.  The initial
  84     // thread's queues are constructed before the second phase of the
  85     // two-phase initialization of the associated qsets.  As a result,
  86     // we can't initialize _capacity_in_bytes in the queue constructor.
  87     if (_capacity_in_bytes == 0) {
  88       _capacity_in_bytes = index_to_byte_index(qset()->buffer_size());
  89     }
  90     allocate_buffer();
  91   }
  92 }
  93 
  94 void PtrQueue::allocate_buffer() {
  95   _buf = qset()->allocate_buffer();
  96   reset();
  97 }
  98 
  99 void PtrQueue::enqueue_completed_buffer() {
 100   assert(_buf != NULL, "precondition");
 101   BufferNode* node = BufferNode::make_node_from_buffer(_buf, index());
 102   qset()->enqueue_completed_buffer(node);
 103   allocate_buffer();
 104 }
 105 
 106 BufferNode* BufferNode::allocate(size_t size) {
 107   size_t byte_size = size * sizeof(void*);
 108   void* data = NEW_C_HEAP_ARRAY(char, buffer_offset() + byte_size, mtGC);
 109   return new (data) BufferNode;
 110 }
 111 
 112 void BufferNode::deallocate(BufferNode* node) {
 113   node->~BufferNode();
 114   FREE_C_HEAP_ARRAY(char, node);
 115 }
 116 
 117 BufferNode::Allocator::Allocator(const char* name, size_t buffer_size) :
 118   _buffer_size(buffer_size),
 119   _pending_list(),
 120   _free_list(),
 121   _pending_count(0),
 122   _free_count(0),
 123   _transfer_lock(false)
 124 {
 125   strncpy(_name, name, sizeof(_name) - 1);
 126   _name[sizeof(_name) - 1] = '\0';
 127 }
 128 
 129 BufferNode::Allocator::~Allocator() {
 130   delete_list(_free_list.pop_all());
 131   delete_list(_pending_list.pop_all());
 132 }
 133 
 134 void BufferNode::Allocator::delete_list(BufferNode* list) {
 135   while (list != NULL) {
 136     BufferNode* next = list->next();
 137     DEBUG_ONLY(list->set_next(NULL);)
 138     BufferNode::deallocate(list);
 139     list = next;
 140   }
 141 }
 142 
 143 size_t BufferNode::Allocator::free_count() const {
 144   return Atomic::load(&_free_count);
 145 }
 146 
 147 BufferNode* BufferNode::Allocator::allocate() {
 148   BufferNode* node;
 149   {
 150     // Protect against ABA; see release().
 151     GlobalCounter::CriticalSection cs(Thread::current());
 152     node = _free_list.pop();
 153   }
 154   if (node == NULL) {
 155     node = BufferNode::allocate(_buffer_size);
 156   } else {
 157     // Decrement count after getting buffer from free list.  This, along
 158     // with incrementing count before adding to free list, ensures count
 159     // never underflows.
 160     size_t count = Atomic::sub(1u, &_free_count);
 161     assert((count + 1) != 0, "_free_count underflow");
 162   }
 163   return node;
 164 }
 165 
 166 // To solve the ABA problem for lock-free stack pop, allocate does the
 167 // pop inside a critical section, and release synchronizes on the
 168 // critical sections before adding to the _free_list.  But we don't
 169 // want to make every release have to do a synchronize.  Instead, we
 170 // initially place released nodes on the _pending_list, and transfer
 171 // them to the _free_list in batches.  Only one transfer at a time is
 172 // permitted, with a lock bit to control access to that phase.  A
 173 // transfer takes all the nodes from the _pending_list, synchronizes on
 174 // the _free_list pops, and then adds the former pending nodes to the
 175 // _free_list.  While that's happening, other threads might be adding
 176 // other nodes to the _pending_list, to be dealt with by some later
 177 // transfer.
 178 void BufferNode::Allocator::release(BufferNode* node) {
 179   assert(node != NULL, "precondition");
 180   assert(node->next() == NULL, "precondition");
 181 
 182   // Desired minimum transfer batch size.  There is relatively little
 183   // importance to the specific number.  It shouldn't be too big, else
 184   // we're wasting space when the release rate is low.  If the release
 185   // rate is high, we might accumulate more than this before being
 186   // able to start a new transfer, but that's okay.  Also note that
 187   // the allocation rate and the release rate are going to be fairly
 188   // similar, due to how the buffers are used.
 189   const size_t trigger_transfer = 10;
 190 
 191   // Add to pending list. Update count first so no underflow in transfer.
 192   size_t pending_count = Atomic::add(1u, &_pending_count);
 193   _pending_list.push(*node);
 194   if (pending_count > trigger_transfer) {
 195     try_transfer_pending();
 196   }
 197 }
 198 
 199 // Try to transfer nodes from _pending_list to _free_list, with a
 200 // synchronization delay for any in-progress pops from the _free_list,
 201 // to solve ABA there.  Return true if performed a (possibly empty)
 202 // transfer, false if blocked from doing so by some other thread's
 203 // in-progress transfer.
 204 bool BufferNode::Allocator::try_transfer_pending() {
 205   // Attempt to claim the lock.
 206   if (Atomic::load(&_transfer_lock) || // Skip CAS if likely to fail.
 207       Atomic::cmpxchg(true, &_transfer_lock, false)) {
 208     return false;
 209   }
 210   // Have the lock; perform the transfer.
 211 
 212   // Claim all the pending nodes.
 213   BufferNode* first = _pending_list.pop_all();
 214   if (first != NULL) {
 215     // Prepare to add the claimed nodes, and update _pending_count.
 216     BufferNode* last = first;
 217     size_t count = 1;
 218     for (BufferNode* next = first->next(); next != NULL; next = next->next()) {
 219       last = next;
 220       ++count;
 221     }
 222     Atomic::sub(count, &_pending_count);
 223 
 224     // Wait for any in-progress pops, to avoid ABA for them.
 225     GlobalCounter::write_synchronize();
 226 
 227     // Add synchronized nodes to _free_list.
 228     // Update count first so no underflow in allocate().
 229     Atomic::add(count, &_free_count);
 230     _free_list.prepend(*first, *last);
 231     log_trace(gc, ptrqueue, freelist)
 232              ("Transferred %s pending to free: " SIZE_FORMAT, name(), count);
 233   }
 234   OrderAccess::release_store(&_transfer_lock, false);
 235   return true;
 236 }
 237 
 238 size_t BufferNode::Allocator::reduce_free_list(size_t remove_goal) {
 239   try_transfer_pending();
 240   size_t removed = 0;
 241   for ( ; removed < remove_goal; ++removed) {
 242     BufferNode* node = _free_list.pop();
 243     if (node == NULL) break;
 244     BufferNode::deallocate(node);
 245   }
 246   size_t new_count = Atomic::sub(removed, &_free_count);
 247   log_debug(gc, ptrqueue, freelist)
 248            ("Reduced %s free list by " SIZE_FORMAT " to " SIZE_FORMAT,
 249             name(), removed, new_count);
 250   return removed;
 251 }
 252 
 253 PtrQueueSet::PtrQueueSet(bool notify_when_complete) :
 254   _allocator(NULL),
 255   _cbl_mon(NULL),
 256   _completed_buffers_head(NULL),
 257   _completed_buffers_tail(NULL),
 258   _n_completed_buffers(0),
 259   _process_completed_buffers_threshold(ProcessCompletedBuffersThresholdNever),
 260   _process_completed_buffers(false),
 261   _notify_when_complete(notify_when_complete),
 262   _all_active(false)
 263 {}
 264 
 265 PtrQueueSet::~PtrQueueSet() {
 266   // There are presently only a couple (derived) instances ever
 267   // created, and they are permanent, so no harm currently done by
 268   // doing nothing here.
 269 }
 270 
 271 void PtrQueueSet::initialize(Monitor* cbl_mon,
 272                              BufferNode::Allocator* allocator) {
 273   assert(cbl_mon != NULL && allocator != NULL, "Init order issue?");
 274   _cbl_mon = cbl_mon;
 275   _allocator = allocator;
 276 }
 277 
 278 void** PtrQueueSet::allocate_buffer() {
 279   BufferNode* node = _allocator->allocate();
 280   return BufferNode::make_buffer_from_node(node);
 281 }
 282 
 283 void PtrQueueSet::deallocate_buffer(BufferNode* node) {
 284   _allocator->release(node);
 285 }
 286 
 287 void PtrQueueSet::enqueue_completed_buffer(BufferNode* cbn) {
 288   MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
 289   cbn->set_next(NULL);
 290   if (_completed_buffers_tail == NULL) {
 291     assert(_completed_buffers_head == NULL, "Well-formedness");
 292     _completed_buffers_head = cbn;
 293     _completed_buffers_tail = cbn;
 294   } else {
 295     _completed_buffers_tail->set_next(cbn);
 296     _completed_buffers_tail = cbn;
 297   }
 298   _n_completed_buffers++;
 299 
 300   if (!_process_completed_buffers &&
 301       (_n_completed_buffers > _process_completed_buffers_threshold)) {
 302     _process_completed_buffers = true;
 303     if (_notify_when_complete) {
 304       _cbl_mon->notify();
 305     }
 306   }
 307   assert_completed_buffers_list_len_correct_locked();
 308 }
 309 
 310 BufferNode* PtrQueueSet::get_completed_buffer(size_t stop_at) {
 311   MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
 312 
 313   if (_n_completed_buffers <= stop_at) {
 314     return NULL;
 315   }
 316 
 317   assert(_n_completed_buffers > 0, "invariant");
 318   assert(_completed_buffers_head != NULL, "invariant");
 319   assert(_completed_buffers_tail != NULL, "invariant");
 320 
 321   BufferNode* bn = _completed_buffers_head;
 322   _n_completed_buffers--;
 323   _completed_buffers_head = bn->next();
 324   if (_completed_buffers_head == NULL) {
 325     assert(_n_completed_buffers == 0, "invariant");
 326     _completed_buffers_tail = NULL;
 327     _process_completed_buffers = false;
 328   }
 329   assert_completed_buffers_list_len_correct_locked();
 330   bn->set_next(NULL);
 331   return bn;
 332 }
 333 
 334 void PtrQueueSet::abandon_completed_buffers() {
 335   BufferNode* buffers_to_delete = NULL;
 336   {
 337     MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
 338     buffers_to_delete = _completed_buffers_head;
 339     _completed_buffers_head = NULL;
 340     _completed_buffers_tail = NULL;
 341     _n_completed_buffers = 0;
 342     _process_completed_buffers = false;
 343   }
 344   while (buffers_to_delete != NULL) {
 345     BufferNode* bn = buffers_to_delete;
 346     buffers_to_delete = bn->next();
 347     bn->set_next(NULL);
 348     deallocate_buffer(bn);
 349   }
 350 }
 351 
 352 #ifdef ASSERT
 353 
 354 void PtrQueueSet::assert_completed_buffers_list_len_correct_locked() {
 355   assert_lock_strong(_cbl_mon);
 356   size_t n = 0;
 357   for (BufferNode* bn = _completed_buffers_head; bn != NULL; bn = bn->next()) {
 358     ++n;
 359   }
 360   assert(n == _n_completed_buffers,
 361          "Completed buffer length is wrong: counted: " SIZE_FORMAT
 362          ", expected: " SIZE_FORMAT, n, _n_completed_buffers);
 363 }
 364 
 365 #endif // ASSERT
 366 
 367 // Merge lists of buffers. Notify the processing threads.
 368 // The source queue is emptied as a result. The queues
 369 // must share the monitor.
 370 void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) {
 371   assert(_cbl_mon == src->_cbl_mon, "Should share the same lock");
 372   MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
 373   if (_completed_buffers_tail == NULL) {
 374     assert(_completed_buffers_head == NULL, "Well-formedness");
 375     _completed_buffers_head = src->_completed_buffers_head;
 376     _completed_buffers_tail = src->_completed_buffers_tail;
 377   } else {
 378     assert(_completed_buffers_head != NULL, "Well formedness");
 379     if (src->_completed_buffers_head != NULL) {
 380       _completed_buffers_tail->set_next(src->_completed_buffers_head);
 381       _completed_buffers_tail = src->_completed_buffers_tail;
 382     }
 383   }
 384   _n_completed_buffers += src->_n_completed_buffers;
 385 
 386   src->_n_completed_buffers = 0;
 387   src->_completed_buffers_head = NULL;
 388   src->_completed_buffers_tail = NULL;
 389   src->_process_completed_buffers = false;
 390 
 391   assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL ||
 392          _completed_buffers_head != NULL && _completed_buffers_tail != NULL,
 393          "Sanity");
 394   assert_completed_buffers_list_len_correct_locked();
 395 }
 396 
 397 void PtrQueueSet::notify_if_necessary() {
 398   MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
 399   if (_n_completed_buffers > _process_completed_buffers_threshold) {
 400     _process_completed_buffers = true;
 401     if (_notify_when_complete)
 402       _cbl_mon->notify();
 403   }
 404 }