--- old/src/hotspot/share/gc/shared/ptrQueue.cpp 2019-01-15 17:45:45.298047544 -0500 +++ new/src/hotspot/share/gc/shared/ptrQueue.cpp 2019-01-15 17:45:45.042034023 -0500 @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,12 +24,15 @@ #include "precompiled.hpp" #include "gc/shared/ptrQueue.hpp" +#include "logging/log.hpp" #include "memory/allocation.hpp" #include "memory/allocation.inline.hpp" #include "runtime/atomic.hpp" #include "runtime/mutex.hpp" #include "runtime/mutexLocker.hpp" +#include "runtime/orderAccess.hpp" #include "runtime/thread.inline.hpp" +#include "utilities/globalCounter.inline.hpp" #include @@ -85,20 +88,29 @@ FREE_C_HEAP_ARRAY(char, node); } -BufferNode::Allocator::Allocator(size_t buffer_size, Mutex* lock) : +BufferNode::Allocator::Allocator(const char* name, size_t buffer_size) : _buffer_size(buffer_size), - _lock(lock), - _free_list(NULL), - _free_count(0) + _pending_list(), + _free_list(), + _pending_count(0), + _free_count(0), + _transfer_lock(false) { - assert(lock != NULL, "precondition"); + strncpy(_name, name, sizeof(_name)); + _name[sizeof(_name) - 1] = '\0'; } BufferNode::Allocator::~Allocator() { - while (_free_list != NULL) { - BufferNode* node = _free_list; - _free_list = node->next(); - BufferNode::deallocate(node); + delete_list(_free_list.pop_all()); + delete_list(_pending_list.pop_all()); +} + +void BufferNode::Allocator::delete_list(BufferNode* list) { + while (list != NULL) { + BufferNode* next = list->next(); + DEBUG_ONLY(list->set_next(NULL);) + BufferNode::deallocate(list); + list = next; } } @@ -107,55 +119,109 @@ } BufferNode* BufferNode::Allocator::allocate() { - BufferNode* node = NULL; + BufferNode* node; { - MutexLockerEx ml(_lock, Mutex::_no_safepoint_check_flag); - node = _free_list; - if (node != NULL) { - _free_list = node->next(); - --_free_count; - node->set_next(NULL); - node->set_index(0); - return node; - } - } - return BufferNode::allocate(_buffer_size); -} - + // Protect against ABA; see release(). + GlobalCounter::CriticalSection cs(Thread::current()); + node = _free_list.pop(); + } + if (node == NULL) { + node = BufferNode::allocate(_buffer_size); + } else { + // Decrement count after getting buffer from free list. This, along + // with incrementing count before adding to free list, ensures count + // never underflows. + size_t count = Atomic::sub(1u, &_free_count); + assert((count + 1) != 0, "_free_count underflow"); + } + return node; +} + +// To solve the ABA problem for lock-free stack pop, allocate does the +// pop inside a critical section, and release synchronizes on the +// critical sections before adding to the _free_list. But we don't +// want to make every release have to do a synchronize. Instead, we +// initially place released nodes on the _pending_list, and transfer +// them to the _free_list in batches. Only one transfer at a time is +// permitted, with a lock bit to control access to that phase. A +// transfer takes all the nodes from the _pending_list, synchronizes on +// the _free_list pops, and then adds the former pending nodes to the +// _free_list. While that's happening, other threads might be adding +// other nodes to the _pending_list, to be dealt with by some later +// transfer. void BufferNode::Allocator::release(BufferNode* node) { - MutexLockerEx ml(_lock, Mutex::_no_safepoint_check_flag); - node->set_next(_free_list); - _free_list = node; - ++_free_count; + assert(node != NULL, "precondition"); + assert(node->next() == NULL, "precondition"); + + // Desired minimum transfer batch size. There is relatively little + // importance to the specific number. It shouldn't be too big, else + // we're wasting space when the release rate is low. If the release + // rate is high, we might accumulate more than this before being + // able to start a new transfer, but that's okay. Also note that + // the allocation rate and the release rate are going to be fairly + // similar, due to how the buffers are used. + const size_t trigger_transfer = 10; + + // Add to pending list. Update count first so no underflow in transfer. + size_t pending_count = Atomic::add(1u, &_pending_count); + _pending_list.push(*node); + if (pending_count > trigger_transfer) { + try_transfer_pending(); + } } -void BufferNode::Allocator::reduce_free_list() { - BufferNode* head = NULL; - { - MutexLockerEx ml(_lock, Mutex::_no_safepoint_check_flag); - // For now, delete half. - size_t remove = _free_count / 2; - if (remove > 0) { - head = _free_list; - BufferNode* tail = head; - BufferNode* prev = NULL; - for (size_t i = 0; i < remove; ++i) { - assert(tail != NULL, "free list size is wrong"); - prev = tail; - tail = tail->next(); - } - assert(prev != NULL, "invariant"); - assert(prev->next() == tail, "invariant"); - prev->set_next(NULL); - _free_list = tail; - _free_count -= remove; +// Try to transfer nodes from _pending_list to _free_list, with a +// synchronization delay for any in-progress pops from the _free_list, +// to solve ABA there. Return true if performed a (possibly empty) +// transfer, false if blocked from doing so by some other thread's +// in-progress transfer. +bool BufferNode::Allocator::try_transfer_pending() { + // Attempt to claim the lock. + if (Atomic::load(&_transfer_lock) || // Skip CAS if likely to fail. + Atomic::cmpxchg(true, &_transfer_lock, false)) { + return false; + } + // Have the lock; perform the transfer. + + // Claim all the pending nodes. + BufferNode* first = _pending_list.pop_all(); + if (first != NULL) { + // Prepare to add the claimed nodes, and update _pending_count. + BufferNode* last = first; + size_t count = 1; + for (BufferNode* next = first->next(); next != NULL; next = next->next()) { + last = next; + ++count; } + Atomic::sub(count, &_pending_count); + + // Wait for any in-progress pops, to avoid ABA for them. + GlobalCounter::write_synchronize(); + + // Add synchronized nodes to _free_list. + // Update count first so no underflow in allocate(). + Atomic::add(count, &_free_count); + _free_list.prepend(*first, *last); + log_trace(gc, ptrqueue, freelist) + ("Transferred %s pending to free: " SIZE_FORMAT, name(), count); + } + OrderAccess::release_store(&_transfer_lock, false); + return true; +} + +size_t BufferNode::Allocator::reduce_free_list(size_t remove_goal) { + try_transfer_pending(); + size_t removed = 0; + for ( ; removed < remove_goal; ++removed) { + BufferNode* node = _free_list.pop(); + if (node == NULL) break; + BufferNode::deallocate(node); } - while (head != NULL) { - BufferNode* next = head->next(); - BufferNode::deallocate(head); - head = next; - } + size_t new_count = Atomic::sub(removed, &_free_count); + log_debug(gc, ptrqueue, freelist) + ("Reduced %s free list by " SIZE_FORMAT " to " SIZE_FORMAT, + name(), removed, new_count); + return removed; } PtrQueueSet::PtrQueueSet(bool notify_when_complete) :