--- old/src/hotspot/share/gc/g1/g1CollectedHeap.cpp	2019-06-19 14:01:12.119171016 -0400
+++ new/src/hotspot/share/gc/g1/g1CollectedHeap.cpp	2019-06-19 14:01:11.679147435 -0400
@@ -1677,7 +1677,6 @@
   _card_table = ct;
 
   G1BarrierSet::satb_mark_queue_set().initialize(this,
-                                                 SATB_Q_CBL_mon,
                                                  &bs->satb_mark_queue_buffer_allocator(),
                                                  G1SATBProcessCompletedThreshold,
                                                  G1SATBBufferEnqueueingThresholdPercent);
--- old/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp	2019-06-19 14:01:13.319235328 -0400
+++ new/src/hotspot/share/gc/g1/g1ConcurrentMark.cpp	2019-06-19 14:01:12.867211104 -0400
@@ -2419,11 +2419,12 @@
     abort_marking_if_regular_check_fail();
   }
 
-  _draining_satb_buffers = false;
+  // Can't assert qset is empty here, even if not aborted.  If concurrent,
+  // some other thread might be adding to the queue.  If not concurrent,
+  // some other thread might have won the race for the last buffer, but
+  // has not yet decremented the count.
 
-  assert(has_aborted() ||
-         _cm->concurrent() ||
-         satb_mq_set.completed_buffers_num() == 0, "invariant");
+  _draining_satb_buffers = false;
 
   // again, this was a potentially expensive operation, decrease the
   // limits to get the regular clock call early
--- old/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp	2019-06-19 14:01:14.467296854 -0400
+++ new/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp	2019-06-19 14:01:14.019272844 -0400
@@ -78,7 +78,14 @@
 }
 
 G1DirtyCardQueueSet::G1DirtyCardQueueSet(bool notify_when_complete) :
-  PtrQueueSet(notify_when_complete),
+  PtrQueueSet(),
+  _cbl_mon(NULL),
+  _completed_buffers_head(NULL),
+  _completed_buffers_tail(NULL),
+  _n_completed_buffers(0),
+  _process_completed_buffers_threshold(ProcessCompletedBuffersThresholdNever),
+  _process_completed_buffers(false),
+  _notify_when_complete(notify_when_complete),
   _max_completed_buffers(MaxCompletedBuffersUnlimited),
   _completed_buffers_padding(0),
   _free_ids(NULL),
@@ -90,6 +97,7 @@
 }
 
 G1DirtyCardQueueSet::~G1DirtyCardQueueSet() {
+  abandon_completed_buffers();
   delete _free_ids;
 }
 
@@ -101,7 +109,9 @@
 void G1DirtyCardQueueSet::initialize(Monitor* cbl_mon,
                                      BufferNode::Allocator* allocator,
                                      bool init_free_ids) {
-  PtrQueueSet::initialize(cbl_mon, allocator);
+  PtrQueueSet::initialize(allocator);
+  assert(_cbl_mon == NULL, "Init order issue?");
+  _cbl_mon = cbl_mon;
   if (init_free_ids) {
     _free_ids = new G1FreeIdSet(0, num_par_ids());
   }
@@ -111,6 +121,123 @@
   G1ThreadLocalData::dirty_card_queue(t).handle_zero_index();
 }
 
+void G1DirtyCardQueueSet::enqueue_completed_buffer(BufferNode* cbn) {
+  MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+  cbn->set_next(NULL);
+  if (_completed_buffers_tail == NULL) {
+    assert(_completed_buffers_head == NULL, "Well-formedness");
+    _completed_buffers_head = cbn;
+    _completed_buffers_tail = cbn;
+  } else {
+    _completed_buffers_tail->set_next(cbn);
+    _completed_buffers_tail = cbn;
+  }
+  _n_completed_buffers++;
+
+  if (!process_completed_buffers() &&
+      (_n_completed_buffers > process_completed_buffers_threshold())) {
+    set_process_completed_buffers(true);
+    if (_notify_when_complete) {
+      _cbl_mon->notify_all();
+    }
+  }
+  assert_completed_buffers_list_len_correct_locked();
+}
+
+BufferNode* G1DirtyCardQueueSet::get_completed_buffer(size_t stop_at) {
+  MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+
+  if (_n_completed_buffers <= stop_at) {
+    return NULL;
+  }
+
+  assert(_n_completed_buffers > 0, "invariant");
+  assert(_completed_buffers_head != NULL, "invariant");
+  assert(_completed_buffers_tail != NULL, "invariant");
+
+  BufferNode* bn = _completed_buffers_head;
+  _n_completed_buffers--;
+  _completed_buffers_head = bn->next();
+  if (_completed_buffers_head == NULL) {
+    assert(_n_completed_buffers == 0, "invariant");
+    _completed_buffers_tail = NULL;
+    set_process_completed_buffers(false);
+  }
+  assert_completed_buffers_list_len_correct_locked();
+  bn->set_next(NULL);
+  return bn;
+}
+
+void G1DirtyCardQueueSet::abandon_completed_buffers() {
+  BufferNode* buffers_to_delete = NULL;
+  {
+    MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+    buffers_to_delete = _completed_buffers_head;
+    _completed_buffers_head = NULL;
+    _completed_buffers_tail = NULL;
+    _n_completed_buffers = 0;
+    set_process_completed_buffers(false);
+  }
+  while (buffers_to_delete != NULL) {
+    BufferNode* bn = buffers_to_delete;
+    buffers_to_delete = bn->next();
+    bn->set_next(NULL);
+    deallocate_buffer(bn);
+  }
+}
+
+void G1DirtyCardQueueSet::notify_if_necessary() {
+  MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+  if (_n_completed_buffers > process_completed_buffers_threshold()) {
+    set_process_completed_buffers(true);
+    if (_notify_when_complete)
+      _cbl_mon->notify();
+  }
+}
+
+#ifdef ASSERT
+void G1DirtyCardQueueSet::assert_completed_buffers_list_len_correct_locked() {
+  assert_lock_strong(_cbl_mon);
+  size_t n = 0;
+  for (BufferNode* bn = _completed_buffers_head; bn != NULL; bn = bn->next()) {
+    ++n;
+  }
+  assert(n == _n_completed_buffers,
+         "Completed buffer length is wrong: counted: " SIZE_FORMAT
+         ", expected: " SIZE_FORMAT, n, _n_completed_buffers);
+}
+#endif // ASSERT
+
+// Merge lists of buffers. Notify the processing threads.
+// The source queue is emptied as a result. The queues
+// must share the monitor.
+void G1DirtyCardQueueSet::merge_bufferlists(G1DirtyCardQueueSet *src) {
+  assert(_cbl_mon == src->_cbl_mon, "Should share the same lock");
+  MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+  if (_completed_buffers_tail == NULL) {
+    assert(_completed_buffers_head == NULL, "Well-formedness");
+    _completed_buffers_head = src->_completed_buffers_head;
+    _completed_buffers_tail = src->_completed_buffers_tail;
+  } else {
+    assert(_completed_buffers_head != NULL, "Well formedness");
+    if (src->_completed_buffers_head != NULL) {
+      _completed_buffers_tail->set_next(src->_completed_buffers_head);
+      _completed_buffers_tail = src->_completed_buffers_tail;
+    }
+  }
+  _n_completed_buffers += src->_n_completed_buffers;
+
+  src->_n_completed_buffers = 0;
+  src->_completed_buffers_head = NULL;
+  src->_completed_buffers_tail = NULL;
+  src->set_process_completed_buffers(false);
+
+  assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL ||
+         _completed_buffers_head != NULL && _completed_buffers_tail != NULL,
+         "Sanity");
+  assert_completed_buffers_list_len_correct_locked();
+}
+
 bool G1DirtyCardQueueSet::apply_closure_to_buffer(G1CardTableEntryClosure* cl,
                                                   BufferNode* node,
                                                   bool consume,
--- old/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp	2019-06-19 14:01:15.587356878 -0400
+++ new/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp	2019-06-19 14:01:15.143333083 -0400
@@ -76,6 +76,21 @@
 };
 
 class G1DirtyCardQueueSet: public PtrQueueSet {
+  Monitor* _cbl_mon;  // Protects the fields below.
+  BufferNode* _completed_buffers_head;
+  BufferNode* _completed_buffers_tail;
+  volatile size_t _n_completed_buffers;
+
+  size_t _process_completed_buffers_threshold;
+  volatile bool _process_completed_buffers;
+
+  // If true, notify_all on _cbl_mon when the threshold is reached.
+  bool _notify_when_complete;
+
+  void assert_completed_buffers_list_len_correct_locked() NOT_DEBUG_RETURN;
+
+  void abandon_completed_buffers();
+
   // Apply the closure to the elements of "node" from it's index to
   // buffer_size.  If all closure applications return true, then
   // returns true.  Stops processing after the first closure
@@ -111,7 +126,7 @@
   // mutator must start doing some of the concurrent refinement work,
   size_t _max_completed_buffers;
   size_t _completed_buffers_padding;
-  static const size_t MaxCompletedBuffersUnlimited = ~size_t(0);
+  static const size_t MaxCompletedBuffersUnlimited = SIZE_MAX;
 
   G1FreeIdSet* _free_ids;
 
@@ -142,6 +157,34 @@
   // it can be reused in place.
   bool process_or_enqueue_completed_buffer(BufferNode* node);
 
+  virtual void enqueue_completed_buffer(BufferNode* node);
+
+  // If the number of completed buffers is > stop_at, then remove and
+  // return a completed buffer from the list.  Otherwise, return NULL.
+  BufferNode* get_completed_buffer(size_t stop_at = 0);
+
+  // The number of buffers in the list.  Racy...
+  size_t completed_buffers_num() const { return _n_completed_buffers; }
+
+  bool process_completed_buffers() { return _process_completed_buffers; }
+  void set_process_completed_buffers(bool x) { _process_completed_buffers = x; }
+
+  // Get/Set the number of completed buffers that triggers log processing.
+  // Log processing should be done when the number of buffers exceeds the
+  // threshold.
+  void set_process_completed_buffers_threshold(size_t sz) {
+    _process_completed_buffers_threshold = sz;
+  }
+  size_t process_completed_buffers_threshold() const {
+    return _process_completed_buffers_threshold;
+  }
+  static const size_t ProcessCompletedBuffersThresholdNever = SIZE_MAX;
+
+  // Notify the consumer if the number of buffers crossed the threshold
+  void notify_if_necessary();
+
+  void merge_bufferlists(G1DirtyCardQueueSet* src);
+
   // Apply G1RefineCardConcurrentlyClosure to completed buffers until there are stop_at
   // completed buffers remaining.
   bool refine_completed_buffer_concurrently(uint worker_i, size_t stop_at);
@@ -150,13 +193,13 @@
   // must never return false. Must only be called during GC.
   bool apply_closure_during_gc(G1CardTableEntryClosure* cl, uint worker_i);
 
-  void reset_for_par_iteration() { _cur_par_buffer_node = completed_buffers_head(); }
+  void reset_for_par_iteration() { _cur_par_buffer_node = _completed_buffers_head; }
   // Applies the current closure to all completed buffers, non-consumptively.
   // Can be used in parallel, all callers using the iteration state initialized
   // by reset_for_par_iteration.
   void par_apply_closure_to_all_completed_buffers(G1CardTableEntryClosure* cl);
 
-  // If a full collection is happening, reset partial logs, and ignore
+  // If a full collection is happening, reset partial logs, and release
   // completed ones: the full collection will make them all irrelevant.
   void abandon_logs();
 
--- old/src/hotspot/share/gc/g1/g1SATBMarkQueueSet.cpp	2019-06-19 14:01:16.723417760 -0400
+++ new/src/hotspot/share/gc/g1/g1SATBMarkQueueSet.cpp	2019-06-19 14:01:16.271393536 -0400
@@ -35,12 +35,10 @@
 G1SATBMarkQueueSet::G1SATBMarkQueueSet() : _g1h(NULL) {}
 
 void G1SATBMarkQueueSet::initialize(G1CollectedHeap* g1h,
-                                    Monitor* cbl_mon,
                                     BufferNode::Allocator* allocator,
                                     size_t process_completed_buffers_threshold,
                                     uint buffer_enqueue_threshold_percentage) {
-  SATBMarkQueueSet::initialize(cbl_mon,
-                               allocator,
+  SATBMarkQueueSet::initialize(allocator,
                                process_completed_buffers_threshold,
                                buffer_enqueue_threshold_percentage);
   _g1h = g1h;
--- old/src/hotspot/share/gc/g1/g1SATBMarkQueueSet.hpp	2019-06-19 14:01:17.867479071 -0400
+++ new/src/hotspot/share/gc/g1/g1SATBMarkQueueSet.hpp	2019-06-19 14:01:17.415454847 -0400
@@ -38,7 +38,6 @@
   G1SATBMarkQueueSet();
 
   void initialize(G1CollectedHeap* g1h,
-                  Monitor* cbl_mon,
                   BufferNode::Allocator* allocator,
                   size_t process_completed_buffers_threshold,
                   uint buffer_enqueue_threshold_percentage);
--- old/src/hotspot/share/gc/shared/ptrQueue.cpp	2019-06-19 14:01:19.011540382 -0400
+++ new/src/hotspot/share/gc/shared/ptrQueue.cpp	2019-06-19 14:01:18.563516372 -0400
@@ -250,28 +250,15 @@
   return removed;
 }
 
-PtrQueueSet::PtrQueueSet(bool notify_when_complete) :
+PtrQueueSet::PtrQueueSet() :
   _allocator(NULL),
-  _cbl_mon(NULL),
-  _completed_buffers_head(NULL),
-  _completed_buffers_tail(NULL),
-  _n_completed_buffers(0),
-  _process_completed_buffers_threshold(ProcessCompletedBuffersThresholdNever),
-  _process_completed_buffers(false),
-  _notify_when_complete(notify_when_complete),
   _all_active(false)
 {}
 
-PtrQueueSet::~PtrQueueSet() {
-  // There are presently only a couple (derived) instances ever
-  // created, and they are permanent, so no harm currently done by
-  // doing nothing here.
-}
+PtrQueueSet::~PtrQueueSet() {}
 
-void PtrQueueSet::initialize(Monitor* cbl_mon,
-                             BufferNode::Allocator* allocator) {
-  assert(cbl_mon != NULL && allocator != NULL, "Init order issue?");
-  _cbl_mon = cbl_mon;
+void PtrQueueSet::initialize(BufferNode::Allocator* allocator) {
+  assert(allocator != NULL, "Init order issue?");
   _allocator = allocator;
 }
 
@@ -284,121 +271,3 @@
   _allocator->release(node);
 }
 
-void PtrQueueSet::enqueue_completed_buffer(BufferNode* cbn) {
-  MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
-  cbn->set_next(NULL);
-  if (_completed_buffers_tail == NULL) {
-    assert(_completed_buffers_head == NULL, "Well-formedness");
-    _completed_buffers_head = cbn;
-    _completed_buffers_tail = cbn;
-  } else {
-    _completed_buffers_tail->set_next(cbn);
-    _completed_buffers_tail = cbn;
-  }
-  _n_completed_buffers++;
-
-  if (!_process_completed_buffers &&
-      (_n_completed_buffers > _process_completed_buffers_threshold)) {
-    _process_completed_buffers = true;
-    if (_notify_when_complete) {
-      _cbl_mon->notify();
-    }
-  }
-  assert_completed_buffers_list_len_correct_locked();
-}
-
-BufferNode* PtrQueueSet::get_completed_buffer(size_t stop_at) {
-  MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
-
-  if (_n_completed_buffers <= stop_at) {
-    return NULL;
-  }
-
-  assert(_n_completed_buffers > 0, "invariant");
-  assert(_completed_buffers_head != NULL, "invariant");
-  assert(_completed_buffers_tail != NULL, "invariant");
-
-  BufferNode* bn = _completed_buffers_head;
-  _n_completed_buffers--;
-  _completed_buffers_head = bn->next();
-  if (_completed_buffers_head == NULL) {
-    assert(_n_completed_buffers == 0, "invariant");
-    _completed_buffers_tail = NULL;
-    _process_completed_buffers = false;
-  }
-  assert_completed_buffers_list_len_correct_locked();
-  bn->set_next(NULL);
-  return bn;
-}
-
-void PtrQueueSet::abandon_completed_buffers() {
-  BufferNode* buffers_to_delete = NULL;
-  {
-    MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
-    buffers_to_delete = _completed_buffers_head;
-    _completed_buffers_head = NULL;
-    _completed_buffers_tail = NULL;
-    _n_completed_buffers = 0;
-    _process_completed_buffers = false;
-  }
-  while (buffers_to_delete != NULL) {
-    BufferNode* bn = buffers_to_delete;
-    buffers_to_delete = bn->next();
-    bn->set_next(NULL);
-    deallocate_buffer(bn);
-  }
-}
-
-#ifdef ASSERT
-
-void PtrQueueSet::assert_completed_buffers_list_len_correct_locked() {
-  assert_lock_strong(_cbl_mon);
-  size_t n = 0;
-  for (BufferNode* bn = _completed_buffers_head; bn != NULL; bn = bn->next()) {
-    ++n;
-  }
-  assert(n == _n_completed_buffers,
-         "Completed buffer length is wrong: counted: " SIZE_FORMAT
-         ", expected: " SIZE_FORMAT, n, _n_completed_buffers);
-}
-
-#endif // ASSERT
-
-// Merge lists of buffers. Notify the processing threads.
-// The source queue is emptied as a result. The queues
-// must share the monitor.
-void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) {
-  assert(_cbl_mon == src->_cbl_mon, "Should share the same lock");
-  MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
-  if (_completed_buffers_tail == NULL) {
-    assert(_completed_buffers_head == NULL, "Well-formedness");
-    _completed_buffers_head = src->_completed_buffers_head;
-    _completed_buffers_tail = src->_completed_buffers_tail;
-  } else {
-    assert(_completed_buffers_head != NULL, "Well formedness");
-    if (src->_completed_buffers_head != NULL) {
-      _completed_buffers_tail->set_next(src->_completed_buffers_head);
-      _completed_buffers_tail = src->_completed_buffers_tail;
-    }
-  }
-  _n_completed_buffers += src->_n_completed_buffers;
-
-  src->_n_completed_buffers = 0;
-  src->_completed_buffers_head = NULL;
-  src->_completed_buffers_tail = NULL;
-  src->_process_completed_buffers = false;
-
-  assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL ||
-         _completed_buffers_head != NULL && _completed_buffers_tail != NULL,
-         "Sanity");
-  assert_completed_buffers_list_len_correct_locked();
-}
-
-void PtrQueueSet::notify_if_necessary() {
-  MutexLocker x(_cbl_mon, Mutex::_no_safepoint_check_flag);
-  if (_n_completed_buffers > _process_completed_buffers_threshold) {
-    _process_completed_buffers = true;
-    if (_notify_when_complete)
-      _cbl_mon->notify();
-  }
-}
--- old/src/hotspot/share/gc/shared/ptrQueue.hpp	2019-06-19 14:01:20.187603408 -0400
+++ new/src/hotspot/share/gc/shared/ptrQueue.hpp	2019-06-19 14:01:19.739579398 -0400
@@ -296,35 +296,16 @@
 class PtrQueueSet {
   BufferNode::Allocator* _allocator;
 
-  Monitor* _cbl_mon;  // Protects the fields below.
-  BufferNode* _completed_buffers_head;
-  BufferNode* _completed_buffers_tail;
-  volatile size_t _n_completed_buffers;
-
-  size_t _process_completed_buffers_threshold;
-  volatile bool _process_completed_buffers;
-
-  // If true, notify_all on _cbl_mon when the threshold is reached.
-  bool _notify_when_complete;
-
-  void assert_completed_buffers_list_len_correct_locked() NOT_DEBUG_RETURN;
-
 protected:
   bool _all_active;
 
   // Create an empty ptr queue set.
-  PtrQueueSet(bool notify_when_complete = false);
+  PtrQueueSet();
   ~PtrQueueSet();
 
   // Because of init-order concerns, we can't pass these as constructor
   // arguments.
-  void initialize(Monitor* cbl_mon, BufferNode::Allocator* allocator);
-
-  // For (unlocked!) iteration over the completed buffers.
-  BufferNode* completed_buffers_head() const { return _completed_buffers_head; }
-
-  // Deallocate all of the completed buffers.
-  void abandon_completed_buffers();
+  void initialize(BufferNode::Allocator* allocator);
 
 public:
 
@@ -339,38 +320,13 @@
   // is ready to be processed by the collector.  It need not be full.
 
   // Adds node to the completed buffer list.
-  void enqueue_completed_buffer(BufferNode* node);
-
-  // If the number of completed buffers is > stop_at, then remove and
-  // return a completed buffer from the list.  Otherwise, return NULL.
-  BufferNode* get_completed_buffer(size_t stop_at = 0);
-
-  bool process_completed_buffers() { return _process_completed_buffers; }
-  void set_process_completed_buffers(bool x) { _process_completed_buffers = x; }
+  virtual void enqueue_completed_buffer(BufferNode* node) = 0;
 
   bool is_active() { return _all_active; }
 
   size_t buffer_size() const {
     return _allocator->buffer_size();
   }
-
-  // Get/Set the number of completed buffers that triggers log processing.
-  // Log processing should be done when the number of buffers exceeds the
-  // threshold.
-  void set_process_completed_buffers_threshold(size_t sz) {
-    _process_completed_buffers_threshold = sz;
-  }
-  size_t process_completed_buffers_threshold() const {
-    return _process_completed_buffers_threshold;
-  }
-  static const size_t ProcessCompletedBuffersThresholdNever = ~size_t(0);
-
-  size_t completed_buffers_num() const { return _n_completed_buffers; }
-
-  void merge_bufferlists(PtrQueueSet* src);
-
-  // Notify the consumer if the number of buffers crossed the threshold
-  void notify_if_necessary();
 };
 
 #endif // SHARE_GC_SHARED_PTRQUEUE_HPP
--- old/src/hotspot/share/gc/shared/satbMarkQueue.cpp	2019-06-19 14:01:21.355666005 -0400
+++ new/src/hotspot/share/gc/shared/satbMarkQueue.cpp	2019-06-19 14:01:20.895641352 -0400
@@ -28,12 +28,15 @@
 #include "logging/log.hpp"
 #include "memory/allocation.inline.hpp"
 #include "oops/oop.inline.hpp"
+#include "runtime/atomic.hpp"
 #include "runtime/mutexLocker.hpp"
+#include "runtime/orderAccess.hpp"
 #include "runtime/os.hpp"
 #include "runtime/safepoint.hpp"
 #include "runtime/thread.hpp"
 #include "runtime/threadSMR.hpp"
 #include "runtime/vmThread.hpp"
+#include "utilities/globalCounter.inline.hpp"
 
 SATBMarkQueue::SATBMarkQueue(SATBMarkQueueSet* qset) :
   // SATB queues are only active during marking cycles. We create
@@ -107,15 +110,66 @@
 
 SATBMarkQueueSet::SATBMarkQueueSet() :
   PtrQueueSet(),
+  _list(),
+  _count_and_process_flag(0),
+  _process_completed_buffers_threshold(SIZE_MAX),
   _buffer_enqueue_threshold(0)
 {}
 
-void SATBMarkQueueSet::initialize(Monitor* cbl_mon,
-                                  BufferNode::Allocator* allocator,
+SATBMarkQueueSet::~SATBMarkQueueSet() {
+  abandon_completed_buffers();
+}
+
+// _count_and_process_flag has flag in least significant bit, count in
+// remaining bits.  _process_completed_buffers_threshold is scaled
+// accordingly, with the lsbit set, so a _count_and_process_flag value
+// is directly comparable with the recorded threshold value.  The
+// process flag is set whenever the count exceeds the threshold, and
+// remains set until the count is reduced to zero.
+
+// Increment count.  If count > threshold, set flag, else maintain flag.
+static void increment_count(volatile size_t* cfptr, size_t threshold) {
+  size_t old;
+  size_t value = Atomic::load(cfptr);
+  do {
+    old = value;
+    value += 2;
+    assert(value > old, "overflow");
+    if (value > threshold) value |= 1;
+    value = Atomic::cmpxchg(value, cfptr, old);
+  } while (value != old);
+}
+
+// Decrement count.  If count == 0, clear flag, else maintain flag.
+static void decrement_count(volatile size_t* cfptr) {
+  size_t old;
+  size_t value = Atomic::load(cfptr);
+  do {
+    assert((value >> 1) != 0, "underflow");
+    old = value;
+    value -= 2;
+    if (value <= 1) value = 0;
+    value = Atomic::cmpxchg(value, cfptr, old);
+  } while (value != old);
+}
+
+// Scale requested threshold to align with count field.  If scaling
+// overflows, just use max value.  Set process flag field to make
+// comparison in increment_count exact.
+static size_t scale_threshold(size_t value) {
+  size_t scaled_value = value << 1;
+  if ((scaled_value >> 1) != value) {
+    scaled_value = SIZE_MAX;
+  }
+  return scaled_value | 1;
+}
+
+void SATBMarkQueueSet::initialize(BufferNode::Allocator* allocator,
                                   size_t process_completed_buffers_threshold,
                                   uint buffer_enqueue_threshold_percentage) {
-  PtrQueueSet::initialize(cbl_mon, allocator);
-  set_process_completed_buffers_threshold(process_completed_buffers_threshold);
+  PtrQueueSet::initialize(allocator);
+  _process_completed_buffers_threshold =
+    scale_threshold(process_completed_buffers_threshold);
   assert(buffer_size() != 0, "buffer size not initialized");
   // Minimum threshold of 1 ensures enqueuing of completely full buffers.
   size_t size = buffer_size();
@@ -207,6 +261,38 @@
   }
 }
 
+// SATB buffer life-cycle - Per-thread queues obtain buffers from the
+// qset's buffer allocator, fill them, and push them onto the qset's
+// list.  The GC concurrently pops buffers from the qset, processes
+// them, and returns them to the buffer allocator for re-use.  Both
+// the allocator and the qset use lock-free stacks.  The ABA problem
+// is solved by having both allocation pops and GC pops performed
+// within GlobalCounter critical sections, while the return of buffers
+// to the allocator performs a GlobalCounter synchronize before
+// pushing onto the allocator's list.
+
+void SATBMarkQueueSet::enqueue_completed_buffer(BufferNode* node) {
+  assert(node != NULL, "precondition");
+  // Increment count and update flag appropriately.  Done before
+  // pushing buffer so count is always at least the actual number in
+  // the list, and decrement never underflows.
+  increment_count(&_count_and_process_flag, _process_completed_buffers_threshold);
+  _list.push(*node);
+}
+
+BufferNode* SATBMarkQueueSet::get_completed_buffer() {
+  BufferNode* node;
+  {
+    GlobalCounter::CriticalSection cs(Thread::current());
+    node = _list.pop();
+  }
+  if (node != NULL) {
+    // Got a buffer so decrement count and update flag appropriately.
+    decrement_count(&_count_and_process_flag);
+  }
+  return node;
+}
+
 #ifndef PRODUCT
 // Helpful for debugging
 
@@ -219,7 +305,7 @@
   tty->cr();
   tty->print_cr("SATB BUFFERS [%s]", msg);
 
-  BufferNode* nd = completed_buffers_head();
+  BufferNode* nd = _list.top();
   int i = 0;
   while (nd != NULL) {
     void** buf = BufferNode::make_buffer_from_node(nd);
@@ -248,6 +334,17 @@
 }
 #endif // PRODUCT
 
+void SATBMarkQueueSet::abandon_completed_buffers() {
+  Atomic::store(size_t(0), &_count_and_process_flag);
+  BufferNode* buffers_to_delete = _list.pop_all();
+  while (buffers_to_delete != NULL) {
+    BufferNode* bn = buffers_to_delete;
+    buffers_to_delete = bn->next();
+    bn->set_next(NULL);
+    deallocate_buffer(bn);
+  }
+}
+
 void SATBMarkQueueSet::abandon_partial_marking() {
   assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
   abandon_completed_buffers();
--- old/src/hotspot/share/gc/shared/satbMarkQueue.hpp	2019-06-19 14:01:22.479726243 -0400
+++ new/src/hotspot/share/gc/shared/satbMarkQueue.hpp	2019-06-19 14:01:22.039702662 -0400
@@ -27,6 +27,7 @@
 
 #include "gc/shared/ptrQueue.hpp"
 #include "memory/allocation.hpp"
+#include "memory/padded.hpp"
 
 class Thread;
 class Monitor;
@@ -93,7 +94,17 @@
 };
 
 class SATBMarkQueueSet: public PtrQueueSet {
+
+  DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, 0);
+  PaddedEnd<BufferNode::Stack> _list;
+  volatile size_t _count_and_process_flag;
+  // These are rarely (if ever) changed, so same cache line as count.
+  size_t _process_completed_buffers_threshold;
   size_t _buffer_enqueue_threshold;
+  DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, 3 * sizeof(size_t));
+
+  BufferNode* get_completed_buffer();
+  void abandon_completed_buffers();
 
 #ifdef ASSERT
   void dump_active_states(bool expected_active);
@@ -102,15 +113,14 @@
 
 protected:
   SATBMarkQueueSet();
-  ~SATBMarkQueueSet() {}
+  ~SATBMarkQueueSet();
 
   template<typename Filter>
   void apply_filter(Filter filter, SATBMarkQueue* queue) {
     queue->apply_filter(filter);
   }
 
-  void initialize(Monitor* cbl_mon,
-                  BufferNode::Allocator* allocator,
+  void initialize(BufferNode::Allocator* allocator,
                   size_t process_completed_buffers_threshold,
                   uint buffer_enqueue_threshold_percentage);
 
@@ -132,6 +142,19 @@
   // buffer; the leading entries may be excluded due to filtering.
   bool apply_closure_to_completed_buffer(SATBBufferClosure* cl);
 
+  virtual void enqueue_completed_buffer(BufferNode* node);
+
+  // The number of buffers in the list.  Racy and not updated atomically
+  // with the set of completed buffers.
+  size_t completed_buffers_num() const {
+    return _count_and_process_flag >> 1;
+  }
+
+  // Return true if completed buffers should be processed.
+  bool process_completed_buffers() const {
+    return (_count_and_process_flag & 1) != 0;
+  }
+
 #ifndef PRODUCT
   // Helpful for debugging
   void print_all(const char* msg);
--- old/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp	2019-06-19 14:01:23.611786911 -0400
+++ new/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp	2019-06-19 14:01:23.159762687 -0400
@@ -347,7 +347,6 @@
   // The call below uses stuff (the SATB* things) that are in G1, but probably
   // belong into a shared location.
   ShenandoahBarrierSet::satb_mark_queue_set().initialize(this,
-                                                         SATB_Q_CBL_mon,
                                                          20 /* G1SATBProcessCompletedThreshold */,
                                                          60 /* G1SATBBufferEnqueueingThresholdPercent */);
 
--- old/src/hotspot/share/gc/shenandoah/shenandoahSATBMarkQueueSet.cpp	2019-06-19 14:01:24.807851008 -0400
+++ new/src/hotspot/share/gc/shenandoah/shenandoahSATBMarkQueueSet.cpp	2019-06-19 14:01:24.359826999 -0400
@@ -33,11 +33,9 @@
 {}
 
 void ShenandoahSATBMarkQueueSet::initialize(ShenandoahHeap* const heap,
-                                            Monitor* cbl_mon,
                                             int process_completed_threshold,
                                             uint buffer_enqueue_threshold_percentage) {
-  SATBMarkQueueSet::initialize(cbl_mon,
-                               &_satb_mark_queue_buffer_allocator,
+  SATBMarkQueueSet::initialize(&_satb_mark_queue_buffer_allocator,
                                process_completed_threshold,
                                buffer_enqueue_threshold_percentage);
   _heap = heap;
--- old/src/hotspot/share/gc/shenandoah/shenandoahSATBMarkQueueSet.hpp	2019-06-19 14:01:25.955912533 -0400
+++ new/src/hotspot/share/gc/shenandoah/shenandoahSATBMarkQueueSet.hpp	2019-06-19 14:01:25.503888309 -0400
@@ -44,7 +44,6 @@
   ShenandoahSATBMarkQueueSet();
 
   void initialize(ShenandoahHeap* const heap,
-                  Monitor* cbl_mon,
                   int process_completed_threshold,
                   uint buffer_enqueue_threshold_percentage);
 
--- old/src/hotspot/share/runtime/mutexLocker.cpp	2019-06-19 14:01:27.095973629 -0400
+++ new/src/hotspot/share/runtime/mutexLocker.cpp	2019-06-19 14:01:26.655950048 -0400
@@ -83,7 +83,6 @@
 Monitor* CGC_lock                     = NULL;
 Monitor* STS_lock                     = NULL;
 Monitor* FullGCCount_lock             = NULL;
-Monitor* SATB_Q_CBL_mon               = NULL;
 Monitor* DirtyCardQ_CBL_mon           = NULL;
 Mutex*   Shared_DirtyCardQ_lock       = NULL;
 Mutex*   MarkStackFreeList_lock       = NULL;
@@ -229,8 +228,6 @@
 
   def(FullGCCount_lock             , PaddedMonitor, leaf,        true,  Monitor::_safepoint_check_never);      // in support of ExplicitGCInvokesConcurrent
   if (UseG1GC) {
-    def(SATB_Q_CBL_mon             , PaddedMonitor, access,      true,  Monitor::_safepoint_check_never);
-
     def(DirtyCardQ_CBL_mon         , PaddedMonitor, access,      true,  Monitor::_safepoint_check_never);
     def(Shared_DirtyCardQ_lock     , PaddedMutex  , access + 1,  true,  Monitor::_safepoint_check_never);
 
@@ -247,8 +244,6 @@
     def(MonitoringSupport_lock     , PaddedMutex  , native   ,   true,  Monitor::_safepoint_check_never);      // used for serviceability monitoring support
   }
   if (UseShenandoahGC) {
-    def(SATB_Q_CBL_mon             , PaddedMonitor, access,      true,  Monitor::_safepoint_check_never);
-
     def(StringDedupQueue_lock      , PaddedMonitor, leaf,        true,  Monitor::_safepoint_check_never);
     def(StringDedupTable_lock      , PaddedMutex  , leaf,        true,  Monitor::_safepoint_check_never);
   }
--- old/src/hotspot/share/runtime/mutexLocker.hpp	2019-06-19 14:01:28.272036654 -0400
+++ new/src/hotspot/share/runtime/mutexLocker.hpp	2019-06-19 14:01:27.816012216 -0400
@@ -78,8 +78,6 @@
                                                  // fore- & background GC threads.
 extern Monitor* STS_lock;                        // used for joining/leaving SuspendibleThreadSet.
 extern Monitor* FullGCCount_lock;                // in support of "concurrent" full gc
-extern Monitor* SATB_Q_CBL_mon;                  // Protects SATB Q
-                                                 // completed buffer queue.
 extern Monitor* DirtyCardQ_CBL_mon;              // Protects dirty card Q
                                                  // completed buffer queue.
 extern Mutex*   Shared_DirtyCardQ_lock;          // Lock protecting dirty card