1 /*
   2  * Copyright (c) 2001, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
  26 #define SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP
  27 
  28 #include "gc/g1/g1BufferNodeList.hpp"
  29 #include "gc/g1/g1FreeIdSet.hpp"
  30 #include "gc/shared/ptrQueue.hpp"
  31 #include "memory/allocation.hpp"
  32 #include "memory/padded.hpp"
  33 
  34 class G1ConcurrentRefineThread;
  35 class G1DirtyCardQueueSet;
  36 class G1RedirtyCardsQueueSet;
  37 class Thread;
  38 
  39 // A ptrQueue whose elements are "oops", pointers to object heads.
  40 class G1DirtyCardQueue: public PtrQueue {
  41 protected:
  42   virtual void handle_completed_buffer();
  43 
  44 public:
  45   G1DirtyCardQueue(G1DirtyCardQueueSet* qset);
  46 
  47   // Flush before destroying; queue may be used to capture pending work while
  48   // doing something else, with auto-flush on completion.
  49   ~G1DirtyCardQueue();
  50 
  51   // Process queue entries and release resources.
  52   void flush() { flush_impl(); }
  53 
  54   inline G1DirtyCardQueueSet* dirty_card_qset() const;
  55 
  56   // Compiler support.
  57   static ByteSize byte_offset_of_index() {
  58     return PtrQueue::byte_offset_of_index<G1DirtyCardQueue>();
  59   }
  60   using PtrQueue::byte_width_of_index;
  61 
  62   static ByteSize byte_offset_of_buf() {
  63     return PtrQueue::byte_offset_of_buf<G1DirtyCardQueue>();
  64   }
  65   using PtrQueue::byte_width_of_buf;
  66 
  67 };
  68 
  69 class G1DirtyCardQueueSet: public PtrQueueSet {
  70   // Head and tail of a list of BufferNodes, linked through their next()
  71   // fields.  Similar to G1BufferNodeList, but without the _entry_count.
  72   struct HeadTail {
  73     BufferNode* _head;
  74     BufferNode* _tail;
  75     HeadTail() : _head(NULL), _tail(NULL) {}
  76     HeadTail(BufferNode* head, BufferNode* tail) : _head(head), _tail(tail) {}
  77   };
  78 
  79   // A lock-free FIFO of BufferNodes, linked through their next() fields.
  80   // This class has a restriction that pop() cannot return the last buffer
  81   // in the queue, or what was the last buffer for a concurrent push/append
  82   // operation.  It is expected that there will be a later push/append that
  83   // will make that buffer available to a future pop(), or there will
  84   // eventually be a complete transfer via take_all().
  85   class Queue {
  86     BufferNode* volatile _head;
  87     DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(BufferNode*));
  88     BufferNode* volatile _tail;
  89     DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(BufferNode*));
  90 
  91     NONCOPYABLE(Queue);
  92 
  93   public:
  94     Queue() : _head(NULL), _tail(NULL) {}
  95     DEBUG_ONLY(~Queue();)
  96 
  97     // Return the first buffer in the queue.
  98     // Thread-safe, but the result may change immediately.
  99     BufferNode* top() const;
 100 
 101     // Thread-safe add the buffer to the end of the queue.
 102     void push(BufferNode& node) { append(node, node); }
 103 
 104     // Thread-safe add the buffers from first to last to the end of the queue.
 105     void append(BufferNode& first, BufferNode& last);
 106 
 107     // Thread-safe attempt to remove and return the first buffer in the queue.
 108     // Returns NULL if the queue is empty, or if only one buffer is found.
 109     // Uses GlobalCounter critical sections to address the ABA problem; this
 110     // works with the buffer allocator's use of GlobalCounter synchronization.
 111     BufferNode* pop();
 112 
 113     // Take all the buffers from the queue, leaving the queue empty.
 114     // Not thread-safe.
 115     HeadTail take_all();
 116   };
 117 
 118   // Concurrent refinement may stop processing in the middle of a buffer if
 119   // there is a pending safepoint, to avoid long delays to safepoint.  A
 120   // partially processed buffer needs to be recorded for processing by the
 121   // safepoint if it's a GC safepoint; otherwise it needs to be recorded for
 122   // further concurrent refinement work after the safepoint.  But if the
 123   // buffer was obtained from the completed buffer queue then it can't simply
 124   // be added back to the queue, as that would introduce a new source of ABA
 125   // for the queue.
 126   //
 127   // The PausedBuffer object is used to record such buffers for the upcoming
 128   // safepoint, and provides access to the buffers recorded for previous
 129   // safepoints.  Before obtaining a buffer from the completed buffers queue,
 130   // we first transfer any buffers from previous safepoints to the queue.
 131   // This is ABA-safe because threads cannot be in the midst of a queue pop
 132   // across a safepoint.
 133   //
 134   // The paused buffers are conceptually an extension of the completed buffers
 135   // queue, and operations which need to deal with all of the queued buffers
 136   // (such as concatenate_logs) also need to deal with any paused buffers.  In
 137   // general, if a safepoint performs a GC then the paused buffers will be
 138   // processed as part of it, and there won't be any paused buffers after a
 139   // GC safepoint.
 140   class PausedBuffers {
 141     class PausedList : public CHeapObj<mtGC> {
 142       BufferNode* volatile _head;
 143       BufferNode* _tail;
 144       size_t _safepoint_id;
 145 
 146       NONCOPYABLE(PausedList);
 147 
 148     public:
 149       PausedList();
 150       DEBUG_ONLY(~PausedList();)
 151 
 152       // Return true if this list was created to hold buffers for the
 153       // next safepoint.
 154       // precondition: not at safepoint.
 155       bool is_next() const;
 156 
 157       // Thread-safe add the buffer to the list.
 158       // precondition: not at safepoint.
 159       // precondition: is_next().
 160       void add(BufferNode* node);
 161 
 162       // Take all the buffers from the list.  Not thread-safe.
 163       HeadTail take();
 164     };
 165 
 166     // The most recently created list, which might be for either the next or
 167     // a previous safepoint, or might be NULL if the next list hasn't been
 168     // created yet.  We only need one list because of the requirement that
 169     // threads calling add() must first ensure there are no paused buffers
 170     // from a previous safepoint.  There might be many list instances existing
 171     // at the same time though; there can be many threads competing to create
 172     // and install the next list, and meanwhile there can be a thread dealing
 173     // with the previous list.
 174     PausedList* volatile _plist;
 175     DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(PausedList*));
 176 
 177     NONCOPYABLE(PausedBuffers);
 178 
 179   public:
 180     PausedBuffers();
 181     DEBUG_ONLY(~PausedBuffers();)
 182 
 183     // Test whether there are any paused lists.
 184     // Thread-safe, but the answer may change immediately.
 185     bool is_empty() const;
 186 
 187     // Thread-safe add the buffer to paused list for next safepoint.
 188     // precondition: not at safepoint.
 189     // precondition: does not have paused buffers from a previous safepoint.
 190     void add(BufferNode* node);
 191 
 192     // Thread-safe take all paused buffers for previous safepoints.
 193     // precondition: not at safepoint.
 194     HeadTail take_previous();
 195 
 196     // Take all the paused buffers.
 197     // precondition: at safepoint.
 198     HeadTail take_all();
 199   };
 200 
 201   // The primary refinement thread, for activation when the processing
 202   // threshold is reached.  NULL if there aren't any refinement threads.
 203   G1ConcurrentRefineThread* _primary_refinement_thread;
 204   DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(G1ConcurrentRefineThread*));
 205   // Upper bound on the number of cards in the completed and paused buffers.
 206   volatile size_t _num_cards;
 207   DEFINE_PAD_MINUS_SIZE(2, DEFAULT_CACHE_LINE_SIZE, sizeof(size_t));
 208   // Buffers ready for refinement.
 209   Queue _completed;           // Has inner padding, including trailer.
 210   // Buffers for which refinement is temporarily paused.
 211   PausedBuffers _paused;      // Has inner padding, including trailer.
 212 
 213   G1FreeIdSet _free_ids;
 214 
 215   // Activation threshold for the primary refinement thread.
 216   size_t _process_cards_threshold;
 217 
 218   // If the queue contains more cards than configured here, the
 219   // mutator must start doing some of the concurrent refinement work.
 220   size_t _max_cards;
 221   size_t _max_cards_padding;
 222   static const size_t MaxCardsUnlimited = SIZE_MAX;
 223 
 224   // Array of cumulative dirty cards refined by mutator threads.
 225   // Array has an entry per id in _free_ids.
 226   size_t* _mutator_refined_cards_counters;
 227 
 228   // Verify _num_cards == sum of cards in the completed queue.
 229   void verify_num_cards() const NOT_DEBUG_RETURN;
 230 
 231   // Thread-safe add a buffer to paused list for next safepoint.
 232   // precondition: not at safepoint.
 233   // precondition: does not have paused buffers from a previous safepoint.
 234   void record_paused_buffer(BufferNode* node);
 235   void enqueue_paused_buffers_aux(const HeadTail& paused);
 236   // Thread-safe transfer paused buffers for previous safepoints to the queue.
 237   // precondition: not at safepoint.
 238   void enqueue_previous_paused_buffers();
 239   // Transfer all paused buffers to the queue.
 240   // precondition: at safepoint.
 241   void enqueue_all_paused_buffers();
 242 
 243   void abandon_completed_buffers();
 244 
 245   // Refine the cards in "node" from its index to buffer_size.
 246   // Stops processing if SuspendibleThreadSet::should_yield() is true.
 247   // Returns true if the entire buffer was processed, false if there
 248   // is a pending yield request.  The node's index is updated to exclude
 249   // the processed elements, e.g. up to the element before processing
 250   // stopped, or one past the last element if the entire buffer was
 251   // processed. Increments *total_refined_cards by the number of cards
 252   // processed and removed from the buffer.
 253   bool refine_buffer(BufferNode* node, uint worker_id, size_t* total_refined_cards);
 254 
 255   bool mut_process_buffer(BufferNode* node);
 256 
 257   // If the number of completed buffers is > stop_at, then remove and
 258   // return a completed buffer from the list.  Otherwise, return NULL.
 259   BufferNode* get_completed_buffer(size_t stop_at = 0);
 260 
 261 public:
 262   G1DirtyCardQueueSet(BufferNode::Allocator* allocator);
 263   ~G1DirtyCardQueueSet();
 264 
 265   void set_primary_refinement_thread(G1ConcurrentRefineThread* thread) {
 266     _primary_refinement_thread = thread;
 267   }
 268 
 269   // The number of parallel ids that can be claimed to allow collector or
 270   // mutator threads to do card-processing work.
 271   static uint num_par_ids();
 272 
 273   static void handle_zero_index_for_thread(Thread* t);
 274 
 275   // Either process the entire buffer and return true, or enqueue the
 276   // buffer and return false.  If the buffer is completely processed,
 277   // it can be reused in place.
 278   bool process_or_enqueue_completed_buffer(BufferNode* node);
 279 
 280   virtual void enqueue_completed_buffer(BufferNode* node);
 281 
 282   // Upper bound on the number of cards currently in in this queue set.
 283   // Read without synchronization.  The value may be high because there
 284   // is a concurrent modification of the set of buffers.
 285   size_t num_cards() const { return _num_cards; }
 286 
 287   // Get/Set the number of cards that triggers log processing.
 288   // Log processing should be done when the number of cards exceeds the
 289   // threshold.
 290   void set_process_cards_threshold(size_t sz) {
 291     _process_cards_threshold = sz;
 292   }
 293   size_t process_cards_threshold() const {
 294     return _process_cards_threshold;
 295   }
 296   static const size_t ProcessCardsThresholdNever = SIZE_MAX;
 297 
 298   // Notify the consumer if the number of buffers crossed the threshold
 299   void notify_if_necessary();
 300 
 301   void merge_bufferlists(G1RedirtyCardsQueueSet* src);
 302 
 303   G1BufferNodeList take_all_completed_buffers();
 304 
 305   // If there are more than stop_at cards in the completed buffers, pop
 306   // a buffer, refine its contents, and return true.  Otherwise return
 307   // false.
 308   //
 309   // Stops processing a buffer if SuspendibleThreadSet::should_yield(),
 310   // recording the incompletely processed buffer for later processing of
 311   // the remainder.
 312   //
 313   // Increments *total_refined_cards by the number of cards processed and
 314   // removed from the buffer.
 315   bool refine_completed_buffer_concurrently(uint worker_id,
 316                                             size_t stop_at,
 317                                             size_t* total_refined_cards);
 318 
 319   // If a full collection is happening, reset partial logs, and release
 320   // completed ones: the full collection will make them all irrelevant.
 321   void abandon_logs();
 322 
 323   // If any threads have partial logs, add them to the global list of logs.
 324   void concatenate_logs();
 325 
 326   void set_max_cards(size_t m) {
 327     _max_cards = m;
 328   }
 329   size_t max_cards() const {
 330     return _max_cards;
 331   }
 332 
 333   void set_max_cards_padding(size_t padding) {
 334     _max_cards_padding = padding;
 335   }
 336   size_t max_cards_padding() const {
 337     return _max_cards_padding;
 338   }
 339 
 340   // Total dirty cards refined by mutator threads.
 341   size_t total_mutator_refined_cards() const;
 342 };
 343 
 344 inline G1DirtyCardQueueSet* G1DirtyCardQueue::dirty_card_qset() const {
 345   return static_cast<G1DirtyCardQueueSet*>(qset());
 346 }
 347 
 348 #endif // SHARE_GC_G1_G1DIRTYCARDQUEUE_HPP