jdkHeadOpen3 Cdiff src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp

src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp

rev 57544 : 8236485: Work-in-progress: Epoch synchronization protocol for G1 concurrent refinement
Reviewed-by:


*** 25,34 ****
--- 25,35 ----
  #include "precompiled.hpp"
  #include "gc/g1/g1BufferNodeList.hpp"
  #include "gc/g1/g1CardTableEntryClosure.hpp"
  #include "gc/g1/g1CollectedHeap.inline.hpp"
  #include "gc/g1/g1DirtyCardQueue.hpp"
+ #include "gc/g1/g1EpochSynchronizer.hpp"
  #include "gc/g1/g1FreeIdSet.hpp"
  #include "gc/g1/g1RedirtyCardsQueue.hpp"
  #include "gc/g1/g1RemSet.hpp"
  #include "gc/g1/g1ThreadLocalData.hpp"
  #include "gc/g1/heapRegionRemSet.hpp"
*** 233,242 ****
--- 234,246 ----
    CardTable::CardValue** const _node_buffer;
    const size_t _node_buffer_size;
    const uint _worker_id;
    size_t* _total_refined_cards;
    G1RemSet* const _g1rs;
+   // TODO: Remove _dcqs when G1TestEpochSyncInConcRefinement executes refine_cleaned_cards()
+   // after the try_synchronize() loop.
+   G1DirtyCardQueueSet* const _dcqs;
  
    static inline int compare_card(const CardTable::CardValue* p1,
                                   const CardTable::CardValue* p2) {
      return p2 - p1;
    }
*** 311,327 ****
  
  public:
    G1RefineBufferedCards(BufferNode* node,
                          size_t node_buffer_size,
                          uint worker_id,
!                         size_t* total_refined_cards) :
      _node(node),
      _node_buffer(reinterpret_cast<CardTable::CardValue**>(BufferNode::make_buffer_from_node(node))),
      _node_buffer_size(node_buffer_size),
      _worker_id(worker_id),
      _total_refined_cards(total_refined_cards),
!     _g1rs(G1CollectedHeap::heap()->rem_set()) {}
  
    bool refine() {
      size_t first_clean_index = clean_cards();
      if (first_clean_index == _node_buffer_size) {
        _node->set_index(first_clean_index);
--- 315,333 ----
  
  public:
    G1RefineBufferedCards(BufferNode* node,
                          size_t node_buffer_size,
                          uint worker_id,
!                         size_t* total_refined_cards,
!                         G1DirtyCardQueueSet* dcqs) :
      _node(node),
      _node_buffer(reinterpret_cast<CardTable::CardValue**>(BufferNode::make_buffer_from_node(node))),
      _node_buffer_size(node_buffer_size),
      _worker_id(worker_id),
      _total_refined_cards(total_refined_cards),
!     _g1rs(G1CollectedHeap::heap()->rem_set()),
!     _dcqs(dcqs) {}
  
    bool refine() {
      size_t first_clean_index = clean_cards();
      if (first_clean_index == _node_buffer_size) {
        _node->set_index(first_clean_index);
*** 334,355 ****
      // humongous object allocation (see comment at the StoreStore fence before
      // setting the regions' tops in humongous allocation path).
      // It's okay that reading region's top and reading region's type were racy
      // wrto each other. We need both set, in any order, to proceed.
      OrderAccess::fence();
      sort_cards(first_clean_index);
      return refine_cleaned_cards(first_clean_index);
    }
  };
  
  bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node,
                                          uint worker_id,
                                          size_t* total_refined_cards) {
    G1RefineBufferedCards buffered_cards(node,
                                         buffer_size(),
                                         worker_id,
!                                        total_refined_cards);
    return buffered_cards.refine();
  }
  
  #ifndef ASSERT
  #define assert_fully_consumed(node, buffer_size)
--- 340,406 ----
      // humongous object allocation (see comment at the StoreStore fence before
      // setting the regions' tops in humongous allocation path).
      // It's okay that reading region's top and reading region's type were racy
      // wrto each other. We need both set, in any order, to proceed.
      OrderAccess::fence();
+ 
+     if (G1TestEpochSyncInConcRefinement && !Thread::current()->is_Java_thread()) {
+       // TODO: Asynchronously execute epoch synchronization for multiple
+       // node buffers. This could be done by calling start_synchronizing() for
+       // multiple node buffers, and associating each required frontier with the
+       // buffer. Then execute sort_cards() for these buffers. Finally call
+       // try_synchronize() for each of these node buffers with the recorded
+       // required frontier.
+       G1EpochSynchronizer syncer;
+       ResourceMark rm; // For retrieving thread names in log messages.
+       syncer.start_synchronizing();
+       jlong start_counter = os::elapsed_counter();
+ 
+       // Spend some time doing useful work instead of blindly waiting.
+       sort_cards(first_clean_index);
+ 
+       // TODO: refine_cleaned_cards() should be called AFTER the try_synchronize()
+       // loop below. However, we should redirty unrefined cards and skip refinement
+       // work if try_synchronize() spans across a safepoint.
+       // See the TODO comment in try_synchronize().
+       bool result = refine_cleaned_cards(first_clean_index);
+       if (!result) {
+         // We need to enqueue partially processed cards before the try_synchronize() loop,
+         // which could spans across a safepoint.
+         _dcqs->enqueue_completed_buffer(_node);
+       }
+ 
+       const jlong increase_urgency_thres_ns = 3 * NANOSECS_PER_MILLISEC;  // 3 millis
+       const char* thread_name = Thread::current()->name();
+       bool synced = false;
+       bool high_urgency = false;
+       // The first call to try_synchronize() does not need high urgency.
+       while (!syncer.try_synchronize()) {
+         jlong elapsed_counter = os::elapsed_counter() - start_counter;
+         if (!high_urgency && elapsed_counter > increase_urgency_thres_ns) {
+           high_urgency = true;
+           syncer.increase_urgency();
+         }
+       };
+       log_debug(gc, refine, handshake)("%s: Epoch synced after %.3f ms",
+           thread_name, TimeHelper::counter_to_millis(os::elapsed_counter() - start_counter));
+       return result;
+     } else {
        sort_cards(first_clean_index);
        return refine_cleaned_cards(first_clean_index);
      }
+   }
  };
  
  bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node,
                                          uint worker_id,
                                          size_t* total_refined_cards) {
    G1RefineBufferedCards buffered_cards(node,
                                         buffer_size(),
                                         worker_id,
!                                        total_refined_cards,
!                                        this);
    return buffered_cards.refine();
  }
  
  #ifndef ASSERT
  #define assert_fully_consumed(node, buffer_size)
*** 405,416 ****
--- 456,469 ----
      assert_fully_consumed(node, buffer_size());
      // Done with fully processed buffer.
      deallocate_buffer(node);
      return true;
    } else {
+     if (!G1TestEpochSyncInConcRefinement) {
        // Return partially processed buffer to the queue.
        enqueue_completed_buffer(node);
+     }
      return true;
    }
  }
  
  void G1DirtyCardQueueSet::abandon_logs() {

< prev index next >