< prev index next >
src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp
Print this page
rev 56899 : 8087198: G1 card refinement: batching, sorting
Reviewed-by: tschatzl
@@ -224,25 +224,109 @@
_completed_buffers_tail = NULL;
_num_cards = 0;
return result;
}
+class G1RefineBufferedCards : public StackObj {
+ BufferNode* const _node;
+ CardTable::CardValue** const _node_buffer;
+ const size_t _node_buffer_size;
+ size_t* _total_refined_cards;
+ G1RemSet* const _g1rs;
+ DEBUG_ONLY(KVHashtable<CardTable::CardValue* COMMA HeapWord* COMMA mtGC> _card_top_map;)
+
+ static inline int compare_card(const void* p1,
+ const void* p2) {
+ return *(CardTable::CardValue**)p2 - *(CardTable::CardValue**)p1;
+ }
+
+ // Sorts the cards from start_index to _node_buffer_size in *decreasing*
+ // address order. This order improves performance of processing the cards
+ // later starting from start_index.
+ void sort_cards(size_t start_index) {
+ qsort(_node_buffer + start_index,
+ _node_buffer_size - start_index,
+ sizeof(CardTable::CardValue*),
+ compare_card);
+ }
+
+ // Returns the index to the first clean card in the buffer.
+ size_t clean_cards() {
+ const size_t start = _node->index();
+ assert(start <= _node_buffer_size, "invariant");
+ size_t first_clean = _node_buffer_size;
+ // We don't check for SuspendibleThreadSet::should_yield(), because
+ // cleaning and redirtying the cards is fast.
+ for (int i = _node_buffer_size - 1; i >= static_cast<int>(start); --i) {
+ CardTable::CardValue* cp = _node_buffer[i];
+ if (_g1rs->clean_card_before_refine(cp
+ DEBUG_ONLY(COMMA _card_top_map))) {
+ first_clean--;
+ _node_buffer[first_clean] = cp;
+ }
+ }
+ assert(first_clean >= start && first_clean <= _node_buffer_size, "invariant");
+ // Skipped cards are considered as refined.
+ *_total_refined_cards += first_clean - start;
+ return first_clean;
+ }
+
+ bool refine_cleaned_cards(uint worker_id, size_t start_index) {
+ for (size_t i = start_index; i < _node_buffer_size; ++i) {
+ if (SuspendibleThreadSet::should_yield()) {
+ redirty_unrefined_cards(i);
+ _node->set_index(i);
+ return false;
+ }
+ _g1rs->refine_card_concurrently(_node_buffer[i], worker_id
+ DEBUG_ONLY(COMMA _card_top_map));
+ *_total_refined_cards += 1;
+ }
+ _node->set_index(_node_buffer_size);
+ return true;
+ }
+
+ void redirty_unrefined_cards(size_t start) {
+ for ( ; start < _node_buffer_size; ++start) {
+ *_node_buffer[start] = G1CardTable::dirty_card_val();
+ }
+ }
+
+public:
+ G1RefineBufferedCards(BufferNode* node,
+ size_t node_buffer_size,
+ size_t* total_refined_cards) :
+ _node(node),
+ _node_buffer(reinterpret_cast<CardTable::CardValue**>(BufferNode::make_buffer_from_node(node))),
+ _node_buffer_size(node_buffer_size),
+ _total_refined_cards(total_refined_cards),
+ _g1rs(G1CollectedHeap::heap()->rem_set())
+ DEBUG_ONLY(COMMA _card_top_map(node_buffer_size)) {}
+
+ bool refine(uint worker_id) {
+ size_t first_clean_index = clean_cards();
+ // This fence serves two purposes. First, the cards must be cleaned
+ // before processing the contents. Second, we can't proceed with
+ // processing a region until after the read of the region's top in
+ // collect_and_clean_cards(), for synchronization with possibly concurrent
+ // humongous object allocation (see comment at the StoreStore fence before
+ // setting the regions' tops in humongous allocation path).
+ // It's okay that reading region's top and reading region's type were racy
+ // wrto each other. We need both set, in any order, to proceed.
+ OrderAccess::fence();
+ sort_cards(first_clean_index);
+ return refine_cleaned_cards(worker_id, first_clean_index);
+ }
+};
+
bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node,
uint worker_id,
size_t* total_refined_cards) {
- G1RemSet* rem_set = G1CollectedHeap::heap()->rem_set();
- size_t size = buffer_size();
- void** buffer = BufferNode::make_buffer_from_node(node);
- size_t i = node->index();
- assert(i <= size, "invariant");
- for ( ; (i < size) && !SuspendibleThreadSet::should_yield(); ++i) {
- CardTable::CardValue* cp = static_cast<CardTable::CardValue*>(buffer[i]);
- rem_set->refine_card_concurrently(cp, worker_id);
- }
- *total_refined_cards += (i - node->index());
- node->set_index(i);
- return i == size;
+ G1RefineBufferedCards buffered_cards(node,
+ buffer_size(),
+ total_refined_cards);
+ return buffered_cards.refine(worker_id);
}
#ifndef ASSERT
#define assert_fully_consumed(node, buffer_size)
#else
< prev index next >