--- old/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp 2019-11-13 17:04:42.410355836 -0800 +++ new/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp 2019-11-13 17:04:42.002358370 -0800 @@ -111,7 +111,6 @@ { SuspendibleThreadSetJoiner sts_join; - ResourceMark rm; while (!should_terminate()) { if (sts_join.should_yield()) { --- old/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp 2019-11-13 17:04:43.382349799 -0800 +++ new/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp 2019-11-13 17:04:43.022352035 -0800 @@ -228,90 +228,82 @@ class G1RefineBufferedCards : public StackObj { BufferNode* const _node; - void** const _node_buffer; + CardTable::CardValue** const _node_buffer; const size_t _node_buffer_size; size_t* _total_refined_cards; - CardTable::CardValue** const _cards; G1RemSet* const _g1rs; + DEBUG_ONLY(KVHashtable _card_top_map;) static inline int compare_card(const void* p1, const void* p2) { - return *(CardTable::CardValue**)p1 - *(CardTable::CardValue**)p2; + return *(CardTable::CardValue**)p2 - *(CardTable::CardValue**)p1; } - void sort_cards(size_t n) { - qsort(_cards, n, sizeof(CardTable::CardValue*), compare_card); - } - - size_t collect_and_clean_cards() { - size_t i = _node->index(); - size_t num_collected = 0; - assert(i <= _node_buffer_size, "invariant"); + // Sorts the cards from start_index to _node_buffer_size in *decreasing* + // address order. This order improves performance of processing the cards + // later starting from start_index. + void sort_cards(size_t start_index) { + qsort(_node_buffer + start_index, + _node_buffer_size - start_index, + sizeof(CardTable::CardValue*), + compare_card); + } + + // Returns the index to the first clean card in the buffer. + size_t clean_cards() { + const size_t start = _node->index(); + assert(start <= _node_buffer_size, "invariant"); + size_t first_clean = _node_buffer_size; // We don't check for SuspendibleThreadSet::should_yield(), because - // collecting, cleaning and abandoning the cards is fast. - for ( ; i < _node_buffer_size; ++i) { - CardTable::CardValue* cp = static_cast(_node_buffer[i]); - if (_g1rs->clean_card_before_refine(cp)) { - _cards[num_collected] = cp; - num_collected++; - } else { - // Skipped cards are considered as refined. - *_total_refined_cards += 1; + // cleaning and redirtying the cards is fast. + for (int i = _node_buffer_size - 1; i >= static_cast(start); --i) { + CardTable::CardValue* cp = _node_buffer[i]; + if (_g1rs->clean_card_before_refine(cp + DEBUG_ONLY(COMMA _card_top_map))) { + first_clean--; + _node_buffer[first_clean] = cp; } } - _node->set_index(i); - return num_collected; + assert(first_clean >= start && first_clean <= _node_buffer_size, "invariant"); + // Skipped cards are considered as refined. + *_total_refined_cards += first_clean - start; + return first_clean; } - bool refine_collected_cards(uint worker_id, size_t num_collected) { - while (num_collected > 0) { + bool refine_cleaned_cards(uint worker_id, size_t start_index) { + for (size_t i = start_index; i < _node_buffer_size; ++i) { if (SuspendibleThreadSet::should_yield()) { - abandon_cards(num_collected); + redirty_unrefined_cards(i); + _node->set_index(i); return false; } - num_collected--; + _g1rs->refine_card_concurrently(_node_buffer[i], worker_id + DEBUG_ONLY(COMMA _card_top_map)); *_total_refined_cards += 1; - _g1rs->refine_card_concurrently(_cards[num_collected], worker_id); } + _node->set_index(_node_buffer_size); return true; } - void abandon_cards(size_t num_collected) { - assert(num_collected <= _node_buffer_size, "sanity"); - for (size_t i = 0; i < num_collected; ++i) { - *_cards[i] = G1CardTable::dirty_card_val(); + void redirty_unrefined_cards(size_t start) { + for ( ; start < _node_buffer_size; ++start) { + *_node_buffer[start] = G1CardTable::dirty_card_val(); } - size_t buffer_index = _node_buffer_size - num_collected; - memcpy(_node_buffer + buffer_index, _cards, num_collected * sizeof(CardTable::CardValue*)); - _node->set_index(buffer_index); } public: G1RefineBufferedCards(BufferNode* node, size_t node_buffer_size, size_t* total_refined_cards) : - _node(node), - _node_buffer(BufferNode::make_buffer_from_node(node)), - _node_buffer_size(node_buffer_size), - _total_refined_cards(total_refined_cards), - _cards(NEW_RESOURCE_ARRAY(CardTable::CardValue*, - node_buffer_size)), - _g1rs(G1CollectedHeap::heap()->rem_set()) {} - - ~G1RefineBufferedCards() { - FREE_RESOURCE_ARRAY(CardTable::CardValue*, _cards, _node_buffer_size); - } - - // Refine the cards in the BufferNode "_node" from its index to buffer_size. - // Stops processing if SuspendibleThreadSet::should_yield() is true. - // Returns true if the entire buffer was processed, false if there - // is a pending yield request. The node's index is updated to exclude - // the processed elements, e.g. up to the element before processing - // stopped, or one past the last element if the entire buffer was - // processed. Increments *_total_refined_cards by the number of cards - // processed and removed from the buffer. + _node(node), + _node_buffer(reinterpret_cast(BufferNode::make_buffer_from_node(node))), + _node_buffer_size(node_buffer_size), + _total_refined_cards(total_refined_cards), + _g1rs(G1CollectedHeap::heap()->rem_set()) + DEBUG_ONLY(COMMA _card_top_map(node_buffer_size)) {} + bool refine(uint worker_id) { - size_t n = collect_and_clean_cards(); + size_t first_clean_index = clean_cards(); // This fence serves two purposes. First, the cards must be cleaned // before processing the contents. Second, we can't proceed with // processing a region until after the read of the region's top in @@ -321,11 +313,20 @@ // It's okay that reading region's top and reading region's type were racy // wrto each other. We need both set, in any order, to proceed. OrderAccess::fence(); - sort_cards(n); - return refine_collected_cards(worker_id, n); + sort_cards(first_clean_index); + return refine_cleaned_cards(worker_id, first_clean_index); } }; +bool G1DirtyCardQueueSet::refine_buffer(BufferNode* node, + uint worker_id, + size_t* total_refined_cards) { + G1RefineBufferedCards buffered_cards(node, + buffer_size(), + total_refined_cards); + return buffered_cards.refine(worker_id); +} + #ifndef ASSERT #define assert_fully_consumed(node, buffer_size) #else @@ -361,11 +362,7 @@ uint worker_id = _free_ids.claim_par_id(); // temporarily claim an id uint counter_index = worker_id - par_ids_start(); size_t* counter = &_mutator_refined_cards_counters[counter_index]; - ResourceMark rm; - G1RefineBufferedCards buffered_cards(node, - buffer_size(), - counter); - bool result = buffered_cards.refine(worker_id); + bool result = refine_buffer(node, worker_id, counter); _free_ids.release_par_id(worker_id); // release the id if (result) { @@ -380,20 +377,15 @@ BufferNode* node = get_completed_buffer(stop_at); if (node == NULL) { return false; + } else if (refine_buffer(node, worker_id, total_refined_cards)) { + assert_fully_consumed(node, buffer_size()); + // Done with fully processed buffer. + deallocate_buffer(node); + return true; } else { - G1RefineBufferedCards buffered_cards(node, - buffer_size(), - total_refined_cards); - if (buffered_cards.refine(worker_id)) { - assert_fully_consumed(node, buffer_size()); - // Done with fully processed buffer. - deallocate_buffer(node); - return true; - } else { - // Return partially processed buffer to the queue. - enqueue_completed_buffer(node); - return true; - } + // Return partially processed buffer to the queue. + enqueue_completed_buffer(node); + return true; } } --- old/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp 2019-11-13 17:04:44.366343688 -0800 +++ new/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp 2019-11-13 17:04:44.034345750 -0800 @@ -78,6 +78,16 @@ void abandon_completed_buffers(); + // Refine the cards in "node" from its index to buffer_size. + // Stops processing if SuspendibleThreadSet::should_yield() is true. + // Returns true if the entire buffer was processed, false if there + // is a pending yield request. The node's index is updated to exclude + // the processed elements, e.g. up to the element before processing + // stopped, or one past the last element if the entire buffer was + // processed. Increments *total_refined_cards by the number of cards + // processed and removed from the buffer. + bool refine_buffer(BufferNode* node, uint worker_id, size_t* total_refined_cards); + bool mut_process_buffer(BufferNode* node); // If the queue contains more cards than configured here, the --- old/src/hotspot/share/gc/g1/g1RemSet.cpp 2019-11-13 17:04:45.430337080 -0800 +++ new/src/hotspot/share/gc/g1/g1RemSet.cpp 2019-11-13 17:04:45.058339390 -0800 @@ -1261,7 +1261,8 @@ #endif } -bool G1RemSet::clean_card_before_refine(CardValue*& card_ptr) { +bool G1RemSet::clean_card_before_refine(CardValue*& card_ptr + DEBUG_ONLY(COMMA KVHashtable& top_map)) { assert(!_g1h->is_gc_active(), "Only call concurrently"); // Find the start address represented by the card. @@ -1277,6 +1278,8 @@ check_card_ptr(card_ptr, _ct); // If the card is no longer dirty, nothing to do. + // We cannot load the card value before the "r == NULL" check, because G1 + // could uncommit parts of the card table covering uncommitted regions. if (*card_ptr != G1CardTable::dirty_card_val()) { return false; } @@ -1359,11 +1362,21 @@ // as iteration failure. *const_cast(card_ptr) = G1CardTable::clean_card_val(); +#ifdef ASSERT + HeapWord** existing_top = top_map.lookup(card_ptr); + if (existing_top != NULL) { + assert(scan_limit == *existing_top, "top must be stable"); + } else { + top_map.add(card_ptr, scan_limit); + } +#endif + return true; } void G1RemSet::refine_card_concurrently(CardValue* const card_ptr, - const uint worker_id) { + const uint worker_id + DEBUG_ONLY(COMMA KVHashtable& top_map)) { assert(!_g1h->is_gc_active(), "Only call concurrently"); check_card_ptr(card_ptr, _ct); @@ -1371,9 +1384,11 @@ HeapWord* start = _ct->addr_for(card_ptr); // And find the region containing it. HeapRegion* r = _g1h->heap_region_containing(start); + // This reload of the top is safe even though it happens after the full + // fence, because top is stable for unfiltered humongous regions, so it must + // return the same value as the previous load when cleaning the card. HeapWord* scan_limit = r->top(); - // top() can only increase since last read of top() before cleaning the card. - assert(scan_limit > start, "sanity"); + assert(scan_limit == *top_map.lookup(card_ptr), "top must be stable"); // Don't use addr_for(card_ptr + 1) which can ask for // a card beyond the heap. --- old/src/hotspot/share/gc/g1/g1RemSet.hpp 2019-11-13 17:04:46.418330944 -0800 +++ new/src/hotspot/share/gc/g1/g1RemSet.hpp 2019-11-13 17:04:46.090332981 -0800 @@ -118,12 +118,14 @@ // Cleans the card at "card_ptr" before refinement, returns true iff the card // needs later refinement. Note that card_ptr could be updated to a different // card due to use of hot card cache. - bool clean_card_before_refine(CardValue*& card_ptr); + bool clean_card_before_refine(CardValue*& card_ptr + DEBUG_ONLY(COMMA KVHashtable& top_map)); // Refine the region corresponding to "card_ptr". Must be called after // being filtered by clean_card_before_refine(), and after proper // fence/synchronization. void refine_card_concurrently(CardValue* const card_ptr, - const uint worker_id); + const uint worker_id + DEBUG_ONLY(COMMA KVHashtable& top_map)); // Print accumulated summary info from the start of the VM. void print_summary_info();