< prev index next >

src/hotspot/share/gc/g1/g1RemSet.cpp

Print this page
rev 55404 : 8226197: Reducing G1?s CPU cost with simplified write post-barrier and disabling concurrent refinement
Summary: A prototype to add a mode for G1 to use a simplified write post-barrier. Guarded by new flag G1FastWriteBarrier.

*** 37,46 **** --- 37,47 ---- #include "gc/g1/g1RemSet.hpp" #include "gc/g1/g1SharedDirtyCardQueue.hpp" #include "gc/g1/heapRegion.inline.hpp" #include "gc/g1/heapRegionManager.inline.hpp" #include "gc/g1/heapRegionRemSet.hpp" + #include "gc/shared/cardTableRS.hpp" #include "gc/shared/gcTraceTime.inline.hpp" #include "gc/shared/suspendibleThreadSet.hpp" #include "jfr/jfrEvents.hpp" #include "memory/iterator.hpp" #include "memory/resourceArea.hpp"
*** 404,414 **** if (card_start >= top) { continue; } // If the card is dirty, then G1 will scan it during Update RS. ! if (_ct->is_card_claimed(card_index) || _ct->is_card_dirty(card_index)) { continue; } // We claim lazily (so races are possible but they're benign), which reduces the // number of duplicate scans (the rsets of the regions in the cset can intersect). --- 405,420 ---- if (card_start >= top) { continue; } // If the card is dirty, then G1 will scan it during Update RS. ! // Ensure it loads the card value only onces, so it is atomic with ! // G1ProcessCardTableRegionClosure::do_heap_region() claiming a dirty card. ! CardTable::CardValue card_val = *(_ct->byte_for_index(card_index)); ! // if (_ct->is_card_claimed(card_index) || _ct->is_card_dirty(card_index)) { ! if ((card_val & (G1CardTable::clean_card_mask_val() | G1CardTable::claimed_card_val())) == G1CardTable::claimed_card_val() || ! card_val == G1CardTable::dirty_card_val()) { continue; } // We claim lazily (so races are possible but they're benign), which reduces the // number of duplicate scans (the rsets of the regions in the cset can intersect).
*** 587,598 **** --- 593,722 ---- p->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, refine_card_cl.cards_scanned(), G1GCPhaseTimes::UpdateRSScannedCards); p->record_thread_work_item(G1GCPhaseTimes::UpdateRS, worker_i, refine_card_cl.cards_skipped(), G1GCPhaseTimes::UpdateRSSkippedCards); } } + class G1ProcessCardTableRegionClosure : public HeapRegionClosure { + G1CollectedHeap* _g1h; + G1CardTable *_ct; + G1ScanCardClosure* _scan_card_cl; + G1RemSetScanState* _scan_state; + + void process_dirty_range(HeapWord* start, HeapWord* end, HeapRegion* r) { + MemRegion dirty_mr(start, end); + assert(!dirty_mr.is_empty(), "sanity"); + _ct->verfiy_claimed_dirty_region(dirty_mr); + bool result = r->oops_on_card_seq_iterate_careful<true>(dirty_mr, _scan_card_cl); + assert(result, "must be"); + _scan_card_cl->trim_queue_partially(); + } + + public: + G1ProcessCardTableRegionClosure(G1RemSetScanState* scan_state, + G1ScanCardClosure* scan_card_cl) : + _g1h(G1CollectedHeap::heap()), + _ct(_g1h->card_table()), + _scan_card_cl(scan_card_cl), + _scan_state(scan_state) {} + + virtual bool do_heap_region(HeapRegion* r) { + assert(_g1h->is_gc_active(), "Only call during GC"); + const uint region_idx = r->hrm_index(); + assert(region_idx == _g1h->addr_to_region(r->bottom()), "invariant"); + HeapWord* scan_top = _scan_state->scan_top(region_idx); + if (scan_top == NULL) { + return false; + } + assert(!r->in_collection_set() && !r->is_empty() && r->is_old_or_humongous_or_archive(), + "Wrong region type"); + assert(scan_top > r->bottom(), "region is empty"); + // _ct->print_content_for_mr(MemRegion(r->bottom(), scan_top), tty); + // The cards for the region are visited in *decreasing* address order, + // in order to avoid redundant scanning of cards. + CardTable::CardValue* cur_card = _ct->byte_for(scan_top - 1); + CardTable::CardValue* const card_limit = _ct->byte_for(r->bottom()); + HeapWord* end_of_dirty = scan_top; + HeapWord* start_of_dirty = end_of_dirty; + bool region_contains_dirty_card = false; + while (cur_card >= card_limit) { + HeapWord* cur_hw = _ct->addr_for(cur_card); + // Skip any non-dirty card, which include cards that were already + // scanned as parts of remembered sets. + // In G1, the card can be Claimed or Deferred. + // Claimed cards are already scanned as part of scanning remset; + // Deferred cards must be initially clean or already claimed. + // In either case, we don't need to scan these cards. + if (*cur_card == G1CardTable::dirty_card_val()) { + // We claim the card to avoid duplicate scans in scan_rem_set(). + // Races with other GC threads are possible but they're benign. + *cur_card = G1CardTable::dirty_card_val() | G1CardTable::claimed_card_val(); + // Continue the dirty range by opening the + // dirty window one card to the left. + start_of_dirty = cur_hw; + if (!region_contains_dirty_card) { + region_contains_dirty_card = true; + _scan_state->add_dirty_region(region_idx); + // _scan_card_cl->set_region(r); + } + } else { + // We hit a non-dirty card; process any non-empty dirty range accumulated + // so far. + if (start_of_dirty < end_of_dirty) { + process_dirty_range(start_of_dirty, end_of_dirty, r); + } + + // fast forward through potential continuous whole-word range of clean cards beginning at a word-boundary + if (is_aligned(cur_card, BytesPerWord)) { + CardTable::CardValue* cur_row = cur_card - BytesPerWord; + while (cur_row >= card_limit && *((intptr_t*)cur_row) == CardTableRS::clean_card_row_val()) { + cur_row -= BytesPerWord; + } + cur_card = cur_row + BytesPerWord; + cur_hw = _ct->addr_for(cur_card); + } + + // Reset the dirty window, while continuing to look + // for the next dirty card that will start a + // new dirty window. + end_of_dirty = cur_hw; + start_of_dirty = cur_hw; + } + cur_card--; + } + // Process the remaining dirty window, if the first card at card_limit is dirty. + if (start_of_dirty < end_of_dirty) { + process_dirty_range(start_of_dirty, end_of_dirty, r); + } + return false; + } + }; + + void G1RemSet::process_card_table(G1ParScanThreadState* pss, uint worker_i, + HeapRegionClaimer* card_table_hr_claimer) { + G1GCPhaseTimes* p = _g1p->phase_times(); + + { + G1EvacPhaseTimesTracker x(p, pss, G1GCPhaseTimes::ProcessCardTable, worker_i); + + G1ScanCardClosure scan_card_cl(_g1h, pss); + G1ProcessCardTableRegionClosure cl(_scan_state, &scan_card_cl); + // TODO: We should check if we can better distribute the workload. + // It only needs to iterate regions not in cset that have dirty cards. + // OTOH, maybe it does not affect load balancing much, because it is part + // of G1ParTask and will be balanced with other collection work. + G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset( + &cl, card_table_hr_claimer, worker_i); + + // TODO: Make g1Policy consider the time of this phase, similarly to + // the update_rem_set() phase. + } + } + void G1RemSet::prepare_for_scan_rem_set() { + if (!G1FastWriteBarrier) { G1BarrierSet::dirty_card_queue_set().concatenate_logs(); + } _scan_state->reset(); } void G1RemSet::prepare_for_scan_rem_set(uint region_idx) { _scan_state->clear_scan_top(region_idx);
*** 620,629 **** --- 744,754 ---- } void G1RemSet::refine_card_concurrently(CardValue* card_ptr, uint worker_i) { assert(!_g1h->is_gc_active(), "Only call concurrently"); + assert(!G1FastWriteBarrier, "Concurrent refinement should be disabled"); // Construct the region representing the card. HeapWord* start = _ct->addr_for(card_ptr); // And find the region containing it. HeapRegion* r = _g1h->heap_region_containing_or_null(start);
*** 760,769 **** --- 885,895 ---- } bool G1RemSet::refine_card_during_gc(CardValue* card_ptr, G1ScanCardClosure* update_rs_cl) { assert(_g1h->is_gc_active(), "Only call during GC"); + assert(!G1FastWriteBarrier, "cards should be processed in process_card_table()"); // Construct the region representing the card. HeapWord* card_start = _ct->addr_for(card_ptr); // And find the region containing it. uint const card_region_idx = _g1h->addr_to_region(card_start);
< prev index next >