< prev index next >

src/share/vm/gc/g1/g1CollectedHeap.cpp

Print this page
rev 8802 : G1 performance improvements: card batching, joining, sorting, prefetching and write barrier fence elision and simplification based on a global syncrhonization using handshakes piggybacking on thread-local safepoints.
rev 8803 : Implementation improvements to pass JPRT
rev 8805 : Another JPRT attempt

*** 63,72 **** --- 63,73 ---- #include "gc/shared/taskqueue.inline.hpp" #include "memory/allocation.hpp" #include "memory/iterator.hpp" #include "oops/oop.inline.hpp" #include "runtime/atomic.inline.hpp" + #include "runtime/globalSynchronizer.hpp" #include "runtime/orderAccess.inline.hpp" #include "runtime/vmThread.hpp" #include "utilities/globalDefinitions.hpp" #include "utilities/stack.inline.hpp"
*** 87,119 **** // allocation code from the rest of the JVM. (Note that this does not // apply to TLAB allocation, which is not part of this interface: it // is done by clients of this interface.) // Local to this file. ! ! class RefineCardTableEntryClosure: public CardTableEntryClosure { ! bool _concurrent; ! public: ! RefineCardTableEntryClosure() : _concurrent(true) { } ! ! bool do_card_ptr(jbyte* card_ptr, uint worker_i) { bool oops_into_cset = G1CollectedHeap::heap()->g1_rem_set()->refine_card(card_ptr, worker_i, false); // This path is executed by the concurrent refine or mutator threads, // concurrently, and so we do not care if card_ptr contains references // that point into the collection set. assert(!oops_into_cset, "should be"); ! if (_concurrent && SuspendibleThreadSet::should_yield()) { ! // Caller will actually yield. return false; } ! // Otherwise, we finished successfully; return true. return true; } ! void set_concurrent(bool b) { _concurrent = b; } ! }; class RedirtyLoggedCardTableEntryClosure : public CardTableEntryClosure { private: size_t _num_processed; --- 88,380 ---- // allocation code from the rest of the JVM. (Note that this does not // apply to TLAB allocation, which is not part of this interface: it // is done by clients of this interface.) // Local to this file. ! bool RefineCardTableEntryClosure::do_card_ptr(jbyte* card_ptr, uint worker_i) { bool oops_into_cset = G1CollectedHeap::heap()->g1_rem_set()->refine_card(card_ptr, worker_i, false); // This path is executed by the concurrent refine or mutator threads, // concurrently, and so we do not care if card_ptr contains references // that point into the collection set. assert(!oops_into_cset, "should be"); ! // return false if caller should yield ! return !(G1CollectedHeap::heap()->refine_cte_cl_concurrency() && SuspendibleThreadSet::should_yield()); ! } ! ! CardBuffer::CardBuffer() ! : _next(NULL) { ! int size = BufferedRefineCardTableEntryClosure::buffer_size(); ! _card_buffer = NEW_C_HEAP_ARRAY(jbyte*, size, mtGC); ! _mr_buffer = NEW_C_HEAP_ARRAY(MemRegion, size, mtGC); ! _gs = new SynchronizerObj<mtGC>(); ! _misses = 0; ! } ! ! CardBuffer::~CardBuffer() { ! FREE_C_HEAP_ARRAY(jbyte*, _card_buffer); ! FREE_C_HEAP_ARRAY(MemRegion, _mr_buffer); ! delete _gs; ! } ! ! BufferedRefineCardTableEntryClosure::BufferedRefineCardTableEntryClosure() ! : _index(0), _g1h(G1CollectedHeap::heap()), _head_buffer(NULL), _tail_buffer(NULL), ! _current_buffer(NULL), _async_buffers(0) { ! } ! ! BufferedRefineCardTableEntryClosure::~BufferedRefineCardTableEntryClosure() { ! assert(_index == 0, "must flush refine card buffer"); ! assert(_head_buffer == NULL && _tail_buffer == NULL, "must flush all async cards first"); ! assert(_async_buffers == 0, "must flush all async cards first"); ! if (_current_buffer) delete _current_buffer; ! } ! ! bool BufferedRefineCardTableEntryClosure::do_card_ptr(jbyte *card_ptr, uint worker_i) { ! _worker_i = worker_i; ! if (_index == buffer_size()) soft_flush(); ! if (_current_buffer == NULL) _current_buffer = new CardBuffer(); ! _current_buffer->_card_buffer[_index++] = card_ptr; ! ! bool should_yield = _g1h->refine_cte_cl_concurrency() && SuspendibleThreadSet::should_yield(); ! if (should_yield) flush_buffer(); ! ! // return false if caller should yield ! return !should_yield; ! } ! ! void BufferedRefineCardTableEntryClosure::soft_flush() { ! general_flush(false); ! } ! ! // Procedures used to sort and join G1 cards during refinement ! static void quick_sort(jbyte **card_array, MemRegion *region_array, int left, int right); ! static int partition(jbyte **card_array, MemRegion *region_array, int left, int right); ! static int join_cards(jbyte **card_array, MemRegion *region_array, int length); ! ! static void quick_sort(jbyte **card_array, MemRegion *region_array, int left, int right) { ! int middle; ! if (left < right) ! { ! middle = partition(card_array, region_array, left, right); ! quick_sort(card_array, region_array, left, middle); ! quick_sort(card_array, region_array, middle + 1, right); ! } ! } ! ! static int partition(jbyte **card_array, MemRegion *region_array, int left, int right) { ! jbyte *card = card_array[left]; ! int i = left; ! int j; ! ! for (j = left + 1; j < right; j++) ! { ! if (card_array[j] <= card) ! { ! i = i + 1; ! swap(card_array[i], card_array[j]); ! swap(region_array[i], region_array[j]); ! } ! } ! ! swap(card_array[i], card_array[left]); ! swap(region_array[i], region_array[left]); ! return i; ! } ! ! static int join_cards(jbyte **card_array, MemRegion *region_array, int length) { ! G1CollectedHeap *g1h = G1CollectedHeap::heap(); ! jbyte *prev_card = NULL; ! HeapRegion *prev_hr = NULL; ! int insert_head = 0; ! for (int i = 0; i < length; i++) { ! jbyte *card = card_array[i]; ! ! if (*card == CardTableModRefBS::clean_card_val()) { ! HeapRegion *hr = g1h->heap_region_containing_raw(region_array[i].start()); ! if (card == prev_card + 1 && hr == prev_hr) { ! MemRegion insert_region = region_array[insert_head - 1]; ! region_array[insert_head - 1] = MemRegion(insert_region.start(), region_array[i].end()); ! } else { ! card_array[insert_head] = card; ! region_array[insert_head] = region_array[i]; ! insert_head++; ! } ! prev_hr = hr; ! } ! ! prev_card = card; ! } ! ! return insert_head; ! } ! ! int BufferedRefineCardTableEntryClosure::buffer_size() { ! return (int)G1UpdateBufferSize; ! } ! ! void BufferedRefineCardTableEntryClosure::flush_buffer() { ! general_flush(true); ! } ! ! // Returns true if it needs post sync ! bool BufferedRefineCardTableEntryClosure::pre_sync(CardBuffer *buffer, bool hard) { ! // 1. Clean all cards in the batch. ! G1RemSet *g1rs = G1CollectedHeap::heap()->g1_rem_set(); ! int needs_processing = 0; ! ! jbyte **const card_buffer = buffer->_card_buffer; ! MemRegion *const mr_buffer = buffer->_mr_buffer; ! const int length = buffer->_length; ! ! for (int i = 0; i < length; i++) { ! if (g1rs->clean_card(card_buffer[i], _worker_i, mr_buffer[i])) { ! card_buffer[needs_processing] = card_buffer[i]; ! mr_buffer[needs_processing] = mr_buffer[i]; ! needs_processing++; ! } ! } ! buffer->_length = needs_processing; ! ! if (needs_processing == 0) { ! if (hard) { ! // If we are forced to finish scanning, we must serialize stores anyway. ! OrderAccess::storeload(); ! if (G1ElideMembar) { ! buffer->_gs->start_synchronizing(); ! } ! } return false; } ! ! OrderAccess::storeload(); ! if (G1ElideMembar) { ! buffer->_gs->start_synchronizing(); ! } ! ! // 2. Sort the cards ! quick_sort(buffer->_card_buffer, buffer->_mr_buffer, 0, buffer->_length); ! return true; + } + + bool BufferedRefineCardTableEntryClosure::sync(CardBuffer *buffer, bool hard) { + if (!G1ElideMembar) return true; + + bool success = buffer->_gs->try_synchronize(); + if (hard) { + if (!success) { + buffer->_gs->maximize_urgency(); + buffer->_gs->synchronize(); + } + return true; + } else { + return success; } + } ! void BufferedRefineCardTableEntryClosure::post_sync(CardBuffer *buffer) { ! const int length = buffer->_length; ! ! const int card_batch_size = 16; ! jbyte **current_card = buffer->_card_buffer; ! MemRegion *current_region = buffer->_mr_buffer; ! ! const uintx interval = PrefetchScanIntervalInBytes * 2; ! ! G1RemSet *g1rs = G1CollectedHeap::heap()->g1_rem_set(); ! ! // 3. Batch 16 cards at a time ! ! for (int j = 0; j < length; j += card_batch_size) { ! // 4. Join consecutive cards together and prefetch next card ! int batch = MIN2((length - j), card_batch_size); ! batch = join_cards(current_card, current_region, batch); ! ! jbyte dirty_card_val = CardTableModRefBS::dirty_card_val(); ! jbyte *end_card; ! HeapWord *end_prefetch; ! ! if (j + card_batch_size < length) { ! end_prefetch = current_region[card_batch_size].start(); ! end_card = current_card[card_batch_size]; ! } else { ! end_card = &dirty_card_val; ! } ! ! MemRegion *region_end = current_region + batch; ! jbyte** batch_card; ! MemRegion* batch_region; ! ! for (batch_card = current_card, batch_region = current_region; batch_region != region_end; batch_card++) { ! jbyte *card = *batch_card; ! MemRegion mr = *batch_region; ! MemRegion *next_region = batch_region + 1; ! ! if (next_region != region_end) { ! MemRegion next_region_val = *next_region; ! // Prefetch interval in batch ! Prefetch::read(next_region_val.start(), next_region_val.byte_size()); ! } else if (*end_card == CardTableModRefBS::clean_card_val()) { ! // Prefetch broken interval to next batch ! Prefetch::read(end_prefetch, interval); ! } ! ! g1rs->refine_card_buffered(card, _worker_i, /*check_for_cset_refs*/ false, mr); ! ! batch_region = next_region; ! } ! ! current_region += card_batch_size; ! current_card += card_batch_size; ! } ! } ! ! void BufferedRefineCardTableEntryClosure::general_flush(bool hard) { ! if (_index == 0) { ! assert(hard, "invariant"); ! if (_async_buffers == 0) return; ! } ! ! // 1. Start asynchronous synchronization for the current buffer ! if (_current_buffer == NULL) _current_buffer = new CardBuffer(); ! _current_buffer->_length = _index; ! if (pre_sync(_current_buffer, hard) || hard) { ! // append async buffer ! CardBuffer *tail = _tail_buffer; ! if (tail != NULL) tail->_next = _current_buffer; ! _tail_buffer = _current_buffer; ! if (_head_buffer == NULL) _head_buffer = _current_buffer; ! if (hard) sync(_current_buffer, hard); ! _current_buffer = NULL; ! _async_buffers++; ! } ! ! _index = 0; ! ! // 2. Process old batches that have been cleaned but couldn't synchronize (async completion) ! CardBuffer *current = _head_buffer; ! bool check_sync = true; ! while (current != NULL) { ! if (hard || sync(current, hard)) { ! post_sync(current); ! CardBuffer *next = current->_next; ! _head_buffer = next; ! if (next == NULL) _tail_buffer = NULL; ! delete current; ! current = next; ! _async_buffers--; ! } else { ! current->_misses++; ! if (_async_buffers > 4 && current->_misses > 2 ! || _async_buffers > 8 && current->_misses > 4 ! || _async_buffers > 16 && current->_misses > 6) { ! current->_gs->increase_urgency(); ! } ! break; ! } ! } ! } class RedirtyLoggedCardTableEntryClosure : public CardTableEntryClosure { private: size_t _num_processed;
*** 1917,1927 **** _ref_processor_cm(NULL), _ref_processor_stw(NULL), _bot_shared(NULL), _cg1r(NULL), _g1mm(NULL), ! _refine_cte_cl(NULL), _secondary_free_list("Secondary Free List", new SecondaryFreeRegionListMtSafeChecker()), _old_set("Old Set", false /* humongous */, new OldRegionSetMtSafeChecker()), _humongous_set("Master Humongous Set", true /* humongous */, new HumongousRegionSetMtSafeChecker()), _humongous_reclaim_candidates(), _has_humongous_reclaim_candidates(false), --- 2178,2188 ---- _ref_processor_cm(NULL), _ref_processor_stw(NULL), _bot_shared(NULL), _cg1r(NULL), _g1mm(NULL), ! _refine_cte_cl_concurrency(true), _secondary_free_list("Secondary Free List", new SecondaryFreeRegionListMtSafeChecker()), _old_set("Old Set", false /* humongous */, new OldRegionSetMtSafeChecker()), _humongous_set("Master Humongous Set", true /* humongous */, new HumongousRegionSetMtSafeChecker()), _humongous_reclaim_candidates(), _has_humongous_reclaim_candidates(false),
*** 2030,2042 **** // Ensure that the sizes are properly aligned. Universe::check_alignment(init_byte_size, HeapRegion::GrainBytes, "g1 heap"); Universe::check_alignment(max_byte_size, HeapRegion::GrainBytes, "g1 heap"); Universe::check_alignment(max_byte_size, heap_alignment, "g1 heap"); ! _refine_cte_cl = new RefineCardTableEntryClosure(); ! ! _cg1r = new ConcurrentG1Refine(this, _refine_cte_cl); // Reserve the maximum. // When compressed oops are enabled, the preferred heap base // is calculated by subtracting the requested size from the --- 2291,2301 ---- // Ensure that the sizes are properly aligned. Universe::check_alignment(init_byte_size, HeapRegion::GrainBytes, "g1 heap"); Universe::check_alignment(max_byte_size, HeapRegion::GrainBytes, "g1 heap"); Universe::check_alignment(max_byte_size, heap_alignment, "g1 heap"); ! _cg1r = new ConcurrentG1Refine(this); // Reserve the maximum. // When compressed oops are enabled, the preferred heap base // is calculated by subtracting the requested size from the
*** 2156,2183 **** JavaThread::satb_mark_queue_set().initialize(SATB_Q_CBL_mon, SATB_Q_FL_lock, G1SATBProcessCompletedThreshold, Shared_SATB_Q_lock); ! JavaThread::dirty_card_queue_set().initialize(_refine_cte_cl, DirtyCardQ_CBL_mon, DirtyCardQ_FL_lock, concurrent_g1_refine()->yellow_zone(), concurrent_g1_refine()->red_zone(), Shared_DirtyCardQ_lock); ! dirty_card_queue_set().initialize(NULL, // Should never be called by the Java code DirtyCardQ_CBL_mon, DirtyCardQ_FL_lock, -1, // never trigger processing -1, // no limit on length Shared_DirtyCardQ_lock, &JavaThread::dirty_card_queue_set()); // Initialize the card queue set used to hold cards containing // references into the collection set. ! _into_cset_dirty_card_queue_set.initialize(NULL, // Should never be called by the Java code DirtyCardQ_CBL_mon, DirtyCardQ_FL_lock, -1, // never trigger processing -1, // no limit on length Shared_DirtyCardQ_lock, --- 2415,2442 ---- JavaThread::satb_mark_queue_set().initialize(SATB_Q_CBL_mon, SATB_Q_FL_lock, G1SATBProcessCompletedThreshold, Shared_SATB_Q_lock); ! JavaThread::dirty_card_queue_set().initialize(true, DirtyCardQ_CBL_mon, DirtyCardQ_FL_lock, concurrent_g1_refine()->yellow_zone(), concurrent_g1_refine()->red_zone(), Shared_DirtyCardQ_lock); ! dirty_card_queue_set().initialize(false, // Should never be called by the Java code DirtyCardQ_CBL_mon, DirtyCardQ_FL_lock, -1, // never trigger processing -1, // no limit on length Shared_DirtyCardQ_lock, &JavaThread::dirty_card_queue_set()); // Initialize the card queue set used to hold cards containing // references into the collection set. ! _into_cset_dirty_card_queue_set.initialize(false, // Should never be called by the Java code DirtyCardQ_CBL_mon, DirtyCardQ_FL_lock, -1, // never trigger processing -1, // no limit on length Shared_DirtyCardQ_lock,
*** 6379,6389 **** "value: " SIZE_FORMAT " recalculated: " SIZE_FORMAT, used_unlocked(), recalculate_used())); } void G1CollectedHeap::set_refine_cte_cl_concurrency(bool concurrent) { ! _refine_cte_cl->set_concurrent(concurrent); } bool G1CollectedHeap::is_in_closed_subset(const void* p) const { HeapRegion* hr = heap_region_containing(p); return hr->is_in(p); --- 6638,6652 ---- "value: " SIZE_FORMAT " recalculated: " SIZE_FORMAT, used_unlocked(), recalculate_used())); } void G1CollectedHeap::set_refine_cte_cl_concurrency(bool concurrent) { ! _refine_cte_cl_concurrency = concurrent; ! } ! ! bool G1CollectedHeap::refine_cte_cl_concurrency() { ! return _refine_cte_cl_concurrency; } bool G1CollectedHeap::is_in_closed_subset(const void* p) const { HeapRegion* hr = heap_region_containing(p); return hr->is_in(p);
< prev index next >