< prev index next >
src/share/vm/gc/g1/g1CollectedHeap.cpp
Print this page
rev 8802 : G1 performance improvements: card batching, joining, sorting, prefetching and write barrier fence elision and simplification based on a global syncrhonization using handshakes piggybacking on thread-local safepoints.
rev 8803 : Implementation improvements to pass JPRT
rev 8805 : Another JPRT attempt
*** 63,72 ****
--- 63,73 ----
#include "gc/shared/taskqueue.inline.hpp"
#include "memory/allocation.hpp"
#include "memory/iterator.hpp"
#include "oops/oop.inline.hpp"
#include "runtime/atomic.inline.hpp"
+ #include "runtime/globalSynchronizer.hpp"
#include "runtime/orderAccess.inline.hpp"
#include "runtime/vmThread.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/stack.inline.hpp"
*** 87,119 ****
// allocation code from the rest of the JVM. (Note that this does not
// apply to TLAB allocation, which is not part of this interface: it
// is done by clients of this interface.)
// Local to this file.
!
! class RefineCardTableEntryClosure: public CardTableEntryClosure {
! bool _concurrent;
! public:
! RefineCardTableEntryClosure() : _concurrent(true) { }
!
! bool do_card_ptr(jbyte* card_ptr, uint worker_i) {
bool oops_into_cset = G1CollectedHeap::heap()->g1_rem_set()->refine_card(card_ptr, worker_i, false);
// This path is executed by the concurrent refine or mutator threads,
// concurrently, and so we do not care if card_ptr contains references
// that point into the collection set.
assert(!oops_into_cset, "should be");
! if (_concurrent && SuspendibleThreadSet::should_yield()) {
! // Caller will actually yield.
return false;
}
! // Otherwise, we finished successfully; return true.
return true;
}
! void set_concurrent(bool b) { _concurrent = b; }
! };
class RedirtyLoggedCardTableEntryClosure : public CardTableEntryClosure {
private:
size_t _num_processed;
--- 88,380 ----
// allocation code from the rest of the JVM. (Note that this does not
// apply to TLAB allocation, which is not part of this interface: it
// is done by clients of this interface.)
// Local to this file.
! bool RefineCardTableEntryClosure::do_card_ptr(jbyte* card_ptr, uint worker_i) {
bool oops_into_cset = G1CollectedHeap::heap()->g1_rem_set()->refine_card(card_ptr, worker_i, false);
// This path is executed by the concurrent refine or mutator threads,
// concurrently, and so we do not care if card_ptr contains references
// that point into the collection set.
assert(!oops_into_cset, "should be");
! // return false if caller should yield
! return !(G1CollectedHeap::heap()->refine_cte_cl_concurrency() && SuspendibleThreadSet::should_yield());
! }
!
! CardBuffer::CardBuffer()
! : _next(NULL) {
! int size = BufferedRefineCardTableEntryClosure::buffer_size();
! _card_buffer = NEW_C_HEAP_ARRAY(jbyte*, size, mtGC);
! _mr_buffer = NEW_C_HEAP_ARRAY(MemRegion, size, mtGC);
! _gs = new SynchronizerObj<mtGC>();
! _misses = 0;
! }
!
! CardBuffer::~CardBuffer() {
! FREE_C_HEAP_ARRAY(jbyte*, _card_buffer);
! FREE_C_HEAP_ARRAY(MemRegion, _mr_buffer);
! delete _gs;
! }
!
! BufferedRefineCardTableEntryClosure::BufferedRefineCardTableEntryClosure()
! : _index(0), _g1h(G1CollectedHeap::heap()), _head_buffer(NULL), _tail_buffer(NULL),
! _current_buffer(NULL), _async_buffers(0) {
! }
!
! BufferedRefineCardTableEntryClosure::~BufferedRefineCardTableEntryClosure() {
! assert(_index == 0, "must flush refine card buffer");
! assert(_head_buffer == NULL && _tail_buffer == NULL, "must flush all async cards first");
! assert(_async_buffers == 0, "must flush all async cards first");
! if (_current_buffer) delete _current_buffer;
! }
!
! bool BufferedRefineCardTableEntryClosure::do_card_ptr(jbyte *card_ptr, uint worker_i) {
! _worker_i = worker_i;
! if (_index == buffer_size()) soft_flush();
! if (_current_buffer == NULL) _current_buffer = new CardBuffer();
! _current_buffer->_card_buffer[_index++] = card_ptr;
!
! bool should_yield = _g1h->refine_cte_cl_concurrency() && SuspendibleThreadSet::should_yield();
! if (should_yield) flush_buffer();
!
! // return false if caller should yield
! return !should_yield;
! }
!
! void BufferedRefineCardTableEntryClosure::soft_flush() {
! general_flush(false);
! }
!
! // Procedures used to sort and join G1 cards during refinement
! static void quick_sort(jbyte **card_array, MemRegion *region_array, int left, int right);
! static int partition(jbyte **card_array, MemRegion *region_array, int left, int right);
! static int join_cards(jbyte **card_array, MemRegion *region_array, int length);
!
! static void quick_sort(jbyte **card_array, MemRegion *region_array, int left, int right) {
! int middle;
! if (left < right)
! {
! middle = partition(card_array, region_array, left, right);
! quick_sort(card_array, region_array, left, middle);
! quick_sort(card_array, region_array, middle + 1, right);
! }
! }
!
! static int partition(jbyte **card_array, MemRegion *region_array, int left, int right) {
! jbyte *card = card_array[left];
! int i = left;
! int j;
!
! for (j = left + 1; j < right; j++)
! {
! if (card_array[j] <= card)
! {
! i = i + 1;
! swap(card_array[i], card_array[j]);
! swap(region_array[i], region_array[j]);
! }
! }
!
! swap(card_array[i], card_array[left]);
! swap(region_array[i], region_array[left]);
! return i;
! }
!
! static int join_cards(jbyte **card_array, MemRegion *region_array, int length) {
! G1CollectedHeap *g1h = G1CollectedHeap::heap();
! jbyte *prev_card = NULL;
! HeapRegion *prev_hr = NULL;
! int insert_head = 0;
! for (int i = 0; i < length; i++) {
! jbyte *card = card_array[i];
!
! if (*card == CardTableModRefBS::clean_card_val()) {
! HeapRegion *hr = g1h->heap_region_containing_raw(region_array[i].start());
! if (card == prev_card + 1 && hr == prev_hr) {
! MemRegion insert_region = region_array[insert_head - 1];
! region_array[insert_head - 1] = MemRegion(insert_region.start(), region_array[i].end());
! } else {
! card_array[insert_head] = card;
! region_array[insert_head] = region_array[i];
! insert_head++;
! }
! prev_hr = hr;
! }
!
! prev_card = card;
! }
!
! return insert_head;
! }
!
! int BufferedRefineCardTableEntryClosure::buffer_size() {
! return (int)G1UpdateBufferSize;
! }
!
! void BufferedRefineCardTableEntryClosure::flush_buffer() {
! general_flush(true);
! }
!
! // Returns true if it needs post sync
! bool BufferedRefineCardTableEntryClosure::pre_sync(CardBuffer *buffer, bool hard) {
! // 1. Clean all cards in the batch.
! G1RemSet *g1rs = G1CollectedHeap::heap()->g1_rem_set();
! int needs_processing = 0;
!
! jbyte **const card_buffer = buffer->_card_buffer;
! MemRegion *const mr_buffer = buffer->_mr_buffer;
! const int length = buffer->_length;
!
! for (int i = 0; i < length; i++) {
! if (g1rs->clean_card(card_buffer[i], _worker_i, mr_buffer[i])) {
! card_buffer[needs_processing] = card_buffer[i];
! mr_buffer[needs_processing] = mr_buffer[i];
! needs_processing++;
! }
! }
! buffer->_length = needs_processing;
!
! if (needs_processing == 0) {
! if (hard) {
! // If we are forced to finish scanning, we must serialize stores anyway.
! OrderAccess::storeload();
! if (G1ElideMembar) {
! buffer->_gs->start_synchronizing();
! }
! }
return false;
}
!
! OrderAccess::storeload();
! if (G1ElideMembar) {
! buffer->_gs->start_synchronizing();
! }
!
! // 2. Sort the cards
! quick_sort(buffer->_card_buffer, buffer->_mr_buffer, 0, buffer->_length);
!
return true;
+ }
+
+ bool BufferedRefineCardTableEntryClosure::sync(CardBuffer *buffer, bool hard) {
+ if (!G1ElideMembar) return true;
+
+ bool success = buffer->_gs->try_synchronize();
+ if (hard) {
+ if (!success) {
+ buffer->_gs->maximize_urgency();
+ buffer->_gs->synchronize();
+ }
+ return true;
+ } else {
+ return success;
}
+ }
! void BufferedRefineCardTableEntryClosure::post_sync(CardBuffer *buffer) {
! const int length = buffer->_length;
!
! const int card_batch_size = 16;
! jbyte **current_card = buffer->_card_buffer;
! MemRegion *current_region = buffer->_mr_buffer;
!
! const uintx interval = PrefetchScanIntervalInBytes * 2;
!
! G1RemSet *g1rs = G1CollectedHeap::heap()->g1_rem_set();
!
! // 3. Batch 16 cards at a time
!
! for (int j = 0; j < length; j += card_batch_size) {
! // 4. Join consecutive cards together and prefetch next card
! int batch = MIN2((length - j), card_batch_size);
! batch = join_cards(current_card, current_region, batch);
!
! jbyte dirty_card_val = CardTableModRefBS::dirty_card_val();
! jbyte *end_card;
! HeapWord *end_prefetch;
!
! if (j + card_batch_size < length) {
! end_prefetch = current_region[card_batch_size].start();
! end_card = current_card[card_batch_size];
! } else {
! end_card = &dirty_card_val;
! }
!
! MemRegion *region_end = current_region + batch;
! jbyte** batch_card;
! MemRegion* batch_region;
!
! for (batch_card = current_card, batch_region = current_region; batch_region != region_end; batch_card++) {
! jbyte *card = *batch_card;
! MemRegion mr = *batch_region;
! MemRegion *next_region = batch_region + 1;
!
! if (next_region != region_end) {
! MemRegion next_region_val = *next_region;
! // Prefetch interval in batch
! Prefetch::read(next_region_val.start(), next_region_val.byte_size());
! } else if (*end_card == CardTableModRefBS::clean_card_val()) {
! // Prefetch broken interval to next batch
! Prefetch::read(end_prefetch, interval);
! }
!
! g1rs->refine_card_buffered(card, _worker_i, /*check_for_cset_refs*/ false, mr);
!
! batch_region = next_region;
! }
!
! current_region += card_batch_size;
! current_card += card_batch_size;
! }
! }
!
! void BufferedRefineCardTableEntryClosure::general_flush(bool hard) {
! if (_index == 0) {
! assert(hard, "invariant");
! if (_async_buffers == 0) return;
! }
!
! // 1. Start asynchronous synchronization for the current buffer
! if (_current_buffer == NULL) _current_buffer = new CardBuffer();
! _current_buffer->_length = _index;
! if (pre_sync(_current_buffer, hard) || hard) {
! // append async buffer
! CardBuffer *tail = _tail_buffer;
! if (tail != NULL) tail->_next = _current_buffer;
! _tail_buffer = _current_buffer;
! if (_head_buffer == NULL) _head_buffer = _current_buffer;
! if (hard) sync(_current_buffer, hard);
! _current_buffer = NULL;
! _async_buffers++;
! }
!
! _index = 0;
!
! // 2. Process old batches that have been cleaned but couldn't synchronize (async completion)
! CardBuffer *current = _head_buffer;
! bool check_sync = true;
! while (current != NULL) {
! if (hard || sync(current, hard)) {
! post_sync(current);
! CardBuffer *next = current->_next;
! _head_buffer = next;
! if (next == NULL) _tail_buffer = NULL;
! delete current;
! current = next;
! _async_buffers--;
! } else {
! current->_misses++;
! if (_async_buffers > 4 && current->_misses > 2
! || _async_buffers > 8 && current->_misses > 4
! || _async_buffers > 16 && current->_misses > 6) {
! current->_gs->increase_urgency();
! }
! break;
! }
! }
! }
class RedirtyLoggedCardTableEntryClosure : public CardTableEntryClosure {
private:
size_t _num_processed;
*** 1917,1927 ****
_ref_processor_cm(NULL),
_ref_processor_stw(NULL),
_bot_shared(NULL),
_cg1r(NULL),
_g1mm(NULL),
! _refine_cte_cl(NULL),
_secondary_free_list("Secondary Free List", new SecondaryFreeRegionListMtSafeChecker()),
_old_set("Old Set", false /* humongous */, new OldRegionSetMtSafeChecker()),
_humongous_set("Master Humongous Set", true /* humongous */, new HumongousRegionSetMtSafeChecker()),
_humongous_reclaim_candidates(),
_has_humongous_reclaim_candidates(false),
--- 2178,2188 ----
_ref_processor_cm(NULL),
_ref_processor_stw(NULL),
_bot_shared(NULL),
_cg1r(NULL),
_g1mm(NULL),
! _refine_cte_cl_concurrency(true),
_secondary_free_list("Secondary Free List", new SecondaryFreeRegionListMtSafeChecker()),
_old_set("Old Set", false /* humongous */, new OldRegionSetMtSafeChecker()),
_humongous_set("Master Humongous Set", true /* humongous */, new HumongousRegionSetMtSafeChecker()),
_humongous_reclaim_candidates(),
_has_humongous_reclaim_candidates(false),
*** 2030,2042 ****
// Ensure that the sizes are properly aligned.
Universe::check_alignment(init_byte_size, HeapRegion::GrainBytes, "g1 heap");
Universe::check_alignment(max_byte_size, HeapRegion::GrainBytes, "g1 heap");
Universe::check_alignment(max_byte_size, heap_alignment, "g1 heap");
! _refine_cte_cl = new RefineCardTableEntryClosure();
!
! _cg1r = new ConcurrentG1Refine(this, _refine_cte_cl);
// Reserve the maximum.
// When compressed oops are enabled, the preferred heap base
// is calculated by subtracting the requested size from the
--- 2291,2301 ----
// Ensure that the sizes are properly aligned.
Universe::check_alignment(init_byte_size, HeapRegion::GrainBytes, "g1 heap");
Universe::check_alignment(max_byte_size, HeapRegion::GrainBytes, "g1 heap");
Universe::check_alignment(max_byte_size, heap_alignment, "g1 heap");
! _cg1r = new ConcurrentG1Refine(this);
// Reserve the maximum.
// When compressed oops are enabled, the preferred heap base
// is calculated by subtracting the requested size from the
*** 2156,2183 ****
JavaThread::satb_mark_queue_set().initialize(SATB_Q_CBL_mon,
SATB_Q_FL_lock,
G1SATBProcessCompletedThreshold,
Shared_SATB_Q_lock);
! JavaThread::dirty_card_queue_set().initialize(_refine_cte_cl,
DirtyCardQ_CBL_mon,
DirtyCardQ_FL_lock,
concurrent_g1_refine()->yellow_zone(),
concurrent_g1_refine()->red_zone(),
Shared_DirtyCardQ_lock);
! dirty_card_queue_set().initialize(NULL, // Should never be called by the Java code
DirtyCardQ_CBL_mon,
DirtyCardQ_FL_lock,
-1, // never trigger processing
-1, // no limit on length
Shared_DirtyCardQ_lock,
&JavaThread::dirty_card_queue_set());
// Initialize the card queue set used to hold cards containing
// references into the collection set.
! _into_cset_dirty_card_queue_set.initialize(NULL, // Should never be called by the Java code
DirtyCardQ_CBL_mon,
DirtyCardQ_FL_lock,
-1, // never trigger processing
-1, // no limit on length
Shared_DirtyCardQ_lock,
--- 2415,2442 ----
JavaThread::satb_mark_queue_set().initialize(SATB_Q_CBL_mon,
SATB_Q_FL_lock,
G1SATBProcessCompletedThreshold,
Shared_SATB_Q_lock);
! JavaThread::dirty_card_queue_set().initialize(true,
DirtyCardQ_CBL_mon,
DirtyCardQ_FL_lock,
concurrent_g1_refine()->yellow_zone(),
concurrent_g1_refine()->red_zone(),
Shared_DirtyCardQ_lock);
! dirty_card_queue_set().initialize(false, // Should never be called by the Java code
DirtyCardQ_CBL_mon,
DirtyCardQ_FL_lock,
-1, // never trigger processing
-1, // no limit on length
Shared_DirtyCardQ_lock,
&JavaThread::dirty_card_queue_set());
// Initialize the card queue set used to hold cards containing
// references into the collection set.
! _into_cset_dirty_card_queue_set.initialize(false, // Should never be called by the Java code
DirtyCardQ_CBL_mon,
DirtyCardQ_FL_lock,
-1, // never trigger processing
-1, // no limit on length
Shared_DirtyCardQ_lock,
*** 6379,6389 ****
"value: " SIZE_FORMAT " recalculated: " SIZE_FORMAT,
used_unlocked(), recalculate_used()));
}
void G1CollectedHeap::set_refine_cte_cl_concurrency(bool concurrent) {
! _refine_cte_cl->set_concurrent(concurrent);
}
bool G1CollectedHeap::is_in_closed_subset(const void* p) const {
HeapRegion* hr = heap_region_containing(p);
return hr->is_in(p);
--- 6638,6652 ----
"value: " SIZE_FORMAT " recalculated: " SIZE_FORMAT,
used_unlocked(), recalculate_used()));
}
void G1CollectedHeap::set_refine_cte_cl_concurrency(bool concurrent) {
! _refine_cte_cl_concurrency = concurrent;
! }
!
! bool G1CollectedHeap::refine_cte_cl_concurrency() {
! return _refine_cte_cl_concurrency;
}
bool G1CollectedHeap::is_in_closed_subset(const void* p) const {
HeapRegion* hr = heap_region_containing(p);
return hr->is_in(p);
< prev index next >