src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp
Print this page
rev 4535 : 6725714: par compact - add a table to speed up bitmap searches
Reviewed-by: jmasa, tschatzl
*** 62,79 ****
#include "utilities/stack.inline.hpp"
#include <math.h>
// All sizes are in HeapWords.
! const size_t ParallelCompactData::Log2RegionSize = 9; // 512 words
const size_t ParallelCompactData::RegionSize = (size_t)1 << Log2RegionSize;
const size_t ParallelCompactData::RegionSizeBytes =
RegionSize << LogHeapWordSize;
const size_t ParallelCompactData::RegionSizeOffsetMask = RegionSize - 1;
const size_t ParallelCompactData::RegionAddrOffsetMask = RegionSizeBytes - 1;
const size_t ParallelCompactData::RegionAddrMask = ~RegionAddrOffsetMask;
const ParallelCompactData::RegionData::region_sz_t
ParallelCompactData::RegionData::dc_shift = 27;
const ParallelCompactData::RegionData::region_sz_t
ParallelCompactData::RegionData::dc_mask = ~0U << dc_shift;
--- 62,91 ----
#include "utilities/stack.inline.hpp"
#include <math.h>
// All sizes are in HeapWords.
! const size_t ParallelCompactData::Log2RegionSize = 16; // 64K words
const size_t ParallelCompactData::RegionSize = (size_t)1 << Log2RegionSize;
const size_t ParallelCompactData::RegionSizeBytes =
RegionSize << LogHeapWordSize;
const size_t ParallelCompactData::RegionSizeOffsetMask = RegionSize - 1;
const size_t ParallelCompactData::RegionAddrOffsetMask = RegionSizeBytes - 1;
const size_t ParallelCompactData::RegionAddrMask = ~RegionAddrOffsetMask;
+ const size_t ParallelCompactData::Log2BlockSize = 7; // 128 words
+ const size_t ParallelCompactData::BlockSize = (size_t)1 << Log2BlockSize;
+ const size_t ParallelCompactData::BlockSizeBytes =
+ BlockSize << LogHeapWordSize;
+ const size_t ParallelCompactData::BlockSizeOffsetMask = BlockSize - 1;
+ const size_t ParallelCompactData::BlockAddrOffsetMask = BlockSizeBytes - 1;
+ const size_t ParallelCompactData::BlockAddrMask = ~BlockAddrOffsetMask;
+
+ const size_t ParallelCompactData::BlocksPerRegion = RegionSize / BlockSize;
+ const size_t ParallelCompactData::Log2BlocksPerRegion =
+ Log2RegionSize - Log2BlockSize;
+
const ParallelCompactData::RegionData::region_sz_t
ParallelCompactData::RegionData::dc_shift = 27;
const ParallelCompactData::RegionData::region_sz_t
ParallelCompactData::RegionData::dc_mask = ~0U << dc_shift;
*** 377,386 ****
--- 389,402 ----
_region_start = 0;
_region_vspace = 0;
_region_data = 0;
_region_count = 0;
+
+ _block_vspace = 0;
+ _block_data = 0;
+ _block_count = 0;
}
bool ParallelCompactData::initialize(MemRegion covered_region)
{
_region_start = covered_region.start();
*** 390,401 ****
assert(region_align_down(_region_start) == _region_start,
"region start not aligned");
assert((region_size & RegionSizeOffsetMask) == 0,
"region size not a multiple of RegionSize");
! bool result = initialize_region_data(region_size);
!
return result;
}
PSVirtualSpace*
ParallelCompactData::create_vspace(size_t count, size_t element_size)
--- 406,416 ----
assert(region_align_down(_region_start) == _region_start,
"region start not aligned");
assert((region_size & RegionSizeOffsetMask) == 0,
"region size not a multiple of RegionSize");
! bool result = initialize_region_data(region_size) && initialize_block_data();
return result;
}
PSVirtualSpace*
ParallelCompactData::create_vspace(size_t count, size_t element_size)
*** 436,456 ****
--- 451,490 ----
return true;
}
return false;
}
+ bool ParallelCompactData::initialize_block_data()
+ {
+ assert(_region_count != 0, "region data must be initialized first");
+ const size_t count = _region_count << Log2BlocksPerRegion;
+ _block_vspace = create_vspace(count, sizeof(BlockData));
+ if (_block_vspace != 0) {
+ _block_data = (BlockData*)_block_vspace->reserved_low_addr();
+ _block_count = count;
+ return true;
+ }
+ return false;
+ }
+
void ParallelCompactData::clear()
{
memset(_region_data, 0, _region_vspace->committed_size());
+ memset(_block_data, 0, _block_vspace->committed_size());
}
void ParallelCompactData::clear_range(size_t beg_region, size_t end_region) {
assert(beg_region <= _region_count, "beg_region out of range");
assert(end_region <= _region_count, "end_region out of range");
+ assert(RegionSize % BlockSize == 0, "RegionSize not a multiple of BlockSize");
const size_t region_cnt = end_region - beg_region;
memset(_region_data + beg_region, 0, region_cnt * sizeof(RegionData));
+
+ const size_t beg_block = beg_region * BlocksPerRegion;
+ const size_t block_cnt = region_cnt * BlocksPerRegion;
+ memset(_block_data + beg_block, 0, block_cnt * sizeof(BlockData));
}
HeapWord* ParallelCompactData::partial_obj_end(size_t region_idx) const
{
const RegionData* cur_cp = region(region_idx);
*** 725,773 ****
return true;
}
HeapWord* ParallelCompactData::calc_new_pointer(HeapWord* addr) {
assert(addr != NULL, "Should detect NULL oop earlier");
! assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap");
! #ifdef ASSERT
! if (PSParallelCompact::mark_bitmap()->is_unmarked(addr)) {
! gclog_or_tty->print_cr("calc_new_pointer:: addr " PTR_FORMAT, addr);
! }
! #endif
! assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked");
// Region covering the object.
! size_t region_index = addr_to_region_idx(addr);
! const RegionData* const region_ptr = region(region_index);
! HeapWord* const region_addr = region_align_down(addr);
!
! assert(addr < region_addr + RegionSize, "Region does not cover object");
! assert(addr_to_region_ptr(region_addr) == region_ptr, "sanity check");
!
HeapWord* result = region_ptr->destination();
! // If all the data in the region is live, then the new location of the object
! // can be calculated from the destination of the region plus the offset of the
! // object in the region.
if (region_ptr->data_size() == RegionSize) {
! result += pointer_delta(addr, region_addr);
! DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result);)
return result;
}
! // The new location of the object is
! // region destination +
! // size of the partial object extending onto the region +
! // sizes of the live objects in the Region that are to the left of addr
! const size_t partial_obj_size = region_ptr->partial_obj_size();
! HeapWord* const search_start = region_addr + partial_obj_size;
! const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap();
! size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr));
! result += partial_obj_size + live_to_left;
! DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result);)
return result;
}
klassOop ParallelCompactData::calc_new_klass(klassOop old_klass) {
klassOop updated_klass;
--- 759,806 ----
return true;
}
HeapWord* ParallelCompactData::calc_new_pointer(HeapWord* addr) {
assert(addr != NULL, "Should detect NULL oop earlier");
! assert(PSParallelCompact::gc_heap()->is_in(addr), "not in heap");
! assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "not marked");
// Region covering the object.
! RegionData* const region_ptr = addr_to_region_ptr(addr);
HeapWord* result = region_ptr->destination();
! // If the entire Region is live, the new location is region->destination + the
! // offset of the object within in the Region.
!
! // Run some performance tests to determine if this special case pays off. It
! // is worth it for pointers into the dense prefix. If the optimization to
! // avoid pointer updates in regions that only point to the dense prefix is
! // ever implemented, this should be revisited.
if (region_ptr->data_size() == RegionSize) {
! result += region_offset(addr);
return result;
}
! // Otherwise, the new location is region->destination + block offset + the
! // number of live words in the Block that are (a) to the left of addr and (b)
! // due to objects that start in the Block.
! // Fill in the block table if necessary. This is unsynchronized, so multiple
! // threads may fill the block table for a region (harmless, since it is
! // idempotent).
! if (!region_ptr->blocks_filled()) {
! PSParallelCompact::fill_blocks(addr_to_region_idx(addr));
! region_ptr->set_blocks_filled();
! }
! HeapWord* const search_start = block_align_down(addr);
! const size_t block_offset = addr_to_block_ptr(addr)->offset();
!
! const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap();
! const size_t live = bitmap->live_words_in_range(search_start, oop(addr));
! result += block_offset + live;
! DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result));
return result;
}
klassOop ParallelCompactData::calc_new_klass(klassOop old_klass) {
klassOop updated_klass;
*** 791,810 ****
}
void ParallelCompactData::verify_clear()
{
verify_clear(_region_vspace);
}
#endif // #ifdef ASSERT
- #ifdef NOT_PRODUCT
- ParallelCompactData::RegionData* debug_region(size_t region_index) {
- ParallelCompactData& sd = PSParallelCompact::summary_data();
- return sd.region(region_index);
- }
- #endif
-
STWGCTimer PSParallelCompact::_gc_timer;
ParallelOldTracer PSParallelCompact::_gc_tracer;
elapsedTimer PSParallelCompact::_accumulated_time;
unsigned int PSParallelCompact::_total_invocations = 0;
unsigned int PSParallelCompact::_maximum_compaction_gc_num = 0;
--- 824,837 ----
}
void ParallelCompactData::verify_clear()
{
verify_clear(_region_vspace);
+ verify_clear(_block_vspace);
}
#endif // #ifdef ASSERT
STWGCTimer PSParallelCompact::_gc_timer;
ParallelOldTracer PSParallelCompact::_gc_tracer;
elapsedTimer PSParallelCompact::_accumulated_time;
unsigned int PSParallelCompact::_total_invocations = 0;
unsigned int PSParallelCompact::_maximum_compaction_gc_num = 0;
*** 1995,2009 ****
PSParallelCompact::invoke_no_policy(clear_all_soft_refs ||
maximum_heap_compaction);
}
- bool ParallelCompactData::region_contains(size_t region_index, HeapWord* addr) {
- size_t addr_region_index = addr_to_region_idx(addr);
- return region_index == addr_region_index;
- }
-
// This method contains no policy. You should probably
// be calling invoke() instead.
bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
assert(ref_processor() != NULL, "Sanity");
--- 2022,2031 ----
*** 2659,2668 ****
--- 2681,2725 ----
q->enqueue(new StealRegionCompactionTask(terminator_ptr));
}
}
}
+ #ifdef ASSERT
+ // Write a histogram of the number of times the block table was filled for a
+ // region.
+ void PSParallelCompact::write_block_fill_histogram(outputStream* const out)
+ {
+ if (!TraceParallelOldGCCompactionPhase) return;
+
+ typedef ParallelCompactData::RegionData rd_t;
+ ParallelCompactData& sd = summary_data();
+
+ for (unsigned int id = old_space_id; id < last_space_id; ++id) {
+ MutableSpace* const spc = _space_info[id].space();
+ if (spc->bottom() != spc->top()) {
+ const rd_t* const beg = sd.addr_to_region_ptr(spc->bottom());
+ HeapWord* const top_aligned_up = sd.region_align_up(spc->top());
+ const rd_t* const end = sd.addr_to_region_ptr(top_aligned_up);
+
+ size_t histo[5] = { 0, 0, 0, 0, 0 };
+ const size_t histo_len = sizeof(histo) / sizeof(size_t);
+ const size_t region_cnt = pointer_delta(end, beg, sizeof(rd_t));
+
+ for (const rd_t* cur = beg; cur < end; ++cur) {
+ ++histo[MIN2(cur->blocks_filled_count(), histo_len - 1)];
+ }
+ out->print("%u %-4s" SIZE_FORMAT_W(5), id, space_names[id], region_cnt);
+ for (size_t i = 0; i < histo_len; ++i) {
+ out->print(" " SIZE_FORMAT_W(5) " %5.1f%%",
+ histo[i], 100.0 * histo[i] / region_cnt);
+ }
+ out->cr();
+ }
+ }
+ }
+ #endif // #ifdef ASSERT
+
void PSParallelCompact::compact() {
// trace("5");
GCTraceTime tm("compaction phase", print_phases(), true, &_gc_timer);
ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
*** 2700,2709 ****
--- 2757,2768 ----
ParCompactionManager* cm = ParCompactionManager::manager_array(0);
for (unsigned int id = old_space_id; id < last_space_id; ++id) {
update_deferred_objects(cm, SpaceId(id));
}
}
+
+ DEBUG_ONLY(write_block_fill_histogram(gclog_or_tty));
}
#ifdef ASSERT
void PSParallelCompact::verify_complete(SpaceId space_id) {
// All Regions between space bottom() to new_top() should be marked as filled
*** 3369,3378 ****
--- 3428,3488 ----
src_region_idx = next_src_region(closure, src_space_id, src_space_top,
end_addr);
} while (true);
}
+ void PSParallelCompact::fill_blocks(size_t region_idx)
+ {
+ // Fill in the block table elements for the specified region. Each block
+ // table element holds the number of live words in the region that are to the
+ // left of the first object that starts in the block. Thus only blocks in
+ // which an object starts need to be filled.
+ //
+ // The algorithm scans the section of the bitmap that corresponds to the
+ // region, keeping a running total of the live words. When an object start is
+ // found, if it's the first to start in the block that contains it, the
+ // current total is written to the block table element.
+ const size_t Log2BlockSize = ParallelCompactData::Log2BlockSize;
+ const size_t Log2RegionSize = ParallelCompactData::Log2RegionSize;
+ const size_t RegionSize = ParallelCompactData::RegionSize;
+
+ ParallelCompactData& sd = summary_data();
+ const size_t partial_obj_size = sd.region(region_idx)->partial_obj_size();
+ if (partial_obj_size >= RegionSize) {
+ return; // No objects start in this region.
+ }
+
+ // Ensure the first loop iteration decides that the block has changed.
+ size_t cur_block = sd.block_count();
+
+ const ParMarkBitMap* const bitmap = mark_bitmap();
+
+ const size_t Log2BitsPerBlock = Log2BlockSize - LogMinObjAlignment;
+ assert((size_t)1 << Log2BitsPerBlock ==
+ bitmap->words_to_bits(ParallelCompactData::BlockSize), "sanity");
+
+ size_t beg_bit = bitmap->words_to_bits(region_idx << Log2RegionSize);
+ const size_t range_end = beg_bit + bitmap->words_to_bits(RegionSize);
+ size_t live_bits = bitmap->words_to_bits(partial_obj_size);
+ beg_bit = bitmap->find_obj_beg(beg_bit + live_bits, range_end);
+ while (beg_bit < range_end) {
+ const size_t new_block = beg_bit >> Log2BitsPerBlock;
+ if (new_block != cur_block) {
+ cur_block = new_block;
+ sd.block(cur_block)->set_offset(bitmap->bits_to_words(live_bits));
+ }
+
+ const size_t end_bit = bitmap->find_obj_end(beg_bit, range_end);
+ if (end_bit < range_end - 1) {
+ live_bits += end_bit - beg_bit + 1;
+ beg_bit = bitmap->find_obj_beg(end_bit + 1, range_end);
+ } else {
+ return;
+ }
+ }
+ }
+
void
PSParallelCompact::move_and_update(ParCompactionManager* cm, SpaceId space_id) {
const MutableSpace* sp = space(space_id);
if (sp->is_empty()) {
return;