src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp

Print this page
rev 4535 : 6725714: par compact - add a table to speed up bitmap searches
Reviewed-by: jmasa, tschatzl

*** 62,79 **** #include "utilities/stack.inline.hpp" #include <math.h> // All sizes are in HeapWords. ! const size_t ParallelCompactData::Log2RegionSize = 9; // 512 words const size_t ParallelCompactData::RegionSize = (size_t)1 << Log2RegionSize; const size_t ParallelCompactData::RegionSizeBytes = RegionSize << LogHeapWordSize; const size_t ParallelCompactData::RegionSizeOffsetMask = RegionSize - 1; const size_t ParallelCompactData::RegionAddrOffsetMask = RegionSizeBytes - 1; const size_t ParallelCompactData::RegionAddrMask = ~RegionAddrOffsetMask; const ParallelCompactData::RegionData::region_sz_t ParallelCompactData::RegionData::dc_shift = 27; const ParallelCompactData::RegionData::region_sz_t ParallelCompactData::RegionData::dc_mask = ~0U << dc_shift; --- 62,91 ---- #include "utilities/stack.inline.hpp" #include <math.h> // All sizes are in HeapWords. ! const size_t ParallelCompactData::Log2RegionSize = 16; // 64K words const size_t ParallelCompactData::RegionSize = (size_t)1 << Log2RegionSize; const size_t ParallelCompactData::RegionSizeBytes = RegionSize << LogHeapWordSize; const size_t ParallelCompactData::RegionSizeOffsetMask = RegionSize - 1; const size_t ParallelCompactData::RegionAddrOffsetMask = RegionSizeBytes - 1; const size_t ParallelCompactData::RegionAddrMask = ~RegionAddrOffsetMask; + const size_t ParallelCompactData::Log2BlockSize = 7; // 128 words + const size_t ParallelCompactData::BlockSize = (size_t)1 << Log2BlockSize; + const size_t ParallelCompactData::BlockSizeBytes = + BlockSize << LogHeapWordSize; + const size_t ParallelCompactData::BlockSizeOffsetMask = BlockSize - 1; + const size_t ParallelCompactData::BlockAddrOffsetMask = BlockSizeBytes - 1; + const size_t ParallelCompactData::BlockAddrMask = ~BlockAddrOffsetMask; + + const size_t ParallelCompactData::BlocksPerRegion = RegionSize / BlockSize; + const size_t ParallelCompactData::Log2BlocksPerRegion = + Log2RegionSize - Log2BlockSize; + const ParallelCompactData::RegionData::region_sz_t ParallelCompactData::RegionData::dc_shift = 27; const ParallelCompactData::RegionData::region_sz_t ParallelCompactData::RegionData::dc_mask = ~0U << dc_shift;
*** 377,386 **** --- 389,402 ---- _region_start = 0; _region_vspace = 0; _region_data = 0; _region_count = 0; + + _block_vspace = 0; + _block_data = 0; + _block_count = 0; } bool ParallelCompactData::initialize(MemRegion covered_region) { _region_start = covered_region.start();
*** 390,401 **** assert(region_align_down(_region_start) == _region_start, "region start not aligned"); assert((region_size & RegionSizeOffsetMask) == 0, "region size not a multiple of RegionSize"); ! bool result = initialize_region_data(region_size); ! return result; } PSVirtualSpace* ParallelCompactData::create_vspace(size_t count, size_t element_size) --- 406,416 ---- assert(region_align_down(_region_start) == _region_start, "region start not aligned"); assert((region_size & RegionSizeOffsetMask) == 0, "region size not a multiple of RegionSize"); ! bool result = initialize_region_data(region_size) && initialize_block_data(); return result; } PSVirtualSpace* ParallelCompactData::create_vspace(size_t count, size_t element_size)
*** 436,456 **** --- 451,490 ---- return true; } return false; } + bool ParallelCompactData::initialize_block_data() + { + assert(_region_count != 0, "region data must be initialized first"); + const size_t count = _region_count << Log2BlocksPerRegion; + _block_vspace = create_vspace(count, sizeof(BlockData)); + if (_block_vspace != 0) { + _block_data = (BlockData*)_block_vspace->reserved_low_addr(); + _block_count = count; + return true; + } + return false; + } + void ParallelCompactData::clear() { memset(_region_data, 0, _region_vspace->committed_size()); + memset(_block_data, 0, _block_vspace->committed_size()); } void ParallelCompactData::clear_range(size_t beg_region, size_t end_region) { assert(beg_region <= _region_count, "beg_region out of range"); assert(end_region <= _region_count, "end_region out of range"); + assert(RegionSize % BlockSize == 0, "RegionSize not a multiple of BlockSize"); const size_t region_cnt = end_region - beg_region; memset(_region_data + beg_region, 0, region_cnt * sizeof(RegionData)); + + const size_t beg_block = beg_region * BlocksPerRegion; + const size_t block_cnt = region_cnt * BlocksPerRegion; + memset(_block_data + beg_block, 0, block_cnt * sizeof(BlockData)); } HeapWord* ParallelCompactData::partial_obj_end(size_t region_idx) const { const RegionData* cur_cp = region(region_idx);
*** 725,773 **** return true; } HeapWord* ParallelCompactData::calc_new_pointer(HeapWord* addr) { assert(addr != NULL, "Should detect NULL oop earlier"); ! assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap"); ! #ifdef ASSERT ! if (PSParallelCompact::mark_bitmap()->is_unmarked(addr)) { ! gclog_or_tty->print_cr("calc_new_pointer:: addr " PTR_FORMAT, addr); ! } ! #endif ! assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked"); // Region covering the object. ! size_t region_index = addr_to_region_idx(addr); ! const RegionData* const region_ptr = region(region_index); ! HeapWord* const region_addr = region_align_down(addr); ! ! assert(addr < region_addr + RegionSize, "Region does not cover object"); ! assert(addr_to_region_ptr(region_addr) == region_ptr, "sanity check"); ! HeapWord* result = region_ptr->destination(); ! // If all the data in the region is live, then the new location of the object ! // can be calculated from the destination of the region plus the offset of the ! // object in the region. if (region_ptr->data_size() == RegionSize) { ! result += pointer_delta(addr, region_addr); ! DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result);) return result; } ! // The new location of the object is ! // region destination + ! // size of the partial object extending onto the region + ! // sizes of the live objects in the Region that are to the left of addr ! const size_t partial_obj_size = region_ptr->partial_obj_size(); ! HeapWord* const search_start = region_addr + partial_obj_size; ! const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap(); ! size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr)); ! result += partial_obj_size + live_to_left; ! DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result);) return result; } klassOop ParallelCompactData::calc_new_klass(klassOop old_klass) { klassOop updated_klass; --- 759,806 ---- return true; } HeapWord* ParallelCompactData::calc_new_pointer(HeapWord* addr) { assert(addr != NULL, "Should detect NULL oop earlier"); ! assert(PSParallelCompact::gc_heap()->is_in(addr), "not in heap"); ! assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "not marked"); // Region covering the object. ! RegionData* const region_ptr = addr_to_region_ptr(addr); HeapWord* result = region_ptr->destination(); ! // If the entire Region is live, the new location is region->destination + the ! // offset of the object within in the Region. ! ! // Run some performance tests to determine if this special case pays off. It ! // is worth it for pointers into the dense prefix. If the optimization to ! // avoid pointer updates in regions that only point to the dense prefix is ! // ever implemented, this should be revisited. if (region_ptr->data_size() == RegionSize) { ! result += region_offset(addr); return result; } ! // Otherwise, the new location is region->destination + block offset + the ! // number of live words in the Block that are (a) to the left of addr and (b) ! // due to objects that start in the Block. ! // Fill in the block table if necessary. This is unsynchronized, so multiple ! // threads may fill the block table for a region (harmless, since it is ! // idempotent). ! if (!region_ptr->blocks_filled()) { ! PSParallelCompact::fill_blocks(addr_to_region_idx(addr)); ! region_ptr->set_blocks_filled(); ! } ! HeapWord* const search_start = block_align_down(addr); ! const size_t block_offset = addr_to_block_ptr(addr)->offset(); ! ! const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap(); ! const size_t live = bitmap->live_words_in_range(search_start, oop(addr)); ! result += block_offset + live; ! DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result)); return result; } klassOop ParallelCompactData::calc_new_klass(klassOop old_klass) { klassOop updated_klass;
*** 791,810 **** } void ParallelCompactData::verify_clear() { verify_clear(_region_vspace); } #endif // #ifdef ASSERT - #ifdef NOT_PRODUCT - ParallelCompactData::RegionData* debug_region(size_t region_index) { - ParallelCompactData& sd = PSParallelCompact::summary_data(); - return sd.region(region_index); - } - #endif - STWGCTimer PSParallelCompact::_gc_timer; ParallelOldTracer PSParallelCompact::_gc_tracer; elapsedTimer PSParallelCompact::_accumulated_time; unsigned int PSParallelCompact::_total_invocations = 0; unsigned int PSParallelCompact::_maximum_compaction_gc_num = 0; --- 824,837 ---- } void ParallelCompactData::verify_clear() { verify_clear(_region_vspace); + verify_clear(_block_vspace); } #endif // #ifdef ASSERT STWGCTimer PSParallelCompact::_gc_timer; ParallelOldTracer PSParallelCompact::_gc_tracer; elapsedTimer PSParallelCompact::_accumulated_time; unsigned int PSParallelCompact::_total_invocations = 0; unsigned int PSParallelCompact::_maximum_compaction_gc_num = 0;
*** 1995,2009 **** PSParallelCompact::invoke_no_policy(clear_all_soft_refs || maximum_heap_compaction); } - bool ParallelCompactData::region_contains(size_t region_index, HeapWord* addr) { - size_t addr_region_index = addr_to_region_idx(addr); - return region_index == addr_region_index; - } - // This method contains no policy. You should probably // be calling invoke() instead. bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) { assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint"); assert(ref_processor() != NULL, "Sanity"); --- 2022,2031 ----
*** 2659,2668 **** --- 2681,2725 ---- q->enqueue(new StealRegionCompactionTask(terminator_ptr)); } } } + #ifdef ASSERT + // Write a histogram of the number of times the block table was filled for a + // region. + void PSParallelCompact::write_block_fill_histogram(outputStream* const out) + { + if (!TraceParallelOldGCCompactionPhase) return; + + typedef ParallelCompactData::RegionData rd_t; + ParallelCompactData& sd = summary_data(); + + for (unsigned int id = old_space_id; id < last_space_id; ++id) { + MutableSpace* const spc = _space_info[id].space(); + if (spc->bottom() != spc->top()) { + const rd_t* const beg = sd.addr_to_region_ptr(spc->bottom()); + HeapWord* const top_aligned_up = sd.region_align_up(spc->top()); + const rd_t* const end = sd.addr_to_region_ptr(top_aligned_up); + + size_t histo[5] = { 0, 0, 0, 0, 0 }; + const size_t histo_len = sizeof(histo) / sizeof(size_t); + const size_t region_cnt = pointer_delta(end, beg, sizeof(rd_t)); + + for (const rd_t* cur = beg; cur < end; ++cur) { + ++histo[MIN2(cur->blocks_filled_count(), histo_len - 1)]; + } + out->print("%u %-4s" SIZE_FORMAT_W(5), id, space_names[id], region_cnt); + for (size_t i = 0; i < histo_len; ++i) { + out->print(" " SIZE_FORMAT_W(5) " %5.1f%%", + histo[i], 100.0 * histo[i] / region_cnt); + } + out->cr(); + } + } + } + #endif // #ifdef ASSERT + void PSParallelCompact::compact() { // trace("5"); GCTraceTime tm("compaction phase", print_phases(), true, &_gc_timer); ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
*** 2700,2709 **** --- 2757,2768 ---- ParCompactionManager* cm = ParCompactionManager::manager_array(0); for (unsigned int id = old_space_id; id < last_space_id; ++id) { update_deferred_objects(cm, SpaceId(id)); } } + + DEBUG_ONLY(write_block_fill_histogram(gclog_or_tty)); } #ifdef ASSERT void PSParallelCompact::verify_complete(SpaceId space_id) { // All Regions between space bottom() to new_top() should be marked as filled
*** 3369,3378 **** --- 3428,3488 ---- src_region_idx = next_src_region(closure, src_space_id, src_space_top, end_addr); } while (true); } + void PSParallelCompact::fill_blocks(size_t region_idx) + { + // Fill in the block table elements for the specified region. Each block + // table element holds the number of live words in the region that are to the + // left of the first object that starts in the block. Thus only blocks in + // which an object starts need to be filled. + // + // The algorithm scans the section of the bitmap that corresponds to the + // region, keeping a running total of the live words. When an object start is + // found, if it's the first to start in the block that contains it, the + // current total is written to the block table element. + const size_t Log2BlockSize = ParallelCompactData::Log2BlockSize; + const size_t Log2RegionSize = ParallelCompactData::Log2RegionSize; + const size_t RegionSize = ParallelCompactData::RegionSize; + + ParallelCompactData& sd = summary_data(); + const size_t partial_obj_size = sd.region(region_idx)->partial_obj_size(); + if (partial_obj_size >= RegionSize) { + return; // No objects start in this region. + } + + // Ensure the first loop iteration decides that the block has changed. + size_t cur_block = sd.block_count(); + + const ParMarkBitMap* const bitmap = mark_bitmap(); + + const size_t Log2BitsPerBlock = Log2BlockSize - LogMinObjAlignment; + assert((size_t)1 << Log2BitsPerBlock == + bitmap->words_to_bits(ParallelCompactData::BlockSize), "sanity"); + + size_t beg_bit = bitmap->words_to_bits(region_idx << Log2RegionSize); + const size_t range_end = beg_bit + bitmap->words_to_bits(RegionSize); + size_t live_bits = bitmap->words_to_bits(partial_obj_size); + beg_bit = bitmap->find_obj_beg(beg_bit + live_bits, range_end); + while (beg_bit < range_end) { + const size_t new_block = beg_bit >> Log2BitsPerBlock; + if (new_block != cur_block) { + cur_block = new_block; + sd.block(cur_block)->set_offset(bitmap->bits_to_words(live_bits)); + } + + const size_t end_bit = bitmap->find_obj_end(beg_bit, range_end); + if (end_bit < range_end - 1) { + live_bits += end_bit - beg_bit + 1; + beg_bit = bitmap->find_obj_beg(end_bit + 1, range_end); + } else { + return; + } + } + } + void PSParallelCompact::move_and_update(ParCompactionManager* cm, SpaceId space_id) { const MutableSpace* sp = space(space_id); if (sp->is_empty()) { return;