--- old/src/share/vm/gc/g1/concurrentMarkThread.cpp 2016-03-18 13:05:55.005275890 +0100 +++ new/src/share/vm/gc/g1/concurrentMarkThread.cpp 2016-03-18 13:05:54.909273024 +0100 @@ -183,6 +183,11 @@ } } while (cm()->restart_for_overflow()); + if (!cm()->has_aborted()) { + G1ConcPhaseTimer t(_cm, "Concurrent Create Live Data"); + cm()->create_live_data(); + } + double end_time = os::elapsedVTime(); // Update the total virtual time before doing this, since it will try // to measure it to get the vtime for this marking. We purposely --- old/src/share/vm/gc/g1/g1ConcurrentMark.cpp 2016-03-18 13:05:55.521291297 +0100 +++ new/src/share/vm/gc/g1/g1ConcurrentMark.cpp 2016-03-18 13:05:55.424288401 +0100 @@ -48,6 +48,7 @@ #include "gc/shared/taskqueue.inline.hpp" #include "gc/shared/vmGCOperations.hpp" #include "logging/log.hpp" +#include "logging/logTag.hpp" #include "memory/allocation.hpp" #include "memory/resourceArea.hpp" #include "oops/oop.inline.hpp" @@ -355,10 +356,8 @@ _sleep_factor(0.0), _marking_task_overhead(1.0), _cleanup_list("Cleanup List"), - _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/), - _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >> - CardTableModRefBS::card_shift, - false /* in_resource_area*/), + _region_live_bm(), + _card_live_bm(), _prevMarkBitMap(&_markBitMap1), _nextMarkBitMap(&_markBitMap2), @@ -390,8 +389,6 @@ _parallel_workers(NULL), - _count_card_bitmaps(NULL), - _count_marked_bytes(NULL), _completed_initialization(false) { _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage); @@ -502,43 +499,28 @@ return; } - _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC); - _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); + allocate_internal_bitmaps(); - _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC); - _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC); + if (G1PretouchAuxiliaryMemory) { + pretouch_internal_bitmaps(); + } - BitMap::idx_t card_bm_size = _card_bm.size(); + _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC); + _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC); // so that the assertion in MarkingTaskQueue::task_queue doesn't fail _active_tasks = _max_worker_id; - uint max_regions = _g1h->max_regions(); for (uint i = 0; i < _max_worker_id; ++i) { G1CMTaskQueue* task_queue = new G1CMTaskQueue(); task_queue->initialize(); _task_queues->register_queue(i, task_queue); - _count_card_bitmaps[i] = BitMap(card_bm_size, false); - _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC); - - _tasks[i] = new G1CMTask(i, this, - _count_marked_bytes[i], - &_count_card_bitmaps[i], - task_queue, _task_queues); + _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues); _accum_task_vtime[i] = 0.0; } - // Calculate the card number for the bottom of the heap. Used - // in biasing indexes into the accounting card bitmaps. - _heap_bottom_card_num = - intptr_t(uintptr_t(_g1h->reserved_region().start()) >> - CardTableModRefBS::card_shift); - - // Clear all the liveness counting data - clear_all_count_data(); - // so that the call below can read a sensible value _heap_start = g1h->reserved_region().start(); set_non_marking_state(); @@ -716,10 +698,11 @@ clear_bitmap(_nextMarkBitMap, _parallel_workers, true); - // Clear the liveness counting data. If the marking has been aborted, the abort() + // Clear the live count data. If the marking has been aborted, the abort() // call already did that. if (!has_aborted()) { - clear_all_count_data(); + clear_all_live_data(_parallel_workers); + DEBUG_ONLY(verify_all_live_data()); } // Repeat the asserts from above. @@ -1107,14 +1090,6 @@ // marking due to overflowing the global mark stack. reset_marking_state(); } else { - { - GCTraceTime(Debug, gc) trace("Aggregate Data", _gc_timer_cm); - - // Aggregate the per-task counting data that we have accumulated - // while marking. - aggregate_count_data(); - } - SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); // We're done with marking. // This is the end of the marking cycle, we're expected all @@ -1150,17 +1125,81 @@ _gc_tracer_cm->report_object_count_after_gc(&is_alive); } -// Base class of the closures that finalize and verify the -// liveness counting data. -class G1CMCountDataClosureBase: public HeapRegionClosure { -protected: - G1CollectedHeap* _g1h; - G1ConcurrentMark* _cm; - CardTableModRefBS* _ct_bs; - +// Helper class that provides functionality to generate the Live Data Count +// information. +class G1LiveDataHelper VALUE_OBJ_CLASS_SPEC { +private: BitMap* _region_bm; BitMap* _card_bm; + // The card number of the bottom of the G1 heap. Used for converting addresses + // to bitmap indices quickly. + BitMap::idx_t _heap_card_bias; + + // Utility routine to set an exclusive range of bits on the given + // bitmap, optimized for very small ranges. + // There must be at least one bit to set. + inline void set_card_bitmap_range(BitMap* bm, + BitMap::idx_t start_idx, + BitMap::idx_t end_idx) { + + // Set the exclusive bit range [start_idx, end_idx). + assert((end_idx - start_idx) > 0, "at least one bit"); + assert(end_idx <= bm->size(), "sanity"); + + // For small ranges use a simple loop; otherwise use set_range or + // use par_at_put_range (if parallel). The range is made up of the + // cards that are spanned by an object/mem region so 8 cards will + // allow up to object sizes up to 4K to be handled using the loop. + if ((end_idx - start_idx) <= 8) { + for (BitMap::idx_t i = start_idx; i < end_idx; i += 1) { + bm->set_bit(i); + } + } else { + bm->set_range(start_idx, end_idx); + } + } + + // We cache the last mark set. This avoids setting the same bit multiple times. + // This is particularly interesting for dense bitmaps, as this avoids doing + // lots of work most of the time. + BitMap::idx_t _last_marked_bit_idx; + + // Mark the card liveness bitmap for the object spanning from start to end. + void mark_card_bitmap_range(HeapWord* start, HeapWord* end) { + BitMap::idx_t start_idx = card_live_bitmap_index_for(start); + BitMap::idx_t end_idx = card_live_bitmap_index_for((HeapWord*)align_ptr_up(end, CardTableModRefBS::card_size)); + + assert((end_idx - start_idx) > 0, "Trying to mark zero sized range."); + + if (start_idx == _last_marked_bit_idx) { + start_idx++; + } + if (start_idx == end_idx) { + return; + } + + // Set the bits in the card bitmap for the cards spanned by this object. + set_card_bitmap_range(_card_bm, start_idx, end_idx); + _last_marked_bit_idx = end_idx - 1; + } + + void reset_mark_cache() { + _last_marked_bit_idx = (BitMap::idx_t)-1; + } + +public: + // Returns the index in the per-card liveness count bitmap + // for the given address + inline BitMap::idx_t card_live_bitmap_index_for(HeapWord* addr) { + // Below, the term "card num" means the result of shifting an address + // by the card shift -- address 0 corresponds to card number 0. One + // must subtract the card num of the bottom of the heap to obtain a + // card table index. + BitMap::idx_t card_num = (BitMap::idx_t)(uintptr_t(addr) >> CardTableModRefBS::card_shift); + return card_num - _heap_card_bias; + } + // Takes a region that's not empty (i.e., it has at least one // live object in it and sets its corresponding bit on the region // bitmap to 1. @@ -1169,136 +1208,128 @@ _region_bm->par_at_put(index, true); } -public: - G1CMCountDataClosureBase(G1CollectedHeap* g1h, - BitMap* region_bm, BitMap* card_bm): - _g1h(g1h), _cm(g1h->concurrent_mark()), - _ct_bs(barrier_set_cast(g1h->barrier_set())), - _region_bm(region_bm), _card_bm(card_bm) { } -}; + // Mark the range of bits covered by allocations done since the last marking + // in the given heap region, i.e. from NTAMS to top of the given region. + // Returns if there has been some allocation in this region since the last marking. + bool mark_allocated_since_marking(HeapRegion* hr) { + reset_mark_cache(); -// Closure that calculates the # live objects per region. Used -// for verification purposes during the cleanup pause. -class CalcLiveObjectsClosure: public G1CMCountDataClosureBase { - G1CMBitMapRO* _bm; - size_t _region_marked_bytes; + HeapWord* ntams = hr->next_top_at_mark_start(); + HeapWord* top = hr->top(); -public: - CalcLiveObjectsClosure(G1CMBitMapRO *bm, G1CollectedHeap* g1h, - BitMap* region_bm, BitMap* card_bm) : - G1CMCountDataClosureBase(g1h, region_bm, card_bm), - _bm(bm), _region_marked_bytes(0) { } + assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); + + // Mark the allocated-since-marking portion... + if (ntams < top) { + mark_card_bitmap_range(ntams, top); + return true; + } else { + return false; + } + } + + // Mark the range of bits covered by live objects on the mark bitmap between + // bottom and NTAMS of the given region. + // Returns the number of live bytes marked within that area for the given + // heap region. + size_t mark_marked_during_marking(G1CMBitMap* mark_bitmap, HeapRegion* hr) { + reset_mark_cache(); + + size_t marked_bytes = 0; - bool doHeapRegion(HeapRegion* hr) { HeapWord* ntams = hr->next_top_at_mark_start(); HeapWord* start = hr->bottom(); + if (ntams <= start) { + // Skip empty regions. + return 0; + } else if (hr->is_humongous()) { + mark_card_bitmap_range(start, hr->top()); + return pointer_delta(hr->top(), start, 1); + } + assert(start <= hr->end() && start <= ntams && ntams <= hr->end(), "Preconditions not met - " "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT, p2i(start), p2i(ntams), p2i(hr->end())); // Find the first marked object at or after "start". - start = _bm->getNextMarkedWordAddress(start, ntams); - - size_t marked_bytes = 0; - + start = mark_bitmap->getNextMarkedWordAddress(start, ntams); while (start < ntams) { oop obj = oop(start); int obj_sz = obj->size(); HeapWord* obj_end = start + obj_sz; - BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); - BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end); - - // Note: if we're looking at the last region in heap - obj_end - // could be actually just beyond the end of the heap; end_idx - // will then correspond to a (non-existent) card that is also - // just beyond the heap. - if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) { - // end of object is not card aligned - increment to cover - // all the cards spanned by the object - end_idx += 1; - } - - // Set the bits in the card BM for the cards spanned by this object. - _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); + assert(obj_end <= hr->end(), "Humongous objects must have been handled elsewhere."); + + mark_card_bitmap_range(start, obj_end); // Add the size of this object to the number of marked bytes. marked_bytes += (size_t)obj_sz * HeapWordSize; - // This will happen if we are handling a humongous object that spans - // several heap regions. - if (obj_end > hr->end()) { - break; - } // Find the next marked object after this one. - start = _bm->getNextMarkedWordAddress(obj_end, ntams); - } - - // Mark the allocated-since-marking portion... - HeapWord* top = hr->top(); - if (ntams < top) { - BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); - BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); - - // Note: if we're looking at the last region in heap - top - // could be actually just beyond the end of the heap; end_idx - // will then correspond to a (non-existent) card that is also - // just beyond the heap. - if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { - // end of object is not card aligned - increment to cover - // all the cards spanned by the object - end_idx += 1; - } - _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); - - // This definitely means the region has live objects. - set_bit_for_region(hr); - } - - // Update the live region bitmap. - if (marked_bytes > 0) { - set_bit_for_region(hr); + start = mark_bitmap->getNextMarkedWordAddress(obj_end, ntams); } - // Set the marked bytes for the current region so that - // it can be queried by a calling verification routine - _region_marked_bytes = marked_bytes; - - return false; + return marked_bytes; } - size_t region_marked_bytes() const { return _region_marked_bytes; } + G1LiveDataHelper(BitMap* region_bm, + BitMap* card_bm): + _region_bm(region_bm), + _card_bm(card_bm) { + //assert(region_bm != NULL, ""); + assert(card_bm != NULL, ""); + // Calculate the card number for the bottom of the heap. Used + // in biasing indexes into the accounting card bitmaps. + _heap_card_bias = + (BitMap::idx_t)(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> CardTableModRefBS::card_shift); + } }; -// Heap region closure used for verifying the counting data -// that was accumulated concurrently and aggregated during +// Heap region closure used for verifying the live count data +// that was created concurrently and finalized during // the remark pause. This closure is applied to the heap // regions during the STW cleanup pause. - -class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { +class G1VerifyLiveDataHRClosure: public HeapRegionClosure { +private: G1CollectedHeap* _g1h; - G1ConcurrentMark* _cm; - CalcLiveObjectsClosure _calc_cl; - BitMap* _region_bm; // Region BM to be verified - BitMap* _card_bm; // Card BM to be verified + G1CMBitMap* _mark_bitmap; + G1LiveDataHelper _calc_helper; + + BitMap* _act_region_bm; // Region BM to be verified + BitMap* _act_card_bm; // Card BM to be verified BitMap* _exp_region_bm; // Expected Region BM values BitMap* _exp_card_bm; // Expected card BM values int _failures; + // Updates the live data count for the given heap region and returns the number + // of bytes marked. + size_t create_live_data_count(HeapRegion* hr) { + size_t bytes_marked = _calc_helper.mark_marked_during_marking(_mark_bitmap, hr); + bool allocated_since_marking = _calc_helper.mark_allocated_since_marking(hr); + if (allocated_since_marking || bytes_marked > 0) { + _calc_helper.set_bit_for_region(hr); + } + return bytes_marked; + } + public: - VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h, - BitMap* region_bm, - BitMap* card_bm, - BitMap* exp_region_bm, - BitMap* exp_card_bm) : - _g1h(g1h), _cm(g1h->concurrent_mark()), - _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm), - _region_bm(region_bm), _card_bm(card_bm), - _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), + G1VerifyLiveDataHRClosure(G1CollectedHeap* g1h, + G1CMBitMap* mark_bitmap, + BitMap* act_region_bm, + BitMap* act_card_bm, + BitMap* exp_region_bm, + BitMap* exp_card_bm) : + _g1h(g1h), + _mark_bitmap(mark_bitmap), + _calc_helper(exp_region_bm, exp_card_bm), + _act_region_bm(act_region_bm), + _act_card_bm(act_card_bm), + _exp_region_bm(exp_region_bm), + _exp_card_bm(exp_card_bm), _failures(0) { } int failures() const { return _failures; } @@ -1306,35 +1337,16 @@ bool doHeapRegion(HeapRegion* hr) { int failures = 0; - // Call the CalcLiveObjectsClosure to walk the marking bitmap for - // this region and set the corresponding bits in the expected region - // and card bitmaps. - bool res = _calc_cl.doHeapRegion(hr); - assert(res == false, "should be continuing"); - - // Verify the marked bytes for this region. - size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); + // Walk the marking bitmap for this region and set the corresponding bits + // in the expected region and card bitmaps. + size_t exp_marked_bytes = create_live_data_count(hr); size_t act_marked_bytes = hr->next_marked_bytes(); + // Verify the marked bytes for this region. - if (exp_marked_bytes > act_marked_bytes) { - if (hr->is_starts_humongous()) { - // For start_humongous regions, the size of the whole object will be - // in exp_marked_bytes. - HeapRegion* region = hr; - int num_regions; - for (num_regions = 0; region != NULL; num_regions++) { - region = _g1h->next_region_in_humongous(region); - } - if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) { - failures += 1; - } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) { - failures += 1; - } - } else { - // We're not OK if expected marked bytes > actual marked bytes. It means - // we have missed accounting some objects during the actual marking. - failures += 1; - } + if (exp_marked_bytes != act_marked_bytes) { + failures += 1; + } else if (exp_marked_bytes > HeapRegion::GrainBytes) { + failures += 1; } // Verify the bit, for this region, in the actual and expected @@ -1344,7 +1356,7 @@ BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index(); bool expected = _exp_region_bm->at(index); - bool actual = _region_bm->at(index); + bool actual = _act_region_bm->at(index); if (expected && !actual) { failures += 1; } @@ -1353,12 +1365,12 @@ // region match. We have an error if we have a set bit in the expected // bit map and the corresponding bit in the actual bitmap is not set. - BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom()); - BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top()); + BitMap::idx_t start_idx = _calc_helper.card_live_bitmap_index_for(hr->bottom()); + BitMap::idx_t end_idx = _calc_helper.card_live_bitmap_index_for(hr->top()); for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { expected = _exp_card_bm->at(i); - actual = _card_bm->at(i); + actual = _act_card_bm->at(i); if (expected && !actual) { failures += 1; @@ -1373,137 +1385,100 @@ } }; -class G1ParVerifyFinalCountTask: public AbstractGangTask { +class G1VerifyLiveDataTask: public AbstractGangTask { protected: G1CollectedHeap* _g1h; - G1ConcurrentMark* _cm; + G1CMBitMap* _mark_bitmap; BitMap* _actual_region_bm; BitMap* _actual_card_bm; - uint _n_workers; - - BitMap* _expected_region_bm; - BitMap* _expected_card_bm; + BitMap _expected_region_bm; + BitMap _expected_card_bm; int _failures; - HeapRegionClaimer _hrclaimer; + HeapRegionClaimer _hr_claimer; public: - G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, - BitMap* region_bm, BitMap* card_bm, - BitMap* expected_region_bm, BitMap* expected_card_bm) - : AbstractGangTask("G1 verify final counting"), - _g1h(g1h), _cm(_g1h->concurrent_mark()), - _actual_region_bm(region_bm), _actual_card_bm(card_bm), - _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), - _failures(0), - _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { + G1VerifyLiveDataTask(G1CollectedHeap* g1h, + G1CMBitMap* bitmap, + BitMap* region_bm, + BitMap* card_bm, + uint n_workers) + : AbstractGangTask("G1 verify final counting"), + _g1h(g1h), + _mark_bitmap(bitmap), + _actual_region_bm(region_bm), + _actual_card_bm(card_bm), + _expected_region_bm(region_bm->size(), true /* in_resource_area */), + _expected_card_bm(card_bm->size(), true /* in_resource_area */), + _failures(0), + _hr_claimer(n_workers) { assert(VerifyDuringGC, "don't call this otherwise"); - assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); - assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); } void work(uint worker_id) { - assert(worker_id < _n_workers, "invariant"); - - VerifyLiveObjectDataHRClosure verify_cl(_g1h, - _actual_region_bm, _actual_card_bm, - _expected_region_bm, - _expected_card_bm); + G1VerifyLiveDataHRClosure cl(_g1h, + _mark_bitmap, + _actual_region_bm, + _actual_card_bm, + &_expected_region_bm, + &_expected_card_bm); + _g1h->heap_region_par_iterate(&cl, worker_id, &_hr_claimer); - _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer); - - Atomic::add(verify_cl.failures(), &_failures); + Atomic::add(cl.failures(), &_failures); } int failures() const { return _failures; } }; -// Closure that finalizes the liveness counting data. -// Used during the cleanup pause. -// Sets the bits corresponding to the interval [NTAMS, top] -// (which contains the implicitly live objects) in the -// card liveness bitmap. Also sets the bit for each region, -// containing live data, in the region liveness bitmap. - -class FinalCountDataUpdateClosure: public G1CMCountDataClosureBase { - public: - FinalCountDataUpdateClosure(G1CollectedHeap* g1h, - BitMap* region_bm, - BitMap* card_bm) : - G1CMCountDataClosureBase(g1h, region_bm, card_bm) { } - - bool doHeapRegion(HeapRegion* hr) { - HeapWord* ntams = hr->next_top_at_mark_start(); - HeapWord* top = hr->top(); - - assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); - - // Mark the allocated-since-marking portion... - if (ntams < top) { - // This definitely means the region has live objects. - set_bit_for_region(hr); - - // Now set the bits in the card bitmap for [ntams, top) - BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams); - BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top); - - // Note: if we're looking at the last region in heap - top - // could be actually just beyond the end of the heap; end_idx - // will then correspond to a (non-existent) card that is also - // just beyond the heap. - if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) { - // end of object is not card aligned - increment to cover - // all the cards spanned by the object - end_idx += 1; - } - - assert(end_idx <= _card_bm->size(), - "oob: end_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, - end_idx, _card_bm->size()); - assert(start_idx < _card_bm->size(), - "oob: start_idx= " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT, - start_idx, _card_bm->size()); - - _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */); - } - - // Set the bit for the region if it contains live data - if (hr->next_marked_bytes() > 0) { - set_bit_for_region(hr); +class G1FinalizeLiveDataTask: public AbstractGangTask { + // Finalizes the liveness counting data. + // Sets the bits corresponding to the interval [NTAMS, top] + // (which contains the implicitly live objects) in the + // card liveness bitmap. Also sets the bit for each region + // containing live data, in the region liveness bitmap. + class G1FinalizeCountDataClosure: public HeapRegionClosure { + private: + G1LiveDataHelper _helper; + public: + G1FinalizeCountDataClosure(G1CMBitMap* bitmap, + BitMap* region_bm, + BitMap* card_bm) : + HeapRegionClosure(), + _helper(region_bm, card_bm) { } + + bool doHeapRegion(HeapRegion* hr) { + bool allocated_since_marking = _helper.mark_allocated_since_marking(hr); + if (allocated_since_marking || hr->next_marked_bytes() > 0) { + _helper.set_bit_for_region(hr); + } + return false; } + }; - return false; - } -}; + G1CMBitMap* _bitmap; -class G1ParFinalCountTask: public AbstractGangTask { -protected: - G1CollectedHeap* _g1h; - G1ConcurrentMark* _cm; BitMap* _actual_region_bm; BitMap* _actual_card_bm; - uint _n_workers; - HeapRegionClaimer _hrclaimer; + HeapRegionClaimer _hr_claimer; public: - G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) - : AbstractGangTask("G1 final counting"), - _g1h(g1h), _cm(_g1h->concurrent_mark()), - _actual_region_bm(region_bm), _actual_card_bm(card_bm), - _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) { + G1FinalizeLiveDataTask(G1CMBitMap* bitmap, BitMap* region_bm, BitMap* card_bm, uint n_workers) : + AbstractGangTask("G1 final counting"), + _bitmap(bitmap), + _actual_region_bm(region_bm), + _actual_card_bm(card_bm), + _hr_claimer(n_workers) { } void work(uint worker_id) { - assert(worker_id < _n_workers, "invariant"); - - FinalCountDataUpdateClosure final_update_cl(_g1h, - _actual_region_bm, - _actual_card_bm); + G1FinalizeCountDataClosure cl(_bitmap, + _actual_region_bm, + _actual_card_bm); - _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer); + G1CollectedHeap::heap()->heap_region_par_iterate(&cl, worker_id, &_hr_claimer); } }; @@ -1637,31 +1612,29 @@ HeapRegionRemSet::reset_for_cleanup_tasks(); - // Do counting once more with the world stopped for good measure. - G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); - - g1h->workers()->run_task(&g1_par_count_task); + { + // Finalize the live data. + G1FinalizeLiveDataTask cl(_nextMarkBitMap, + &_region_live_bm, + &_card_live_bm, + g1h->workers()->active_workers()); + g1h->workers()->run_task(&cl); + } if (VerifyDuringGC) { - // Verify that the counting data accumulated during marking matches - // that calculated by walking the marking bitmap. - - // Bitmaps to hold expected values - BitMap expected_region_bm(_region_bm.size(), true); - BitMap expected_card_bm(_card_bm.size(), true); - - G1ParVerifyFinalCountTask g1_par_verify_task(g1h, - &_region_bm, - &_card_bm, - &expected_region_bm, - &expected_card_bm); - - g1h->workers()->run_task(&g1_par_verify_task); + // Verify that the liveness count data created concurrently matches one created + // during this safepoint. + ResourceMark rm; + G1VerifyLiveDataTask cl(G1CollectedHeap::heap(), + _nextMarkBitMap, + &_region_live_bm, + &_card_live_bm, + g1h->workers()->active_workers()); + g1h->workers()->run_task(&cl); - guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); + guarantee(cl.failures() == 0, "Unexpected accounting failures"); } - size_t start_used_bytes = g1h->used(); g1h->collector_state()->set_mark_in_progress(false); double count_end = os::elapsedTime(); @@ -1696,7 +1669,7 @@ // regions. if (G1ScrubRemSets) { double rs_scrub_start = os::elapsedTime(); - g1h->scrub_rem_set(&_region_bm, &_card_bm); + g1h->scrub_rem_set(&_region_live_bm, &_card_live_bm); _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start); } @@ -2146,6 +2119,35 @@ _nextMarkBitMap = (G1CMBitMap*) temp; } +BitMap G1ConcurrentMark::allocate_large_bitmap(BitMap::idx_t size_in_bits) { + size_t size_in_words = BitMap::size_in_words(size_in_bits); + + BitMap::bm_word_t* map = MmapArrayAllocator::allocate(size_in_words); + + return BitMap(map, size_in_bits); +} + +void G1ConcurrentMark::allocate_internal_bitmaps() { + double start_time = os::elapsedTime(); + + _region_live_bm = allocate_large_bitmap(_g1h->max_regions()); + + guarantee(_g1h->max_capacity() % CardTableModRefBS::card_size == 0, + "Heap capacity must be aligned to card size."); + _card_live_bm = allocate_large_bitmap(_g1h->max_capacity() / CardTableModRefBS::card_size); + + log_debug(gc, marking)("Allocating internal bitmaps took %1.2f seconds.", os::elapsedTime() - start_time); +} + +void G1ConcurrentMark::pretouch_internal_bitmaps() { + double start_time = os::elapsedTime(); + + _region_live_bm.pretouch(); + _card_live_bm.pretouch(); + + log_debug(gc, marking)("Pre-touching internal bitmaps took %1.2f seconds.", os::elapsedTime() - start_time); +} + // Closure for marking entries in SATB buffers. class G1CMSATBBufferClosure : public SATBBufferClosure { private: @@ -2164,7 +2166,7 @@ oop obj = static_cast(entry); assert(obj->is_oop(true /* ignore mark word */), "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)); - _task->make_reference_grey(obj, hr); + _task->make_reference_grey(obj); } } @@ -2406,165 +2408,117 @@ } #endif // PRODUCT -// Aggregate the counting data that was constructed concurrently -// with marking. -class AggregateCountDataHRClosure: public HeapRegionClosure { - G1CollectedHeap* _g1h; - G1ConcurrentMark* _cm; - CardTableModRefBS* _ct_bs; - BitMap* _cm_card_bm; - uint _max_worker_id; - - public: - AggregateCountDataHRClosure(G1CollectedHeap* g1h, - BitMap* cm_card_bm, - uint max_worker_id) : - _g1h(g1h), _cm(g1h->concurrent_mark()), - _ct_bs(barrier_set_cast(g1h->barrier_set())), - _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { } - - bool doHeapRegion(HeapRegion* hr) { - HeapWord* start = hr->bottom(); - HeapWord* limit = hr->next_top_at_mark_start(); - HeapWord* end = hr->end(); - - assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(), - "Preconditions not met - " - "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", " - "top: " PTR_FORMAT ", end: " PTR_FORMAT, - p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())); - - assert(hr->next_marked_bytes() == 0, "Precondition"); - - if (start == limit) { - // NTAMS of this region has not been set so nothing to do. - return false; - } - - // 'start' should be in the heap. - assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity"); - // 'end' *may* be just beyond the end of the heap (if hr is the last region) - assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity"); - - BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start); - BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit); - BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end); - - // If ntams is not card aligned then we bump card bitmap index - // for limit so that we get the all the cards spanned by - // the object ending at ntams. - // Note: if this is the last region in the heap then ntams - // could be actually just beyond the end of the the heap; - // limit_idx will then correspond to a (non-existent) card - // that is also outside the heap. - if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) { - limit_idx += 1; - } - - assert(limit_idx <= end_idx, "or else use atomics"); - - // Aggregate the "stripe" in the count data associated with hr. - uint hrm_index = hr->hrm_index(); - size_t marked_bytes = 0; - - for (uint i = 0; i < _max_worker_id; i += 1) { - size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); - BitMap* task_card_bm = _cm->count_card_bitmap_for(i); - - // Fetch the marked_bytes in this region for task i and - // add it to the running total for this region. - marked_bytes += marked_bytes_array[hrm_index]; +class G1CreateLiveDataTask: public AbstractGangTask { + // Aggregate the counting data that was constructed concurrently + // with marking. + class G1CreateLiveDataHRClosure: public HeapRegionClosure { + G1LiveDataHelper _helper; - // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx) - // into the global card bitmap. - BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); + G1CMBitMap* _mark_bitmap; - while (scan_idx < limit_idx) { - assert(task_card_bm->at(scan_idx) == true, "should be"); - _cm_card_bm->set_bit(scan_idx); - assert(_cm_card_bm->at(scan_idx) == true, "should be"); + G1ConcurrentMark* _cm; + public: + G1CreateLiveDataHRClosure(G1ConcurrentMark* cm, + G1CMBitMap* mark_bitmap, + BitMap* cm_card_bm) : + HeapRegionClosure(), + _helper(NULL, cm_card_bm), + _mark_bitmap(mark_bitmap), + _cm(cm) { } + + bool doHeapRegion(HeapRegion* hr) { + size_t marked_bytes = _helper.mark_marked_during_marking(_mark_bitmap, hr); + if (marked_bytes > 0) { + hr->add_to_marked_bytes(marked_bytes); + } - // BitMap::get_next_one_offset() can handle the case when - // its left_offset parameter is greater than its right_offset - // parameter. It does, however, have an early exit if - // left_offset == right_offset. So let's limit the value - // passed in for left offset here. - BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx); - scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx); + if (_cm->do_yield_check() && _cm->has_aborted()) { + return true; } + return false; } + }; - // Update the marked bytes for this region. - hr->add_to_marked_bytes(marked_bytes); - - // Next heap region - return false; - } -}; - -class G1AggregateCountDataTask: public AbstractGangTask { -protected: G1CollectedHeap* _g1h; G1ConcurrentMark* _cm; BitMap* _cm_card_bm; - uint _max_worker_id; - uint _active_workers; - HeapRegionClaimer _hrclaimer; + HeapRegionClaimer _hr_claimer; public: - G1AggregateCountDataTask(G1CollectedHeap* g1h, - G1ConcurrentMark* cm, - BitMap* cm_card_bm, - uint max_worker_id, - uint n_workers) : - AbstractGangTask("Count Aggregation"), - _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), - _max_worker_id(max_worker_id), - _active_workers(n_workers), - _hrclaimer(_active_workers) { + G1CreateLiveDataTask(G1CollectedHeap* g1h, + BitMap* cm_card_bm, + uint n_workers) : + AbstractGangTask("Create Live Data"), + _g1h(g1h), + _cm_card_bm(cm_card_bm), + _hr_claimer(n_workers) { } void work(uint worker_id) { - AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id); + SuspendibleThreadSetJoiner sts_join; - _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer); + G1CreateLiveDataHRClosure cl(_g1h->concurrent_mark(), _g1h->concurrent_mark()->nextMarkBitMap(), _cm_card_bm); + _g1h->heap_region_par_iterate(&cl, worker_id, &_hr_claimer); } }; -void G1ConcurrentMark::aggregate_count_data() { - uint n_workers = _g1h->workers()->active_workers(); - - G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, - _max_worker_id, n_workers); +void G1ConcurrentMark::create_live_data() { + uint n_workers = _parallel_workers->active_workers(); - _g1h->workers()->run_task(&g1_par_agg_task); + G1CreateLiveDataTask cl(_g1h, + &_card_live_bm, + n_workers); + _parallel_workers->run_task(&cl); } -// Clear the per-worker arrays used to store the per-region counting data -void G1ConcurrentMark::clear_all_count_data() { - // Clear the global card bitmap - it will be filled during - // liveness count aggregation (during remark) and the - // final counting task. - _card_bm.clear(); +class G1ClearAllLiveDataTask : public AbstractGangTask { + BitMap* _bitmap; + size_t _num_tasks; + size_t _cur_task; +public: + G1ClearAllLiveDataTask(BitMap* bitmap, size_t num_tasks) : + AbstractGangTask("Clear All Live Data"), + _bitmap(bitmap), + _num_tasks(num_tasks), + _cur_task(0) { + } - // Clear the global region bitmap - it will be filled as part - // of the final counting task. - _region_bm.clear(); + virtual void work(uint worker_id) { + while (true) { + size_t to_process = Atomic::add(1, &_cur_task) - 1; + if (to_process >= _num_tasks) { + break; + } - uint max_regions = _g1h->max_regions(); - assert(_max_worker_id > 0, "uninitialized"); + BitMap::idx_t start = M * BitsPerByte * to_process; + BitMap::idx_t end = MIN2(start + M * BitsPerByte, _bitmap->size()); + _bitmap->clear_range(start, end); + } + } +}; - for (uint i = 0; i < _max_worker_id; i += 1) { - BitMap* task_card_bm = count_card_bitmap_for(i); - size_t* marked_bytes_array = count_marked_bytes_array_for(i); +void G1ConcurrentMark::clear_all_live_data(WorkGang* workers) { + double start_time = os::elapsedTime(); + + guarantee(Universe::is_fully_initialized(), "Should not call this during initialization."); + + size_t const num_chunks = align_size_up(_card_live_bm.size_in_words() * HeapWordSize, M) / M; - assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); - assert(marked_bytes_array != NULL, "uninitialized"); + G1ClearAllLiveDataTask cl(&_card_live_bm, num_chunks); + workers->run_task(&cl); - memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t)); - task_card_bm->clear(); - } + // The region live bitmap is always very small, even for huge heaps. Clear + // directly. + _region_live_bm.clear(); + + + log_debug(gc, marking)("Clear Live Data took %.3fms", (os::elapsedTime() - start_time) * 1000.0); +} + +void G1ConcurrentMark::verify_all_live_data() { + assert(_card_live_bm.count_one_bits() == 0, "Master card bitmap not clear"); + assert(_region_live_bm.count_one_bits() == 0, "Master region bitmap not clear"); } void G1ConcurrentMark::print_stats() { @@ -2578,7 +2532,6 @@ } } -// abandon current marking iteration due to a Full GC void G1ConcurrentMark::abort() { if (!cmThread()->during_cycle() || _has_aborted) { // We haven't started a concurrent cycle or we have already aborted it. No need to do anything. @@ -2593,8 +2546,8 @@ // since VerifyDuringGC verifies the objects marked during // a full GC against the previous bitmap. - // Clear the liveness counting data - clear_all_count_data(); + clear_all_live_data(_g1h->workers()); + DEBUG_ONLY(verify_all_live_data()); // Empty mark stack reset_marking_state(); for (uint i = 0; i < _max_worker_id; ++i) { @@ -2638,7 +2591,7 @@ } print_ms_time_info(" ", "cleanups", _cleanup_times); - log.trace(" Final counting total time = %8.2f s (avg = %8.2f ms).", + log.trace(" Finalize live data total time = %8.2f s (avg = %8.2f ms).", _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0)); if (G1ScrubRemSets) { log.trace(" RS scrub total time = %8.2f s (avg = %8.2f ms).", @@ -3477,8 +3430,6 @@ G1CMTask::G1CMTask(uint worker_id, G1ConcurrentMark* cm, - size_t* marked_bytes, - BitMap* card_bm, G1CMTaskQueue* task_queue, G1CMTaskQueueSet* task_queues) : _g1h(G1CollectedHeap::heap()), @@ -3487,9 +3438,7 @@ _nextMarkBitMap(NULL), _hash_seed(17), _task_queue(task_queue), _task_queues(task_queues), - _cm_oop_closure(NULL), - _marked_bytes_array(marked_bytes), - _card_bm(card_bm) { + _cm_oop_closure(NULL) { guarantee(task_queue != NULL, "invariant"); guarantee(task_queues != NULL, "invariant"); --- old/src/share/vm/gc/g1/g1ConcurrentMark.hpp 2016-03-18 13:05:56.205311720 +0100 +++ new/src/share/vm/gc/g1/g1ConcurrentMark.hpp 2016-03-18 13:05:56.108308824 +0100 @@ -266,7 +266,7 @@ class G1ConcurrentMark: public CHeapObj { friend class ConcurrentMarkThread; friend class G1ParNoteEndTask; - friend class CalcLiveObjectsClosure; + friend class G1VerifyLiveDataClosure; friend class G1CMRefProcTaskProxy; friend class G1CMRefProcTaskExecutor; friend class G1CMKeepAliveAndDrainClosure; @@ -298,8 +298,14 @@ G1CMBitMapRO* _prevMarkBitMap; // Completed mark bitmap G1CMBitMap* _nextMarkBitMap; // Under-construction mark bitmap - BitMap _region_bm; - BitMap _card_bm; + // Liveness count data. After marking G1 iterates over the recently gathered mark + // bitmap and records rough information about liveness on card and region basis. + // This information can be used for e.g. remembered set scrubbing. + + // A set bit indicates whether the given region contains any live object. + BitMap _region_live_bm; + // A set bit indicates that the given card contains a live object. + BitMap _card_live_bm; // Heap bounds HeapWord* _heap_start; @@ -373,6 +379,14 @@ void swapMarkBitMaps(); + // Allocates and returns a zero-ed out "large" bitmap of the given size in bits. + // It is always allocated using virtual memory. + BitMap allocate_large_bitmap(BitMap::idx_t size_in_bits); + // Allocates the memory for all bitmaps used by the concurrent marking. + void allocate_internal_bitmaps(); + // Pre-touches the internal bitmaps. + void pretouch_internal_bitmaps(); + // It resets the global marking data structures, as well as the // task local ones; should be called during initial mark. void reset(); @@ -461,23 +475,6 @@ void enter_first_sync_barrier(uint worker_id); void enter_second_sync_barrier(uint worker_id); - // Live Data Counting data structures... - // These data structures are initialized at the start of - // marking. They are written to while marking is active. - // They are aggregated during remark; the aggregated values - // are then used to populate the _region_bm, _card_bm, and - // the total live bytes, which are then subsequently updated - // during cleanup. - - // An array of bitmaps (one bit map per task). Each bitmap - // is used to record the cards spanned by the live objects - // marked by that task/worker. - BitMap* _count_card_bitmaps; - - // Used to record the number of marked live bytes - // (for each region, by worker thread). - size_t** _count_marked_bytes; - // Card index of the bottom of the G1 heap. Used for biasing indices into // the card bitmaps. intptr_t _heap_bottom_card_num; @@ -563,18 +560,10 @@ // G1CollectedHeap // This notifies CM that a root during initial-mark needs to be - // grayed. It is MT-safe. word_size is the size of the object in - // words. It is passed explicitly as sometimes we cannot calculate - // it from the given object because it might be in an inconsistent - // state (e.g., in to-space and being copied). So the caller is - // responsible for dealing with this issue (e.g., get the size from - // the from-space image when the to-space image might be - // inconsistent) and always passing the size. hr is the region that + // grayed. It is MT-safe. hr is the region that // contains the object and it's passed optionally from callers who // might already have it (no point in recalculating it). inline void grayRoot(oop obj, - size_t word_size, - uint worker_id, HeapRegion* hr = NULL); // Prepare internal data structures for the next mark cycle. This includes clearing @@ -641,7 +630,7 @@ inline bool do_yield_check(uint worker_i = 0); - // Called to abort the marking cycle after a Full GC takes place. + // Abandon current marking iteration due to a Full GC. void abort(); bool has_aborted() { return _has_aborted; } @@ -652,75 +641,8 @@ void print_on_error(outputStream* st) const; - // Liveness counting - - // Utility routine to set an exclusive range of cards on the given - // card liveness bitmap - inline void set_card_bitmap_range(BitMap* card_bm, - BitMap::idx_t start_idx, - BitMap::idx_t end_idx, - bool is_par); - - // Returns the card number of the bottom of the G1 heap. - // Used in biasing indices into accounting card bitmaps. - intptr_t heap_bottom_card_num() const { - return _heap_bottom_card_num; - } - - // Returns the card bitmap for a given task or worker id. - BitMap* count_card_bitmap_for(uint worker_id) { - assert(worker_id < _max_worker_id, "oob"); - assert(_count_card_bitmaps != NULL, "uninitialized"); - BitMap* task_card_bm = &_count_card_bitmaps[worker_id]; - assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); - return task_card_bm; - } - - // Returns the array containing the marked bytes for each region, - // for the given worker or task id. - size_t* count_marked_bytes_array_for(uint worker_id) { - assert(worker_id < _max_worker_id, "oob"); - assert(_count_marked_bytes != NULL, "uninitialized"); - size_t* marked_bytes_array = _count_marked_bytes[worker_id]; - assert(marked_bytes_array != NULL, "uninitialized"); - return marked_bytes_array; - } - - // Returns the index in the liveness accounting card table bitmap - // for the given address - inline BitMap::idx_t card_bitmap_index_for(HeapWord* addr); - - // Counts the size of the given memory region in the the given - // marked_bytes array slot for the given HeapRegion. - // Sets the bits in the given card bitmap that are associated with the - // cards that are spanned by the memory region. - inline void count_region(MemRegion mr, - HeapRegion* hr, - size_t* marked_bytes_array, - BitMap* task_card_bm); - - // Counts the given object in the given task/worker counting - // data structures. - inline void count_object(oop obj, - HeapRegion* hr, - size_t* marked_bytes_array, - BitMap* task_card_bm, - size_t word_size); - - // Attempts to mark the given object and, if successful, counts - // the object in the given task/worker counting structures. - inline bool par_mark_and_count(oop obj, - HeapRegion* hr, - size_t* marked_bytes_array, - BitMap* task_card_bm); - - // Attempts to mark the given object and, if successful, counts - // the object in the task/worker counting structures for the - // given worker id. - inline bool par_mark_and_count(oop obj, - size_t word_size, - HeapRegion* hr, - uint worker_id); + // Attempts to mark the given object on the next mark bitmap. + inline bool par_mark(oop obj); // Returns true if initialization was successfully completed. bool completed_initialization() const { @@ -730,19 +652,19 @@ ConcurrentGCTimer* gc_timer_cm() const { return _gc_timer_cm; } G1OldTracer* gc_tracer_cm() const { return _gc_tracer_cm; } -protected: - // Clear all the per-task bitmaps and arrays used to store the - // counting data. - void clear_all_count_data(); - - // Aggregates the counting data for each worker/task - // that was constructed while marking. Also sets - // the amount of marked bytes for each region and - // the top at concurrent mark count. - void aggregate_count_data(); - - // Verification routine - void verify_count_data(); +private: + // Clear (Reset) all liveness count data. + void clear_all_live_data(WorkGang* workers); + + // Verify all of the above data structures that they are in initial state. + void verify_all_live_data(); + + // Aggregates the per-card liveness data based on the current marking. Also sets + // the amount of marked bytes for each region. + void create_live_data(); + + // Verification routine + void verify_live_data(); }; // A class representing a marking task. @@ -844,12 +766,6 @@ TruncatedSeq _marking_step_diffs_ms; - // Counting data structures. Embedding the task's marked_bytes_array - // and card bitmap into the actual task saves having to go through - // the ConcurrentMark object. - size_t* _marked_bytes_array; - BitMap* _card_bm; - // it updates the local fields after this task has claimed // a new region to scan void setup_for_region(HeapRegion* hr); @@ -936,9 +852,8 @@ // Grey the object by marking it. If not already marked, push it on // the local queue if below the finger. - // Precondition: obj is in region. // Precondition: obj is below region's NTAMS. - inline void make_reference_grey(oop obj, HeapRegion* region); + inline void make_reference_grey(oop obj); // Grey the object (by calling make_grey_reference) if required, // e.g. obj is below its containing region's NTAMS. @@ -976,8 +891,6 @@ G1CMTask(uint worker_id, G1ConcurrentMark *cm, - size_t* marked_bytes, - BitMap* card_bm, G1CMTaskQueue* task_queue, G1CMTaskQueueSet* task_queues); --- old/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp 2016-03-18 13:05:56.745327843 +0100 +++ new/src/share/vm/gc/g1/g1ConcurrentMark.inline.hpp 2016-03-18 13:05:56.646324887 +0100 @@ -29,138 +29,8 @@ #include "gc/g1/g1ConcurrentMark.hpp" #include "gc/shared/taskqueue.inline.hpp" -// Utility routine to set an exclusive range of cards on the given -// card liveness bitmap -inline void G1ConcurrentMark::set_card_bitmap_range(BitMap* card_bm, - BitMap::idx_t start_idx, - BitMap::idx_t end_idx, - bool is_par) { - - // Set the exclusive bit range [start_idx, end_idx). - assert((end_idx - start_idx) > 0, "at least one card"); - assert(end_idx <= card_bm->size(), "sanity"); - - // Silently clip the end index - end_idx = MIN2(end_idx, card_bm->size()); - - // For small ranges use a simple loop; otherwise use set_range or - // use par_at_put_range (if parallel). The range is made up of the - // cards that are spanned by an object/mem region so 8 cards will - // allow up to object sizes up to 4K to be handled using the loop. - if ((end_idx - start_idx) <= 8) { - for (BitMap::idx_t i = start_idx; i < end_idx; i += 1) { - if (is_par) { - card_bm->par_set_bit(i); - } else { - card_bm->set_bit(i); - } - } - } else { - // Note BitMap::par_at_put_range() and BitMap::set_range() are exclusive. - if (is_par) { - card_bm->par_at_put_range(start_idx, end_idx, true); - } else { - card_bm->set_range(start_idx, end_idx); - } - } -} - -// Returns the index in the liveness accounting card bitmap -// for the given address -inline BitMap::idx_t G1ConcurrentMark::card_bitmap_index_for(HeapWord* addr) { - // Below, the term "card num" means the result of shifting an address - // by the card shift -- address 0 corresponds to card number 0. One - // must subtract the card num of the bottom of the heap to obtain a - // card table index. - intptr_t card_num = intptr_t(uintptr_t(addr) >> CardTableModRefBS::card_shift); - return card_num - heap_bottom_card_num(); -} - -// Counts the given memory region in the given task/worker -// counting data structures. -inline void G1ConcurrentMark::count_region(MemRegion mr, HeapRegion* hr, - size_t* marked_bytes_array, - BitMap* task_card_bm) { - G1CollectedHeap* g1h = _g1h; - CardTableModRefBS* ct_bs = g1h->g1_barrier_set(); - - HeapWord* start = mr.start(); - HeapWord* end = mr.end(); - size_t region_size_bytes = mr.byte_size(); - uint index = hr->hrm_index(); - - assert(hr == g1h->heap_region_containing(start), "sanity"); - assert(marked_bytes_array != NULL, "pre-condition"); - assert(task_card_bm != NULL, "pre-condition"); - - // Add to the task local marked bytes for this region. - marked_bytes_array[index] += region_size_bytes; - - BitMap::idx_t start_idx = card_bitmap_index_for(start); - BitMap::idx_t end_idx = card_bitmap_index_for(end); - - // Note: if we're looking at the last region in heap - end - // could be actually just beyond the end of the heap; end_idx - // will then correspond to a (non-existent) card that is also - // just beyond the heap. - if (g1h->is_in_g1_reserved(end) && !ct_bs->is_card_aligned(end)) { - // end of region is not card aligned - increment to cover - // all the cards spanned by the region. - end_idx += 1; - } - // The card bitmap is task/worker specific => no need to use - // the 'par' BitMap routines. - // Set bits in the exclusive bit range [start_idx, end_idx). - set_card_bitmap_range(task_card_bm, start_idx, end_idx, false /* is_par */); -} - -// Counts the given object in the given task/worker counting data structures. -inline void G1ConcurrentMark::count_object(oop obj, - HeapRegion* hr, - size_t* marked_bytes_array, - BitMap* task_card_bm, - size_t word_size) { - assert(!hr->is_continues_humongous(), "Cannot enter count_object with continues humongous"); - if (!hr->is_starts_humongous()) { - MemRegion mr((HeapWord*)obj, word_size); - count_region(mr, hr, marked_bytes_array, task_card_bm); - } else { - do { - MemRegion mr(hr->bottom(), hr->top()); - count_region(mr, hr, marked_bytes_array, task_card_bm); - hr = _g1h->next_region_in_humongous(hr); - } while (hr != NULL); - } -} - -// Attempts to mark the given object and, if successful, counts -// the object in the given task/worker counting structures. -inline bool G1ConcurrentMark::par_mark_and_count(oop obj, - HeapRegion* hr, - size_t* marked_bytes_array, - BitMap* task_card_bm) { - if (_nextMarkBitMap->parMark((HeapWord*)obj)) { - // Update the task specific count data for the object. - count_object(obj, hr, marked_bytes_array, task_card_bm, obj->size()); - return true; - } - return false; -} - -// Attempts to mark the given object and, if successful, counts -// the object in the task/worker counting structures for the -// given worker id. -inline bool G1ConcurrentMark::par_mark_and_count(oop obj, - size_t word_size, - HeapRegion* hr, - uint worker_id) { - if (_nextMarkBitMap->parMark((HeapWord*)obj)) { - size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id); - BitMap* task_card_bm = count_card_bitmap_for(worker_id); - count_object(obj, hr, marked_bytes_array, task_card_bm, word_size); - return true; - } - return false; +inline bool G1ConcurrentMark::par_mark(oop obj) { + return _nextMarkBitMap->parMark((HeapWord*)obj); } inline bool G1CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { @@ -294,10 +164,8 @@ check_limits(); } - - -inline void G1CMTask::make_reference_grey(oop obj, HeapRegion* hr) { - if (_cm->par_mark_and_count(obj, hr, _marked_bytes_array, _card_bm)) { +inline void G1CMTask::make_reference_grey(oop obj) { + if (_cm->par_mark(obj)) { // No OrderAccess:store_load() is needed. It is implicit in the // CAS done in G1CMBitMap::parMark() call in the routine above. HeapWord* global_finger = _cm->finger(); @@ -348,7 +216,7 @@ // anything with it). HeapRegion* hr = _g1h->heap_region_containing(obj); if (!hr->obj_allocated_since_next_marking(obj)) { - make_reference_grey(obj, hr); + make_reference_grey(obj); } } } @@ -370,8 +238,7 @@ return _prevMarkBitMap->isMarked(addr); } -inline void G1ConcurrentMark::grayRoot(oop obj, size_t word_size, - uint worker_id, HeapRegion* hr) { +inline void G1ConcurrentMark::grayRoot(oop obj, HeapRegion* hr) { assert(obj != NULL, "pre-condition"); HeapWord* addr = (HeapWord*) obj; if (hr == NULL) { @@ -386,7 +253,7 @@ if (addr < hr->next_top_at_mark_start()) { if (!_nextMarkBitMap->isMarked(addr)) { - par_mark_and_count(obj, word_size, hr, worker_id); + par_mark(obj); } } } --- old/src/share/vm/gc/g1/g1EvacFailure.cpp 2016-03-18 13:05:57.266343400 +0100 +++ new/src/share/vm/gc/g1/g1EvacFailure.cpp 2016-03-18 13:05:57.168340473 +0100 @@ -95,8 +95,6 @@ void do_object(oop obj) { HeapWord* obj_addr = (HeapWord*) obj; assert(_hr->is_in(obj_addr), "sanity"); - size_t obj_size = obj->size(); - HeapWord* obj_end = obj_addr + obj_size; if (obj->is_forwarded() && obj->forwardee() == obj) { // The object failed to move. @@ -119,8 +117,10 @@ // explicitly and all objects in the CSet are considered // (implicitly) live. So, we won't mark them explicitly and // we'll leave them over NTAMS. - _cm->grayRoot(obj, obj_size, _worker_id, _hr); + _cm->grayRoot(obj, _hr); } + size_t obj_size = obj->size(); + _marked_bytes += (obj_size * HeapWordSize); obj->set_mark(markOopDesc::prototype()); @@ -138,6 +138,7 @@ // the collection set. So, we'll recreate such entries now. obj->oop_iterate(_update_rset_cl); + HeapWord* obj_end = obj_addr + obj_size; _last_forwarded_object_end = obj_end; _hr->cross_threshold(obj_addr, obj_end); } --- old/src/share/vm/gc/g1/g1OopClosures.inline.hpp 2016-03-18 13:05:57.775358597 +0100 +++ new/src/share/vm/gc/g1/g1OopClosures.inline.hpp 2016-03-18 13:05:57.679355731 +0100 @@ -131,7 +131,7 @@ if (!oopDesc::is_null(heap_oop)) { oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); HeapRegion* hr = _g1h->heap_region_containing((HeapWord*) obj); - _cm->grayRoot(obj, obj->size(), _worker_id, hr); + _cm->grayRoot(obj, hr); } } @@ -237,7 +237,7 @@ assert(!_g1->heap_region_containing(obj)->in_collection_set(), "should not mark objects in the CSet"); // We know that the object is not moving so it's safe to read its size. - _cm->grayRoot(obj, (size_t) obj->size(), _worker_id); + _cm->grayRoot(obj); } void G1ParCopyHelper::mark_forwarded_object(oop from_obj, oop to_obj) { @@ -252,7 +252,7 @@ // worker so we cannot trust that its to-space image is // well-formed. So we have to read its size from its from-space // image which we know should not be changing. - _cm->grayRoot(to_obj, (size_t) from_obj->size(), _worker_id); + _cm->grayRoot(to_obj); } template --- old/src/share/vm/gc/g1/g1_globals.hpp 2016-03-18 13:05:58.281373706 +0100 +++ new/src/share/vm/gc/g1/g1_globals.hpp 2016-03-18 13:05:58.186370869 +0100 @@ -260,6 +260,9 @@ "The target number of mixed GCs after a marking cycle.") \ range(0, max_uintx) \ \ + experimental(bool, G1PretouchAuxiliaryMemory, false, \ + "Pre-touch large auxiliary data structures used by the GC.") \ + \ experimental(bool, G1EagerReclaimHumongousObjects, true, \ "Try to reclaim dead large objects at every young GC.") \ \ --- old/src/share/vm/utilities/bitMap.cpp 2016-03-18 13:05:58.790388903 +0100 +++ new/src/share/vm/utilities/bitMap.cpp 2016-03-18 13:05:58.693386007 +0100 @@ -68,6 +68,10 @@ } } +void BitMap::pretouch() { + os::pretouch_memory((char*)word_addr(0), (char*)word_addr(size())); +} + void BitMap::set_range_within_word(idx_t beg, idx_t end) { // With a valid range (beg <= end), this test ensures that end != 0, as // required by inverted_bit_mask_for_range. Also avoids an unnecessary write. --- old/src/share/vm/utilities/bitMap.hpp 2016-03-18 13:05:59.308404370 +0100 +++ new/src/share/vm/utilities/bitMap.hpp 2016-03-18 13:05:59.209401414 +0100 @@ -135,12 +135,19 @@ // use the same value for "in_resource_area".) void resize(idx_t size_in_bits, bool in_resource_area = true); + // Pretouch the entire range of memory this BitMap covers. + void pretouch(); + // Accessing idx_t size() const { return _size; } idx_t size_in_words() const { return word_index(size() + BitsPerWord - 1); } + static idx_t size_in_words(size_t size_in_bits) { + return word_index(size_in_bits + BitsPerWord - 1); + } + bool at(idx_t index) const { verify_index(index); return (*word_addr(index) & bit_mask(index)) != 0; --- old/test/gc/g1/Test2GbHeap.java 2016-03-18 13:05:59.820419657 +0100 +++ new/test/gc/g1/Test2GbHeap.java 2016-03-18 13:05:59.719416642 +0100 @@ -25,6 +25,9 @@ * @test Test2GbHeap * @bug 8031686 * @summary Regression test to ensure we can start G1 with 2gb heap. + * Skip test on 32 bit Windows: it typically does not support the many and large virtual memory reservations needed. + * @requires (vm.gc == "G1" | vm.gc == "null") + * @requires !((sun.arch.data.model == "32") & (os.family == "windows")) * @key gc * @key regression * @library /testlibrary @@ -48,17 +51,6 @@ ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(testArguments.toArray(new String[0])); OutputAnalyzer output = new OutputAnalyzer(pb.start()); - - // Avoid failing test for setups not supported. - if (output.getOutput().contains("Could not reserve enough space for 2097152KB object heap")) { - // Will fail on machines with too little memory (and Windows 32-bit VM), ignore such failures. - output.shouldHaveExitValue(1); - } else if (output.getOutput().contains("G1 GC is disabled in this release")) { - // G1 is not supported on embedded, ignore such failures. - output.shouldHaveExitValue(1); - } else { - // Normally everything should be fine. - output.shouldHaveExitValue(0); - } + output.shouldHaveExitValue(0); } }