src/share/vm/gc_implementation/g1/concurrentMark.cpp

Print this page
rev 2896 : 6484965: G1: piggy-back liveness accounting phase on marking
Summary: Remove the separate counting phase of concurrent marking by tracking the amount of marked bytes and the cards spanned by marked objects in marking task/worker thread local data structures, which are updated as individual objects are marked.
Reviewed-by: brutisso

*** 476,485 **** --- 476,486 ---- _cleanup_list("Cleanup List"), _region_bm(max_regions, false /* in_resource_area*/), _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> CardTableModRefBS::card_shift, false /* in_resource_area*/), + _prevMarkBitMap(&_markBitMap1), _nextMarkBitMap(&_markBitMap2), _at_least_one_mark_complete(false), _markStack(this),
*** 505,515 **** _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), _cleanup_times(), _total_counting_time(0.0), _total_rs_scrub_time(0.0), ! _parallel_workers(NULL) { CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; if (verbose_level < no_verbose) { verbose_level = no_verbose; } if (verbose_level > high_verbose) { --- 506,520 ---- _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), _cleanup_times(), _total_counting_time(0.0), _total_rs_scrub_time(0.0), ! _parallel_workers(NULL), ! ! _count_card_bitmaps(NULL), ! _count_marked_bytes(NULL) ! { CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel; if (verbose_level < no_verbose) { verbose_level = no_verbose; } if (verbose_level > high_verbose) {
*** 539,559 **** satb_qs.set_buffer_size(G1SATBBufferSize); _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); // so that the assertion in MarkingTaskQueue::task_queue doesn't fail _active_tasks = _max_task_num; for (int i = 0; i < (int) _max_task_num; ++i) { CMTaskQueue* task_queue = new CMTaskQueue(); task_queue->initialize(); _task_queues->register_queue(i, task_queue); ! _tasks[i] = new CMTask(i, this, task_queue, _task_queues); _accum_task_vtime[i] = 0.0; } if (ConcGCThreads > ParallelGCThreads) { vm_exit_during_initialization("Can't have more ConcGCThreads " "than ParallelGCThreads."); } if (ParallelGCThreads == 0) { --- 544,583 ---- satb_qs.set_buffer_size(G1SATBBufferSize); _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); + _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num); + _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num); + + BitMap::idx_t card_bm_size = _card_bm.size(); + // so that the assertion in MarkingTaskQueue::task_queue doesn't fail _active_tasks = _max_task_num; for (int i = 0; i < (int) _max_task_num; ++i) { CMTaskQueue* task_queue = new CMTaskQueue(); task_queue->initialize(); _task_queues->register_queue(i, task_queue); ! _count_card_bitmaps[i] = BitMap(card_bm_size, false); ! _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions); ! ! _tasks[i] = new CMTask(i, this, ! _count_marked_bytes[i], ! &_count_card_bitmaps[i], ! task_queue, _task_queues); ! _accum_task_vtime[i] = 0.0; } + // Calculate the card number for the bottom of the heap. Used + // in biasing indexes into the accounting card bitmaps. + _heap_bottom_card_num = + intptr_t(uintptr_t(_g1h->reserved_region().start()) >> + CardTableModRefBS::card_shift); + + if (ConcGCThreads > ParallelGCThreads) { vm_exit_during_initialization("Can't have more ConcGCThreads " "than ParallelGCThreads."); } if (ParallelGCThreads == 0) {
*** 673,682 **** --- 697,708 ---- assert(_heap_start < _heap_end, "heap bounds should look ok"); // reset all the marking data structures and any necessary flags clear_marking_state(); + clear_all_count_data(); + if (verbose_low()) { gclog_or_tty->print_cr("[global] resetting"); } // We do reset all of them, since different phases will use
*** 724,742 **** clear_marking_state(); _active_tasks = 0; clear_concurrent_marking_in_progress(); } - ConcurrentMark::~ConcurrentMark() { - for (int i = 0; i < (int) _max_task_num; ++i) { - delete _task_queues->queue(i); - delete _tasks[i]; - } - delete _task_queues; - FREE_C_HEAP_ARRAY(CMTask*, _max_task_num); - } - // This closure is used to mark refs into the g1 generation // from external roots in the CMS bit map. // Called at the first checkpoint. // --- 750,759 ----
*** 949,970 **** return false; } } #endif // !PRODUCT ! void ConcurrentMark::grayRoot(oop p) { HeapWord* addr = (HeapWord*) p; // We can't really check against _heap_start and _heap_end, since it // is possible during an evacuation pause with piggy-backed // initial-mark that the committed space is expanded during the // pause without CM observing this change. So the assertions below // is a bit conservative; but better than nothing. assert(_g1h->g1_committed().contains(addr), "address should be within the heap bounds"); if (!_nextMarkBitMap->isMarked(addr)) { ! _nextMarkBitMap->parMark(addr); } } void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) { // The objects on the region have already been marked "in bulk" by --- 966,987 ---- return false; } } #endif // !PRODUCT ! void ConcurrentMark::grayRoot(oop p, int worker_i) { HeapWord* addr = (HeapWord*) p; // We can't really check against _heap_start and _heap_end, since it // is possible during an evacuation pause with piggy-backed // initial-mark that the committed space is expanded during the // pause without CM observing this change. So the assertions below // is a bit conservative; but better than nothing. assert(_g1h->g1_committed().contains(addr), "address should be within the heap bounds"); if (!_nextMarkBitMap->isMarked(addr)) { ! par_mark_and_count(p, worker_i); } } void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) { // The objects on the region have already been marked "in bulk" by
*** 1009,1027 **** } } } } ! void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) { // The object is not marked by the caller. We need to at least mark // it and maybe push in on the stack. HeapWord* addr = (HeapWord*)p; if (!_nextMarkBitMap->isMarked(addr)) { // We definitely need to mark it, irrespective whether we bail out // because we're done with marking. ! if (_nextMarkBitMap->parMark(addr)) { if (!concurrent_marking_in_progress() || !_should_gray_objects) { // If we're done with concurrent marking and we're waiting for // remark, then we're not pushing anything on the stack. return; } --- 1026,1045 ---- } } } } ! void ConcurrentMark::markAndGrayObjectIfNecessary(oop p, int worker_i) { // The object is not marked by the caller. We need to at least mark // it and maybe push in on the stack. HeapWord* addr = (HeapWord*)p; if (!_nextMarkBitMap->isMarked(addr)) { // We definitely need to mark it, irrespective whether we bail out // because we're done with marking. ! ! if (par_mark_and_count(p, worker_i)) { if (!concurrent_marking_in_progress() || !_should_gray_objects) { // If we're done with concurrent marking and we're waiting for // remark, then we're not pushing anything on the stack. return; }
*** 1219,1228 **** --- 1237,1250 ---- clear_has_overflown(); if (G1TraceMarkStackOverflow) { gclog_or_tty->print_cr("\nRemark led to restart for overflow."); } } else { + // Aggregate the per-task counting data that we have accumulated + // while marking. + aggregate_and_clear_count_data(); + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); // We're done with marking. // This is the end of the marking cycle, we're expected all // threads to have SATB queues with active set to true. satb_mq_set.set_active_all_threads(false, /* new active value */
*** 1259,1307 **** g1p->record_concurrent_mark_remark_end(); } #define CARD_BM_TEST_MODE 0 class CalcLiveObjectsClosure: public HeapRegionClosure { CMBitMapRO* _bm; ConcurrentMark* _cm; ! bool _changed; ! bool _yield; ! size_t _words_done; size_t _tot_live; size_t _tot_used; - size_t _regions_done; - double _start_vtime_sec; ! BitMap* _region_bm; ! BitMap* _card_bm; intptr_t _bottom_card_num; - bool _final; void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { ! for (intptr_t i = start_card_num; i <= last_card_num; i++) { #if CARD_BM_TEST_MODE ! guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set."); #else ! _card_bm->par_at_put(i - _bottom_card_num, 1); #endif } } public: ! CalcLiveObjectsClosure(bool final, ! CMBitMapRO *bm, ConcurrentMark *cm, BitMap* region_bm, BitMap* card_bm) : ! _bm(bm), _cm(cm), _changed(false), _yield(true), ! _words_done(0), _tot_live(0), _tot_used(0), ! _region_bm(region_bm), _card_bm(card_bm),_final(final), ! _regions_done(0), _start_vtime_sec(0.0) { ! _bottom_card_num = ! intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> ! CardTableModRefBS::card_shift); } // It takes a region that's not empty (i.e., it has at least one // live object in it and sets its corresponding bit on the region // bitmap to 1. If the region is "starts humongous" it will also set --- 1281,1328 ---- g1p->record_concurrent_mark_remark_end(); } #define CARD_BM_TEST_MODE 0 + // Used to calculate the # live objects per region + // for verification purposes class CalcLiveObjectsClosure: public HeapRegionClosure { CMBitMapRO* _bm; ConcurrentMark* _cm; ! BitMap* _region_bm; ! BitMap* _card_bm; ! ! size_t _tot_words_done; size_t _tot_live; size_t _tot_used; ! size_t _region_marked_bytes; ! intptr_t _bottom_card_num; void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { ! BitMap::idx_t start_idx = start_card_num - _bottom_card_num; ! BitMap::idx_t last_idx = last_card_num - _bottom_card_num; ! ! for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) { #if CARD_BM_TEST_MODE ! guarantee(_card_bm->at(i), "Should already be set."); #else ! _card_bm->par_at_put(i, 1); #endif } } public: ! CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm, BitMap* region_bm, BitMap* card_bm) : ! _bm(bm), _cm(cm), _region_bm(region_bm), _card_bm(card_bm), ! _region_marked_bytes(0), _tot_words_done(0), ! _tot_live(0), _tot_used(0) { ! _bottom_card_num = cm->heap_bottom_card_num(); } // It takes a region that's not empty (i.e., it has at least one // live object in it and sets its corresponding bit on the region // bitmap to 1. If the region is "starts humongous" it will also set
*** 1314,1346 **** if (!hr->startsHumongous()) { // Normal (non-humongous) case: just set the bit. _region_bm->par_at_put((BitMap::idx_t) index, true); } else { // Starts humongous case: calculate how many regions are part of ! // this humongous region and then set the bit range. It might ! // have been a bit more efficient to look at the object that ! // spans these humongous regions to calculate their number from ! // the object's size. However, it's a good idea to calculate ! // this based on the metadata itself, and not the region ! // contents, so that this code is not aware of what goes into ! // the humongous regions (in case this changes in the future). G1CollectedHeap* g1h = G1CollectedHeap::heap(); ! size_t end_index = index + 1; ! while (end_index < g1h->n_regions()) { ! HeapRegion* chr = g1h->region_at(end_index); ! if (!chr->continuesHumongous()) break; ! end_index += 1; ! } _region_bm->par_at_put_range((BitMap::idx_t) index, (BitMap::idx_t) end_index, true); } } bool doHeapRegion(HeapRegion* hr) { - if (!_final && _regions_done == 0) { - _start_vtime_sec = os::elapsedVTime(); - } if (hr->continuesHumongous()) { // We will ignore these here and process them when their // associated "starts humongous" region is processed (see // set_bit_for_heap_region()). Note that we cannot rely on their --- 1335,1354 ---- if (!hr->startsHumongous()) { // Normal (non-humongous) case: just set the bit. _region_bm->par_at_put((BitMap::idx_t) index, true); } else { // Starts humongous case: calculate how many regions are part of ! // this humongous region and then set the bit range. G1CollectedHeap* g1h = G1CollectedHeap::heap(); ! HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1); ! size_t end_index = last_hr->hrs_index() + 1; _region_bm->par_at_put_range((BitMap::idx_t) index, (BitMap::idx_t) end_index, true); } } bool doHeapRegion(HeapRegion* hr) { if (hr->continuesHumongous()) { // We will ignore these here and process them when their // associated "starts humongous" region is processed (see // set_bit_for_heap_region()). Note that we cannot rely on their
*** 1350,1401 **** // before its associated "starts humongous". return false; } HeapWord* nextTop = hr->next_top_at_mark_start(); ! HeapWord* start = hr->top_at_conc_mark_count(); ! assert(hr->bottom() <= start && start <= hr->end() && ! hr->bottom() <= nextTop && nextTop <= hr->end() && ! start <= nextTop, "Preconditions."); ! // Otherwise, record the number of word's we'll examine. size_t words_done = (nextTop - start); // Find the first marked object at or after "start". start = _bm->getNextMarkedWordAddress(start, nextTop); size_t marked_bytes = 0; // Below, the term "card num" means the result of shifting an address // by the card shift -- address 0 corresponds to card number 0. One // must subtract the card num of the bottom of the heap to obtain a // card table index. // The first card num of the sequence of live cards currently being // constructed. -1 ==> no sequence. intptr_t start_card_num = -1; // The last card num of the sequence of live cards currently being // constructed. -1 ==> no sequence. intptr_t last_card_num = -1; while (start < nextTop) { - if (_yield && _cm->do_yield_check()) { - // We yielded. It might be for a full collection, in which case - // all bets are off; terminate the traversal. - if (_cm->has_aborted()) { - _changed = false; - return true; - } else { - // Otherwise, it might be a collection pause, and the region - // we're looking at might be in the collection set. We'll - // abandon this region. - return false; - } - } oop obj = oop(start); int obj_sz = obj->size(); // The card num of the start of the current object. intptr_t obj_card_num = intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); - HeapWord* obj_last = start + obj_sz - 1; intptr_t obj_last_card_num = intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift); if (obj_card_num != last_card_num) { --- 1358,1401 ---- // before its associated "starts humongous". return false; } HeapWord* nextTop = hr->next_top_at_mark_start(); ! HeapWord* start = hr->bottom(); ! ! assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(), "Preconditions."); ! ! // Record the number of word's we'll examine. size_t words_done = (nextTop - start); + // Find the first marked object at or after "start". start = _bm->getNextMarkedWordAddress(start, nextTop); + size_t marked_bytes = 0; + _region_marked_bytes = 0; // Below, the term "card num" means the result of shifting an address // by the card shift -- address 0 corresponds to card number 0. One // must subtract the card num of the bottom of the heap to obtain a // card table index. + // The first card num of the sequence of live cards currently being // constructed. -1 ==> no sequence. intptr_t start_card_num = -1; + // The last card num of the sequence of live cards currently being // constructed. -1 ==> no sequence. intptr_t last_card_num = -1; while (start < nextTop) { oop obj = oop(start); int obj_sz = obj->size(); + // The card num of the start of the current object. intptr_t obj_card_num = intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); HeapWord* obj_last = start + obj_sz - 1; intptr_t obj_last_card_num = intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift); if (obj_card_num != last_card_num) {
*** 1410,1520 **** mark_card_num_range(start_card_num, last_card_num); start_card_num = obj_card_num; } } #if CARD_BM_TEST_MODE - /* - gclog_or_tty->print_cr("Setting bits from %d/%d.", - obj_card_num - _bottom_card_num, - obj_last_card_num - _bottom_card_num); - */ for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) { _card_bm->par_at_put(j - _bottom_card_num, 1); } ! #endif } // In any case, we set the last card num. last_card_num = obj_last_card_num; marked_bytes += (size_t)obj_sz * HeapWordSize; // Find the next marked object after this one. start = _bm->getNextMarkedWordAddress(start + 1, nextTop); - _changed = true; } // Handle the last range, if any. if (start_card_num != -1) { mark_card_num_range(start_card_num, last_card_num); } ! if (_final) { // Mark the allocated-since-marking portion... ! HeapWord* tp = hr->top(); ! if (nextTop < tp) { ! start_card_num = ! intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift); ! last_card_num = ! intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift); mark_card_num_range(start_card_num, last_card_num); // This definitely means the region has live objects. set_bit_for_region(hr); } - } - hr->add_to_marked_bytes(marked_bytes); // Update the live region bitmap. if (marked_bytes > 0) { set_bit_for_region(hr); } ! hr->set_top_at_conc_mark_count(nextTop); _tot_live += hr->next_live_bytes(); _tot_used += hr->used(); ! _words_done = words_done; ! if (!_final) { ! ++_regions_done; ! if (_regions_done % 10 == 0) { ! double end_vtime_sec = os::elapsedVTime(); ! double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec; ! if (elapsed_vtime_sec > (10.0 / 1000.0)) { ! jlong sleep_time_ms = ! (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0); ! os::sleep(Thread::current(), sleep_time_ms, false); ! _start_vtime_sec = end_vtime_sec; } } } return false; } ! bool changed() { return _changed; } ! void reset() { _changed = false; _words_done = 0; } ! void no_yield() { _yield = false; } ! size_t words_done() { return _words_done; } ! size_t tot_live() { return _tot_live; } ! size_t tot_used() { return _tot_used; } }; ! void ConcurrentMark::calcDesiredRegions() { ! _region_bm.clear(); ! _card_bm.clear(); ! CalcLiveObjectsClosure calccl(false /*final*/, ! nextMarkBitMap(), this, ! &_region_bm, &_card_bm); ! G1CollectedHeap *g1h = G1CollectedHeap::heap(); ! g1h->heap_region_iterate(&calccl); ! do { ! calccl.reset(); ! g1h->heap_region_iterate(&calccl); ! } while (calccl.changed()); ! } class G1ParFinalCountTask: public AbstractGangTask { protected: G1CollectedHeap* _g1h; ! CMBitMap* _bm; size_t _n_workers; size_t *_live_bytes; size_t *_used_bytes; ! BitMap* _region_bm; ! BitMap* _card_bm; public: ! G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, ! BitMap* region_bm, BitMap* card_bm) ! : AbstractGangTask("G1 final counting"), _g1h(g1h), ! _bm(bm), _region_bm(region_bm), _card_bm(card_bm), _n_workers(0) { // Use the value already set as the number of active threads // in the call to run_task(). Needed for the allocation of // _live_bytes and _used_bytes. --- 1410,1838 ---- mark_card_num_range(start_card_num, last_card_num); start_card_num = obj_card_num; } } #if CARD_BM_TEST_MODE for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) { _card_bm->par_at_put(j - _bottom_card_num, 1); } ! #endif // CARD_BM_TEST_MODE } // In any case, we set the last card num. last_card_num = obj_last_card_num; marked_bytes += (size_t)obj_sz * HeapWordSize; + // Find the next marked object after this one. start = _bm->getNextMarkedWordAddress(start + 1, nextTop); } + // Handle the last range, if any. if (start_card_num != -1) { mark_card_num_range(start_card_num, last_card_num); } ! // Mark the allocated-since-marking portion... ! HeapWord* top = hr->top(); ! if (nextTop < top) { ! start_card_num = intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift); ! last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift); ! mark_card_num_range(start_card_num, last_card_num); + // This definitely means the region has live objects. set_bit_for_region(hr); } // Update the live region bitmap. if (marked_bytes > 0) { set_bit_for_region(hr); } ! ! // Set the marked bytes for the current region so that ! // it can be queried by a calling verificiation routine ! _region_marked_bytes = marked_bytes; ! _tot_live += hr->next_live_bytes(); _tot_used += hr->used(); ! _tot_words_done = words_done; ! return false; ! } ! ! size_t region_marked_bytes() const { return _region_marked_bytes; } ! size_t tot_words_done() const { return _tot_words_done; } ! size_t tot_live() const { return _tot_live; } ! size_t tot_used() const { return _tot_used; } ! }; ! ! // Heap region closure used for verifying the counting data ! // that was accumulated concurrently and aggregated during ! // the remark pause. This closure is applied to the heap ! // regions during the STW cleanup pause. ! ! class VerifyLiveObjectDataHRClosure: public HeapRegionClosure { ! ConcurrentMark* _cm; ! CalcLiveObjectsClosure _calc_cl; ! BitMap* _region_bm; // Region BM to be verified ! BitMap* _card_bm; // Card BM to be verified ! bool _verbose; // verbose output? ! ! BitMap* _exp_region_bm; // Expected Region BM values ! BitMap* _exp_card_bm; // Expected card BM values ! ! intptr_t _bottom_card_num; // Used for calculatint bitmap indices ! ! int _failures; ! ! public: ! VerifyLiveObjectDataHRClosure(ConcurrentMark* cm, ! BitMap* region_bm, ! BitMap* card_bm, ! BitMap* exp_region_bm, ! BitMap* exp_card_bm, ! bool verbose) : ! _cm(cm), ! _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm), ! _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose), ! _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm), ! _failures(0) ! { ! _bottom_card_num = cm->heap_bottom_card_num(); ! } ! ! int failures() const { return _failures; } ! ! bool doHeapRegion(HeapRegion* hr) { ! if (hr->continuesHumongous()) { ! // We will ignore these here and process them when their ! // associated "starts humongous" region is processed (see ! // set_bit_for_heap_region()). Note that we cannot rely on their ! // associated "starts humongous" region to have their bit set to ! // 1 since, due to the region chunking in the parallel region ! // iteration, a "continues humongous" region might be visited ! // before its associated "starts humongous". ! return false; } + + int failures = 0; + + // Call the CalcLiveObjectsClosure to walk the marking bitmap for + // this region and set the corresponding bits in the expected region + // and card bitmaps. + bool res = _calc_cl.doHeapRegion(hr); + assert(res == false, "should be continuing"); + + // Note that the calculated count data could be a subset of the + // count data that was accumlated during marking. See the comment + // in G1ParCopyHelper::copy_to_survivor space for an explanation + // why. + + // Verify that _top_at_conc_count == ntams + if (hr->top_at_conc_mark_count() != hr->next_top_at_mark_start()) { + if (_verbose) { + gclog_or_tty->print_cr("Region %d: top at conc count incorrect: expected " + PTR_FORMAT", actual: "PTR_FORMAT, + hr->hrs_index(), hr->next_top_at_mark_start(), + hr->top_at_conc_mark_count()); } + failures += 1; } + // Verify the marked bytes for this region. + size_t exp_marked_bytes = _calc_cl.region_marked_bytes(); + size_t act_marked_bytes = hr->next_marked_bytes(); + + // We're not OK if expected marked bytes > actual marked bytes. It means + // we have missed accounting some objects during the actual marking. + if (exp_marked_bytes > act_marked_bytes) { + if (_verbose) { + gclog_or_tty->print_cr("Region %d: marked bytes mismatch: expected: " + SIZE_FORMAT", actual: "SIZE_FORMAT, + hr->hrs_index(), exp_marked_bytes, act_marked_bytes); + } + failures += 1; + } + + // Verify the bit, for this region, in the actual and expected + // (which was just calculated) region bit maps. + // We're not OK if the expected bit is set and the actual is not set. + BitMap::idx_t index = (BitMap::idx_t)hr->hrs_index(); + + bool expected = _exp_region_bm->at(index); + bool actual = _region_bm->at(index); + if (expected && !actual) { + if (_verbose) { + gclog_or_tty->print_cr("Region %d: region bitmap mismatch: expected: %d, actual: %d", + hr->hrs_index(), expected, actual); + } + failures += 1; + } + + // Verify that the card bit maps for the cards spanned by the current + // region match. The set of offsets that have set bits in the expected + // bitmap should be a subset of the offsets with set bits from the actual + // calculated card bitmap. + // Again it's more important that if the expected bit is set then the + // actual bit be set. + intptr_t start_card_num = + intptr_t(uintptr_t(hr->bottom()) >> CardTableModRefBS::card_shift); + intptr_t top_card_num = + intptr_t(uintptr_t(hr->top()) >> CardTableModRefBS::card_shift); + + BitMap::idx_t start_idx = start_card_num - _bottom_card_num; + BitMap::idx_t end_idx = top_card_num - _bottom_card_num; + + for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) { + expected = _exp_card_bm->at(i); + actual = _card_bm->at(i); + + if (expected && !actual) { + if (_verbose) { + gclog_or_tty->print_cr("Region %d: card bitmap mismatch at idx %d: expected: %d, actual: %d", + hr->hrs_index(), i, expected, actual); + } + failures += 1; + } + } + + if (failures > 0 && _verbose) { + gclog_or_tty->print("Region %d: bottom: "PTR_FORMAT", ntams: " + PTR_FORMAT", top: "PTR_FORMAT", end: "PTR_FORMAT, + hr->hrs_index(), hr->bottom(), hr->next_top_at_mark_start(), + hr->top(), hr->end()); + gclog_or_tty->print_cr(", marked_bytes: calc/actual "SIZE_FORMAT"/"SIZE_FORMAT, + _calc_cl.region_marked_bytes(), + hr->next_marked_bytes()); + } + + _failures += failures; + + // We could stop iteration over the heap when we + // find the first voilating region by returning true. return false; } + }; + + + class G1ParVerifyFinalCountTask: public AbstractGangTask { + protected: + G1CollectedHeap* _g1h; + ConcurrentMark* _cm; + BitMap* _actual_region_bm; + BitMap* _actual_card_bm; + + size_t _n_workers; + + BitMap* _expected_region_bm; + BitMap* _expected_card_bm; + + int _failures; + bool _verbose; + + public: + G1ParVerifyFinalCountTask(G1CollectedHeap* g1h, + BitMap* region_bm, BitMap* card_bm, + BitMap* expected_region_bm, BitMap* expected_card_bm) + : AbstractGangTask("G1 verify final counting"), + _g1h(g1h), _cm(_g1h->concurrent_mark()), + _actual_region_bm(region_bm), _actual_card_bm(card_bm), + _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm), + _failures(0), _verbose(false), + _n_workers(0) + { + assert(VerifyDuringGC, "don't call this otherwise"); + + // Use the value already set as the number of active threads + // in the call to run_task(). + if (G1CollectedHeap::use_parallel_gc_threads()) { + assert( _g1h->workers()->active_workers() > 0, + "Should have been previously set"); + _n_workers = _g1h->workers()->active_workers(); + } else { + _n_workers = 1; + } + + assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity"); + assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity"); + + _verbose = _cm->verbose_medium(); + } + + void work(int worker_i) { + assert((size_t) worker_i < _n_workers, "invariant"); + + VerifyLiveObjectDataHRClosure verify_cl(_cm, + _actual_region_bm, _actual_card_bm, + _expected_region_bm, + _expected_card_bm, + _verbose); + + if (G1CollectedHeap::use_parallel_gc_threads()) { + _g1h->heap_region_par_iterate_chunked(&verify_cl, + worker_i, + (int) _n_workers, + HeapRegion::VerifyCountClaimValue); + } else { + _g1h->heap_region_iterate(&verify_cl); + } ! Atomic::add(verify_cl.failures(), &_failures); ! } ! ! int failures() const { return _failures; } }; + // Final update of count data (during cleanup). + // Adds [top_at_count, NTAMS) to the marked bytes for each + // region. Sets the bits in the card bitmap corresponding + // to the interval [top_at_count, top], and sets the + // liveness bit for each region containing live data + // in the region bitmap. ! class FinalCountDataUpdateClosure: public HeapRegionClosure { ! ConcurrentMark* _cm; ! BitMap* _region_bm; ! BitMap* _card_bm; ! intptr_t _bottom_card_num; ! size_t _total_live_bytes; ! size_t _total_used_bytes; ! size_t _total_words_done; ! ! void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { ! BitMap::idx_t start_idx = start_card_num - _bottom_card_num; ! BitMap::idx_t last_idx = last_card_num - _bottom_card_num; ! ! // Inclusive bit range [start_idx, last_idx]. par_at_put_range ! // is exclusive so we have to also set the bit for last_idx. ! // Passing last_idx+1 to the clear_range would work in ! // most cases but could trip an OOB assertion. ! ! if ((last_idx - start_idx) > 0) { ! _card_bm->par_at_put_range(start_idx, last_idx, true); ! } ! _card_bm->par_set_bit(last_idx); ! } ! ! // It takes a region that's not empty (i.e., it has at least one ! // live object in it and sets its corresponding bit on the region ! // bitmap to 1. If the region is "starts humongous" it will also set ! // to 1 the bits on the region bitmap that correspond to its ! // associated "continues humongous" regions. ! void set_bit_for_region(HeapRegion* hr) { ! assert(!hr->continuesHumongous(), "should have filtered those out"); ! ! size_t index = hr->hrs_index(); ! if (!hr->startsHumongous()) { ! // Normal (non-humongous) case: just set the bit. ! _region_bm->par_set_bit((BitMap::idx_t) index); ! } else { ! // Starts humongous case: calculate how many regions are part of ! // this humongous region and then set the bit range. ! G1CollectedHeap* g1h = G1CollectedHeap::heap(); ! HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1); ! size_t end_index = last_hr->hrs_index() + 1; ! _region_bm->par_at_put_range((BitMap::idx_t) index, ! (BitMap::idx_t) end_index, true); ! } ! } ! ! public: ! FinalCountDataUpdateClosure(ConcurrentMark* cm, ! BitMap* region_bm, ! BitMap* card_bm) : ! _cm(cm), _region_bm(region_bm), _card_bm(card_bm), ! _total_words_done(0), _total_live_bytes(0), _total_used_bytes(0) ! { ! _bottom_card_num = cm->heap_bottom_card_num(); ! } ! ! bool doHeapRegion(HeapRegion* hr) { ! ! if (hr->continuesHumongous()) { ! // We will ignore these here and process them when their ! // associated "starts humongous" region is processed (see ! // set_bit_for_heap_region()). Note that we cannot rely on their ! // associated "starts humongous" region to have their bit set to ! // 1 since, due to the region chunking in the parallel region ! // iteration, a "continues humongous" region might be visited ! // before its associated "starts humongous". ! return false; ! } ! ! HeapWord* start = hr->top_at_conc_mark_count(); ! HeapWord* ntams = hr->next_top_at_mark_start(); ! HeapWord* top = hr->top(); ! ! assert(hr->bottom() <= start && start <= hr->end() && ! hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions."); ! ! size_t words_done = ntams - hr->bottom(); ! ! intptr_t start_card_num = intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); ! intptr_t last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift); ! ! ! if (start < ntams) { ! // Region was changed between remark and cleanup pauses ! // We need to add (ntams - start) to the marked bytes ! // for this region, and set bits for the range ! // [ card_num(start), card_num(ntams) ) in the ! // card bitmap. ! size_t live_bytes = (ntams - start) * HeapWordSize; ! hr->add_to_marked_bytes(live_bytes); ! ! // Record the new top at conc count ! hr->set_top_at_conc_mark_count(ntams); ! ! // The setting of the bits card bitmap takes place below ! } ! ! // Mark the allocated-since-marking portion... ! if (ntams < top) { ! // This definitely means the region has live objects. ! set_bit_for_region(hr); ! } ! ! // Now set the bits for [start, top] ! mark_card_num_range(start_card_num, last_card_num); ! ! // Set the bit for the region if it contains live data ! if (hr->next_marked_bytes() > 0) { ! set_bit_for_region(hr); ! } ! ! _total_words_done += words_done; ! _total_used_bytes += hr->used(); ! _total_live_bytes += hr->next_marked_bytes(); ! ! return false; ! } ! ! size_t total_words_done() const { return _total_words_done; } ! size_t total_live_bytes() const { return _total_live_bytes; } ! size_t total_used_bytes() const { return _total_used_bytes; } ! }; class G1ParFinalCountTask: public AbstractGangTask { protected: G1CollectedHeap* _g1h; ! ConcurrentMark* _cm; ! BitMap* _actual_region_bm; ! BitMap* _actual_card_bm; ! size_t _n_workers; + size_t *_live_bytes; size_t *_used_bytes; ! public: ! G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm) ! : AbstractGangTask("G1 final counting"), ! _g1h(g1h), _cm(_g1h->concurrent_mark()), ! _actual_region_bm(region_bm), _actual_card_bm(card_bm), _n_workers(0) { // Use the value already set as the number of active threads // in the call to run_task(). Needed for the allocation of // _live_bytes and _used_bytes.
*** 1533,1566 **** ~G1ParFinalCountTask() { FREE_C_HEAP_ARRAY(size_t, _live_bytes); FREE_C_HEAP_ARRAY(size_t, _used_bytes); } ! void work(int i) { ! CalcLiveObjectsClosure calccl(true /*final*/, ! _bm, _g1h->concurrent_mark(), ! _region_bm, _card_bm); ! calccl.no_yield(); if (G1CollectedHeap::use_parallel_gc_threads()) { ! _g1h->heap_region_par_iterate_chunked(&calccl, i, (int) _n_workers, HeapRegion::FinalCountClaimValue); } else { ! _g1h->heap_region_iterate(&calccl); } - assert(calccl.complete(), "Shouldn't have yielded!"); ! assert((size_t) i < _n_workers, "invariant"); ! _live_bytes[i] = calccl.tot_live(); ! _used_bytes[i] = calccl.tot_used(); } size_t live_bytes() { size_t live_bytes = 0; for (size_t i = 0; i < _n_workers; ++i) live_bytes += _live_bytes[i]; return live_bytes; } size_t used_bytes() { size_t used_bytes = 0; for (size_t i = 0; i < _n_workers; ++i) used_bytes += _used_bytes[i]; return used_bytes; --- 1851,1887 ---- ~G1ParFinalCountTask() { FREE_C_HEAP_ARRAY(size_t, _live_bytes); FREE_C_HEAP_ARRAY(size_t, _used_bytes); } ! void work(int worker_i) { ! assert((size_t) worker_i < _n_workers, "invariant"); ! ! FinalCountDataUpdateClosure final_update_cl(_cm, ! _actual_region_bm, ! _actual_card_bm); ! if (G1CollectedHeap::use_parallel_gc_threads()) { ! _g1h->heap_region_par_iterate_chunked(&final_update_cl, ! worker_i, (int) _n_workers, HeapRegion::FinalCountClaimValue); } else { ! _g1h->heap_region_iterate(&final_update_cl); } ! _live_bytes[worker_i] = final_update_cl.total_live_bytes(); ! _used_bytes[worker_i] = final_update_cl.total_used_bytes(); } + size_t live_bytes() { size_t live_bytes = 0; for (size_t i = 0; i < _n_workers; ++i) live_bytes += _live_bytes[i]; return live_bytes; } + size_t used_bytes() { size_t used_bytes = 0; for (size_t i = 0; i < _n_workers; ++i) used_bytes += _used_bytes[i]; return used_bytes;
*** 1764,1799 **** double start = os::elapsedTime(); HeapRegionRemSet::reset_for_cleanup_tasks(); size_t n_workers; // Do counting once more with the world stopped for good measure. ! G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), ! &_region_bm, &_card_bm); if (G1CollectedHeap::use_parallel_gc_threads()) { ! assert(g1h->check_heap_region_claim_values( ! HeapRegion::InitialClaimValue), "sanity check"); g1h->set_par_threads(); n_workers = g1h->n_par_threads(); assert(g1h->n_par_threads() == (int) n_workers, "Should not have been reset"); g1h->workers()->run_task(&g1_par_count_task); // Done with the parallel phase so reset to 0. g1h->set_par_threads(0); ! assert(g1h->check_heap_region_claim_values( ! HeapRegion::FinalCountClaimValue), "sanity check"); } else { n_workers = 1; g1_par_count_task.work(0); } size_t known_garbage_bytes = g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); g1p->set_known_garbage_bytes(known_garbage_bytes); size_t start_used_bytes = g1h->used(); --- 2085,2151 ---- double start = os::elapsedTime(); HeapRegionRemSet::reset_for_cleanup_tasks(); + // Clear the global region bitmap - it will be filled as part + // of the final counting task. + _region_bm.clear(); + size_t n_workers; // Do counting once more with the world stopped for good measure. ! G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm); ! if (G1CollectedHeap::use_parallel_gc_threads()) { ! assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity check"); g1h->set_par_threads(); n_workers = g1h->n_par_threads(); assert(g1h->n_par_threads() == (int) n_workers, "Should not have been reset"); g1h->workers()->run_task(&g1_par_count_task); // Done with the parallel phase so reset to 0. g1h->set_par_threads(0); ! assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue), "sanity check"); } else { n_workers = 1; g1_par_count_task.work(0); } + if (VerifyDuringGC) { + // Verify that the counting data accumulated during marking matches + // that calculated by walking the marking bitmap. + + // Bitmaps to hold expected values + BitMap expected_region_bm(_region_bm.size(), false); + BitMap expected_card_bm(_card_bm.size(), false); + + G1ParVerifyFinalCountTask g1_par_verify_task(g1h, + &_region_bm, + &_card_bm, + &expected_region_bm, + &expected_card_bm); + + if (G1CollectedHeap::use_parallel_gc_threads()) { + g1h->set_par_threads((int)n_workers); + g1h->workers()->run_task(&g1_par_verify_task); + // Done with the parallel phase so reset to 0. + g1h->set_par_threads(0); + + assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue), + "sanity check"); + } else { + g1_par_verify_task.work(0); + } + + guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures"); + } + size_t known_garbage_bytes = g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); g1p->set_known_garbage_bytes(known_garbage_bytes); size_t start_used_bytes = g1h->used();
*** 1982,1997 **** } class G1CMKeepAliveClosure: public OopClosure { G1CollectedHeap* _g1; ConcurrentMark* _cm; - CMBitMap* _bitMap; public: ! G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm, ! CMBitMap* bitMap) : ! _g1(g1), _cm(cm), ! _bitMap(bitMap) {} virtual void do_oop(narrowOop* p) { do_oop_work(p); } virtual void do_oop( oop* p) { do_oop_work(p); } template <class T> void do_oop_work(T* p) { --- 2334,2349 ---- } class G1CMKeepAliveClosure: public OopClosure { G1CollectedHeap* _g1; ConcurrentMark* _cm; public: ! G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) : ! _g1(g1), _cm(cm) ! { ! assert(Thread::current()->is_VM_thread(), "otherwise fix worker id"); ! } virtual void do_oop(narrowOop* p) { do_oop_work(p); } virtual void do_oop( oop* p) { do_oop_work(p); } template <class T> void do_oop_work(T* p) {
*** 2003,2032 **** "*"PTR_FORMAT" = "PTR_FORMAT, p, (void*) obj); } if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { ! _bitMap->mark(addr); _cm->mark_stack_push(obj); } } }; class G1CMDrainMarkingStackClosure: public VoidClosure { CMMarkStack* _markStack; - CMBitMap* _bitMap; G1CMKeepAliveClosure* _oopClosure; public: ! G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack, G1CMKeepAliveClosure* oopClosure) : ! _bitMap(bitMap), _markStack(markStack), _oopClosure(oopClosure) {} void do_void() { ! _markStack->drain((OopClosure*)_oopClosure, _bitMap, false); } }; // 'Keep Alive' closure used by parallel reference processing. // An instance of this closure is used in the parallel reference processing --- 2355,2384 ---- "*"PTR_FORMAT" = "PTR_FORMAT, p, (void*) obj); } if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) { ! _cm->mark_and_count(obj); _cm->mark_stack_push(obj); } } }; class G1CMDrainMarkingStackClosure: public VoidClosure { + ConcurrentMark* _cm; CMMarkStack* _markStack; G1CMKeepAliveClosure* _oopClosure; public: ! G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack, G1CMKeepAliveClosure* oopClosure) : ! _cm(cm), _markStack(markStack), _oopClosure(oopClosure) {} void do_void() { ! _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false); } }; // 'Keep Alive' closure used by parallel reference processing. // An instance of this closure is used in the parallel reference processing
*** 2241,2253 **** // Process weak references. rp->setup_policy(clear_all_soft_refs); assert(_markStack.isEmpty(), "mark stack should be empty"); ! G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap()); G1CMDrainMarkingStackClosure ! g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive); // We use the work gang from the G1CollectedHeap and we utilize all // the worker threads. int active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1; active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1); --- 2593,2605 ---- // Process weak references. rp->setup_policy(clear_all_soft_refs); assert(_markStack.isEmpty(), "mark stack should be empty"); ! G1CMKeepAliveClosure g1_keep_alive(g1h, this); G1CMDrainMarkingStackClosure ! g1_drain_mark_stack(this, &_markStack, &g1_keep_alive); // We use the work gang from the G1CollectedHeap and we utilize all // the worker threads. int active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1; active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
*** 2621,2637 **** private: ConcurrentMark* _cm; public: void do_object(oop obj) { ! _cm->deal_with_reference(obj); } CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { } }; ! void ConcurrentMark::deal_with_reference(oop obj) { if (verbose_high()) { gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, (void*) obj); } --- 2973,2989 ---- private: ConcurrentMark* _cm; public: void do_object(oop obj) { ! _cm->deal_with_reference(obj, 0); } CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { } }; ! void ConcurrentMark::deal_with_reference(oop obj, int worker_i) { if (verbose_high()) { gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, (void*) obj); }
*** 2649,2661 **** gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered " "marked", (void*) obj); } // we need to mark it first ! if (_nextMarkBitMap->parMark(objAddr)) { // No OrderAccess:store_load() is needed. It is implicit in the ! // CAS done in parMark(objAddr) above HeapWord* finger = _finger; if (objAddr < finger) { if (verbose_high()) { gclog_or_tty->print_cr("[global] below the global finger " "("PTR_FORMAT"), pushing it", finger); --- 3001,3014 ---- gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered " "marked", (void*) obj); } // we need to mark it first ! if (par_mark_and_count(obj, hr, worker_i)) { // No OrderAccess:store_load() is needed. It is implicit in the ! // CAS done in the call to CMBitMap::parMark() in the above ! // routine. HeapWord* finger = _finger; if (objAddr < finger) { if (verbose_high()) { gclog_or_tty->print_cr("[global] below the global finger " "("PTR_FORMAT"), pushing it", finger);
*** 2696,2706 **** // Note we are overriding the read-only view of the prev map here, via // the cast. ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p); } ! void ConcurrentMark::clear(oop p) { assert(p != NULL && p->is_oop(), "expected an oop"); HeapWord* addr = (HeapWord*)p; assert(addr >= _nextMarkBitMap->startWord() || addr < _nextMarkBitMap->endWord(), "in a region"); --- 3049,3059 ---- // Note we are overriding the read-only view of the prev map here, via // the cast. ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p); } ! void ConcurrentMark::clear_mark(oop p) { assert(p != NULL && p->is_oop(), "expected an oop"); HeapWord* addr = (HeapWord*)p; assert(addr >= _nextMarkBitMap->startWord() || addr < _nextMarkBitMap->endWord(), "in a region");
*** 2896,2905 **** --- 3249,3497 ---- // Clear any partial regions from the CMTasks _tasks[i]->clear_aborted_region(); } } + // Aggregate the counting data that was constructed concurrently + // with marking. + class AggregateCountDataHRClosure: public HeapRegionClosure { + ConcurrentMark* _cm; + BitMap* _cm_card_bm; + intptr_t _bottom_card_num; + size_t _max_task_num; + + public: + AggregateCountDataHRClosure(ConcurrentMark *cm, + BitMap* cm_card_bm, + intptr_t bottom_card_num, + size_t max_task_num) : + _cm(cm), + _cm_card_bm(cm_card_bm), + _bottom_card_num(bottom_card_num), + _max_task_num(max_task_num) + { } + + bool is_card_aligned(HeapWord* p) { + return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0); + } + + bool doHeapRegion(HeapRegion* hr) { + if (hr->continuesHumongous()) { + // We will ignore these here and process them when their + // associated "starts humongous" region is processed. + // Note that we cannot rely on their associated + // "starts humongous" region to have their bit set to 1 + // since, due to the region chunking in the parallel region + // iteration, a "continues humongous" region might be visited + // before its associated "starts humongous". + return false; + } + + HeapWord* start = hr->bottom(); + HeapWord* limit = hr->next_top_at_mark_start(); + HeapWord* end = hr->end(); + + assert(start <= limit && limit <= hr->top() && + hr->top() <= hr->end(), "Preconditions"); + + assert(hr->next_marked_bytes() == 0, "Precondition"); + + if (start == limit) { + // NTAMS of this region has not been set so nothing to do. + return false; + } + + intptr_t start_card_num = intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); + intptr_t limit_card_num = intptr_t(uintptr_t(limit) >> CardTableModRefBS::card_shift); + intptr_t end_card_num = intptr_t(uintptr_t(end) >> CardTableModRefBS::card_shift); + + assert(is_card_aligned(start), "sanity"); + assert(is_card_aligned(end), "sanity"); + + // If ntams is not card aligned then we bump the index for + // limit so that we get the card spanning ntams. + if (!is_card_aligned(limit)) { + limit_card_num += 1; + } + + assert(limit_card_num <= end_card_num, "or else use atomics"); + + BitMap::idx_t start_idx = start_card_num - _bottom_card_num; + BitMap::idx_t limit_idx = limit_card_num - _bottom_card_num; + + // Aggregate the "stripe" in the count data associated with hr. + size_t hrs_index = hr->hrs_index(); + size_t marked_bytes = 0; + + for (int i = 0; (size_t)i < _max_task_num; i += 1) { + size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i); + BitMap* task_card_bm = _cm->count_card_bitmap_for(i); + + // Fetch the marked_bytes in this region for task i and + // add it to the running total for this region. + marked_bytes += marked_bytes_array[hrs_index]; + + // Now clear the value in the task's marked bytes array + // for this region. + marked_bytes_array[hrs_index] = 0; + + // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx) + // into the global card bitmap. + BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx); + + while (scan_idx < limit_idx) { + assert(task_card_bm->at(scan_idx) == true, "should be"); + _cm_card_bm->set_bit(scan_idx); + task_card_bm->clear_bit(scan_idx); + assert(_cm_card_bm->at(scan_idx) == true, "should be"); + scan_idx = task_card_bm->get_next_one_offset(start_idx + 1, limit_idx); + } + } + + // Update the marked bytes for this region. + hr->add_to_marked_bytes(marked_bytes); + + // Now set the top at count to NTAMS. + hr->set_top_at_conc_mark_count(limit); + + // Next heap region + return false; + } + }; + + class G1AggregateCountDataTask: public AbstractGangTask { + protected: + G1CollectedHeap* _g1h; + ConcurrentMark* _cm; + BitMap* _cm_card_bm; + intptr_t _heap_bottom_card_num; + size_t _max_task_num; + int _active_workers; + + public: + G1AggregateCountDataTask(G1CollectedHeap* g1h, + ConcurrentMark* cm, + BitMap* cm_card_bm, + intptr_t bottom_card_num, + size_t max_task_num, + int n_workers) : + AbstractGangTask("Count Aggregation"), + _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm), + _heap_bottom_card_num(bottom_card_num), + _max_task_num(max_task_num), + _active_workers(n_workers) + { } + + void work(int worker_i) { + AggregateCountDataHRClosure cl(_cm, _cm_card_bm, + _heap_bottom_card_num, _max_task_num); + + if (G1CollectedHeap::use_parallel_gc_threads()) { + _g1h->heap_region_par_iterate_chunked(&cl, worker_i, + _active_workers, + HeapRegion::AggregateCountClaimValue); + } else { + _g1h->heap_region_iterate(&cl); + } + } + }; + + + void ConcurrentMark::aggregate_and_clear_count_data() { + // Clear the global card bitmap + _card_bm.clear(); + + int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? + _g1h->workers()->active_workers() : + 1); + + G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm, + _heap_bottom_card_num, _max_task_num, + n_workers); + + if (G1CollectedHeap::use_parallel_gc_threads()) { + assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue), + "sanity check"); + _g1h->set_par_threads(n_workers); + _g1h->workers()->run_task(&g1_par_agg_task); + _g1h->set_par_threads(0); + + assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue), + "sanity check"); + _g1h->reset_heap_region_claim_values(); + } else { + g1_par_agg_task.work(0); + } + } + + // Clear the per-worker arrays used to store the per-region counting data + void ConcurrentMark::clear_all_count_data() { + assert(SafepointSynchronize::is_at_safepoint() || + !Universe::is_fully_initialized(), "must be"); + + size_t max_regions = _g1h->max_regions(); + + assert(_max_task_num != 0, "unitialized"); + assert(_count_card_bitmaps != NULL, "uninitialized"); + assert(_count_marked_bytes != NULL, "uninitialized"); + + for (int i = 0; (size_t) i < _max_task_num; i += 1) { + BitMap* task_card_bm = count_card_bitmap_for(i); + size_t* marked_bytes_array = count_marked_bytes_array_for(i); + + assert(task_card_bm->size() == _card_bm.size(), "size mismatch"); + assert(marked_bytes_array != NULL, "uninitialized"); + + for (int j = 0; (size_t) j < max_regions; j++) { + marked_bytes_array[j] = 0; + } + task_card_bm->clear(); + } + } + + void ConcurrentMark::clear_count_data_for_heap_region(HeapRegion* hr) { + // Clears the count data for the given region from _all_ of + // the per-task counting data structures. + + MemRegion used_region = hr->used_region(); + HeapWord* start = used_region.start(); + HeapWord* last = used_region.last(); + size_t hr_index = hr->hrs_index(); + + intptr_t start_card_num = + intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); + intptr_t last_card_num = + intptr_t(uintptr_t(last) >> CardTableModRefBS::card_shift); + + BitMap::idx_t start_idx = start_card_num - heap_bottom_card_num(); + BitMap::idx_t last_idx = last_card_num - heap_bottom_card_num(); + + size_t used_region_bytes = used_region.byte_size(); + size_t marked_bytes = 0; + + for (int i=0; (size_t)i < _max_task_num; i += 1) { + BitMap* task_card_bm = count_card_bitmap_for(i); + size_t* marked_bytes_array = count_marked_bytes_array_for(i); + + marked_bytes += marked_bytes_array[hr_index]; + // clear the amount of marked bytes in the task array for this + // region + marked_bytes_array[hr_index] = 0; + + // Clear the inclusive range [start_idx, last_idx] from the + // card bitmap. The clear_range routine is exclusive so we + // need to also explicitly clear the bit at last_idx. + // Passing last_idx+1 to the clear_range would work in + // most cases but could trip an OOB assertion. + + if ((last_idx - start_idx) > 0) { + task_card_bm->clear_range(start_idx, last_idx); + } + task_card_bm->clear_bit(last_idx); + } + } + void ConcurrentMark::print_stats() { if (verbose_stats()) { gclog_or_tty->print_cr("---------------------------------------------------------------------"); for (size_t i = 0; i < _active_tasks; ++i) { _tasks[i]->print_stats();
*** 2912,2922 **** class CSetMarkOopClosure: public OopClosure { friend class CSetMarkBitMapClosure; G1CollectedHeap* _g1h; - CMBitMap* _bm; ConcurrentMark* _cm; oop* _ms; jint* _array_ind_stack; int _ms_size; int _ms_ind; --- 3504,3513 ----
*** 2972,2982 **** public: CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, int worker_i) : _g1h(G1CollectedHeap::heap()), _cm(cm), - _bm(cm->nextMarkBitMap()), _ms_size(ms_size), _ms_ind(0), _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), _array_increment(MAX2(ms_size/8, 16)), _worker_i(worker_i) { } --- 3563,3572 ----
*** 3002,3021 **** } HeapRegion* hr = _g1h->heap_region_containing(obj); if (hr != NULL) { if (hr->in_collection_set()) { if (_g1h->is_obj_ill(obj)) { ! if (_bm->parMark((HeapWord*)obj)) { if (!push(obj)) { gclog_or_tty->print_cr("Setting abort in CSetMarkOopClosure because push failed."); set_abort(); } } } } else { // Outside the collection set; we need to gray it ! _cm->deal_with_reference(obj); } } } }; --- 3592,3611 ---- } HeapRegion* hr = _g1h->heap_region_containing(obj); if (hr != NULL) { if (hr->in_collection_set()) { if (_g1h->is_obj_ill(obj)) { ! if (_cm->par_mark_and_count(obj, hr, _worker_i)) { if (!push(obj)) { gclog_or_tty->print_cr("Setting abort in CSetMarkOopClosure because push failed."); set_abort(); } } } } else { // Outside the collection set; we need to gray it ! _cm->deal_with_reference(obj, _worker_i); } } } };
*** 3287,3300 **** } gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " ! "(%8.2f s marking, %8.2f s counting).", cmThread()->vtime_accum(), ! cmThread()->vtime_mark_accum(), ! cmThread()->vtime_count_accum()); } void ConcurrentMark::print_worker_threads_on(outputStream* st) const { _parallel_workers->print_worker_threads_on(st); } --- 3877,3889 ---- } gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0); gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " ! "(%8.2f s marking).", cmThread()->vtime_accum(), ! cmThread()->vtime_mark_accum()); } void ConcurrentMark::print_worker_threads_on(outputStream* st) const { _parallel_workers->print_worker_threads_on(st); }
*** 4585,4604 **** _claimed = false; } CMTask::CMTask(int task_id, ConcurrentMark* cm, CMTaskQueue* task_queue, CMTaskQueueSet* task_queues) : _g1h(G1CollectedHeap::heap()), _task_id(task_id), _cm(cm), _claimed(false), _nextMarkBitMap(NULL), _hash_seed(17), _task_queue(task_queue), _task_queues(task_queues), _cm_oop_closure(NULL), ! _aborted_region(MemRegion()) { guarantee(task_queue != NULL, "invariant"); guarantee(task_queues != NULL, "invariant"); statsOnly( _clock_due_to_scanning = 0; _clock_due_to_marking = 0 ); --- 5174,5197 ---- _claimed = false; } CMTask::CMTask(int task_id, ConcurrentMark* cm, + size_t* marked_bytes, + BitMap* card_bm, CMTaskQueue* task_queue, CMTaskQueueSet* task_queues) : _g1h(G1CollectedHeap::heap()), _task_id(task_id), _cm(cm), _claimed(false), _nextMarkBitMap(NULL), _hash_seed(17), _task_queue(task_queue), _task_queues(task_queues), _cm_oop_closure(NULL), ! _aborted_region(MemRegion()), ! _marked_bytes_array(marked_bytes), ! _card_bm(card_bm) { guarantee(task_queue != NULL, "invariant"); guarantee(task_queues != NULL, "invariant"); statsOnly( _clock_due_to_scanning = 0; _clock_due_to_marking = 0 );