src/share/vm/gc_implementation/g1/concurrentMark.cpp

Print this page
rev 3640 : 7200261: G1: Liveness counting inconsistencies during marking verification
Summary: The clipping code in the routine that sets the bits for a range of cards, in the liveness accounting verification code was incorrect. It set all the bits in the card bitmap from the given starting index which would lead to spurious marking verification failures.
Reviewed-by:
* * *
[mq]: code-review-comments


1171   }
1172 
1173 #if VERIFY_OBJS_PROCESSED
1174   _scan_obj_cl.objs_processed = 0;
1175   ThreadLocalObjQueue::objs_enqueued = 0;
1176 #endif
1177 
1178   // Statistics
1179   double now = os::elapsedTime();
1180   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1181   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1182   _remark_times.add((now - start) * 1000.0);
1183 
1184   g1p->record_concurrent_mark_remark_end();
1185 }
1186 
1187 // Base class of the closures that finalize and verify the
1188 // liveness counting data.
1189 class CMCountDataClosureBase: public HeapRegionClosure {
1190 protected:

1191   ConcurrentMark* _cm;


1192   BitMap* _region_bm;
1193   BitMap* _card_bm;
1194 
1195   void set_card_bitmap_range(BitMap::idx_t start_idx, BitMap::idx_t last_idx) {
1196     assert(start_idx <= last_idx, "sanity");
1197 
1198     // Set the inclusive bit range [start_idx, last_idx].
1199     // For small ranges (up to 8 cards) use a simple loop; otherwise
1200     // use par_at_put_range.
1201     if ((last_idx - start_idx) < 8) {
1202       for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
1203         _card_bm->par_set_bit(i);
1204       }
1205     } else {
1206       assert(last_idx < _card_bm->size(), "sanity");
1207       // Note BitMap::par_at_put_range() is exclusive.
1208       BitMap::idx_t max_idx = MAX2(last_idx+1, _card_bm->size());
1209       _card_bm->par_at_put_range(start_idx, max_idx, true);
1210     }
1211   }
1212 
1213   // It takes a region that's not empty (i.e., it has at least one
1214   // live object in it and sets its corresponding bit on the region
1215   // bitmap to 1. If the region is "starts humongous" it will also set
1216   // to 1 the bits on the region bitmap that correspond to its
1217   // associated "continues humongous" regions.
1218   void set_bit_for_region(HeapRegion* hr) {
1219     assert(!hr->continuesHumongous(), "should have filtered those out");
1220 
1221     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1222     if (!hr->startsHumongous()) {
1223       // Normal (non-humongous) case: just set the bit.
1224       _region_bm->par_at_put(index, true);
1225     } else {
1226       // Starts humongous case: calculate how many regions are part of
1227       // this humongous region and then set the bit range.
1228       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1229       _region_bm->par_at_put_range(index, end_index, true);
1230     }
1231   }
1232 
1233 public:
1234   CMCountDataClosureBase(ConcurrentMark *cm,
1235                          BitMap* region_bm, BitMap* card_bm):
1236     _cm(cm), _region_bm(region_bm), _card_bm(card_bm) { }


1237 };
1238 
1239 // Closure that calculates the # live objects per region. Used
1240 // for verification purposes during the cleanup pause.
1241 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1242   CMBitMapRO* _bm;
1243   size_t _region_marked_bytes;
1244 
1245 public:
1246   CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm,
1247                          BitMap* region_bm, BitMap* card_bm) :
1248     CMCountDataClosureBase(cm, region_bm, card_bm),
1249     _bm(bm), _region_marked_bytes(0) { }
1250 
1251   bool doHeapRegion(HeapRegion* hr) {
1252 
1253     if (hr->continuesHumongous()) {
1254       // We will ignore these here and process them when their
1255       // associated "starts humongous" region is processed (see
1256       // set_bit_for_heap_region()). Note that we cannot rely on their
1257       // associated "starts humongous" region to have their bit set to
1258       // 1 since, due to the region chunking in the parallel region
1259       // iteration, a "continues humongous" region might be visited
1260       // before its associated "starts humongous".
1261       return false;
1262     }
1263 
1264     HeapWord* nextTop = hr->next_top_at_mark_start();
1265     HeapWord* start   = hr->bottom();
1266 
1267     assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(),
1268            err_msg("Preconditions not met - "
1269                    "start: "PTR_FORMAT", nextTop: "PTR_FORMAT", end: "PTR_FORMAT,
1270                    start, nextTop, hr->end()));
1271 
1272     // Find the first marked object at or after "start".
1273     start = _bm->getNextMarkedWordAddress(start, nextTop);
1274 
1275     size_t marked_bytes = 0;
1276 
1277     while (start < nextTop) {
1278       oop obj = oop(start);
1279       int obj_sz = obj->size();
1280       HeapWord* obj_last = start + obj_sz - 1;
1281 
1282       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1283       BitMap::idx_t last_idx = _cm->card_bitmap_index_for(obj_last);









1284 
1285       // Set the bits in the card BM for this object (inclusive).
1286       set_card_bitmap_range(start_idx, last_idx);
1287 
1288       // Add the size of this object to the number of marked bytes.
1289       marked_bytes += (size_t)obj_sz * HeapWordSize;
1290 
1291       // Find the next marked object after this one.
1292       start = _bm->getNextMarkedWordAddress(obj_last + 1, nextTop);
1293     }
1294 
1295     // Mark the allocated-since-marking portion...
1296     HeapWord* top = hr->top();
1297     if (nextTop < top) {
1298       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(nextTop);
1299       BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top - 1);
1300 
1301       set_card_bitmap_range(start_idx, last_idx);








1302 
1303       // This definitely means the region has live objects.
1304       set_bit_for_region(hr);
1305     }
1306 
1307     // Update the live region bitmap.
1308     if (marked_bytes > 0) {
1309       set_bit_for_region(hr);
1310     }
1311 
1312     // Set the marked bytes for the current region so that
1313     // it can be queried by a calling verificiation routine
1314     _region_marked_bytes = marked_bytes;
1315 
1316     return false;
1317   }
1318 
1319   size_t region_marked_bytes() const { return _region_marked_bytes; }
1320 };
1321 
1322 // Heap region closure used for verifying the counting data
1323 // that was accumulated concurrently and aggregated during
1324 // the remark pause. This closure is applied to the heap
1325 // regions during the STW cleanup pause.
1326 
1327 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {

1328   ConcurrentMark* _cm;
1329   CalcLiveObjectsClosure _calc_cl;
1330   BitMap* _region_bm;   // Region BM to be verified
1331   BitMap* _card_bm;     // Card BM to be verified
1332   bool _verbose;        // verbose output?
1333 
1334   BitMap* _exp_region_bm; // Expected Region BM values
1335   BitMap* _exp_card_bm;   // Expected card BM values
1336 
1337   int _failures;
1338 
1339 public:
1340   VerifyLiveObjectDataHRClosure(ConcurrentMark* cm,
1341                                 BitMap* region_bm,
1342                                 BitMap* card_bm,
1343                                 BitMap* exp_region_bm,
1344                                 BitMap* exp_card_bm,
1345                                 bool verbose) :
1346     _cm(cm),
1347     _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm),
1348     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1349     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1350     _failures(0) { }
1351 
1352   int failures() const { return _failures; }
1353 
1354   bool doHeapRegion(HeapRegion* hr) {
1355     if (hr->continuesHumongous()) {
1356       // We will ignore these here and process them when their
1357       // associated "starts humongous" region is processed (see
1358       // set_bit_for_heap_region()). Note that we cannot rely on their
1359       // associated "starts humongous" region to have their bit set to
1360       // 1 since, due to the region chunking in the parallel region
1361       // iteration, a "continues humongous" region might be visited
1362       // before its associated "starts humongous".
1363       return false;
1364     }
1365 
1366     int failures = 0;
1367 


1474 
1475     // Use the value already set as the number of active threads
1476     // in the call to run_task().
1477     if (G1CollectedHeap::use_parallel_gc_threads()) {
1478       assert( _g1h->workers()->active_workers() > 0,
1479         "Should have been previously set");
1480       _n_workers = _g1h->workers()->active_workers();
1481     } else {
1482       _n_workers = 1;
1483     }
1484 
1485     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1486     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1487 
1488     _verbose = _cm->verbose_medium();
1489   }
1490 
1491   void work(uint worker_id) {
1492     assert(worker_id < _n_workers, "invariant");
1493 
1494     VerifyLiveObjectDataHRClosure verify_cl(_cm,
1495                                             _actual_region_bm, _actual_card_bm,
1496                                             _expected_region_bm,
1497                                             _expected_card_bm,
1498                                             _verbose);
1499 
1500     if (G1CollectedHeap::use_parallel_gc_threads()) {
1501       _g1h->heap_region_par_iterate_chunked(&verify_cl,
1502                                             worker_id,
1503                                             _n_workers,
1504                                             HeapRegion::VerifyCountClaimValue);
1505     } else {
1506       _g1h->heap_region_iterate(&verify_cl);
1507     }
1508 
1509     Atomic::add(verify_cl.failures(), &_failures);
1510   }
1511 
1512   int failures() const { return _failures; }
1513 };
1514 
1515 // Closure that finalizes the liveness counting data.
1516 // Used during the cleanup pause.
1517 // Sets the bits corresponding to the interval [NTAMS, top]
1518 // (which contains the implicitly live objects) in the
1519 // card liveness bitmap. Also sets the bit for each region,
1520 // containing live data, in the region liveness bitmap.
1521 
1522 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1523  public:
1524   FinalCountDataUpdateClosure(ConcurrentMark* cm,
1525                               BitMap* region_bm,
1526                               BitMap* card_bm) :
1527     CMCountDataClosureBase(cm, region_bm, card_bm) { }
1528 
1529   bool doHeapRegion(HeapRegion* hr) {
1530 
1531     if (hr->continuesHumongous()) {
1532       // We will ignore these here and process them when their
1533       // associated "starts humongous" region is processed (see
1534       // set_bit_for_heap_region()). Note that we cannot rely on their
1535       // associated "starts humongous" region to have their bit set to
1536       // 1 since, due to the region chunking in the parallel region
1537       // iteration, a "continues humongous" region might be visited
1538       // before its associated "starts humongous".
1539       return false;
1540     }
1541 
1542     HeapWord* ntams = hr->next_top_at_mark_start();
1543     HeapWord* top   = hr->top();
1544 
1545     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1546 
1547     // Mark the allocated-since-marking portion...
1548     if (ntams < top) {
1549       // This definitely means the region has live objects.
1550       set_bit_for_region(hr);
1551     }
1552 
1553     // Now set the bits for [ntams, top]
1554     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1555     // set_card_bitmap_range() expects the last_idx to be with
1556     // the range of the bit map (see assertion in set_card_bitmap_range()),
1557     // so limit it to that range with this application of MIN2.
1558     BitMap::idx_t last_idx = MIN2(_cm->card_bitmap_index_for(top),
1559                                   _card_bm->size()-1);
1560     if (start_idx < _card_bm->size()) {
1561     set_card_bitmap_range(start_idx, last_idx);
1562     } else {
1563       // To reach here start_idx must be beyond the end of
1564       // the bit map and last_idx must have been limited by
1565       // the MIN2().
1566       assert(start_idx == last_idx + 1,
1567         err_msg("Not beyond end start_idx " SIZE_FORMAT " last_idx "
1568                 SIZE_FORMAT, start_idx, last_idx));





1569     }
1570 
1571     // Set the bit for the region if it contains live data
1572     if (hr->next_marked_bytes() > 0) {
1573       set_bit_for_region(hr);
1574     }
1575 
1576     return false;
1577   }
1578 };
1579 
1580 class G1ParFinalCountTask: public AbstractGangTask {
1581 protected:
1582   G1CollectedHeap* _g1h;
1583   ConcurrentMark* _cm;
1584   BitMap* _actual_region_bm;
1585   BitMap* _actual_card_bm;
1586 
1587   uint    _n_workers;
1588 
1589 public:
1590   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1591     : AbstractGangTask("G1 final counting"),
1592       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1593       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1594       _n_workers(0) {
1595     // Use the value already set as the number of active threads
1596     // in the call to run_task().
1597     if (G1CollectedHeap::use_parallel_gc_threads()) {
1598       assert( _g1h->workers()->active_workers() > 0,
1599         "Should have been previously set");
1600       _n_workers = _g1h->workers()->active_workers();
1601     } else {
1602       _n_workers = 1;
1603     }
1604   }
1605 
1606   void work(uint worker_id) {
1607     assert(worker_id < _n_workers, "invariant");
1608 
1609     FinalCountDataUpdateClosure final_update_cl(_cm,
1610                                                 _actual_region_bm,
1611                                                 _actual_card_bm);
1612 
1613     if (G1CollectedHeap::use_parallel_gc_threads()) {
1614       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1615                                             worker_id,
1616                                             _n_workers,
1617                                             HeapRegion::FinalCountClaimValue);
1618     } else {
1619       _g1h->heap_region_iterate(&final_update_cl);
1620     }
1621   }
1622 };
1623 
1624 class G1ParNoteEndTask;
1625 
1626 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1627   G1CollectedHeap* _g1;
1628   int _worker_num;
1629   size_t _max_live_bytes;


2829 
2830 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2831   _markStack.setEmpty();
2832   _markStack.clear_overflow();
2833   if (clear_overflow) {
2834     clear_has_overflown();
2835   } else {
2836     assert(has_overflown(), "pre-condition");
2837   }
2838   _finger = _heap_start;
2839 
2840   for (int i = 0; i < (int)_max_task_num; ++i) {
2841     OopTaskQueue* queue = _task_queues->queue(i);
2842     queue->set_empty();
2843   }
2844 }
2845 
2846 // Aggregate the counting data that was constructed concurrently
2847 // with marking.
2848 class AggregateCountDataHRClosure: public HeapRegionClosure {

2849   ConcurrentMark* _cm;

2850   BitMap* _cm_card_bm;
2851   size_t _max_task_num;
2852 
2853  public:
2854   AggregateCountDataHRClosure(ConcurrentMark *cm,
2855                               BitMap* cm_card_bm,
2856                               size_t max_task_num) :
2857     _cm(cm), _cm_card_bm(cm_card_bm),
2858     _max_task_num(max_task_num) { }
2859 
2860   bool is_card_aligned(HeapWord* p) {
2861     return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0);
2862   }
2863 
2864   bool doHeapRegion(HeapRegion* hr) {
2865     if (hr->continuesHumongous()) {
2866       // We will ignore these here and process them when their
2867       // associated "starts humongous" region is processed.
2868       // Note that we cannot rely on their associated
2869       // "starts humongous" region to have their bit set to 1
2870       // since, due to the region chunking in the parallel region
2871       // iteration, a "continues humongous" region might be visited
2872       // before its associated "starts humongous".
2873       return false;
2874     }
2875 
2876     HeapWord* start = hr->bottom();
2877     HeapWord* limit = hr->next_top_at_mark_start();
2878     HeapWord* end = hr->end();
2879 
2880     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2881            err_msg("Preconditions not met - "
2882                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
2883                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
2884                    start, limit, hr->top(), hr->end()));
2885 
2886     assert(hr->next_marked_bytes() == 0, "Precondition");
2887 
2888     if (start == limit) {
2889       // NTAMS of this region has not been set so nothing to do.
2890       return false;
2891     }
2892 
2893     assert(is_card_aligned(start), "sanity");
2894     assert(is_card_aligned(end), "sanity");


2895 
2896     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2897     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2898     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2899 
2900     // If ntams is not card aligned then we bump the index for
2901     // limit so that we get the card spanning ntams.
2902     if (!is_card_aligned(limit)) {




2903       limit_idx += 1;
2904     }
2905 
2906     assert(limit_idx <= end_idx, "or else use atomics");
2907 
2908     // Aggregate the "stripe" in the count data associated with hr.
2909     uint hrs_index = hr->hrs_index();
2910     size_t marked_bytes = 0;
2911 
2912     for (int i = 0; (size_t)i < _max_task_num; i += 1) {
2913       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2914       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2915 
2916       // Fetch the marked_bytes in this region for task i and
2917       // add it to the running total for this region.
2918       marked_bytes += marked_bytes_array[hrs_index];
2919 
2920       // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx)
2921       // into the global card bitmap.
2922       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2923 
2924       while (scan_idx < limit_idx) {
2925         assert(task_card_bm->at(scan_idx) == true, "should be");
2926         _cm_card_bm->set_bit(scan_idx);
2927         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2928 
2929         // BitMap::get_next_one_offset() can handle the case when
2930         // its left_offset parameter is greater than its right_offset
2931         // parameter. If does, however, have an early exit if
2932         // left_offset == right_offset. So let's limit the value
2933         // passed in for left offset here.
2934         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2935         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2936       }
2937     }
2938 
2939     // Update the marked bytes for this region.
2940     hr->add_to_marked_bytes(marked_bytes);
2941 
2942     // Next heap region
2943     return false;
2944   }
2945 };
2946 
2947 class G1AggregateCountDataTask: public AbstractGangTask {
2948 protected:
2949   G1CollectedHeap* _g1h;
2950   ConcurrentMark* _cm;
2951   BitMap* _cm_card_bm;
2952   size_t _max_task_num;
2953   int _active_workers;
2954 
2955 public:
2956   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2957                            ConcurrentMark* cm,
2958                            BitMap* cm_card_bm,
2959                            size_t max_task_num,
2960                            int n_workers) :
2961     AbstractGangTask("Count Aggregation"),
2962     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2963     _max_task_num(max_task_num),
2964     _active_workers(n_workers) { }
2965 
2966   void work(uint worker_id) {
2967     AggregateCountDataHRClosure cl(_cm, _cm_card_bm, _max_task_num);
2968 
2969     if (G1CollectedHeap::use_parallel_gc_threads()) {
2970       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
2971                                             _active_workers,
2972                                             HeapRegion::AggregateCountClaimValue);
2973     } else {
2974       _g1h->heap_region_iterate(&cl);
2975     }
2976   }
2977 };
2978 
2979 
2980 void ConcurrentMark::aggregate_count_data() {
2981   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
2982                         _g1h->workers()->active_workers() :
2983                         1);
2984 
2985   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2986                                            _max_task_num, n_workers);
2987 




1171   }
1172 
1173 #if VERIFY_OBJS_PROCESSED
1174   _scan_obj_cl.objs_processed = 0;
1175   ThreadLocalObjQueue::objs_enqueued = 0;
1176 #endif
1177 
1178   // Statistics
1179   double now = os::elapsedTime();
1180   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1181   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1182   _remark_times.add((now - start) * 1000.0);
1183 
1184   g1p->record_concurrent_mark_remark_end();
1185 }
1186 
1187 // Base class of the closures that finalize and verify the
1188 // liveness counting data.
1189 class CMCountDataClosureBase: public HeapRegionClosure {
1190 protected:
1191   G1CollectedHeap* _g1h;
1192   ConcurrentMark* _cm;
1193   CardTableModRefBS* _ct_bs;
1194 
1195   BitMap* _region_bm;
1196   BitMap* _card_bm;
1197 
1198   // Takes a region that's not empty (i.e., it has at least one


















1199   // live object in it and sets its corresponding bit on the region
1200   // bitmap to 1. If the region is "starts humongous" it will also set
1201   // to 1 the bits on the region bitmap that correspond to its
1202   // associated "continues humongous" regions.
1203   void set_bit_for_region(HeapRegion* hr) {
1204     assert(!hr->continuesHumongous(), "should have filtered those out");
1205 
1206     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1207     if (!hr->startsHumongous()) {
1208       // Normal (non-humongous) case: just set the bit.
1209       _region_bm->par_at_put(index, true);
1210     } else {
1211       // Starts humongous case: calculate how many regions are part of
1212       // this humongous region and then set the bit range.
1213       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1214       _region_bm->par_at_put_range(index, end_index, true);
1215     }
1216   }
1217 
1218 public:
1219   CMCountDataClosureBase(G1CollectedHeap* g1h,
1220                          BitMap* region_bm, BitMap* card_bm):
1221     _g1h(g1h), _cm(g1h->concurrent_mark()),
1222     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1223     _region_bm(region_bm), _card_bm(card_bm) { }
1224 };
1225 
1226 // Closure that calculates the # live objects per region. Used
1227 // for verification purposes during the cleanup pause.
1228 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1229   CMBitMapRO* _bm;
1230   size_t _region_marked_bytes;
1231 
1232 public:
1233   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1234                          BitMap* region_bm, BitMap* card_bm) :
1235     CMCountDataClosureBase(g1h, region_bm, card_bm),
1236     _bm(bm), _region_marked_bytes(0) { }
1237 
1238   bool doHeapRegion(HeapRegion* hr) {
1239 
1240     if (hr->continuesHumongous()) {
1241       // We will ignore these here and process them when their
1242       // associated "starts humongous" region is processed (see
1243       // set_bit_for_heap_region()). Note that we cannot rely on their
1244       // associated "starts humongous" region to have their bit set to
1245       // 1 since, due to the region chunking in the parallel region
1246       // iteration, a "continues humongous" region might be visited
1247       // before its associated "starts humongous".
1248       return false;
1249     }
1250 
1251     HeapWord* ntams = hr->next_top_at_mark_start();
1252     HeapWord* start = hr->bottom();
1253 
1254     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1255            err_msg("Preconditions not met - "
1256                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1257                    start, ntams, hr->end()));
1258 
1259     // Find the first marked object at or after "start".
1260     start = _bm->getNextMarkedWordAddress(start, ntams);
1261 
1262     size_t marked_bytes = 0;
1263 
1264     while (start < ntams) {
1265       oop obj = oop(start);
1266       int obj_sz = obj->size();
1267       HeapWord* obj_end = start + obj_sz;
1268 
1269       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1270       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1271       
1272       // Note: if we're looking at the last region in heap - obj_end
1273       // could be actually outside the heap and end_idx correspond
1274       // to a card that is also outside the heap.
1275       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1276         // end of object is not card aligned - increment to cover
1277         // all the cards spanned by the object
1278         end_idx += 1;
1279       }
1280 
1281       // Set the bits in the card BM for the cards spanned by this object.
1282       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1283 
1284       // Add the size of this object to the number of marked bytes.
1285       marked_bytes += (size_t)obj_sz * HeapWordSize;
1286 
1287       // Find the next marked object after this one.
1288       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1289     }
1290 
1291     // Mark the allocated-since-marking portion...
1292     HeapWord* top = hr->top();
1293     if (ntams < top) {
1294       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1295       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1296 
1297       // Note: if we're looking at the last region in heap - top
1298       // could actually be outside the heap and end_idx correspond
1299       // to a card that is also outside the heap.
1300       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1301         // end of object is not card aligned - increment to cover
1302         // all the cards spanned by the object
1303         end_idx += 1;
1304       }
1305       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1306 
1307       // This definitely means the region has live objects.
1308       set_bit_for_region(hr);
1309     }
1310 
1311     // Update the live region bitmap.
1312     if (marked_bytes > 0) {
1313       set_bit_for_region(hr);
1314     }
1315 
1316     // Set the marked bytes for the current region so that
1317     // it can be queried by a calling verificiation routine
1318     _region_marked_bytes = marked_bytes;
1319 
1320     return false;
1321   }
1322 
1323   size_t region_marked_bytes() const { return _region_marked_bytes; }
1324 };
1325 
1326 // Heap region closure used for verifying the counting data
1327 // that was accumulated concurrently and aggregated during
1328 // the remark pause. This closure is applied to the heap
1329 // regions during the STW cleanup pause.
1330 
1331 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1332   G1CollectedHeap* _g1h;
1333   ConcurrentMark* _cm;
1334   CalcLiveObjectsClosure _calc_cl;
1335   BitMap* _region_bm;   // Region BM to be verified
1336   BitMap* _card_bm;     // Card BM to be verified
1337   bool _verbose;        // verbose output?
1338 
1339   BitMap* _exp_region_bm; // Expected Region BM values
1340   BitMap* _exp_card_bm;   // Expected card BM values
1341 
1342   int _failures;
1343 
1344 public:
1345   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1346                                 BitMap* region_bm,
1347                                 BitMap* card_bm,
1348                                 BitMap* exp_region_bm,
1349                                 BitMap* exp_card_bm,
1350                                 bool verbose) :
1351     _g1h(g1h), _cm(g1h->concurrent_mark()), 
1352     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1353     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1354     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1355     _failures(0) { }
1356 
1357   int failures() const { return _failures; }
1358 
1359   bool doHeapRegion(HeapRegion* hr) {
1360     if (hr->continuesHumongous()) {
1361       // We will ignore these here and process them when their
1362       // associated "starts humongous" region is processed (see
1363       // set_bit_for_heap_region()). Note that we cannot rely on their
1364       // associated "starts humongous" region to have their bit set to
1365       // 1 since, due to the region chunking in the parallel region
1366       // iteration, a "continues humongous" region might be visited
1367       // before its associated "starts humongous".
1368       return false;
1369     }
1370 
1371     int failures = 0;
1372 


1479 
1480     // Use the value already set as the number of active threads
1481     // in the call to run_task().
1482     if (G1CollectedHeap::use_parallel_gc_threads()) {
1483       assert( _g1h->workers()->active_workers() > 0,
1484         "Should have been previously set");
1485       _n_workers = _g1h->workers()->active_workers();
1486     } else {
1487       _n_workers = 1;
1488     }
1489 
1490     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1491     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1492 
1493     _verbose = _cm->verbose_medium();
1494   }
1495 
1496   void work(uint worker_id) {
1497     assert(worker_id < _n_workers, "invariant");
1498 
1499     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1500                                             _actual_region_bm, _actual_card_bm,
1501                                             _expected_region_bm,
1502                                             _expected_card_bm,
1503                                             _verbose);
1504 
1505     if (G1CollectedHeap::use_parallel_gc_threads()) {
1506       _g1h->heap_region_par_iterate_chunked(&verify_cl,
1507                                             worker_id,
1508                                             _n_workers,
1509                                             HeapRegion::VerifyCountClaimValue);
1510     } else {
1511       _g1h->heap_region_iterate(&verify_cl);
1512     }
1513 
1514     Atomic::add(verify_cl.failures(), &_failures);
1515   }
1516 
1517   int failures() const { return _failures; }
1518 };
1519 
1520 // Closure that finalizes the liveness counting data.
1521 // Used during the cleanup pause.
1522 // Sets the bits corresponding to the interval [NTAMS, top]
1523 // (which contains the implicitly live objects) in the
1524 // card liveness bitmap. Also sets the bit for each region,
1525 // containing live data, in the region liveness bitmap.
1526 
1527 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1528  public:
1529   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1530                               BitMap* region_bm,
1531                               BitMap* card_bm) :
1532     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1533 
1534   bool doHeapRegion(HeapRegion* hr) {
1535 
1536     if (hr->continuesHumongous()) {
1537       // We will ignore these here and process them when their
1538       // associated "starts humongous" region is processed (see
1539       // set_bit_for_heap_region()). Note that we cannot rely on their
1540       // associated "starts humongous" region to have their bit set to
1541       // 1 since, due to the region chunking in the parallel region
1542       // iteration, a "continues humongous" region might be visited
1543       // before its associated "starts humongous".
1544       return false;
1545     }
1546 
1547     HeapWord* ntams = hr->next_top_at_mark_start();
1548     HeapWord* top   = hr->top();
1549 
1550     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1551 
1552     // Mark the allocated-since-marking portion...
1553     if (ntams < top) {
1554       // This definitely means the region has live objects.
1555       set_bit_for_region(hr);

1556       
1557       // Now set the bits in the card bitmap for [ntams, top)
1558       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1559       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1560       
1561       // Note: if we're looking at the last region in heap - top
1562       // could actually be outside the heap and end_idx correspond
1563       // to a card that is also outside the heap.
1564       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1565         // end of object is not card aligned - increment to cover
1566         // all the cards spanned by the object
1567         end_idx += 1;
1568       }
1569 
1570       assert(end_idx <= _card_bm->size(),
1571              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1572                      end_idx, _card_bm->size()));
1573       assert(start_idx < _card_bm->size(),
1574              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1575                      start_idx, _card_bm->size()));
1576       
1577       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1578     }
1579 
1580     // Set the bit for the region if it contains live data
1581     if (hr->next_marked_bytes() > 0) {
1582       set_bit_for_region(hr);
1583     }
1584 
1585     return false;
1586   }
1587 };
1588 
1589 class G1ParFinalCountTask: public AbstractGangTask {
1590 protected:
1591   G1CollectedHeap* _g1h;
1592   ConcurrentMark* _cm;
1593   BitMap* _actual_region_bm;
1594   BitMap* _actual_card_bm;
1595 
1596   uint    _n_workers;
1597 
1598 public:
1599   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1600     : AbstractGangTask("G1 final counting"),
1601       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1602       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1603       _n_workers(0) {
1604     // Use the value already set as the number of active threads
1605     // in the call to run_task().
1606     if (G1CollectedHeap::use_parallel_gc_threads()) {
1607       assert( _g1h->workers()->active_workers() > 0,
1608         "Should have been previously set");
1609       _n_workers = _g1h->workers()->active_workers();
1610     } else {
1611       _n_workers = 1;
1612     }
1613   }
1614 
1615   void work(uint worker_id) {
1616     assert(worker_id < _n_workers, "invariant");
1617 
1618     FinalCountDataUpdateClosure final_update_cl(_g1h,
1619                                                 _actual_region_bm,
1620                                                 _actual_card_bm);
1621 
1622     if (G1CollectedHeap::use_parallel_gc_threads()) {
1623       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1624                                             worker_id,
1625                                             _n_workers,
1626                                             HeapRegion::FinalCountClaimValue);
1627     } else {
1628       _g1h->heap_region_iterate(&final_update_cl);
1629     }
1630   }
1631 };
1632 
1633 class G1ParNoteEndTask;
1634 
1635 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1636   G1CollectedHeap* _g1;
1637   int _worker_num;
1638   size_t _max_live_bytes;


2838 
2839 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2840   _markStack.setEmpty();
2841   _markStack.clear_overflow();
2842   if (clear_overflow) {
2843     clear_has_overflown();
2844   } else {
2845     assert(has_overflown(), "pre-condition");
2846   }
2847   _finger = _heap_start;
2848 
2849   for (int i = 0; i < (int)_max_task_num; ++i) {
2850     OopTaskQueue* queue = _task_queues->queue(i);
2851     queue->set_empty();
2852   }
2853 }
2854 
2855 // Aggregate the counting data that was constructed concurrently
2856 // with marking.
2857 class AggregateCountDataHRClosure: public HeapRegionClosure {
2858   G1CollectedHeap* _g1h;
2859   ConcurrentMark* _cm;
2860   CardTableModRefBS* _ct_bs;
2861   BitMap* _cm_card_bm;
2862   size_t _max_task_num;
2863 
2864  public:
2865   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2866                               BitMap* cm_card_bm,
2867                               size_t max_task_num) :
2868     _g1h(g1h), _cm(g1h->concurrent_mark()),
2869     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
2870     _cm_card_bm(cm_card_bm), _max_task_num(max_task_num) { }



2871 
2872   bool doHeapRegion(HeapRegion* hr) {
2873     if (hr->continuesHumongous()) {
2874       // We will ignore these here and process them when their
2875       // associated "starts humongous" region is processed.
2876       // Note that we cannot rely on their associated
2877       // "starts humongous" region to have their bit set to 1
2878       // since, due to the region chunking in the parallel region
2879       // iteration, a "continues humongous" region might be visited
2880       // before its associated "starts humongous".
2881       return false;
2882     }
2883 
2884     HeapWord* start = hr->bottom();
2885     HeapWord* limit = hr->next_top_at_mark_start();
2886     HeapWord* end = hr->end();
2887 
2888     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2889            err_msg("Preconditions not met - "
2890                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
2891                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
2892                    start, limit, hr->top(), hr->end()));
2893 
2894     assert(hr->next_marked_bytes() == 0, "Precondition");
2895 
2896     if (start == limit) {
2897       // NTAMS of this region has not been set so nothing to do.
2898       return false;
2899     }
2900 
2901     // 'start' should be in the heap.
2902     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2903     // 'end' *may* be outside the heap (if hr is the last region)
2904     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2905 
2906     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2907     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2908     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2909 
2910     // If ntams is not card aligned then we bump card bitmap index
2911     // for limit so that we get the all the cards spanned by
2912     // the object ending at ntams.
2913     // Note: if this is the last region in the heap then ntams
2914     // *may* be outside the heap and limit_idx will correspond
2915     // to a card that is also outside the heap.
2916     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2917       limit_idx += 1;
2918     }
2919 
2920     assert(limit_idx <= end_idx, "or else use atomics");
2921 
2922     // Aggregate the "stripe" in the count data associated with hr.
2923     uint hrs_index = hr->hrs_index();
2924     size_t marked_bytes = 0;
2925 
2926     for (int i = 0; (size_t)i < _max_task_num; i += 1) {
2927       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2928       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2929 
2930       // Fetch the marked_bytes in this region for task i and
2931       // add it to the running total for this region.
2932       marked_bytes += marked_bytes_array[hrs_index];
2933 
2934       // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx)
2935       // into the global card bitmap.
2936       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2937 
2938       while (scan_idx < limit_idx) {
2939         assert(task_card_bm->at(scan_idx) == true, "should be");
2940         _cm_card_bm->set_bit(scan_idx);
2941         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2942 
2943         // BitMap::get_next_one_offset() can handle the case when
2944         // its left_offset parameter is greater than its right_offset
2945         // parameter. It does, however, have an early exit if
2946         // left_offset == right_offset. So let's limit the value
2947         // passed in for left offset here.
2948         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2949         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2950       }
2951     }
2952 
2953     // Update the marked bytes for this region.
2954     hr->add_to_marked_bytes(marked_bytes);
2955 
2956     // Next heap region
2957     return false;
2958   }
2959 };
2960 
2961 class G1AggregateCountDataTask: public AbstractGangTask {
2962 protected:
2963   G1CollectedHeap* _g1h;
2964   ConcurrentMark* _cm;
2965   BitMap* _cm_card_bm;
2966   size_t _max_task_num;
2967   int _active_workers;
2968 
2969 public:
2970   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2971                            ConcurrentMark* cm,
2972                            BitMap* cm_card_bm,
2973                            size_t max_task_num,
2974                            int n_workers) :
2975     AbstractGangTask("Count Aggregation"),
2976     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2977     _max_task_num(max_task_num),
2978     _active_workers(n_workers) { }
2979 
2980   void work(uint worker_id) {
2981     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_task_num);
2982 
2983     if (G1CollectedHeap::use_parallel_gc_threads()) {
2984       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
2985                                             _active_workers,
2986                                             HeapRegion::AggregateCountClaimValue);
2987     } else {
2988       _g1h->heap_region_iterate(&cl);
2989     }
2990   }
2991 };
2992 
2993 
2994 void ConcurrentMark::aggregate_count_data() {
2995   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
2996                         _g1h->workers()->active_workers() :
2997                         1);
2998 
2999   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3000                                            _max_task_num, n_workers);
3001