< prev index next >

src/share/vm/gc/g1/g1ConcurrentMark.cpp

Print this page
rev 10490 : imported patch 8151126-clean-up-duplicate-code-for-clearing-bitmaps
rev 10491 : imported patch 8151126-jon-review
rev 10492 : [mq]: 8151614-improve-concurrent-mark-logging
rev 10493 : [mq]: 8077144-concurrent-mark-thread-init-fix

@@ -46,10 +46,11 @@
 #include "gc/shared/referencePolicy.hpp"
 #include "gc/shared/strongRootsScope.hpp"
 #include "gc/shared/taskqueue.inline.hpp"
 #include "gc/shared/vmGCOperations.hpp"
 #include "logging/log.hpp"
+#include "logging/logTag.hpp"
 #include "memory/allocation.hpp"
 #include "memory/resourceArea.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/atomic.inline.hpp"
 #include "runtime/handles.inline.hpp"

@@ -353,14 +354,12 @@
   _parallel_marking_threads(0),
   _max_parallel_marking_threads(0),
   _sleep_factor(0.0),
   _marking_task_overhead(1.0),
   _cleanup_list("Cleanup List"),
-  _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
-  _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
-            CardTableModRefBS::card_shift,
-            false /* in_resource_area*/),
+  _region_live_bm(),
+  _card_live_bm(),
 
   _prevMarkBitMap(&_markBitMap1),
   _nextMarkBitMap(&_markBitMap2),
 
   _markStack(this),

@@ -387,12 +386,10 @@
   _total_counting_time(0.0),
   _total_rs_scrub_time(0.0),
 
   _parallel_workers(NULL),
 
-  _count_card_bitmaps(NULL),
-  _count_marked_bytes(NULL),
   _completed_initialization(false) {
 
   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 

@@ -499,47 +496,38 @@
   if (!_markStack.allocate(MarkStackSize)) {
     log_warning(gc)("Failed to allocate CM marking stack");
     return;
   }
 
-  _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC);
-  _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
+  allocate_internal_bitmaps();
 
-  _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
-  _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
+  if (G1PretouchAuxiliaryMemory) {
+    pretouch_internal_bitmaps();
+  }
 
-  BitMap::idx_t card_bm_size = _card_bm.size();
+  _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC);
+  _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 
   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
   _active_tasks = _max_worker_id;
 
-  uint max_regions = _g1h->max_regions();
   for (uint i = 0; i < _max_worker_id; ++i) {
     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
     task_queue->initialize();
     _task_queues->register_queue(i, task_queue);
 
-    _count_card_bitmaps[i] = BitMap(card_bm_size, false);
-    _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
-
-    _tasks[i] = new G1CMTask(i, this,
-                             _count_marked_bytes[i],
-                             &_count_card_bitmaps[i],
-                             task_queue, _task_queues);
+    _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues);
 
     _accum_task_vtime[i] = 0.0;
   }
 
   // Calculate the card number for the bottom of the heap. Used
   // in biasing indexes into the accounting card bitmaps.
   _heap_bottom_card_num =
     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
                                 CardTableModRefBS::card_shift);
 
-  // Clear all the liveness counting data
-  clear_all_count_data();
-
   // so that the call below can read a sensible value
   _heap_start = g1h->reserved_region().start();
   set_non_marking_state();
   _completed_initialization = true;
 }

@@ -713,14 +701,15 @@
   // is the case.
   guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
 
   clear_bitmap(_nextMarkBitMap, _parallel_workers, true);
 
-  // Clear the liveness counting data. If the marking has been aborted, the abort()
+  // Clear the live count data. If the marking has been aborted, the abort()
   // call already did that.
   if (!has_aborted()) {
-    clear_all_count_data();
+    clear_all_live_data(_parallel_workers);
+    DEBUG_ONLY(verify_all_live_data());
   }
 
   // Repeat the asserts from above.
   guarantee(cmThread()->during_cycle(), "invariant");
   guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");

@@ -1124,18 +1113,10 @@
 
     // Clear the marking state because we will be restarting
     // marking due to overflowing the global mark stack.
     reset_marking_state();
   } else {
-    {
-      GCTraceTime(Debug, gc) trace("Aggregate Data", g1h->gc_timer_cm());
-
-      // Aggregate the per-task counting data that we have accumulated
-      // while marking.
-      aggregate_count_data();
-    }
-
     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
     // We're done with marking.
     // This is the end of  the marking cycle, we're expected all
     // threads to have SATB queues with active set to true.
     satb_mq_set.set_active_all_threads(false, /* new active value */

@@ -1168,16 +1149,14 @@
   G1CMIsAliveClosure is_alive(g1h);
   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
 }
 
 // Base class of the closures that finalize and verify the
-// liveness counting data.
-class G1CMCountDataClosureBase: public HeapRegionClosure {
+// liveness count data.
+class G1LiveDataClosureBase: public HeapRegionClosure {
 protected:
-  G1CollectedHeap* _g1h;
   G1ConcurrentMark* _cm;
-  CardTableModRefBS* _ct_bs;
 
   BitMap* _region_bm;
   BitMap* _card_bm;
 
   // Takes a region that's not empty (i.e., it has at least one

@@ -1186,152 +1165,213 @@
   void set_bit_for_region(HeapRegion* hr) {
     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
     _region_bm->par_at_put(index, true);
   }
 
-public:
-  G1CMCountDataClosureBase(G1CollectedHeap* g1h,
-                           BitMap* region_bm, BitMap* card_bm):
-    _g1h(g1h), _cm(g1h->concurrent_mark()),
-    _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
-    _region_bm(region_bm), _card_bm(card_bm) { }
-};
+  // Utility routine to set an exclusive range of cards on the given
+  // bitmap.
+  inline void set_card_bitmap_range(BitMap* card_bm,
+                                    BitMap::idx_t start_idx,
+                                    BitMap::idx_t end_idx) {
 
-// Closure that calculates the # live objects per region. Used
-// for verification purposes during the cleanup pause.
-class CalcLiveObjectsClosure: public G1CMCountDataClosureBase {
-  G1CMBitMapRO* _bm;
-  size_t _region_marked_bytes;
+    // Set the exclusive bit range [start_idx, end_idx).
+    assert((end_idx - start_idx) > 0, "at least one card");
+    assert(end_idx <= card_bm->size(), "sanity");
 
-public:
-  CalcLiveObjectsClosure(G1CMBitMapRO *bm, G1CollectedHeap* g1h,
-                         BitMap* region_bm, BitMap* card_bm) :
-    G1CMCountDataClosureBase(g1h, region_bm, card_bm),
-    _bm(bm), _region_marked_bytes(0) { }
+    // For small ranges use a simple loop; otherwise use set_range or
+    // use par_at_put_range (if parallel). The range is made up of the
+    // cards that are spanned by an object/mem region so 8 cards will
+    // allow up to object sizes up to 4K to be handled using the loop.
+    if ((end_idx - start_idx) <= 8) {
+      for (BitMap::idx_t i = start_idx; i < end_idx; i += 1) {
+        card_bm->set_bit(i);
+      }
+    } else {
+      card_bm->set_range(start_idx, end_idx);
+    }    
+  }
+
+  void mark_card_bitmap_range(HeapWord* start, HeapWord* end) {
+    BitMap::idx_t start_idx = _cm->card_live_bitmap_index_for(start);
+    BitMap::idx_t end_idx = _cm->card_live_bitmap_index_for((HeapWord*)align_ptr_up(end, CardTableModRefBS::card_size));
+
+    assert((end_idx - start_idx) > 0, "Trying to mark zero sized range.");
+    
+    if (start_idx == _last_marked_bit_idx) {
+      start_idx++;
+    }
+    if (start_idx == end_idx) {
+      return;
+    }
+    
+    // Set the bits in the card bitmap for the cards spanned by this object.
+    set_card_bitmap_range(_card_bm, start_idx, end_idx);
+    _last_marked_bit_idx = end_idx - 1;
+  }
+
+  // We cache the last mark set. This avoids setting the same bit multiple times.
+  // This is particularly interesting for dense bitmaps, as this avoids doing
+  // any work most of the time.
+  BitMap::idx_t _last_marked_bit_idx;
+
+  void reset_mark_cache() {
+    _last_marked_bit_idx = (BitMap::idx_t)-1;
+  }
+
+  void mark_allocated_since_marking(HeapRegion* hr) {
+    reset_mark_cache();
+
+    HeapWord* ntams = hr->next_top_at_mark_start();
+    HeapWord* top   = hr->top();
+
+    assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
+
+    // Mark the allocated-since-marking portion...
+    if (ntams < top) {
+      mark_card_bitmap_range(ntams, top);
+      // This definitely means the region has live objects.
+      set_bit_for_region(hr);
+    }
+  }
+
+  bool mark_marked_during_marking(HeapRegion* hr, bool may_suspend, size_t* total_bytes_marked) {
+    reset_mark_cache();
+
+    size_t marked_bytes = 0;
 
-  bool doHeapRegion(HeapRegion* hr) {
     HeapWord* ntams = hr->next_top_at_mark_start();
     HeapWord* start = hr->bottom();
 
+    if (ntams <= start) {
+      // Empty region (at the start of marking). Nothing to do.
+      // hr->add_to_marked_bytes(0);
+      *total_bytes_marked = marked_bytes;
+      return false;
+    } else if (hr->is_starts_humongous()) {
+      // Humongous object: distribute the marked bytes across the humongous object.
+      do {
+        mark_card_bitmap_range(start, hr->top());
+
+        marked_bytes += pointer_delta(hr->top(), start, 1);
+        hr->add_to_marked_bytes(marked_bytes);
+
+        hr = G1CollectedHeap::heap()->next_region_in_humongous(hr);
+      } while (hr != NULL);
+      *total_bytes_marked = marked_bytes;
+      return false;
+    } else if (hr->is_continues_humongous()) {
+      // Humongous continues regions were handled during processing of the start region.
+      *total_bytes_marked = marked_bytes;
+      return false;
+    }
+
     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
            "Preconditions not met - "
            "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
            p2i(start), p2i(ntams), p2i(hr->end()));
 
+    G1CMBitMap* bitmap = _cm->nextMarkBitMap();
     // Find the first marked object at or after "start".
-    start = _bm->getNextMarkedWordAddress(start, ntams);
-
-    size_t marked_bytes = 0;
-
+    start = bitmap->getNextMarkedWordAddress(start, ntams);
     while (start < ntams) {
       oop obj = oop(start);
       int obj_sz = obj->size();
       HeapWord* obj_end = start + obj_sz;
 
-      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
-      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
-
-      // Note: if we're looking at the last region in heap - obj_end
-      // could be actually just beyond the end of the heap; end_idx
-      // will then correspond to a (non-existent) card that is also
-      // just beyond the heap.
-      if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
-        // end of object is not card aligned - increment to cover
-        // all the cards spanned by the object
-        end_idx += 1;
-      }
+      assert(obj_end <= hr->end(), "Humongous objects must have been handled elsewhere.");
 
-      // Set the bits in the card BM for the cards spanned by this object.
-      _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
+      mark_card_bitmap_range(start, obj_end);
 
       // Add the size of this object to the number of marked bytes.
       marked_bytes += (size_t)obj_sz * HeapWordSize;
 
-      // This will happen if we are handling a humongous object that spans
-      // several heap regions.
-      if (obj_end > hr->end()) {
-        break;
-      }
       // Find the next marked object after this one.
-      start = _bm->getNextMarkedWordAddress(obj_end, ntams);
+      start = bitmap->getNextMarkedWordAddress(obj_end, ntams);
     }
 
-    // Mark the allocated-since-marking portion...
-    HeapWord* top = hr->top();
-    if (ntams < top) {
-      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
-      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
+    // Update the marked bytes for this region.
+    hr->add_to_marked_bytes(marked_bytes);
+    *total_bytes_marked = marked_bytes;
 
-      // Note: if we're looking at the last region in heap - top
-      // could be actually just beyond the end of the heap; end_idx
-      // will then correspond to a (non-existent) card that is also
-      // just beyond the heap.
-      if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
-        // end of object is not card aligned - increment to cover
-        // all the cards spanned by the object
-        end_idx += 1;
+    // Abort iteration if after yielding the marking has been aborted.
+    if (may_suspend && _cm->do_yield_check() && _cm->has_aborted()) {
+       return true;
       }
-      _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
-
-      // This definitely means the region has live objects.
-      set_bit_for_region(hr);
+    // Next heap region
+    return false;
     }
 
-    // Update the live region bitmap.
-    if (marked_bytes > 0) {
-      set_bit_for_region(hr);
-    }
+public:
+  G1LiveDataClosureBase(G1CollectedHeap* g1h,
+                        BitMap* region_bm,
+                        BitMap* card_bm):
+    _cm(g1h->concurrent_mark()),
+    _region_bm(region_bm),
+    _card_bm(card_bm) { }
+};
 
-    // Set the marked bytes for the current region so that
-    // it can be queried by a calling verification routine
-    _region_marked_bytes = marked_bytes;
+// Heap region closure used for verifying the live count data
+// that was created concurrently and finalized during
+// the remark pause. This closure is applied to the heap
+// regions during the STW cleanup pause.
+class G1VerifyLiveDataHRClosure: public HeapRegionClosure {
+  // Calculates the # live objects per region.
+  class G1VerifyLiveDataClosure: public G1LiveDataClosureBase {
+    size_t _region_marked_bytes;
+
+  public:
+    G1VerifyLiveDataClosure(G1CollectedHeap* g1h,
+                            BitMap* region_bm,
+                            BitMap* card_bm) :
+      G1LiveDataClosureBase(g1h, region_bm, card_bm),
+      _region_marked_bytes(0) { }
 
+    bool doHeapRegion(HeapRegion* hr) {
+      mark_marked_during_marking(hr, false, &_region_marked_bytes);
+      mark_allocated_since_marking(hr);
     return false;
   }
 
   size_t region_marked_bytes() const { return _region_marked_bytes; }
-};
-
-// Heap region closure used for verifying the counting data
-// that was accumulated concurrently and aggregated during
-// the remark pause. This closure is applied to the heap
-// regions during the STW cleanup pause.
+  };
 
-class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
   G1CollectedHeap* _g1h;
   G1ConcurrentMark* _cm;
-  CalcLiveObjectsClosure _calc_cl;
-  BitMap* _region_bm;   // Region BM to be verified
-  BitMap* _card_bm;     // Card BM to be verified
+  G1VerifyLiveDataClosure _calc_cl;
+  BitMap* _act_region_bm;   // Region BM to be verified
+  BitMap* _act_card_bm;     // Card BM to be verified
 
   BitMap* _exp_region_bm; // Expected Region BM values
   BitMap* _exp_card_bm;   // Expected card BM values
 
   int _failures;
 
 public:
-  VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
-                                BitMap* region_bm,
-                                BitMap* card_bm,
+  G1VerifyLiveDataHRClosure(G1CollectedHeap* g1h,
+                            BitMap* act_region_bm,
+                            BitMap* act_card_bm,
                                 BitMap* exp_region_bm,
                                 BitMap* exp_card_bm) :
-    _g1h(g1h), _cm(g1h->concurrent_mark()),
-    _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
-    _region_bm(region_bm), _card_bm(card_bm),
-    _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
+    _g1h(g1h),
+    _cm(g1h->concurrent_mark()),
+    _calc_cl(g1h, exp_region_bm, exp_card_bm),
+    _act_region_bm(act_region_bm),
+    _act_card_bm(act_card_bm),
+    _exp_region_bm(exp_region_bm),
+    _exp_card_bm(exp_card_bm),
     _failures(0) { }
 
   int failures() const { return _failures; }
 
   bool doHeapRegion(HeapRegion* hr) {
     int failures = 0;
 
-    // Call the CalcLiveObjectsClosure to walk the marking bitmap for
+    // Call the G1VerifyLiveDataClosure to walk the marking bitmap for
     // this region and set the corresponding bits in the expected region
     // and card bitmaps.
     bool res = _calc_cl.doHeapRegion(hr);
-    assert(res == false, "should be continuing");
+    assert(!res, "Should be completed.");
 
     // Verify the marked bytes for this region.
     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
     size_t act_marked_bytes = hr->next_marked_bytes();
 

@@ -1361,25 +1401,25 @@
     // We're not OK if the bit in the calculated expected region
     // bitmap is set and the bit in the actual region bitmap is not.
     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
 
     bool expected = _exp_region_bm->at(index);
-    bool actual = _region_bm->at(index);
+    bool actual = _act_region_bm->at(index);
     if (expected && !actual) {
       failures += 1;
     }
 
     // Verify that the card bit maps for the cards spanned by the current
     // region match. We have an error if we have a set bit in the expected
     // bit map and the corresponding bit in the actual bitmap is not set.
 
-    BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
-    BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
+    BitMap::idx_t start_idx = _cm->card_live_bitmap_index_for(hr->bottom());
+    BitMap::idx_t end_idx = _cm->card_live_bitmap_index_for(hr->top());
 
     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
       expected = _exp_card_bm->at(i);
-      actual = _card_bm->at(i);
+      actual = _act_card_bm->at(i);
 
       if (expected && !actual) {
         failures += 1;
       }
     }

@@ -1390,141 +1430,98 @@
     // find the first violating region by returning true.
     return false;
   }
 };
 
-class G1ParVerifyFinalCountTask: public AbstractGangTask {
+class G1VerifyLiveDataTask: public AbstractGangTask {
 protected:
   G1CollectedHeap* _g1h;
-  G1ConcurrentMark* _cm;
   BitMap* _actual_region_bm;
   BitMap* _actual_card_bm;
 
-  uint    _n_workers;
-
-  BitMap* _expected_region_bm;
-  BitMap* _expected_card_bm;
+  BitMap _expected_region_bm;
+  BitMap _expected_card_bm;
 
   int  _failures;
 
-  HeapRegionClaimer _hrclaimer;
+  HeapRegionClaimer _hr_claimer;
 
 public:
-  G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
-                            BitMap* region_bm, BitMap* card_bm,
-                            BitMap* expected_region_bm, BitMap* expected_card_bm)
+  G1VerifyLiveDataTask(G1CollectedHeap* g1h,
+                       BitMap* region_bm,
+                       BitMap* card_bm,
+                       uint n_workers)
     : AbstractGangTask("G1 verify final counting"),
-      _g1h(g1h), _cm(_g1h->concurrent_mark()),
-      _actual_region_bm(region_bm), _actual_card_bm(card_bm),
-      _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
+    _g1h(g1h),
+    _actual_region_bm(region_bm),
+    _actual_card_bm(card_bm),
+    _expected_region_bm(region_bm->size(), true /* in_resource_area */),
+    _expected_card_bm(card_bm->size(), true /* in_resource_area */),
       _failures(0),
-      _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
+    _hr_claimer(n_workers) {
     assert(VerifyDuringGC, "don't call this otherwise");
-    assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
-    assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
   }
 
   void work(uint worker_id) {
-    assert(worker_id < _n_workers, "invariant");
-
-    VerifyLiveObjectDataHRClosure verify_cl(_g1h,
-                                            _actual_region_bm, _actual_card_bm,
-                                            _expected_region_bm,
-                                            _expected_card_bm);
-
-    _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
+    G1VerifyLiveDataHRClosure cl(_g1h,
+                                 _actual_region_bm,
+                                 _actual_card_bm,
+                                 &_expected_region_bm,
+                                 &_expected_card_bm);
+    _g1h->heap_region_par_iterate(&cl, worker_id, &_hr_claimer);
 
-    Atomic::add(verify_cl.failures(), &_failures);
+    Atomic::add(cl.failures(), &_failures);
   }
 
   int failures() const { return _failures; }
 };
 
-// Closure that finalizes the liveness counting data.
-// Used during the cleanup pause.
-// Sets the bits corresponding to the interval [NTAMS, top]
-// (which contains the implicitly live objects) in the
-// card liveness bitmap. Also sets the bit for each region,
-// containing live data, in the region liveness bitmap.
-
-class FinalCountDataUpdateClosure: public G1CMCountDataClosureBase {
+class G1FinalizeLiveDataTask: public AbstractGangTask {
+  // Finalizes the liveness counting data.
+  // Sets the bits corresponding to the interval [NTAMS, top]
+  // (which contains the implicitly live objects) in the
+  // card liveness bitmap. Also sets the bit for each region
+  // containing live data, in the region liveness bitmap.
+  class G1FinalizeCountDataClosure: public G1LiveDataClosureBase {
  public:
-  FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
+    G1FinalizeCountDataClosure(G1CollectedHeap* g1h,
                               BitMap* region_bm,
                               BitMap* card_bm) :
-    G1CMCountDataClosureBase(g1h, region_bm, card_bm) { }
+      G1LiveDataClosureBase(g1h, region_bm, card_bm) { }
 
   bool doHeapRegion(HeapRegion* hr) {
-    HeapWord* ntams = hr->next_top_at_mark_start();
-    HeapWord* top   = hr->top();
-
-    assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
-
-    // Mark the allocated-since-marking portion...
-    if (ntams < top) {
-      // This definitely means the region has live objects.
-      set_bit_for_region(hr);
-
-      // Now set the bits in the card bitmap for [ntams, top)
-      BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
-      BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
-
-      // Note: if we're looking at the last region in heap - top
-      // could be actually just beyond the end of the heap; end_idx
-      // will then correspond to a (non-existent) card that is also
-      // just beyond the heap.
-      if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
-        // end of object is not card aligned - increment to cover
-        // all the cards spanned by the object
-        end_idx += 1;
-      }
-
-      assert(end_idx <= _card_bm->size(),
-             "oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
-             end_idx, _card_bm->size());
-      assert(start_idx < _card_bm->size(),
-             "oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
-             start_idx, _card_bm->size());
-
-      _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
-    }
-
+      mark_allocated_since_marking(hr);
     // Set the bit for the region if it contains live data
     if (hr->next_marked_bytes() > 0) {
       set_bit_for_region(hr);
     }
 
     return false;
   }
-};
+  };
 
-class G1ParFinalCountTask: public AbstractGangTask {
-protected:
   G1CollectedHeap* _g1h;
-  G1ConcurrentMark* _cm;
   BitMap* _actual_region_bm;
   BitMap* _actual_card_bm;
 
-  uint    _n_workers;
-  HeapRegionClaimer _hrclaimer;
+  HeapRegionClaimer _hr_claimer;
 
 public:
-  G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
-    : AbstractGangTask("G1 final counting"),
-      _g1h(g1h), _cm(_g1h->concurrent_mark()),
-      _actual_region_bm(region_bm), _actual_card_bm(card_bm),
-      _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
+  G1FinalizeLiveDataTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
+    AbstractGangTask("G1 final counting"),
+    _g1h(g1h),
+    _actual_region_bm(region_bm),
+    _actual_card_bm(card_bm),
+    _hr_claimer(n_workers) {
   }
 
   void work(uint worker_id) {
-    assert(worker_id < _n_workers, "invariant");
-
-    FinalCountDataUpdateClosure final_update_cl(_g1h,
+    G1FinalizeCountDataClosure cl(_g1h,
                                                 _actual_region_bm,
                                                 _actual_card_bm);
 
-    _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
+    _g1h->heap_region_par_iterate(&cl, worker_id, &_hr_claimer);
   }
 };
 
 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
   G1CollectedHeap* _g1;

@@ -1654,35 +1651,32 @@
 
   double start = os::elapsedTime();
 
   HeapRegionRemSet::reset_for_cleanup_tasks();
 
-  // Do counting once more with the world stopped for good measure.
-  G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
-
-  g1h->workers()->run_task(&g1_par_count_task);
+  {
+    // Finalize the live data.
+    G1FinalizeLiveDataTask cl(g1h,
+                              &_region_live_bm,
+                              &_card_live_bm,
+                              g1h->workers()->active_workers());
+    g1h->workers()->run_task(&cl);
+  }
 
   if (VerifyDuringGC) {
-    // Verify that the counting data accumulated during marking matches
-    // that calculated by walking the marking bitmap.
-
-    // Bitmaps to hold expected values
-    BitMap expected_region_bm(_region_bm.size(), true);
-    BitMap expected_card_bm(_card_bm.size(), true);
-
-    G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
-                                                 &_region_bm,
-                                                 &_card_bm,
-                                                 &expected_region_bm,
-                                                 &expected_card_bm);
-
-    g1h->workers()->run_task(&g1_par_verify_task);
+    // Verify that the liveness count data created concurrently matches one created
+    // during this safepoint.
+    ResourceMark rm;
+    G1VerifyLiveDataTask cl(g1h,
+                            &_region_live_bm,
+                            &_card_live_bm,
+                            g1h->workers()->active_workers());
+    g1h->workers()->run_task(&cl);
 
-    guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
+    guarantee(cl.failures() == 0, "Unexpected accounting failures");
   }
 
-  size_t start_used_bytes = g1h->used();
   g1h->collector_state()->set_mark_in_progress(false);
 
   double count_end = os::elapsedTime();
   double this_final_counting_time = (count_end - start);
   _total_counting_time += this_final_counting_time;

@@ -1713,11 +1707,11 @@
 
   // call below, since it affects the metric by which we sort the heap
   // regions.
   if (G1ScrubRemSets) {
     double rs_scrub_start = os::elapsedTime();
-    g1h->scrub_rem_set(&_region_bm, &_card_bm);
+    g1h->scrub_rem_set(&_region_live_bm, &_card_live_bm);
     _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start);
   }
 
   // this will also free any regions totally full of garbage objects,
   // and sort the regions.

@@ -2165,10 +2159,39 @@
   G1CMBitMapRO* temp = _prevMarkBitMap;
   _prevMarkBitMap    = (G1CMBitMapRO*)_nextMarkBitMap;
   _nextMarkBitMap    = (G1CMBitMap*)  temp;
 }
 
+BitMap G1ConcurrentMark::allocate_large_bitmap(BitMap::idx_t size_in_bits) {
+  size_t size_in_words = BitMap::size_in_words(size_in_bits);
+
+  BitMap::bm_word_t* map = MmapArrayAllocator<BitMap::bm_word_t, mtGC>::allocate(size_in_words);
+  
+  return BitMap(map, size_in_bits);
+}
+
+void G1ConcurrentMark::allocate_internal_bitmaps() {
+  double start_time = os::elapsedTime();
+
+  _region_live_bm = allocate_large_bitmap(_g1h->max_regions());
+
+  guarantee(_g1h->max_capacity() % CardTableModRefBS::card_size == 0,
+            "Heap capacity must be aligned to card size.");
+  _card_live_bm = allocate_large_bitmap(_g1h->max_capacity() / CardTableModRefBS::card_size);
+
+  log_debug(gc, marking)("Allocating internal bitmaps took %1.2f seconds.", os::elapsedTime() - start_time);
+}
+
+void G1ConcurrentMark::pretouch_internal_bitmaps() {
+  double start_time = os::elapsedTime();
+
+  _region_live_bm.pretouch();
+  _card_live_bm.pretouch();
+  
+  log_debug(gc, marking)("Pre-touching internal bitmaps took %1.2f seconds.", os::elapsedTime() - start_time);
+}
+
 // Closure for marking entries in SATB buffers.
 class G1CMSATBBufferClosure : public SATBBufferClosure {
 private:
   G1CMTask* _task;
   G1CollectedHeap* _g1h;

@@ -2183,11 +2206,11 @@
       // Until we get here, we don't know whether entry refers to a valid
       // object; it could instead have been a stale reference.
       oop obj = static_cast<oop>(entry);
       assert(obj->is_oop(true /* ignore mark word */),
              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
-      _task->make_reference_grey(obj, hr);
+      _task->make_reference_grey(obj);
     }
   }
 
 public:
   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)

@@ -2425,169 +2448,105 @@
     }
   }
 }
 #endif // PRODUCT
 
-// Aggregate the counting data that was constructed concurrently
-// with marking.
-class AggregateCountDataHRClosure: public HeapRegionClosure {
-  G1CollectedHeap* _g1h;
-  G1ConcurrentMark* _cm;
-  CardTableModRefBS* _ct_bs;
-  BitMap* _cm_card_bm;
-  uint _max_worker_id;
-
+class G1CreateLiveDataTask: public AbstractGangTask {
+  // Aggregate the counting data that was constructed concurrently
+  // with marking.
+  class G1CreateLiveDataHRClosure: public G1LiveDataClosureBase {
  public:
-  AggregateCountDataHRClosure(G1CollectedHeap* g1h,
-                              BitMap* cm_card_bm,
-                              uint max_worker_id) :
-    _g1h(g1h), _cm(g1h->concurrent_mark()),
-    _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
-    _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
+    G1CreateLiveDataHRClosure(G1CollectedHeap* g1h,
+                              BitMap* cm_card_bm)
+    : G1LiveDataClosureBase(g1h, NULL, cm_card_bm) { }
 
   bool doHeapRegion(HeapRegion* hr) {
-    HeapWord* start = hr->bottom();
-    HeapWord* limit = hr->next_top_at_mark_start();
-    HeapWord* end = hr->end();
-
-    assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
-           "Preconditions not met - "
-           "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
-           "top: " PTR_FORMAT ", end: " PTR_FORMAT,
-           p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()));
-
-    assert(hr->next_marked_bytes() == 0, "Precondition");
-
-    if (start == limit) {
-      // NTAMS of this region has not been set so nothing to do.
-      return false;
-    }
-
-    // 'start' should be in the heap.
-    assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
-    // 'end' *may* be just beyond the end of the heap (if hr is the last region)
-    assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
-
-    BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
-    BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
-    BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
-
-    // If ntams is not card aligned then we bump card bitmap index
-    // for limit so that we get the all the cards spanned by
-    // the object ending at ntams.
-    // Note: if this is the last region in the heap then ntams
-    // could be actually just beyond the end of the the heap;
-    // limit_idx will then  correspond to a (non-existent) card
-    // that is also outside the heap.
-    if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
-      limit_idx += 1;
-    }
-
-    assert(limit_idx <= end_idx, "or else use atomics");
-
-    // Aggregate the "stripe" in the count data associated with hr.
-    uint hrm_index = hr->hrm_index();
-    size_t marked_bytes = 0;
-
-    for (uint i = 0; i < _max_worker_id; i += 1) {
-      size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
-      BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
-
-      // Fetch the marked_bytes in this region for task i and
-      // add it to the running total for this region.
-      marked_bytes += marked_bytes_array[hrm_index];
-
-      // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
-      // into the global card bitmap.
-      BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
-
-      while (scan_idx < limit_idx) {
-        assert(task_card_bm->at(scan_idx) == true, "should be");
-        _cm_card_bm->set_bit(scan_idx);
-        assert(_cm_card_bm->at(scan_idx) == true, "should be");
-
-        // BitMap::get_next_one_offset() can handle the case when
-        // its left_offset parameter is greater than its right_offset
-        // parameter. It does, however, have an early exit if
-        // left_offset == right_offset. So let's limit the value
-        // passed in for left offset here.
-        BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
-        scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
-      }
-    }
-
-    // Update the marked bytes for this region.
-    hr->add_to_marked_bytes(marked_bytes);
-
-    // Next heap region
-    return false;
+      size_t temp;
+      return mark_marked_during_marking(hr, true, &temp);
   }
-};
+  };
 
-class G1AggregateCountDataTask: public AbstractGangTask {
-protected:
   G1CollectedHeap* _g1h;
   G1ConcurrentMark* _cm;
   BitMap* _cm_card_bm;
-  uint _max_worker_id;
-  uint _active_workers;
   HeapRegionClaimer _hrclaimer;
 
 public:
-  G1AggregateCountDataTask(G1CollectedHeap* g1h,
-                           G1ConcurrentMark* cm,
+  G1CreateLiveDataTask(G1CollectedHeap* g1h,
                            BitMap* cm_card_bm,
-                           uint max_worker_id,
                            uint n_workers) :
-      AbstractGangTask("Count Aggregation"),
-      _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
-      _max_worker_id(max_worker_id),
-      _active_workers(n_workers),
-      _hrclaimer(_active_workers) {
+      AbstractGangTask("Create Live Data"),
+      _g1h(g1h),
+      _cm_card_bm(cm_card_bm),
+      _hrclaimer(n_workers) {
   }
 
   void work(uint worker_id) {
-    AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
+    SuspendibleThreadSetJoiner sts_join;
 
+    G1CreateLiveDataHRClosure cl(_g1h, _cm_card_bm);
     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
   }
 };
 
 
-void G1ConcurrentMark::aggregate_count_data() {
-  uint n_workers = _g1h->workers()->active_workers();
-
-  G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
-                                           _max_worker_id, n_workers);
+void G1ConcurrentMark::create_live_data() {
+  uint n_workers = _parallel_workers->active_workers();
 
-  _g1h->workers()->run_task(&g1_par_agg_task);
+  G1CreateLiveDataTask cl(_g1h,
+                          &_card_live_bm,
+                          n_workers);
+  _parallel_workers->run_task(&cl);
 }
 
-// Clear the per-worker arrays used to store the per-region counting data
-void G1ConcurrentMark::clear_all_count_data() {
-  // Clear the global card bitmap - it will be filled during
-  // liveness count aggregation (during remark) and the
-  // final counting task.
-  _card_bm.clear();
+class G1ClearAllLiveDataTask : public AbstractGangTask {
+  BitMap* _bitmap;
+  size_t _num_tasks;
+  size_t _cur_task;
+public:
+  G1ClearAllLiveDataTask(BitMap* bitmap, size_t num_tasks) :
+    AbstractGangTask("Clear All Live Data"),
+    _bitmap(bitmap),
+    _num_tasks(num_tasks),
+    _cur_task(0) {
+  }
 
-  // Clear the global region bitmap - it will be filled as part
-  // of the final counting task.
-  _region_bm.clear();
+  virtual void work(uint worker_id) {
+    while (true) {
+      size_t to_process = Atomic::add(1, &_cur_task) - 1;
+      if (to_process >= _num_tasks) {
+        break;
+      }
+
+      BitMap::idx_t start = M * BitsPerByte * to_process;
+      BitMap::idx_t end = MIN2(start + M * BitsPerByte, _bitmap->size());
+      _bitmap->clear_range(start, end);
+    }
+  }
+};
 
-  uint max_regions = _g1h->max_regions();
-  assert(_max_worker_id > 0, "uninitialized");
+void G1ConcurrentMark::clear_all_live_data(WorkGang* workers) {
+  double start_time = os::elapsedTime();
 
-  for (uint i = 0; i < _max_worker_id; i += 1) {
-    BitMap* task_card_bm = count_card_bitmap_for(i);
-    size_t* marked_bytes_array = count_marked_bytes_array_for(i);
+  guarantee(Universe::is_fully_initialized(), "Should not call this during initialization.");
 
-    assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
-    assert(marked_bytes_array != NULL, "uninitialized");
+  size_t const num_chunks = align_size_up(_card_live_bm.size_in_words() * HeapWordSize, M) / M;
 
-    memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
-    task_card_bm->clear();
-  }
+  G1ClearAllLiveDataTask cl(&_card_live_bm, num_chunks);
+  workers->run_task(&cl);
+
+  // The region live bitmap is always very small, even for huge heaps. Clear
+  // directly.
+  _region_live_bm.clear();
+
+
+  log_debug(gc, marking)("Clear Live Data took %.3fms", (os::elapsedTime() - start_time) * 1000.0);
+}
+
+void G1ConcurrentMark::verify_all_live_data() {
+  assert(_card_live_bm.count_one_bits() == 0, "Master card bitmap not clear");
+  assert(_region_live_bm.count_one_bits() == 0, "Master region bitmap not clear");
 }
 
 void G1ConcurrentMark::print_stats() {
   if (!log_is_enabled(Debug, gc, stats)) {
     return;

@@ -2597,11 +2556,10 @@
     _tasks[i]->print_stats();
     log_debug(gc, stats)("---------------------------------------------------------------------");
   }
 }
 
-// abandon current marking iteration due to a Full GC
 void G1ConcurrentMark::abort() {
   if (!cmThread()->during_cycle() || _has_aborted) {
     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
     return;
   }

@@ -2612,12 +2570,12 @@
 
   // Note we cannot clear the previous marking bitmap here
   // since VerifyDuringGC verifies the objects marked during
   // a full GC against the previous bitmap.
 
-  // Clear the liveness counting data
-  clear_all_count_data();
+  clear_all_live_data(_g1h->workers());
+  DEBUG_ONLY(verify_all_live_data());
   // Empty mark stack
   reset_marking_state();
   for (uint i = 0; i < _max_worker_id; ++i) {
     _tasks[i]->clear_region_fields();
   }

@@ -2661,11 +2619,11 @@
     print_ms_time_info("     ", "final marks", _remark_mark_times);
     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
 
   }
   print_ms_time_info("  ", "cleanups", _cleanup_times);
-  log.trace("    Final counting total time = %8.2f s (avg = %8.2f ms).",
+  log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
             _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
   if (G1ScrubRemSets) {
     log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
               _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
   }

@@ -3500,23 +3458,19 @@
   _claimed = false;
 }
 
 G1CMTask::G1CMTask(uint worker_id,
                    G1ConcurrentMark* cm,
-                   size_t* marked_bytes,
-                   BitMap* card_bm,
                    G1CMTaskQueue* task_queue,
                    G1CMTaskQueueSet* task_queues)
   : _g1h(G1CollectedHeap::heap()),
     _worker_id(worker_id), _cm(cm),
     _claimed(false),
     _nextMarkBitMap(NULL), _hash_seed(17),
     _task_queue(task_queue),
     _task_queues(task_queues),
-    _cm_oop_closure(NULL),
-    _marked_bytes_array(marked_bytes),
-    _card_bm(card_bm) {
+    _cm_oop_closure(NULL) {
   guarantee(task_queue != NULL, "invariant");
   guarantee(task_queues != NULL, "invariant");
 
   _marking_step_diffs_ms.add(0.5);
 }
< prev index next >