# HG changeset patch
# User kbarrett
# Date 1430516292 14400
#      Fri May 01 17:38:12 2015 -0400
# Node ID 0d5b2d7c32587dae044ed0e6416d64d224707002
# Parent  ff6facebf36f7a8baaf0fe3425067797b09e7a1c
8075215: SATB buffer processing found reclaimed humongous object
Summary: Don't assume SATB buffer entries are valid objects
Reviewed-by: brutisso, ecaspole

diff --git a/src/share/vm/gc_implementation/g1/concurrentMark.cpp b/src/share/vm/gc_implementation/g1/concurrentMark.cpp
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp
@@ -2640,24 +2640,41 @@
   _nextMarkBitMap  = (CMBitMap*)  temp;
 }
 
-class CMObjectClosure;
-
-// Closure for iterating over objects, currently only used for
-// processing SATB buffers.
-class CMObjectClosure : public ObjectClosure {
+// Closure for marking entries in SATB buffers.
+class CMSATBBufferClosure : public SATBBufferClosure {
 private:
   CMTask* _task;
+  G1CollectedHeap* _g1h;
+
+  // This is very similar to CMTask::deal_with_reference, but with
+  // more relaxed requirements for the argument, so this must be more
+  // circumspect about treating the argument as an object.
+  void do_entry(void* entry) const {
+    _task->increment_refs_reached();
+    HeapRegion* hr = _g1h->heap_region_containing_raw(entry);
+    if (entry < hr->next_top_at_mark_start()) {
+      // Until we get here, we don't know whether entry refers to a valid
+      // object; it could instead have been a stale reference.
+      oop obj = static_cast<oop>(entry);
+      assert(obj->is_oop(true /* ignore mark word */),
+             err_msg("Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)));
+      _task->make_reference_grey(obj, hr);
+    }
+  }
 
 public:
-  void do_object(oop obj) {
-    _task->deal_with_reference(obj);
+  CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
+    : _task(task), _g1h(g1h) { }
+
+  virtual void do_buffer(void** buffer, size_t size) {
+    for (size_t i = 0; i < size; ++i) {
+      do_entry(buffer[i]);
+    }
   }
-
-  CMObjectClosure(CMTask* task) : _task(task) { }
 };
 
 class G1RemarkThreadsClosure : public ThreadClosure {
-  CMObjectClosure _cm_obj;
+  CMSATBBufferClosure _cm_satb_cl;
   G1CMOopClosure _cm_cl;
   MarkingCodeBlobClosure _code_cl;
   int _thread_parity;
@@ -2665,7 +2682,9 @@
 
  public:
   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) :
-    _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
+    _cm_satb_cl(task, g1h),
+    _cm_cl(g1h, g1h->concurrent_mark(), task),
+    _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
     _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {}
 
   void do_thread(Thread* thread) {
@@ -2681,11 +2700,11 @@
         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
         jt->nmethods_do(&_code_cl);
 
-        jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj);
+        jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
       }
     } else if (thread->is_VM_thread()) {
       if (thread->claim_oops_do(_is_par, _thread_parity)) {
-        JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj);
+        JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
       }
     }
   }
@@ -3976,13 +3995,13 @@
   // very counter productive if it did that. :-)
   _draining_satb_buffers = true;
 
-  CMObjectClosure oc(this);
+  CMSATBBufferClosure satb_cl(this, _g1h);
   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 
   // This keeps claiming and applying the closure to completed buffers
   // until we run out of buffers or we need to abort.
   while (!has_aborted() &&
-         satb_mq_set.apply_closure_to_completed_buffer(&oc)) {
+         satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
     if (_cm->verbose_medium()) {
       gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
     }
diff --git a/src/share/vm/gc_implementation/g1/concurrentMark.hpp b/src/share/vm/gc_implementation/g1/concurrentMark.hpp
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp
@@ -1103,9 +1103,9 @@
   void regular_clock_call();
   bool concurrent() { return _concurrent; }
 
-  // Test whether objAddr might have already been passed over by the
+  // Test whether obj might have already been passed over by the
   // mark bitmap scan, and so needs to be pushed onto the mark stack.
-  bool is_below_finger(HeapWord* objAddr, HeapWord* global_finger) const;
+  bool is_below_finger(oop obj, HeapWord* global_finger) const;
 
   template<bool scan> void process_grey_object(oop obj);
 
@@ -1156,8 +1156,18 @@
 
   void set_cm_oop_closure(G1CMOopClosure* cm_oop_closure);
 
-  // It grays the object by marking it and, if necessary, pushing it
-  // on the local queue
+  // Increment the number of references this task has visited.
+  void increment_refs_reached() { ++_refs_reached; }
+
+  // Grey the object by marking it.  If not already marked, push it on
+  // the local queue if below the finger.
+  // Precondition: obj is in region.
+  // Precondition: obj is below region's NTAMS.
+  inline void make_reference_grey(oop obj, HeapRegion* region);
+
+  // Grey the object (by calling make_grey_reference) if required,
+  // e.g. obj is below its containing region's NTAMS.
+  // Precondition: obj is a valid heap object.
   inline void deal_with_reference(oop obj);
 
   // It scans an object and visits its children.
diff --git a/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp b/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp
--- a/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp
@@ -259,15 +259,15 @@
              ++_local_pushes );
 }
 
-inline bool CMTask::is_below_finger(HeapWord* objAddr,
-                                    HeapWord* global_finger) const {
-  // If objAddr is above the global finger, then the mark bitmap scan
+inline bool CMTask::is_below_finger(oop obj, HeapWord* global_finger) const {
+  // If obj is above the global finger, then the mark bitmap scan
   // will find it later, and no push is needed.  Similarly, if we have
-  // a current region and objAddr is between the local finger and the
+  // a current region and obj is between the local finger and the
   // end of the current region, then no push is needed.  The tradeoff
   // of checking both vs only checking the global finger is that the
   // local check will be more accurate and so result in fewer pushes,
   // but may also be a little slower.
+  HeapWord* objAddr = (HeapWord*)obj;
   if (_finger != NULL) {
     // We have a current region.
 
@@ -277,7 +277,7 @@
     assert(_region_limit != NULL, "invariant");
     assert(_region_limit <= global_finger, "invariant");
 
-    // True if objAddr is less than the local finger, or is between
+    // True if obj is less than the local finger, or is between
     // the region limit and the global finger.
     if (objAddr < _finger) {
       return true;
@@ -289,13 +289,65 @@
   return objAddr < global_finger;
 }
 
+inline void CMTask::make_reference_grey(oop obj, HeapRegion* hr) {
+  if (_cm->par_mark_and_count(obj, hr, _marked_bytes_array, _card_bm)) {
+
+    if (_cm->verbose_high()) {
+      gclog_or_tty->print_cr("[%u] marked object " PTR_FORMAT,
+                             _worker_id, p2i(obj));
+    }
+
+    // No OrderAccess:store_load() is needed. It is implicit in the
+    // CAS done in CMBitMap::parMark() call in the routine above.
+    HeapWord* global_finger = _cm->finger();
+
+    // We only need to push a newly grey object on the mark
+    // stack if it is in a section of memory the mark bitmap
+    // scan has already examined.  Mark bitmap scanning
+    // maintains progress "fingers" for determining that.
+    //
+    // Notice that the global finger might be moving forward
+    // concurrently. This is not a problem. In the worst case, we
+    // mark the object while it is above the global finger and, by
+    // the time we read the global finger, it has moved forward
+    // past this object. In this case, the object will probably
+    // be visited when a task is scanning the region and will also
+    // be pushed on the stack. So, some duplicate work, but no
+    // correctness problems.
+    if (is_below_finger(obj, global_finger)) {
+      if (obj->is_typeArray()) {
+        // Immediately process arrays of primitive types, rather
+        // than pushing on the mark stack.  This keeps us from
+        // adding humongous objects to the mark stack that might
+        // be reclaimed before the entry is processed - see
+        // selection of candidates for eager reclaim of humongous
+        // objects.  The cost of the additional type test is
+        // mitigated by avoiding a trip through the mark stack,
+        // by only doing a bookkeeping update and avoiding the
+        // actual scan of the object - a typeArray contains no
+        // references, and the metadata is built-in.
+        process_grey_object<false>(obj);
+      } else {
+        if (_cm->verbose_high()) {
+          gclog_or_tty->print_cr("[%u] below a finger (local: " PTR_FORMAT
+                                 ", global: " PTR_FORMAT ") pushing "
+                                 PTR_FORMAT " on mark stack",
+                                 _worker_id, p2i(_finger),
+                                 p2i(global_finger), p2i(obj));
+        }
+        push(obj);
+      }
+    }
+  }
+}
+
 inline void CMTask::deal_with_reference(oop obj) {
   if (_cm->verbose_high()) {
     gclog_or_tty->print_cr("[%u] we're dealing with reference = "PTR_FORMAT,
                            _worker_id, p2i((void*) obj));
   }
 
-  ++_refs_reached;
+  increment_refs_reached();
 
   HeapWord* objAddr = (HeapWord*) obj;
   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
@@ -307,55 +359,7 @@
       // anything with it).
       HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
       if (!hr->obj_allocated_since_next_marking(obj)) {
-        if (_cm->verbose_high()) {
-          gclog_or_tty->print_cr("[%u] "PTR_FORMAT" is not considered marked",
-                                 _worker_id, p2i((void*) obj));
-        }
-
-        // we need to mark it first
-        if (_cm->par_mark_and_count(obj, hr, _marked_bytes_array, _card_bm)) {
-          // No OrderAccess:store_load() is needed. It is implicit in the
-          // CAS done in CMBitMap::parMark() call in the routine above.
-          HeapWord* global_finger = _cm->finger();
-
-          // We only need to push a newly grey object on the mark
-          // stack if it is in a section of memory the mark bitmap
-          // scan has already examined.  Mark bitmap scanning
-          // maintains progress "fingers" for determining that.
-          //
-          // Notice that the global finger might be moving forward
-          // concurrently. This is not a problem. In the worst case, we
-          // mark the object while it is above the global finger and, by
-          // the time we read the global finger, it has moved forward
-          // past this object. In this case, the object will probably
-          // be visited when a task is scanning the region and will also
-          // be pushed on the stack. So, some duplicate work, but no
-          // correctness problems.
-          if (is_below_finger(objAddr, global_finger)) {
-            if (obj->is_typeArray()) {
-              // Immediately process arrays of primitive types, rather
-              // than pushing on the mark stack.  This keeps us from
-              // adding humongous objects to the mark stack that might
-              // be reclaimed before the entry is processed - see
-              // selection of candidates for eager reclaim of humongous
-              // objects.  The cost of the additional type test is
-              // mitigated by avoiding a trip through the mark stack,
-              // by only doing a bookkeeping update and avoiding the
-              // actual scan of the object - a typeArray contains no
-              // references, and the metadata is built-in.
-              process_grey_object<false>(obj);
-            } else {
-              if (_cm->verbose_high()) {
-                gclog_or_tty->print_cr("[%u] below a finger (local: " PTR_FORMAT
-                                       ", global: " PTR_FORMAT ") pushing "
-                                       PTR_FORMAT " on mark stack",
-                                       _worker_id, p2i(_finger),
-                                       p2i(global_finger), p2i(objAddr));
-              }
-              push(obj);
-            }
-          }
-        }
+        make_reference_grey(obj, hr);
       }
     }
   }
diff --git a/src/share/vm/gc_implementation/g1/satbQueue.cpp b/src/share/vm/gc_implementation/g1/satbQueue.cpp
--- a/src/share/vm/gc_implementation/g1/satbQueue.cpp
+++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp
@@ -29,6 +29,7 @@
 #include "memory/sharedHeap.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/mutexLocker.hpp"
+#include "runtime/safepoint.hpp"
 #include "runtime/thread.hpp"
 #include "runtime/vmThread.hpp"
 
@@ -162,10 +163,7 @@
   assert(_lock == NULL || _lock->owned_by_self(),
          "we should have taken the lock before calling this");
 
-  // Even if G1SATBBufferEnqueueingThresholdPercent == 0 we have to
-  // filter the buffer given that this will remove any references into
-  // the CSet as we currently assume that no such refs will appear in
-  // enqueued buffers.
+  // If G1SATBBufferEnqueueingThresholdPercent == 0 we could skip filtering.
 
   // This method should only be called if there is a non-NULL buffer
   // that is full.
@@ -182,25 +180,19 @@
   return should_enqueue;
 }
 
-void ObjPtrQueue::apply_closure_and_empty(ObjectClosure* cl) {
+void ObjPtrQueue::apply_closure_and_empty(SATBBufferClosure* cl) {
+  assert(SafepointSynchronize::is_at_safepoint(),
+         "SATB queues must only be processed at safepoints");
   if (_buf != NULL) {
-    apply_closure_to_buffer(cl, _buf, _index, _sz);
+    assert(_index % sizeof(void*) == 0, "invariant");
+    assert(_sz % sizeof(void*) == 0, "invariant");
+    assert(_index <= _sz, "invariant");
+    cl->do_buffer(_buf + byte_index_to_index((int)_index),
+                  byte_index_to_index((int)(_sz - _index)));
     _index = _sz;
   }
 }
 
-void ObjPtrQueue::apply_closure_to_buffer(ObjectClosure* cl,
-                                          void** buf, size_t index, size_t sz) {
-  if (cl == NULL) return;
-  for (size_t i = index; i < sz; i += oopSize) {
-    oop obj = (oop)buf[byte_index_to_index((int)i)];
-    // There can be NULL entries because of destructors.
-    if (obj != NULL) {
-      cl->do_object(obj);
-    }
-  }
-}
-
 #ifndef PRODUCT
 // Helpful for debugging
 
@@ -291,7 +283,7 @@
   shared_satb_queue()->filter();
 }
 
-bool SATBMarkQueueSet::apply_closure_to_completed_buffer(ObjectClosure* cl) {
+bool SATBMarkQueueSet::apply_closure_to_completed_buffer(SATBBufferClosure* cl) {
   BufferNode* nd = NULL;
   {
     MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
@@ -305,7 +297,18 @@
   }
   if (nd != NULL) {
     void **buf = BufferNode::make_buffer_from_node(nd);
-    ObjPtrQueue::apply_closure_to_buffer(cl, buf, 0, _sz);
+    // Skip over NULL entries at beginning (e.g. push end) of buffer.
+    // Filtering can result in non-full completed buffers; see
+    // should_enqueue_buffer.
+    assert(_sz % sizeof(void*) == 0, "invariant");
+    size_t limit = ObjPtrQueue::byte_index_to_index((int)_sz);
+    for (size_t i = 0; i < limit; ++i) {
+      if (buf[i] != NULL) {
+        // Found the end of the block of NULLs; process the remainder.
+        cl->do_buffer(buf + i, limit - i);
+        break;
+      }
+    }
     deallocate_buffer(buf);
     return true;
   } else {
diff --git a/src/share/vm/gc_implementation/g1/satbQueue.hpp b/src/share/vm/gc_implementation/g1/satbQueue.hpp
--- a/src/share/vm/gc_implementation/g1/satbQueue.hpp
+++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp
@@ -25,29 +25,30 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_SATBQUEUE_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_SATBQUEUE_HPP
 
+#include "memory/allocation.hpp"
 #include "gc_implementation/g1/ptrQueue.hpp"
 
-class ObjectClosure;
 class JavaThread;
 class SATBMarkQueueSet;
 
+// Base class for processing the contents of a SATB buffer.
+class SATBBufferClosure : public StackObj {
+protected:
+  ~SATBBufferClosure() { }
+
+public:
+  // Process the SATB entries in the designated buffer range.
+  virtual void do_buffer(void** buffer, size_t size) = 0;
+};
+
 // A ptrQueue whose elements are "oops", pointers to object heads.
 class ObjPtrQueue: public PtrQueue {
-  friend class Threads;
   friend class SATBMarkQueueSet;
-  friend class G1RemarkThreadsClosure;
 
 private:
   // Filter out unwanted entries from the buffer.
   void filter();
 
-  // Apply the closure to all elements and empty the buffer;
-  void apply_closure_and_empty(ObjectClosure* cl);
-
-  // Apply the closure to all elements of "buf", down to "index" (inclusive.)
-  static void apply_closure_to_buffer(ObjectClosure* cl,
-                                      void** buf, size_t index, size_t sz);
-
 public:
   ObjPtrQueue(PtrQueueSet* qset, bool perm = false) :
     // SATB queues are only active during marking cycles. We create
@@ -60,6 +61,10 @@
   // Process queue entries and free resources.
   void flush();
 
+  // Apply cl to the active part of the buffer.
+  // Prerequisite: Must be at a safepoint.
+  void apply_closure_and_empty(SATBBufferClosure* cl);
+
   // Overrides PtrQueue::should_enqueue_buffer(). See the method's
   // definition for more information.
   virtual bool should_enqueue_buffer();
@@ -97,10 +102,12 @@
   // Filter all the currently-active SATB buffers.
   void filter_thread_buffers();
 
-  // If there exists some completed buffer, pop it, then apply the
-  // closure to all its elements, and return true.  If no
-  // completed buffers exist, return false.
-  bool apply_closure_to_completed_buffer(ObjectClosure* closure);
+  // If there exists some completed buffer, pop and process it, and
+  // return true.  Otherwise return false.  Processing a buffer
+  // consists of applying the closure to the buffer range starting
+  // with the first non-NULL entry to the end of the buffer; the
+  // leading entries may be NULL due to filtering.
+  bool apply_closure_to_completed_buffer(SATBBufferClosure* cl);
 
 #ifndef PRODUCT
   // Helpful for debugging