hotspot Cdiff src/share/vm/gc_implementation/g1/concurrentMark.cpp

src/share/vm/gc_implementation/g1/concurrentMark.cpp


*** 667,686 ****
      CMTaskQueue* queue = _task_queues->queue(i);
      queue->set_empty();
    }
  }
  
! void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
    assert(active_tasks <= _max_task_num, "we should not have more");
  
    _active_tasks = active_tasks;
    // Need to update the three data structures below according to the
    // number of active threads for this phase.
    _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
    _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
    _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
  
    _concurrent = concurrent;
    // We propagate this to all tasks, not just the active ones.
    for (int i = 0; i < (int) _max_task_num; ++i)
      _tasks[i]->set_concurrent(concurrent);
  
--- 667,690 ----
      CMTaskQueue* queue = _task_queues->queue(i);
      queue->set_empty();
    }
  }
  
! void ConcurrentMark::set_concurrency(uint active_tasks) {
    assert(active_tasks <= _max_task_num, "we should not have more");
  
    _active_tasks = active_tasks;
    // Need to update the three data structures below according to the
    // number of active threads for this phase.
    _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
    _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
    _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
+ }
  
+ void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
+   set_concurrency(active_tasks);
+ 
    _concurrent = concurrent;
    // We propagate this to all tasks, not just the active ones.
    for (int i = 0; i < (int) _max_task_num; ++i)
      _tasks[i]->set_concurrent(concurrent);
  
*** 689,699 ****
    } else {
      // We currently assume that the concurrent flag has been set to
      // false before we start remark. At this point we should also be
      // in a STW phase.
      assert(!concurrent_marking_in_progress(), "invariant");
!     assert(_finger == _heap_end, "only way to get here");
      update_g1_committed(true);
    }
  }
  
  void ConcurrentMark::set_non_marking_state() {
--- 693,705 ----
    } else {
      // We currently assume that the concurrent flag has been set to
      // false before we start remark. At this point we should also be
      // in a STW phase.
      assert(!concurrent_marking_in_progress(), "invariant");
!     assert(_finger == _heap_end,
!            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
!                    _finger, _heap_end));
      update_g1_committed(true);
    }
  }
  
  void ConcurrentMark::set_non_marking_state() {
*** 857,882 ****
  
    if (verbose_low()) {
      gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
    }
  
!   // let task 0 do this
    if (task_num == 0) {
      // task 0 is responsible for clearing the global data structures
      // We should be here because of an overflow. During STW we should
      // not clear the overflow flag since we rely on it being true when
      // we exit this method to abort the pause and restart concurent
      // marking.
!     reset_marking_state(concurrent() /* clear_overflow */);
      force_overflow()->update();
  
      if (G1Log::fine()) {
        gclog_or_tty->date_stamp(PrintGCDateStamps);
        gclog_or_tty->stamp(PrintGCTimeStamps);
        gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
      }
    }
  
    // after this, each task should reset its own data structures then
    // then go into the second barrier
  }
  
--- 863,896 ----
  
    if (verbose_low()) {
      gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
    }
  
!   // If we're executing the concurrent phase of marking, reset the marking
!   // state; otherwise the marking state is reset after reference processing,
!   // during the remark pause.
!   // If we reset here as a result of an overflow during the remark we will
!   // see assertion failures from any subsequent set_concurrency_and_phase()
!   // calls.
!   if (concurrent()) {
!     // let the task 0 do this
      if (task_num == 0) {
        // task 0 is responsible for clearing the global data structures
        // We should be here because of an overflow. During STW we should
        // not clear the overflow flag since we rely on it being true when
        // we exit this method to abort the pause and restart concurent
        // marking.
!       reset_marking_state(true /* clear_overflow */);
        force_overflow()->update();
  
        if (G1Log::fine()) {
          gclog_or_tty->date_stamp(PrintGCDateStamps);
          gclog_or_tty->stamp(PrintGCTimeStamps);
          gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
        }
      }
+   }
  
    // after this, each task should reset its own data structures then
    // then go into the second barrier
  }
  
*** 890,900 ****
    }
    _second_overflow_barrier_sync.enter();
    if (concurrent()) {
      ConcurrentGCThread::stsJoin();
    }
!   // at this point everything should be re-initialised and ready to go
  
    if (verbose_low()) {
      gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
    }
  }
--- 904,914 ----
    }
    _second_overflow_barrier_sync.enter();
    if (concurrent()) {
      ConcurrentGCThread::stsJoin();
    }
!   // at this point everything should be re-initialized and ready to go
  
    if (verbose_low()) {
      gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
    }
  }
*** 948,959 ****
          double start_vtime_sec = os::elapsedVTime();
          double start_time_sec = os::elapsedTime();
          double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
  
          the_task->do_marking_step(mark_step_duration_ms,
!                                   true /* do_stealing    */,
!                                   true /* do_termination */);
  
          double end_time_sec = os::elapsedTime();
          double end_vtime_sec = os::elapsedVTime();
          double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
          double elapsed_time_sec = end_time_sec - start_time_sec;
--- 962,973 ----
          double start_vtime_sec = os::elapsedVTime();
          double start_time_sec = os::elapsedTime();
          double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
  
          the_task->do_marking_step(mark_step_duration_ms,
!                                   true  /* do_termination */,
!                                   false /* is_serial*/);
  
          double end_time_sec = os::elapsedTime();
          double end_vtime_sec = os::elapsedVTime();
          double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
          double elapsed_time_sec = end_time_sec - start_time_sec;
*** 1105,1116 ****
    assert(parallel_marking_threads() <= max_parallel_marking_threads(),
      "Maximum number of marking threads exceeded");
  
    uint active_workers = MAX2(1U, parallel_marking_threads());
  
!   // Parallel task terminator is set in "set_phase()"
!   set_phase(active_workers, true /* concurrent */);
  
    CMConcurrentMarkingTask markingTask(this, cmThread());
    if (use_parallel_marking_threads()) {
      _parallel_workers->set_active_workers((int)active_workers);
      // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
--- 1119,1130 ----
    assert(parallel_marking_threads() <= max_parallel_marking_threads(),
      "Maximum number of marking threads exceeded");
  
    uint active_workers = MAX2(1U, parallel_marking_threads());
  
!   // Parallel task terminator is set in "set_concurrency_and_phase()"
!   set_concurrency_and_phase(active_workers, true /* concurrent */);
  
    CMConcurrentMarkingTask markingTask(this, cmThread());
    if (use_parallel_marking_threads()) {
      _parallel_workers->set_active_workers((int)active_workers);
      // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
*** 1158,1173 ****
    weakRefsWork(clear_all_soft_refs);
  
    if (has_overflown()) {
      // Oops.  We overflowed.  Restart concurrent marking.
      _restart_for_overflow = true;
-     // Clear the marking state because we will be restarting
-     // marking due to overflowing the global mark stack.
-     reset_marking_state();
      if (G1TraceMarkStackOverflow) {
        gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
      }
    } else {
      // Aggregate the per-task counting data that we have accumulated
      // while marking.
      aggregate_count_data();
  
--- 1172,1197 ----
    weakRefsWork(clear_all_soft_refs);
  
    if (has_overflown()) {
      // Oops.  We overflowed.  Restart concurrent marking.
      _restart_for_overflow = true;
      if (G1TraceMarkStackOverflow) {
        gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
      }
+ 
+     // Verify the heap w.r.t. the previous marking bitmap.
+     if (VerifyDuringGC) {
+       HandleMark hm;  // handle scope
+       gclog_or_tty->print(" VerifyDuringGC:(overflow)");
+       Universe::heap()->prepare_for_verify();
+       Universe::verify(/* silent */ false,
+                        /* option */ VerifyOption_G1UsePrevMarking);
+     }
+ 
+     // Clear the marking state because we will be restarting
+     // marking due to overflowing the global mark stack.
+     reset_marking_state();
    } else {
      // Aggregate the per-task counting data that we have accumulated
      // while marking.
      aggregate_count_data();
  
*** 2049,2134 ****
      }
    }
    assert(tmp_free_list.is_empty(), "post-condition");
  }
  
! // Support closures for reference procssing in G1
  
  bool G1CMIsAliveClosure::do_object_b(oop obj) {
    HeapWord* addr = (HeapWord*)obj;
    return addr != NULL &&
           (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
  }
  
! class G1CMKeepAliveClosure: public OopClosure {
!   G1CollectedHeap* _g1;
!   ConcurrentMark*  _cm;
!  public:
!   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
!     _g1(g1), _cm(cm) {
!     assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
!   }
  
!   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
!   virtual void do_oop(      oop* p) { do_oop_work(p); }
! 
!   template <class T> void do_oop_work(T* p) {
!     oop obj = oopDesc::load_decode_heap_oop(p);
!     HeapWord* addr = (HeapWord*)obj;
! 
!     if (_cm->verbose_high()) {
!       gclog_or_tty->print_cr("\t[0] we're looking at location "
!                              "*"PTR_FORMAT" = "PTR_FORMAT,
!                              p, (void*) obj);
!     }
! 
!     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
!       _cm->mark_and_count(obj);
!       _cm->mark_stack_push(obj);
!     }
!   }
! };
! 
! class G1CMDrainMarkingStackClosure: public VoidClosure {
    ConcurrentMark*               _cm;
-   CMMarkStack*                  _markStack;
-   G1CMKeepAliveClosure*         _oopClosure;
-  public:
-   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
-                                G1CMKeepAliveClosure* oopClosure) :
-     _cm(cm),
-     _markStack(markStack),
-     _oopClosure(oopClosure) { }
- 
-   void do_void() {
-     _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false);
-   }
- };
- 
- // 'Keep Alive' closure used by parallel reference processing.
- // An instance of this closure is used in the parallel reference processing
- // code rather than an instance of G1CMKeepAliveClosure. We could have used
- // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
- // placed on to discovered ref lists once so we can mark and push with no
- // need to check whether the object has already been marked. Using the
- // G1CMKeepAliveClosure would mean, however, having all the worker threads
- // operating on the global mark stack. This means that an individual
- // worker would be doing lock-free pushes while it processes its own
- // discovered ref list followed by drain call. If the discovered ref lists
- // are unbalanced then this could cause interference with the other
- // workers. Using a CMTask (and its embedded local data structures)
- // avoids that potential interference.
- class G1CMParKeepAliveAndDrainClosure: public OopClosure {
-   ConcurrentMark*  _cm;
    CMTask*          _task;
    int              _ref_counter_limit;
    int              _ref_counter;
   public:
!   G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
!     _cm(cm), _task(task),
      _ref_counter_limit(G1RefProcDrainInterval) {
      assert(_ref_counter_limit > 0, "sanity");
      _ref_counter = _ref_counter_limit;
    }
  
    virtual void do_oop(narrowOop* p) { do_oop_work(p); }
    virtual void do_oop(      oop* p) { do_oop_work(p); }
--- 2073,2115 ----
      }
    }
    assert(tmp_free_list.is_empty(), "post-condition");
  }
  
! // Supporting Object and Oop closures for reference discovery
! // and processing in during marking
  
  bool G1CMIsAliveClosure::do_object_b(oop obj) {
    HeapWord* addr = (HeapWord*)obj;
    return addr != NULL &&
           (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
  }
  
! // 'Keep Alive' oop closure used by both serial parallel reference processing.
! // Uses the CMTask associated with a worker thread (for serial reference
! // processing the CMTask for worker 0 is used) to preserve (mark) and
! // trace referent objects.
! //
! // Using the CMTask and embedded local queues avoids having the worker
! // threads operating on the global mark stack. This reduces the risk
! // of overflowing the stack - which we would rather avoid at this late
! // state. Also using the tasks' local queues removes the potential
! // of the workers interfering with each other that could occur if
! // operating on the global stack.
  
! class G1CMKeepAliveAndDrainClosure: public OopClosure {
    ConcurrentMark* _cm;
    CMTask*         _task;
    int             _ref_counter_limit;
    int             _ref_counter;
+   bool            _is_serial;
   public:
!   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
!     _cm(cm), _task(task), _is_serial(is_serial),
      _ref_counter_limit(G1RefProcDrainInterval) {
      assert(_ref_counter_limit > 0, "sanity");
+     assert(!_is_serial || _task->task_id() == 0, "only task 0 for serial code");
      _ref_counter = _ref_counter_limit;
    }
  
    virtual void do_oop(narrowOop* p) { do_oop_work(p); }
    virtual void do_oop(      oop* p) { do_oop_work(p); }
*** 2144,2170 ****
  
        _task->deal_with_reference(obj);
        _ref_counter--;
  
        if (_ref_counter == 0) {
!         // We have dealt with _ref_counter_limit references, pushing them and objects
!         // reachable from them on to the local stack (and possibly the global stack).
!         // Call do_marking_step() to process these entries. We call the routine in a
!         // loop, which we'll exit if there's nothing more to do (i.e. we're done
!         // with the entries that we've pushed as a result of the deal_with_reference
!         // calls above) or we overflow.
!         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
!         // while there may still be some work to do. (See the comment at the
!         // beginning of CMTask::do_marking_step() for those conditions - one of which
!         // is reaching the specified time target.) It is only when
!         // CMTask::do_marking_step() returns without setting the has_aborted() flag
!         // that the marking has completed.
          do {
            double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
            _task->do_marking_step(mark_step_duration_ms,
!                                  false /* do_stealing    */,
!                                  false /* do_termination */);
          } while (_task->has_aborted() && !_cm->has_overflown());
          _ref_counter = _ref_counter_limit;
        }
      } else {
        if (_cm->verbose_high()) {
--- 2125,2155 ----
  
        _task->deal_with_reference(obj);
        _ref_counter--;
  
        if (_ref_counter == 0) {
!         // We have dealt with _ref_counter_limit references, pushing them
!         // and objects reachable from them on to the local stack (and
!         // possibly the global stack). Call CMTask::do_marking_step() to
!         // process these entries.
!         //
!         // We call CMTask::do_marking_step() in a loop, which we'll exit if
!         // there's nothing more to do (i.e. we're done with the entries that
!         // were pushed as a result of the CMTask::deal_with_reference() calls
!         // above) or we overflow.
!         //
!         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
!         // flag while there may still be some work to do. (See the comment at
!         // the beginning of CMTask::do_marking_step() for those conditions -
!         // one of which is reaching the specified time target.) It is only
!         // when CMTask::do_marking_step() returns without setting the
!         // has_aborted() flag that the marking step has completed.
          do {
            double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
            _task->do_marking_step(mark_step_duration_ms,
!                                  false      /* do_termination */,
!                                  _is_serial);
          } while (_task->has_aborted() && !_cm->has_overflown());
          _ref_counter = _ref_counter_limit;
        }
      } else {
        if (_cm->verbose_high()) {
*** 2172,2211 ****
        }
      }
    }
  };
  
! class G1CMParDrainMarkingStackClosure: public VoidClosure {
    ConcurrentMark* _cm;
    CMTask* _task;
   public:
!   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
!     _cm(cm), _task(task) { }
  
    void do_void() {
      do {
        if (_cm->verbose_high()) {
!         gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
!                                _task->task_id());
        }
  
!       // We call CMTask::do_marking_step() to completely drain the local and
!       // global marking stacks. The routine is called in a loop, which we'll
!       // exit if there's nothing more to do (i.e. we'completely drained the
!       // entries that were pushed as a result of applying the
!       // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
!       // lists above) or we overflow the global marking stack.
!       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
!       // while there may still be some work to do. (See the comment at the
!       // beginning of CMTask::do_marking_step() for those conditions - one of which
!       // is reaching the specified time target.) It is only when
!       // CMTask::do_marking_step() returns without setting the has_aborted() flag
!       // that the marking has completed.
  
        _task->do_marking_step(1000000000.0 /* something very large */,
!                              true /* do_stealing    */,
!                              true /* do_termination */);
      } while (_task->has_aborted() && !_cm->has_overflown());
    }
  };
  
  // Implementation of AbstractRefProcTaskExecutor for parallel
--- 2157,2210 ----
        }
      }
    }
  };
  
! // 'Drain' oop closure used by both serial and parallel reference processing.
! // Uses the CMTask associated with a given worker thread (for serial
! // reference processing the CMtask for worker 0 is used). Calls the
! // do_marking_step routine, with an unbelievably large timeout value,
! // to drain the marking data structures of the remaining entries
! // added by the 'keep alive' oop closure above.
! 
! class G1CMDrainMarkingStackClosure: public VoidClosure {
    ConcurrentMark* _cm;
    CMTask*         _task;
+   bool            _is_serial;
   public:
!   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
!     _cm(cm), _task(task), _is_serial(is_serial) {
!     assert(!_is_serial || _task->task_id() == 0, "only task 0 for serial code");
!   }
  
    void do_void() {
      do {
        if (_cm->verbose_high()) {
!         gclog_or_tty->print_cr("\t[%d] Drain: Calling do_marking_step - serial: %s",
!                                _task->task_id(), BOOL_TO_STR(_is_serial));
        }
  
!       // We call CMTask::do_marking_step() to completely drain the local
!       // and global marking stacks of entries pushed by the 'keep alive'
!       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
!       //
!       // CMTask::do_marking_step() is called in a loop, which we'll exit
!       // if there's nothing more to do (i.e. we'completely drained the
!       // entries that were pushed as a a result of applying the 'keep alive'
!       // closure to the entries on the discovered ref lists) or we overflow
!       // the global marking stack.
!       //
!       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
!       // flag while there may still be some work to do. (See the comment at
!       // the beginning of CMTask::do_marking_step() for those conditions -
!       // one of which is reaching the specified time target.) It is only
!       // when CMTask::do_marking_step() returns without setting the
!       // has_aborted() flag that the marking step has completed.
  
        _task->do_marking_step(1000000000.0 /* something very large */,
!                              true         /* do_termination */,
!                              _is_serial);
      } while (_task->has_aborted() && !_cm->has_overflown());
    }
  };
  
  // Implementation of AbstractRefProcTaskExecutor for parallel
*** 2240,2269 ****
  public:
    G1CMRefProcTaskProxy(ProcessTask& proc_task,
                       G1CollectedHeap* g1h,
                       ConcurrentMark* cm) :
      AbstractGangTask("Process reference objects in parallel"),
!     _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
  
    virtual void work(uint worker_id) {
!     CMTask* marking_task = _cm->task(worker_id);
      G1CMIsAliveClosure g1_is_alive(_g1h);
!     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
!     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
  
      _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
    }
  };
  
  void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
    assert(_workers != NULL, "Need parallel worker threads.");
  
    G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
  
!   // We need to reset the phase for each task execution so that
!   // the termination protocol of CMTask::do_marking_step works.
!   _cm->set_phase(_active_workers, false /* concurrent */);
    _g1h->set_par_threads(_active_workers);
    _workers->run_task(&proc_task_proxy);
    _g1h->set_par_threads(0);
  }
  
--- 2239,2274 ----
  public:
    G1CMRefProcTaskProxy(ProcessTask& proc_task,
                       G1CollectedHeap* g1h,
                       ConcurrentMark* cm) :
      AbstractGangTask("Process reference objects in parallel"),
!     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
!     ReferenceProcessor* rp = _g1h->ref_processor_cm();
!     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
!   }
  
    virtual void work(uint worker_id) {
!     CMTask* task = _cm->task(worker_id);
      G1CMIsAliveClosure g1_is_alive(_g1h);
!     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
!     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
  
      _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
    }
  };
  
  void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
    assert(_workers != NULL, "Need parallel worker threads.");
+   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
  
    G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
  
!   // We need to reset the concurrency level before each
!   // proxy task execution, so that the termination protocol
!   // and overflow handling in CMTask::do_marking_step() knows
!   // how many workers to wait for.
!   _cm->set_concurrency(_active_workers);
    _g1h->set_par_threads(_active_workers);
    _workers->run_task(&proc_task_proxy);
    _g1h->set_par_threads(0);
  }
  
*** 2281,2299 ****
--- 2286,2322 ----
    }
  };
  
  void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
    assert(_workers != NULL, "Need parallel worker threads.");
+   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
  
    G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
  
+   // Not strictly necessary but...
+   //
+   // We need to reset the concurrency level before each
+   // proxy task execution, so that the termination protocol
+   // and overflow handling in CMTask::do_marking_step() knows
+   // how many workers to wait for.
+   _cm->set_concurrency(_active_workers);
    _g1h->set_par_threads(_active_workers);
    _workers->run_task(&enq_task_proxy);
    _g1h->set_par_threads(0);
  }
  
  void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
+   if (has_overflown()) {
+     // Skip processing the discovered references if we have
+     // overflown the global marking stack. Reference objects
+     // only get discovered once so it is OK to not
+     // de-populate the discovered reference lists. We could have,
+     // but the only benefit would be that, when marking restarts,
+     // less reference objects are discovered.
+     return;
+   }
+ 
    ResourceMark rm;
    HandleMark   hm;
  
    G1CollectedHeap* g1h = G1CollectedHeap::heap();
  
*** 2311,2379 ****
      ReferenceProcessor* rp = g1h->ref_processor_cm();
  
      // See the comment in G1CollectedHeap::ref_processing_init()
      // about how reference processing currently works in G1.
  
!     // Process weak references.
      rp->setup_policy(clear_all_soft_refs);
      assert(_markStack.isEmpty(), "mark stack should be empty");
  
!     G1CMKeepAliveClosure g1_keep_alive(g1h, this);
!     G1CMDrainMarkingStackClosure
!       g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
  
!     // We use the work gang from the G1CollectedHeap and we utilize all
!     // the worker threads.
!     uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;
      active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U);
  
      G1CMRefProcTaskExecutor par_task_executor(g1h, this,
                                                g1h->workers(), active_workers);
  
      ReferenceProcessorStats stats;
!     if (rp->processing_is_mt()) {
!       // Set the degree of MT here.  If the discovery is done MT, there
!       // may have been a different number of threads doing the discovery
!       // and a different number of discovered lists may have Ref objects.
!       // That is OK as long as the Reference lists are balanced (see
!       // balance_all_queues() and balance_queues()).
        rp->set_active_mt_degree(active_workers);
  
        stats = rp->process_discovered_references(&g1_is_alive,
                                        &g1_keep_alive,
                                        &g1_drain_mark_stack,
!                                       &par_task_executor,
                                        g1h->gc_timer_cm());
  
!       // The work routines of the parallel keep_alive and drain_marking_stack
!       // will set the has_overflown flag if we overflow the global marking
!       // stack.
!     } else {
!       stats = rp->process_discovered_references(&g1_is_alive,
!                                         &g1_keep_alive,
!                                         &g1_drain_mark_stack,
!                                         NULL,
!                                         g1h->gc_timer_cm());
!     }
  
      g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
  
      assert(_markStack.overflow() || _markStack.isEmpty(),
              "mark stack should be empty (unless it overflowed)");
      if (_markStack.overflow()) {
!       // Should have been done already when we tried to push an
        // entry on to the global mark stack. But let's do it again.
        set_has_overflown();
      }
  
-     if (rp->processing_is_mt()) {
        assert(rp->num_q() == active_workers, "why not");
-       rp->enqueue_discovered_references(&par_task_executor);
-     } else {
-       rp->enqueue_discovered_references();
-     }
  
      rp->verify_no_references_recorded();
      assert(!rp->discovery_enabled(), "Post condition");
    }
  
    // Now clean up stale oops in StringTable
--- 2334,2415 ----
      ReferenceProcessor* rp = g1h->ref_processor_cm();
  
      // See the comment in G1CollectedHeap::ref_processing_init()
      // about how reference processing currently works in G1.
  
!     // Set the soft reference policy
      rp->setup_policy(clear_all_soft_refs);
      assert(_markStack.isEmpty(), "mark stack should be empty");
  
!     // Instances of the 'Keep Alive' and 'Complete GC' closures used
!     // in serial reference processing. Note these closures are also
!     // used for serially processing (by the the current thread) the
!     // JNI references during parallel reference processing.
!     //
!     // These closures do not need to synchronize with the worker
!     // threads involved in parallel reference processing as these
!     // instances are executed serially by the current thread (e.g.
!     // reference processing is not multi-threaded and is thus
!     // performed by the current thread instead of a gang worker).
!     //
!     // The gang tasks involved in parallel reference procssing create
!     // their own instances of these closures, which do their own
!     // synchronization among themselves.
!     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
!     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
  
!     // We need at least one active thread. If reference processing
!     // is not multi-threaded we use the current (VMThread) thread,
!     // otherwise we use the work gang from the G1CollectedHeap and
!     // we utilize all the worker threads we can.
!     bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
!     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
      active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U);
  
+     // Parallel processing task executor.
      G1CMRefProcTaskExecutor par_task_executor(g1h, this,
                                                g1h->workers(), active_workers);
+     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
  
      ReferenceProcessorStats stats;
! 
!     // Set the concurrency level. The phase was already set prior to
!     // executing the remark task.
!     set_concurrency(active_workers);
! 
!     // Set the degree of MT processing here.  If the discovery was done MT,
!     // the number of threads involved during discovery could differ from
!     // the number of active workers.  This is OK as long as the discovered
!     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
      rp->set_active_mt_degree(active_workers);
  
+     // Process the weak references.
      stats = rp->process_discovered_references(&g1_is_alive,
                                                &g1_keep_alive,
                                                &g1_drain_mark_stack,
!                                               executor,
                                                g1h->gc_timer_cm());
  
!     // The do_oop work routines of the keep_alive and drain_marking_stack
!     // oop closures will set the has_overflown flag if we overflow the
!     // global marking stack.
  
      g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
  
      assert(_markStack.overflow() || _markStack.isEmpty(),
              "mark stack should be empty (unless it overflowed)");
+ 
      if (_markStack.overflow()) {
!       // This should have been done already when we tried to push an
        // entry on to the global mark stack. But let's do it again.
        set_has_overflown();
      }
  
      assert(rp->num_q() == active_workers, "why not");
  
+     rp->enqueue_discovered_references(executor);
+ 
      rp->verify_no_references_recorded();
      assert(!rp->discovery_enabled(), "Post condition");
    }
  
    // Now clean up stale oops in StringTable
*** 2388,2419 ****
    _nextMarkBitMap  = (CMBitMap*)  temp;
  }
  
  class CMRemarkTask: public AbstractGangTask {
  private:
!   ConcurrentMark *_cm;
! 
  public:
    void work(uint worker_id) {
      // Since all available tasks are actually started, we should
      // only proceed if we're supposed to be actived.
      if (worker_id < _cm->active_tasks()) {
        CMTask* task = _cm->task(worker_id);
        task->record_start_time();
        do {
          task->do_marking_step(1000000000.0 /* something very large */,
!                               true /* do_stealing    */,
!                               true /* do_termination */);
        } while (task->has_aborted() && !_cm->has_overflown());
        // If we overflow, then we do not want to restart. We instead
        // want to abort remark and do concurrent marking again.
        task->record_end_time();
      }
    }
  
!   CMRemarkTask(ConcurrentMark* cm, int active_workers) :
!     AbstractGangTask("Par Remark"), _cm(cm) {
      _cm->terminator()->reset_for_reuse(active_workers);
    }
  };
  
  void ConcurrentMark::checkpointRootsFinalWork() {
--- 2424,2455 ----
    _nextMarkBitMap  = (CMBitMap*)  temp;
  }
  
  class CMRemarkTask: public AbstractGangTask {
  private:
!   ConcurrentMark* _cm;
!   bool            _is_serial;
  public:
    void work(uint worker_id) {
      // Since all available tasks are actually started, we should
      // only proceed if we're supposed to be actived.
      if (worker_id < _cm->active_tasks()) {
        CMTask* task = _cm->task(worker_id);
        task->record_start_time();
        do {
          task->do_marking_step(1000000000.0 /* something very large */,
!                               true         /* do_termination       */,
!                               _is_serial);
        } while (task->has_aborted() && !_cm->has_overflown());
        // If we overflow, then we do not want to restart. We instead
        // want to abort remark and do concurrent marking again.
        task->record_end_time();
      }
    }
  
!   CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
!     AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
      _cm->terminator()->reset_for_reuse(active_workers);
    }
  };
  
  void ConcurrentMark::checkpointRootsFinalWork() {
*** 2430,2463 ****
      if (active_workers == 0) {
        assert(active_workers > 0, "Should have been set earlier");
        active_workers = (uint) ParallelGCThreads;
        g1h->workers()->set_active_workers(active_workers);
      }
!     set_phase(active_workers, false /* concurrent */);
      // Leave _parallel_marking_threads at it's
      // value originally calculated in the ConcurrentMark
      // constructor and pass values of the active workers
      // through the gang in the task.
  
!     CMRemarkTask remarkTask(this, active_workers);
      g1h->set_par_threads(active_workers);
      g1h->workers()->run_task(&remarkTask);
      g1h->set_par_threads(0);
    } else {
      G1CollectedHeap::StrongRootsScope srs(g1h);
-     // this is remark, so we'll use up all available threads
      uint active_workers = 1;
!     set_phase(active_workers, false /* concurrent */);
  
!     CMRemarkTask remarkTask(this, active_workers);
!     // We will start all available threads, even if we decide that the
!     // active_workers will be fewer. The extra ones will just bail out
!     // immediately.
      remarkTask.work(0);
    }
    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
!   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
  
    print_stats();
  
  #if VERIFY_OBJS_PROCESSED
    if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
--- 2466,2509 ----
      if (active_workers == 0) {
        assert(active_workers > 0, "Should have been set earlier");
        active_workers = (uint) ParallelGCThreads;
        g1h->workers()->set_active_workers(active_workers);
      }
!     set_concurrency_and_phase(active_workers, false /* concurrent */);
      // Leave _parallel_marking_threads at it's
      // value originally calculated in the ConcurrentMark
      // constructor and pass values of the active workers
      // through the gang in the task.
  
!     CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
!     // We will start all available threads, even if we decide that the
!     // active_workers will be fewer. The extra ones will just bail out
!     // immediately.
      g1h->set_par_threads(active_workers);
      g1h->workers()->run_task(&remarkTask);
      g1h->set_par_threads(0);
    } else {
      G1CollectedHeap::StrongRootsScope srs(g1h);
      uint active_workers = 1;
!     set_concurrency_and_phase(active_workers, false /* concurrent */);
  
!     // Note - if there's no work gang then the VMThread will be
!     // the thread to execute the remark - serially. We have
!     // to pass true for the is_serial parameter so that
!     // CMTask::do_marking_step() doesn't enter the sync
!     // barriers in the event of an overflow. Doing so will
!     // cause an assert that the current thread is not a
!     // concurrent GC thread.
!     CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
      remarkTask.work(0);
    }
    SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
!   guarantee(has_overflown() ||
!             satb_mq_set.completed_buffers_num() == 0,
!             err_msg("Invariant: has_overflown = %s, num buffers = %d",
!                     BOOL_TO_STR(has_overflown()),
!                     satb_mq_set.completed_buffers_num()));
  
    print_stats();
  
  #if VERIFY_OBJS_PROCESSED
    if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
*** 3774,3795 ****
  #endif // _MARKING_STATS_
  }
  
  /*****************************************************************************
  
!     The do_marking_step(time_target_ms) method is the building block
!     of the parallel marking framework. It can be called in parallel
      with other invocations of do_marking_step() on different tasks
      (but only one per task, obviously) and concurrently with the
      mutator threads, or during remark, hence it eliminates the need
      for two versions of the code. When called during remark, it will
      pick up from where the task left off during the concurrent marking
      phase. Interestingly, tasks are also claimable during evacuation
      pauses too, since do_marking_step() ensures that it aborts before
      it needs to yield.
  
!     The data structures that is uses to do marking work are the
      following:
  
        (1) Marking Bitmap. If there are gray objects that appear only
        on the bitmap (this happens either when dealing with an overflow
        or when the initial marking phase has simply marked the roots
--- 3820,3841 ----
  #endif // _MARKING_STATS_
  }
  
  /*****************************************************************************
  
!     The do_marking_step(time_target_ms, ...) method is the building
!     block of the parallel marking framework. It can be called in parallel
      with other invocations of do_marking_step() on different tasks
      (but only one per task, obviously) and concurrently with the
      mutator threads, or during remark, hence it eliminates the need
      for two versions of the code. When called during remark, it will
      pick up from where the task left off during the concurrent marking
      phase. Interestingly, tasks are also claimable during evacuation
      pauses too, since do_marking_step() ensures that it aborts before
      it needs to yield.
  
!     The data structures that it uses to do marking work are the
      following:
  
        (1) Marking Bitmap. If there are gray objects that appear only
        on the bitmap (this happens either when dealing with an overflow
        or when the initial marking phase has simply marked the roots
*** 3834,3844 ****
        (1) When the marking phase has been aborted (after a Full GC).
  
        (2) When a global overflow (on the global stack) has been
        triggered. Before the task aborts, it will actually sync up with
        the other tasks to ensure that all the marking data structures
!       (local queues, stacks, fingers etc.)  are re-initialised so that
        when do_marking_step() completes, the marking phase can
        immediately restart.
  
        (3) When enough completed SATB buffers are available. The
        do_marking_step() method only tries to drain SATB buffers right
--- 3880,3890 ----
        (1) When the marking phase has been aborted (after a Full GC).
  
        (2) When a global overflow (on the global stack) has been
        triggered. Before the task aborts, it will actually sync up with
        the other tasks to ensure that all the marking data structures
!       (local queues, stacks, fingers etc.)  are re-initialized so that
        when do_marking_step() completes, the marking phase can
        immediately restart.
  
        (3) When enough completed SATB buffers are available. The
        do_marking_step() method only tries to drain SATB buffers right
*** 3871,3885 ****
      too. The initial reason for the clock method was to avoid calling
      vtime too regularly, as it is quite expensive. So, once it was in
      place, it was natural to piggy-back all the other conditions on it
      too and not constantly check them throughout the code.
  
   *****************************************************************************/
  
  void CMTask::do_marking_step(double time_target_ms,
!                              bool do_stealing,
!                              bool do_termination) {
    assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
    assert(concurrent() == _cm->concurrent(), "they should be the same");
  
    G1CollectorPolicy* g1_policy = _g1h->g1_policy();
    assert(_task_queues != NULL, "invariant");
--- 3917,3945 ----
      too. The initial reason for the clock method was to avoid calling
      vtime too regularly, as it is quite expensive. So, once it was in
      place, it was natural to piggy-back all the other conditions on it
      too and not constantly check them throughout the code.
  
+     If do_termination is true then do_marking_step will enter its
+     termination protocol.
+ 
+     The value of is_serial must be true when do_marking_step is being
+     called serially (i.e. by the VMThread) and do_marking_step should
+     skip any synchronization in the termination and overflow code.
+     Examples include the serial remark code and the serial reference
+     processing closures.
+ 
+     The value of is_serial must be false when do_marking_step is
+     being called by any of the worker threads in a work gang.
+     Examples include the concurrent marking code (CMMarkingTask),
+     the MT remark code, and the MT reference processing closures.
+ 
   *****************************************************************************/
  
  void CMTask::do_marking_step(double time_target_ms,
!                              bool do_termination,
!                              bool is_serial) {
    assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
    assert(concurrent() == _cm->concurrent(), "they should be the same");
  
    G1CollectorPolicy* g1_policy = _g1h->g1_policy();
    assert(_task_queues != NULL, "invariant");
*** 3896,3905 ****
--- 3956,3971 ----
    _claimed = true;
  
    _start_time_ms = os::elapsedVTime() * 1000.0;
    statsOnly( _interval_start_time_ms = _start_time_ms );
  
+   // If do_stealing is true then do_marking_step will attempt to
+   // steal work from the other CMTasks. It only makes sense to
+   // enable stealing when the termination protocol is enabled
+   // and do_marking_step() is not being called serially.
+   bool do_stealing = do_termination && !is_serial;
+ 
    double diff_prediction_ms =
      g1_policy->get_new_prediction(&_marking_step_diffs_ms);
    _time_target_ms = time_target_ms - diff_prediction_ms;
  
    // set up the variables that are used in the work-based scheme to
*** 4136,4149 ****
      if (_cm->verbose_low()) {
        gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
      }
  
      _termination_start_time_ms = os::elapsedVTime() * 1000.0;
      // The CMTask class also extends the TerminatorTerminator class,
      // hence its should_exit_termination() method will also decide
      // whether to exit the termination protocol or not.
!     bool finished = _cm->terminator()->offer_termination(this);
      double termination_end_time_ms = os::elapsedVTime() * 1000.0;
      _termination_time_ms +=
        termination_end_time_ms - _termination_start_time_ms;
  
      if (finished) {
--- 4202,4217 ----
      if (_cm->verbose_low()) {
        gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
      }
  
      _termination_start_time_ms = os::elapsedVTime() * 1000.0;
+ 
      // The CMTask class also extends the TerminatorTerminator class,
      // hence its should_exit_termination() method will also decide
      // whether to exit the termination protocol or not.
!     bool finished = (is_serial ||
!                      _cm->terminator()->offer_termination(this));
      double termination_end_time_ms = os::elapsedVTime() * 1000.0;
      _termination_time_ms +=
        termination_end_time_ms - _termination_start_time_ms;
  
      if (finished) {
*** 4219,4242 ****
  
        if (_cm->verbose_low()) {
          gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
        }
  
        _cm->enter_first_sync_barrier(_task_id);
        // When we exit this sync barrier we know that all tasks have
        // stopped doing marking work. So, it's now safe to
        // re-initialise our data structures. At the end of this method,
        // task 0 will clear the global data structures.
  
        statsOnly( ++_aborted_overflow );
  
        // We clear the local state of this task...
        clear_region_fields();
  
        // ...and enter the second barrier.
        _cm->enter_second_sync_barrier(_task_id);
!       // At this point everything has bee re-initialised and we're
        // ready to restart.
      }
  
      if (_cm->verbose_low()) {
        gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
--- 4287,4318 ----
  
        if (_cm->verbose_low()) {
          gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
        }
  
+       if (!is_serial) {
+         // We only need to enter the sync barrier if being called
+         // from a parallel context
          _cm->enter_first_sync_barrier(_task_id);
+ 
          // When we exit this sync barrier we know that all tasks have
          // stopped doing marking work. So, it's now safe to
          // re-initialise our data structures. At the end of this method,
          // task 0 will clear the global data structures.
+       }
  
        statsOnly( ++_aborted_overflow );
  
        // We clear the local state of this task...
        clear_region_fields();
  
+       if (!is_serial) {
          // ...and enter the second barrier.
          _cm->enter_second_sync_barrier(_task_id);
!       }
!       // At this point, if we're during the concurrent phase of
!       // marking, everything has been re-initialized and we're
        // ready to restart.
      }
  
      if (_cm->verbose_low()) {
        gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "