< prev index next >

src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp

Print this page
rev 7902 : [mq]: 8073052-Rename-and-clean-up-the-allocation-manager-hierarchy-in-g1Allocator
rev 7903 : imported patch 8073013-add-detailed-information-about-plab-memory-usage
rev 7904 : imported patch 8040162-avoid-reallocating-plab-allocators
rev 7908 : [mq]: 8073317-move-region-level-allocation-into-allocregionmanager

@@ -44,10 +44,11 @@
 #include "gc_implementation/g1/g1GCPhaseTimes.hpp"
 #include "gc_implementation/g1/g1Log.hpp"
 #include "gc_implementation/g1/g1MarkSweep.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1ParScanThreadState.inline.hpp"
+#include "gc_implementation/g1/g1EvacStats.hpp"
 #include "gc_implementation/g1/g1RegionToSpaceMapper.hpp"
 #include "gc_implementation/g1/g1RemSet.inline.hpp"
 #include "gc_implementation/g1/g1StringDedup.hpp"
 #include "gc_implementation/g1/g1YCTypes.hpp"
 #include "gc_implementation/g1/heapRegion.inline.hpp"

@@ -666,11 +667,11 @@
   assert(hr == NULL ||
          (hr->end() == new_end && hr->top() == new_top), "sanity");
   check_bitmaps("Humongous Region Allocation", first_hr);
 
   assert(first_hr->used() == word_size * HeapWordSize, "invariant");
-  _allocator->increase_used(first_hr->used());
+  increase_used(first_hr->used());
   _humongous_set.add(first_hr);
 
   return new_obj;
 }
 

@@ -849,26 +850,24 @@
     bool should_try_gc;
     uint gc_count_before;
 
     {
       MutexLockerEx x(Heap_lock);
-      result = _allocator->mutator_alloc_region(context)->attempt_allocation_locked(word_size,
-                                                                                    false /* bot_updates */);
+      result = _allocator->par_allocate_during_mutator_locked(word_size, false /* bot_updates */, context);
       if (result != NULL) {
         return result;
       }
 
       // If we reach here, attempt_allocation_locked() above failed to
       // allocate a new region. So the mutator alloc region should be NULL.
-      assert(_allocator->mutator_alloc_region(context)->get() == NULL, "only way to get here");
+//      assert(_allocator->mutator_alloc_region(context)->get() == NULL, "only way to get here");
 
       if (GC_locker::is_active_and_needs_gc()) {
         if (g1_policy()->can_expand_young_list()) {
           // No need for an ergo verbose message here,
           // can_expand_young_list() does this when it returns true.
-          result = _allocator->mutator_alloc_region(context)->attempt_allocation_force(word_size,
-                                                                                       false /* bot_updates */);
+          result = _allocator->par_allocate_during_mutator_force(word_size, false /* bot_updates */, context);
           if (result != NULL) {
             return result;
           }
         }
         should_try_gc = false;

@@ -924,12 +923,11 @@
     // allocation attempt in case another thread successfully
     // performed a collection and reclaimed enough space. We do the
     // first attempt (without holding the Heap_lock) here and the
     // follow-on attempt will be at the start of the next loop
     // iteration (after taking the Heap_lock).
-    result = _allocator->mutator_alloc_region(context)->attempt_allocation(word_size,
-                                                                           false /* bot_updates */);
+    result = _allocator->par_allocate_during_mutator(word_size, false /* bot_updates */, context);
     if (result != NULL) {
       return result;
     }
 
     // Give a warning if we seem to be looping forever.

@@ -1064,17 +1062,17 @@
 
 HeapWord* G1CollectedHeap::attempt_allocation_at_safepoint(size_t word_size,
                                                            AllocationContext_t context,
                                                            bool expect_null_mutator_alloc_region) {
   assert_at_safepoint(true /* should_be_vm_thread */);
+  /*
   assert(_allocator->mutator_alloc_region(context)->get() == NULL ||
                                              !expect_null_mutator_alloc_region,
          "the current alloc region was unexpectedly found to be non-NULL");
-
+*/
   if (!is_humongous(word_size)) {
-    return _allocator->mutator_alloc_region(context)->attempt_allocation_locked(word_size,
-                                                      false /* bot_updates */);
+    return _allocator->par_allocate_during_mutator_locked(word_size, false /* bot_updates */, context);
   } else {
     HeapWord* result = humongous_obj_allocate(word_size, context);
     if (result != NULL && g1_policy()->need_to_start_conc_mark("STW humongous allocation")) {
       g1_policy()->set_initiate_conc_mark_if_possible();
     }

@@ -1781,12 +1779,10 @@
   _humongous_is_live(),
   _has_humongous_reclaim_candidates(false),
   _free_regions_coming(false),
   _young_list(new YoungList(this)),
   _gc_time_stamp(0),
-  _survivor_plab_stats(YoungPLABSize, PLABWeight),
-  _old_plab_stats(OldPLABSize, PLABWeight),
   _expand_heap_after_alloc_failure(true),
   _surviving_young_words(NULL),
   _old_marking_cycles_started(0),
   _old_marking_cycles_completed(0),
   _concurrent_cycle_started(false),

@@ -2216,15 +2212,15 @@
 }
 
 
 // Computes the sum of the storage used by the various regions.
 size_t G1CollectedHeap::used() const {
-  return _allocator->used();
+  return _summary_bytes_used + _allocator->used_in_alloc_regions();
 }
 
 size_t G1CollectedHeap::used_unlocked() const {
-  return _allocator->used_unlocked();
+  return _summary_bytes_used;
 }
 
 class SumUsedClosure: public HeapRegionClosure {
   size_t _used;
 public:

@@ -2714,24 +2710,11 @@
 size_t G1CollectedHeap::max_tlab_size() const {
   return align_size_down(_humongous_object_threshold_in_words - 1, MinObjAlignment);
 }
 
 size_t G1CollectedHeap::unsafe_max_tlab_alloc(Thread* ignored) const {
-  // Return the remaining space in the cur alloc region, but not less than
-  // the min TLAB size.
-
-  // Also, this value can be at most the humongous object threshold,
-  // since we can't allow tlabs to grow big enough to accommodate
-  // humongous objects.
-
-  HeapRegion* hr = _allocator->mutator_alloc_region(AllocationContext::current())->get();
-  size_t max_tlab = max_tlab_size() * wordSize;
-  if (hr == NULL) {
-    return max_tlab;
-  } else {
-    return MIN2(MAX2(hr->free(), (size_t) MinTLABSize), max_tlab);
-  }
+  return _allocator->unsafe_max_tlab_alloc();
 }
 
 size_t G1CollectedHeap::max_capacity() const {
   return _hrm.reserved().byte_size();
 }

@@ -3936,21 +3919,21 @@
                                              _young_list->last_survivor_region());
 
         _young_list->reset_auxilary_lists();
 
         if (evacuation_failed()) {
-          _allocator->set_used(recalculate_used());
+          set_used(recalculate_used());
           uint n_queues = MAX2((int)ParallelGCThreads, 1);
           for (uint i = 0; i < n_queues; i++) {
             if (_evacuation_failed_info_array[i].has_failed()) {
               _gc_tracer_stw->report_evacuation_failed(_evacuation_failed_info_array[i]);
             }
           }
         } else {
           // The "used" of the the collection set have already been subtracted
           // when they were freed.  Add in the bytes evacuated.
-          _allocator->increase_used(g1_policy()->bytes_copied_during_gc());
+          increase_used(g1_policy()->bytes_copied_during_gc());
         }
 
         if (g1_policy()->during_initial_mark_pause()) {
           // We have to do this before we notify the CM threads that
           // they can start working to make sure that all the

@@ -4153,11 +4136,11 @@
   oop forward_ptr = old->forward_to_atomic(old);
   if (forward_ptr == NULL) {
     // Forward-to-self succeeded.
     assert(_par_scan_state != NULL, "par scan state");
     OopsInHeapRegionClosure* cl = _par_scan_state->evac_failure_closure();
-    uint queue_num = _par_scan_state->queue_num();
+    uint queue_num = _par_scan_state->worker_queue_id();
 
     _evacuation_failed = true;
     _evacuation_failed_info_array[queue_num].register_copy_failure(old->size());
     if (_evac_failure_closure != cl) {
       MutexLockerEx x(EvacFailureStack_lock, Mutex::_no_safepoint_check_flag);

@@ -4255,11 +4238,11 @@
     return;
   }
 
   oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
 
-  assert(_worker_id == _par_scan_state->queue_num(), "sanity");
+  assert(_worker_id == _par_scan_state->worker_queue_id(), "sanity");
 
   const InCSetState state = _g1->in_cset_state(obj);
   if (state.is_in_cset()) {
     oop forwardee;
     markOop m = obj->mark();

@@ -4297,10 +4280,17 @@
 
 template void G1ParCopyClosure<G1BarrierEvac, G1MarkNone>::do_oop_work(oop* p);
 template void G1ParCopyClosure<G1BarrierEvac, G1MarkNone>::do_oop_work(narrowOop* p);
 
 class G1ParEvacuateFollowersClosure : public VoidClosure {
+private:
+  double _start_term;
+  double _term_time;
+  size_t _term_attempts;
+
+  void start_term_time() { _term_attempts++; _start_term = os::elapsedTime(); }
+  void end_term_time() { _term_time += os::elapsedTime() - _start_term; }
 protected:
   G1CollectedHeap*              _g1h;
   G1ParScanThreadState*         _par_scan_state;
   RefToScanQueueSet*            _queues;
   ParallelTaskTerminator*       _terminator;

@@ -4313,23 +4303,27 @@
   G1ParEvacuateFollowersClosure(G1CollectedHeap* g1h,
                                 G1ParScanThreadState* par_scan_state,
                                 RefToScanQueueSet* queues,
                                 ParallelTaskTerminator* terminator)
     : _g1h(g1h), _par_scan_state(par_scan_state),
-      _queues(queues), _terminator(terminator) {}
+      _queues(queues), _terminator(terminator),
+      _start_term(0.0), _term_time(0.0), _term_attempts(0) {}
 
   void do_void();
 
+  double term_time() const { return _term_time; }
+  size_t term_attempts() const { return _term_attempts; }
+
 private:
   inline bool offer_termination();
 };
 
 bool G1ParEvacuateFollowersClosure::offer_termination() {
   G1ParScanThreadState* const pss = par_scan_state();
-  pss->start_term_time();
+  start_term_time();
   const bool res = terminator()->offer_termination();
-  pss->end_term_time();
+  end_term_time();
   return res;
 }
 
 void G1ParEvacuateFollowersClosure::do_void() {
   G1ParScanThreadState* const pss = par_scan_state();

@@ -4416,24 +4410,22 @@
 };
 
 class G1ParTask : public AbstractGangTask {
 protected:
   G1CollectedHeap*       _g1h;
-  RefToScanQueueSet      *_queues;
+  G1ParScanThreadState** _pss;
+  RefToScanQueueSet*     _queues;
   ParallelTaskTerminator _terminator;
   uint _n_workers;
 
-  Mutex _stats_lock;
-  Mutex* stats_lock() { return &_stats_lock; }
-
 public:
-  G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues)
+  G1ParTask(G1CollectedHeap* g1h, G1ParScanThreadState** pss, RefToScanQueueSet *task_queues)
     : AbstractGangTask("G1 collection"),
       _g1h(g1h),
+      _pss(pss),
       _queues(task_queues),
-      _terminator(0, _queues),
-      _stats_lock(Mutex::leaf, "parallel G1 stats lock", true)
+      _terminator(0, _queues)
   {}
 
   RefToScanQueueSet* queues() { return _queues; }
 
   RefToScanQueue *work_queue(int i) {

@@ -4493,30 +4485,28 @@
       ResourceMark rm;
       HandleMark   hm;
 
       ReferenceProcessor*             rp = _g1h->ref_processor_stw();
 
-      G1ParScanThreadState            pss(_g1h, worker_id, rp);
-      G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, rp);
-
-      pss.set_evac_failure_closure(&evac_failure_cl);
+      G1ParScanThreadState*           pss = _pss[worker_id];
+      pss->set_ref_processor(rp);
 
       bool only_young = _g1h->g1_policy()->gcs_are_young();
 
       // Non-IM young GC.
-      G1ParCopyClosure<G1BarrierNone, G1MarkNone>             scan_only_root_cl(_g1h, &pss, rp);
+      G1ParCopyClosure<G1BarrierNone, G1MarkNone>             scan_only_root_cl(_g1h, pss, rp);
       G1CLDClosure<G1MarkNone>                                scan_only_cld_cl(&scan_only_root_cl,
                                                                                only_young, // Only process dirty klasses.
                                                                                false);     // No need to claim CLDs.
       // IM young GC.
       //    Strong roots closures.
-      G1ParCopyClosure<G1BarrierNone, G1MarkFromRoot>         scan_mark_root_cl(_g1h, &pss, rp);
+      G1ParCopyClosure<G1BarrierNone, G1MarkFromRoot>         scan_mark_root_cl(_g1h, pss, rp);
       G1CLDClosure<G1MarkFromRoot>                            scan_mark_cld_cl(&scan_mark_root_cl,
                                                                                false, // Process all klasses.
                                                                                true); // Need to claim CLDs.
       //    Weak roots closures.
-      G1ParCopyClosure<G1BarrierNone, G1MarkPromotedFromRoot> scan_mark_weak_root_cl(_g1h, &pss, rp);
+      G1ParCopyClosure<G1BarrierNone, G1MarkPromotedFromRoot> scan_mark_weak_root_cl(_g1h, pss, rp);
       G1CLDClosure<G1MarkPromotedFromRoot>                    scan_mark_weak_cld_cl(&scan_mark_weak_root_cl,
                                                                                     false, // Process all klasses.
                                                                                     true); // Need to claim CLDs.
 
       G1CodeBlobClosure scan_only_code_cl(&scan_only_root_cl);

@@ -4548,52 +4538,85 @@
         weak_cld_cl    = &scan_only_cld_cl;
         strong_code_cl = &scan_only_code_cl;
       }
 
 
-      G1ParPushHeapRSClosure  push_heap_rs_cl(_g1h, &pss);
+      G1ParPushHeapRSClosure  push_heap_rs_cl(_g1h, pss);
 
-      pss.start_strong_roots();
+      double start_strong_roots = os::elapsedTime();
       _g1h->g1_process_roots(strong_root_cl,
                              weak_root_cl,
                              &push_heap_rs_cl,
                              strong_cld_cl,
                              weak_cld_cl,
                              strong_code_cl,
                              worker_id);
-
-      pss.end_strong_roots();
-
+      double strong_roots_time = os::elapsedTime() - start_strong_roots;
+      double evac_term_time = 0.0;
+      size_t evac_term_attempts = 0;
       {
         double start = os::elapsedTime();
-        G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator);
+        G1ParEvacuateFollowersClosure evac(_g1h, pss, _queues, &_terminator);
         evac.do_void();
         double elapsed_ms = (os::elapsedTime()-start)*1000.0;
-        double term_ms = pss.term_time()*1000.0;
-        _g1h->g1_policy()->phase_times()->add_obj_copy_time(worker_id, elapsed_ms-term_ms);
-        _g1h->g1_policy()->phase_times()->record_termination(worker_id, term_ms, pss.term_attempts());
-      }
-      _g1h->g1_policy()->record_thread_age_table(pss.age_table());
-      _g1h->update_surviving_young_words(pss.surviving_young_words()+1);
+        evac_term_attempts = evac.term_attempts();
+        evac_term_time = evac.term_time();
+        double term_ms = evac_term_time * 1000.0;
+        _g1h->g1_policy()->phase_times()->add_obj_copy_time(worker_id, elapsed_ms - term_ms);
+        _g1h->g1_policy()->phase_times()->record_termination(worker_id, term_ms, evac.term_attempts());
+      }
+      _g1h->g1_policy()->record_thread_age_table(pss->age_table());
+      _g1h->update_surviving_young_words(pss->surviving_young_words()+1);
+      assert(pss->queue_is_empty(), "should be empty");
 
       if (PrintTerminationStats) {
-        MutexLocker x(stats_lock());
-        pss.print_termination_stats(worker_id);
+        MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
+        _g1h->print_termination_stats(gclog_or_tty,
+                                      worker_id,
+                                      os::elapsedTime() * 1000.0 - start_time_ms, /* elapsed time */
+                                      strong_roots_time * 1000.0,                 /* strong roots time */
+                                      evac_term_time * 1000.0,                    /* evac term time */
+                                      evac_term_attempts,                         /* evac term attempts */
+                                      pss->lab_waste(),                  /* alloc buffer waste */
+                                      pss->lab_undo_waste()                           /* undo waste */
+                                      );
       }
-
-      assert(pss.queue_is_empty(), "should be empty");
-
       // Close the inner scope so that the ResourceMark and HandleMark
       // destructors are executed here and are included as part of the
       // "GC Worker Time".
     }
 
     double end_time_ms = os::elapsedTime() * 1000.0;
     _g1h->g1_policy()->phase_times()->record_gc_worker_end_time(worker_id, end_time_ms);
   }
 };
 
+void G1CollectedHeap::print_termination_stats_hdr(outputStream* const st) {
+  st->print_raw_cr("GC Termination Stats");
+  st->print_raw_cr("     elapsed  --strong roots-- -------termination------- ------waste (KiB)------");
+  st->print_raw_cr("thr     ms        ms      %        ms      %    attempts  total   alloc    undo");
+  st->print_raw_cr("--- --------- --------- ------ --------- ------ -------- ------- ------- -------");
+}
+
+void G1CollectedHeap::print_termination_stats(outputStream* const st,
+                                              uint worker_id,
+                                              double elapsed_ms,
+                                              double strong_roots_ms,
+                                              double term_ms,
+                                              size_t term_attempts,
+                                              size_t alloc_buffer_waste,
+                                              size_t undo_waste) const {
+  st->print_cr("%3d %9.2f %9.2f %6.2f "
+               "%9.2f %6.2f " SIZE_FORMAT_W(8) " "
+               SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7) " " SIZE_FORMAT_W(7),
+               worker_id, elapsed_ms, strong_roots_ms, strong_roots_ms * 100 / elapsed_ms,
+               term_ms, term_ms * 100 / elapsed_ms, term_attempts,
+               (alloc_buffer_waste + undo_waste) * HeapWordSize / K,
+               alloc_buffer_waste * HeapWordSize / K,
+               undo_waste * HeapWordSize / K);
+}
+
 // *** Common G1 Evacuation Stuff
 
 // This method is run in a GC worker.
 
 void

@@ -5237,20 +5260,23 @@
 // processing during G1 evacuation pauses.
 
 class G1STWRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
 private:
   G1CollectedHeap*   _g1h;
+  G1ParScanThreadState**  _pss;
   RefToScanQueueSet* _queues;
   FlexibleWorkGang*  _workers;
   int                _active_workers;
 
 public:
   G1STWRefProcTaskExecutor(G1CollectedHeap* g1h,
+                           G1ParScanThreadState** pss,
                         FlexibleWorkGang* workers,
                         RefToScanQueueSet *task_queues,
                         int n_workers) :
     _g1h(g1h),
+    _pss(pss),
     _queues(task_queues),
     _workers(workers),
     _active_workers(n_workers)
   {
     assert(n_workers > 0, "shouldn't call this otherwise");

@@ -5265,21 +5291,24 @@
 
 class G1STWRefProcTaskProxy: public AbstractGangTask {
   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
   ProcessTask&     _proc_task;
   G1CollectedHeap* _g1h;
-  RefToScanQueueSet *_task_queues;
+  G1ParScanThreadState** _pss;
+  RefToScanQueueSet* _task_queues;
   ParallelTaskTerminator* _terminator;
 
 public:
   G1STWRefProcTaskProxy(ProcessTask& proc_task,
                      G1CollectedHeap* g1h,
+                        G1ParScanThreadState** pss,
                      RefToScanQueueSet *task_queues,
                      ParallelTaskTerminator* terminator) :
     AbstractGangTask("Process reference objects in parallel"),
     _proc_task(proc_task),
     _g1h(g1h),
+    _pss(pss),
     _task_queues(task_queues),
     _terminator(terminator)
   {}
 
   virtual void work(uint worker_id) {

@@ -5287,31 +5316,29 @@
     ResourceMark rm;
     HandleMark   hm;
 
     G1STWIsAliveClosure is_alive(_g1h);
 
-    G1ParScanThreadState            pss(_g1h, worker_id, NULL);
-    G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, NULL);
-
-    pss.set_evac_failure_closure(&evac_failure_cl);
+    G1ParScanThreadState*           pss = _pss[worker_id];
+    pss->set_ref_processor(NULL);
 
-    G1ParScanExtRootClosure        only_copy_non_heap_cl(_g1h, &pss, NULL);
+    G1ParScanExtRootClosure        only_copy_non_heap_cl(_g1h, pss, NULL);
 
-    G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, &pss, NULL);
+    G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, pss, NULL);
 
     OopClosure*                    copy_non_heap_cl = &only_copy_non_heap_cl;
 
     if (_g1h->g1_policy()->during_initial_mark_pause()) {
       // We also need to mark copied objects.
       copy_non_heap_cl = &copy_mark_non_heap_cl;
     }
 
     // Keep alive closure.
-    G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, &pss);
+    G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, pss);
 
     // Complete GC closure
-    G1ParEvacuateFollowersClosure drain_queue(_g1h, &pss, _task_queues, _terminator);
+    G1ParEvacuateFollowersClosure drain_queue(_g1h, pss, _task_queues, _terminator);
 
     // Call the reference processing task's work routine.
     _proc_task.work(worker_id, is_alive, keep_alive, drain_queue);
 
     // Note we cannot assert that the refs array is empty here as not all

@@ -5326,11 +5353,11 @@
 // task and has the worker threads execute it.
 void G1STWRefProcTaskExecutor::execute(ProcessTask& proc_task) {
   assert(_workers != NULL, "Need parallel worker threads.");
 
   ParallelTaskTerminator terminator(_active_workers, _queues);
-  G1STWRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _queues, &terminator);
+  G1STWRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _pss, _queues, &terminator);
 
   _g1h->set_par_threads(_active_workers);
   _workers->run_task(&proc_task_proxy);
   _g1h->set_par_threads(0);
 }

@@ -5373,37 +5400,36 @@
 // objects discovered by the CM ref processor.
 
 class G1ParPreserveCMReferentsTask: public AbstractGangTask {
 protected:
   G1CollectedHeap* _g1h;
+  G1ParScanThreadState** _pss;
   RefToScanQueueSet      *_queues;
   ParallelTaskTerminator _terminator;
   uint _n_workers;
 
 public:
-  G1ParPreserveCMReferentsTask(G1CollectedHeap* g1h,int workers, RefToScanQueueSet *task_queues) :
+  G1ParPreserveCMReferentsTask(G1CollectedHeap* g1h, G1ParScanThreadState** pss, int workers, RefToScanQueueSet *task_queues) :
     AbstractGangTask("ParPreserveCMReferents"),
     _g1h(g1h),
+    _pss(pss),
     _queues(task_queues),
     _terminator(workers, _queues),
     _n_workers(workers)
   { }
 
   void work(uint worker_id) {
     ResourceMark rm;
     HandleMark   hm;
 
-    G1ParScanThreadState            pss(_g1h, worker_id, NULL);
-    G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, NULL);
-
-    pss.set_evac_failure_closure(&evac_failure_cl);
-
-    assert(pss.queue_is_empty(), "both queue and overflow should be empty");
+    G1ParScanThreadState*           pss = _pss[worker_id];
+    pss->set_ref_processor(NULL);
+    assert(pss->queue_is_empty(), "both queue and overflow should be empty");
 
-    G1ParScanExtRootClosure        only_copy_non_heap_cl(_g1h, &pss, NULL);
+    G1ParScanExtRootClosure        only_copy_non_heap_cl(_g1h, pss, NULL);
 
-    G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, &pss, NULL);
+    G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, pss, NULL);
 
     OopClosure*                    copy_non_heap_cl = &only_copy_non_heap_cl;
 
     if (_g1h->g1_policy()->during_initial_mark_pause()) {
       // We also need to mark copied objects.

@@ -5413,11 +5439,11 @@
     // Is alive closure
     G1AlwaysAliveClosure always_alive(_g1h);
 
     // Copying keep alive closure. Applied to referent objects that need
     // to be copied.
-    G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, &pss);
+    G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, pss);
 
     ReferenceProcessor* rp = _g1h->ref_processor_cm();
 
     uint limit = ReferenceProcessor::number_of_subclasses_of_ref() * rp->max_num_q();
     uint stride = MIN2(MAX2(_n_workers, 1U), limit);

@@ -5446,19 +5472,19 @@
         iter.move_to_next();
       }
     }
 
     // Drain the queue - which may cause stealing
-    G1ParEvacuateFollowersClosure drain_queue(_g1h, &pss, _queues, &_terminator);
+    G1ParEvacuateFollowersClosure drain_queue(_g1h, pss, _queues, &_terminator);
     drain_queue.do_void();
     // Allocation buffers were retired at the end of G1ParEvacuateFollowersClosure
-    assert(pss.queue_is_empty(), "should be");
+    assert(pss->queue_is_empty(), "should be");
   }
 };
 
 // Weak Reference processing during an evacuation pause (part 1).
-void G1CollectedHeap::process_discovered_references(uint no_of_gc_workers) {
+void G1CollectedHeap::process_discovered_references(G1ParScanThreadState** pss_, uint no_of_gc_workers) {
   double ref_proc_start = os::elapsedTime();
 
   ReferenceProcessor* rp = _ref_processor_stw;
   assert(rp->discovery_enabled(), "should have been enabled");
 

@@ -5485,10 +5511,11 @@
 
   assert(no_of_gc_workers == workers()->active_workers(), "Need to reset active GC workers");
 
   set_par_threads(no_of_gc_workers);
   G1ParPreserveCMReferentsTask keep_cm_referents(this,
+                                                 pss_,
                                                  no_of_gc_workers,
                                                  _task_queues);
 
   workers()->run_task(&keep_cm_referents);
 

@@ -5501,37 +5528,34 @@
   // of JNI refs is serial and performed serially by the current thread
   // rather than by a worker. The following PSS will be used for processing
   // JNI refs.
 
   // Use only a single queue for this PSS.
-  G1ParScanThreadState            pss(this, 0, NULL);
+  G1ParScanThreadState*           pss = pss_[0];
+  pss->set_ref_processor(NULL);
+  assert(pss->queue_is_empty(), "pre-condition");
 
   // We do not embed a reference processor in the copying/scanning
   // closures while we're actually processing the discovered
   // reference objects.
-  G1ParScanHeapEvacFailureClosure evac_failure_cl(this, &pss, NULL);
 
-  pss.set_evac_failure_closure(&evac_failure_cl);
+  G1ParScanExtRootClosure        only_copy_non_heap_cl(this, pss, NULL);
 
-  assert(pss.queue_is_empty(), "pre-condition");
-
-  G1ParScanExtRootClosure        only_copy_non_heap_cl(this, &pss, NULL);
-
-  G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(this, &pss, NULL);
+  G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(this, pss, NULL);
 
   OopClosure*                    copy_non_heap_cl = &only_copy_non_heap_cl;
 
   if (_g1h->g1_policy()->during_initial_mark_pause()) {
     // We also need to mark copied objects.
     copy_non_heap_cl = &copy_mark_non_heap_cl;
   }
 
   // Keep alive closure.
-  G1CopyingKeepAliveClosure keep_alive(this, copy_non_heap_cl, &pss);
+  G1CopyingKeepAliveClosure keep_alive(this, copy_non_heap_cl, pss);
 
   // Serial Complete GC closure
-  G1STWDrainQueueClosure drain_queue(this, &pss);
+  G1STWDrainQueueClosure drain_queue(this, pss);
 
   // Setup the soft refs policy...
   rp->setup_policy(false);
 
   ReferenceProcessorStats stats;

@@ -5546,11 +5570,11 @@
   } else {
     // Parallel reference processing
     assert(rp->num_q() == no_of_gc_workers, "sanity");
     assert(no_of_gc_workers <= rp->max_num_q(), "sanity");
 
-    G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, no_of_gc_workers);
+    G1STWRefProcTaskExecutor par_task_executor(this, pss_, workers(), _task_queues, no_of_gc_workers);
     stats = rp->process_discovered_references(&is_alive,
                                               &keep_alive,
                                               &drain_queue,
                                               &par_task_executor,
                                               _gc_timer_stw,

@@ -5558,11 +5582,11 @@
   }
 
   _gc_tracer_stw->report_gc_reference_stats(stats);
 
   // We have completed copying any necessary live referent objects.
-  assert(pss.queue_is_empty(), "both queue and overflow should be empty");
+  assert(pss->queue_is_empty(), "both queue and overflow should be empty");
 
   double ref_proc_time = os::elapsedTime() - ref_proc_start;
   g1_policy()->phase_times()->record_ref_proc_time(ref_proc_time * 1000.0);
 }
 

@@ -5584,11 +5608,11 @@
     assert(no_of_gc_workers == workers()->active_workers(),
            "Need to reset active workers");
     assert(rp->num_q() == no_of_gc_workers, "sanity");
     assert(no_of_gc_workers <= rp->max_num_q(), "sanity");
 
-    G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, no_of_gc_workers);
+    G1STWRefProcTaskExecutor par_task_executor(this, NULL, workers(), _task_queues, no_of_gc_workers);
     rp->enqueue_discovered_references(&par_task_executor);
   }
 
   rp->verify_no_references_recorded();
   assert(!rp->discovery_enabled(), "should have been disabled");

@@ -5625,11 +5649,16 @@
          n_workers == workers()->total_workers(),
          "If not dynamic should be using all the  workers");
   workers()->set_active_workers(n_workers);
   set_par_threads(n_workers);
 
-  G1ParTask g1_par_task(this, _task_queues);
+  G1ParScanThreadState** per_thread_states = NEW_C_HEAP_ARRAY(G1ParScanThreadState*, n_workers, mtGC);
+  for (uint i = 0; i < n_workers; i++) {
+    per_thread_states[i] = new G1ParScanThreadState(this, i);
+  }
+
+  G1ParTask g1_par_task(this, per_thread_states, _task_queues);
 
   init_for_evac_failure(NULL);
 
   assert(dirty_card_queue_set().completed_buffers_num() == 0, "Should be empty");
   double start_par_time_sec = os::elapsedTime();

@@ -5641,11 +5670,13 @@
     if (g1_policy()->during_initial_mark_pause()) {
       ClassLoaderDataGraph::clear_claimed_marks();
     }
 
      // The individual threads will set their evac-failure closures.
-     if (PrintTerminationStats) G1ParScanThreadState::print_termination_stats_hdr();
+     if (PrintTerminationStats) {
+       print_termination_stats_hdr(gclog_or_tty);
+     }
      // These tasks use ShareHeap::_process_strong_tasks
      assert(UseDynamicNumberOfGCThreads ||
             workers()->active_workers() == workers()->total_workers(),
             "If not dynamic should be using all the  workers");
     workers()->run_task(&g1_par_task);

@@ -5670,18 +5701,25 @@
   // Process any discovered reference objects - we have
   // to do this _before_ we retire the GC alloc regions
   // as we may have to copy some 'reachable' referent
   // objects (and their reachable sub-graphs) that were
   // not copied during the pause.
-  process_discovered_references(n_workers);
+  process_discovered_references(per_thread_states, n_workers);
 
   if (G1StringDedup::is_enabled()) {
     G1STWIsAliveClosure is_alive(this);
     G1KeepAliveClosure keep_alive(this);
     G1StringDedup::unlink_or_oops_do(&is_alive, &keep_alive);
   }
 
+  for (uint i = 0; i < n_workers; i++) {
+    delete per_thread_states[i];
+  }
+  FREE_C_HEAP_ARRAY(G1ParScanThreadState*, per_thread_states);
+
+  record_obj_copy_mem_stats();
+
   _allocator->release_gc_alloc_regions(n_workers, evacuation_info);
   g1_rem_set()->cleanup_after_oops_into_collection_set_do();
 
   // Reset and re-enable the hot card cache.
   // Note the counts for the cards in the regions in the

@@ -5713,10 +5751,35 @@
 
   redirty_logged_cards();
   COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
 }
 
+void G1CollectedHeap::record_obj_copy_mem_stats() {
+  record_obj_copy_mem_stats(InCSetState::Young);
+  record_obj_copy_mem_stats(InCSetState::Old);
+}
+
+void G1CollectedHeap::record_obj_copy_mem_stats(InCSetState which) {
+  G1EvacStats* stats = _allocator->evac_stats(which);
+
+  EventGCG1EvacuationMemoryStatistics e;
+  if (e.should_commit()) {
+    e.set_gcId(GCId::peek().id());
+    e.set_gen(InCSetState::to_gen_number(which));
+    e.set_allocated(stats->allocated() * HeapWordSize);
+    e.set_wasted(stats->wasted() * HeapWordSize);
+    e.set_used(stats->used() * HeapWordSize);
+    e.set_undo_waste(stats->undo_waste() * HeapWordSize);
+    e.set_region_end_waste(stats->region_end_waste() * HeapWordSize);
+    e.set_regions_refilled(stats->regions_refilled());
+    e.set_inline_allocated(stats->inline_allocated() * HeapWordSize);
+    e.set_failure_used(stats->failure_used() * HeapWordSize);
+    e.set_failure_waste(stats->failure_waste() * HeapWordSize);
+    e.commit();
+  }
+}
+
 void G1CollectedHeap::free_region(HeapRegion* hr,
                                   FreeRegionList* free_list,
                                   bool par,
                                   bool locked) {
   assert(!hr->is_free(), "the region should not be free");

@@ -5779,11 +5842,11 @@
     _hrm.insert_list_into_free_list(list);
   }
 }
 
 void G1CollectedHeap::decrement_summary_bytes(size_t bytes) {
-  _allocator->decrease_used(bytes);
+  decrease_used(bytes);
 }
 
 class G1ParCleanupCTTask : public AbstractGangTask {
   G1SATBCardTableModRefBS* _ct_bs;
   G1CollectedHeap* _g1h;

@@ -6113,10 +6176,15 @@
         cur->set_young_index_in_cset(-1);
       }
       cur->set_evacuation_failed(false);
       // The region is now considered to be old.
       cur->set_old();
+      // Do some allocation statistics accounting. Regions that failed evacuation
+      // are always made old, so there is no need to update anything in the young
+      // gen statistics.
+      size_t used_words = cur->marked_bytes() / HeapWordSize;
+      _allocator->evac_stats(InCSetState::Old)->add_failure_used_and_waste(used_words, HeapRegion::GrainWords - used_words);
       _old_set.add(cur);
       evacuation_info.increment_collectionset_used_after(cur->used());
     }
     cur = next;
   }

@@ -6495,16 +6563,16 @@
 
   RebuildRegionSetsClosure cl(free_list_only, &_old_set, &_hrm);
   heap_region_iterate(&cl);
 
   if (!free_list_only) {
-    _allocator->set_used(cl.total_used());
+    set_used(cl.total_used());
   }
-  assert(_allocator->used_unlocked() == recalculate_used(),
+  assert(used_unlocked() == recalculate_used(),
          err_msg("inconsistent _allocator->used_unlocked(), "
                  "value: "SIZE_FORMAT" recalculated: "SIZE_FORMAT,
-                 _allocator->used_unlocked(), recalculate_used()));
+                 used_unlocked(), recalculate_used()));
 }
 
 void G1CollectedHeap::set_refine_cte_cl_concurrency(bool concurrent) {
   _refine_cte_cl->set_concurrent(concurrent);
 }

@@ -6540,33 +6608,18 @@
                                                   size_t allocated_bytes) {
   assert_heap_locked_or_at_safepoint(true /* should_be_vm_thread */);
   assert(alloc_region->is_eden(), "all mutator alloc regions should be eden");
 
   g1_policy()->add_region_to_incremental_cset_lhs(alloc_region);
-  _allocator->increase_used(allocated_bytes);
+  increase_used(allocated_bytes);
   _hr_printer.retire(alloc_region);
   // We update the eden sizes here, when the region is retired,
   // instead of when it's allocated, since this is the point that its
   // used space has been recored in _summary_bytes_used.
   g1mm()->update_eden_size();
 }
 
-void G1CollectedHeap::set_par_threads() {
-  // Don't change the number of workers.  Use the value previously set
-  // in the workgroup.
-  uint n_workers = workers()->active_workers();
-  assert(UseDynamicNumberOfGCThreads ||
-           n_workers == workers()->total_workers(),
-      "Otherwise should be using the total number of workers");
-  if (n_workers == 0) {
-    assert(false, "Should have been set in prior evacuation pause.");
-    n_workers = ParallelGCThreads;
-    workers()->set_active_workers(n_workers);
-  }
-  set_par_threads(n_workers);
-}
-
 // Methods for the GC alloc regions
 
 HeapRegion* G1CollectedHeap::new_gc_alloc_region(size_t word_size,
                                                  uint count,
                                                  InCSetState dest) {

@@ -6611,10 +6664,25 @@
     _old_set.add(alloc_region);
   }
   _hr_printer.retire(alloc_region);
 }
 
+void G1CollectedHeap::set_par_threads() {
+  // Don't change the number of workers.  Use the value previously set
+  // in the workgroup.
+  uint n_workers = workers()->active_workers();
+  assert(UseDynamicNumberOfGCThreads ||
+           n_workers == workers()->total_workers(),
+      "Otherwise should be using the total number of workers");
+  if (n_workers == 0) {
+    assert(false, "Should have been set in prior evacuation pause.");
+    n_workers = ParallelGCThreads;
+    workers()->set_active_workers(n_workers);
+  }
+  set_par_threads(n_workers);
+}
+
 // Heap region set verification
 
 class VerifyRegionListsClosure : public HeapRegionClosure {
 private:
   HeapRegionSet*   _old_set;
< prev index next >