--- old/src/share/vm/gc/cms/parNewGeneration.cpp	2015-05-20 11:05:15.345850041 +0200
+++ new/src/share/vm/gc/cms/parNewGeneration.cpp	2015-05-20 11:05:15.177844364 +0200
@@ -567,21 +567,19 @@
 }
 
 ParNewGenTask::ParNewGenTask(ParNewGeneration* gen, Generation* old_gen,
-                             HeapWord* young_old_boundary, ParScanThreadStateSet* state_set) :
+                             HeapWord* young_old_boundary, ParScanThreadStateSet* state_set,
+                             StrongRootsScope* strong_roots_scope) :
     AbstractGangTask("ParNewGeneration collection"),
     _gen(gen), _old_gen(old_gen),
     _young_old_boundary(young_old_boundary),
-    _state_set(state_set)
+    _state_set(state_set),
+    _strong_roots_scope(strong_roots_scope)
   {}
 
 // Reset the terminator for the given number of
 // active threads.
 void ParNewGenTask::set_for_termination(uint active_workers) {
   _state_set->reset(active_workers, _gen->promotion_failed());
-  // Should the heap be passed in?  There's only 1 for now so
-  // grab it instead.
-  GenCollectedHeap* gch = GenCollectedHeap::heap();
-  gch->set_n_termination(active_workers);
 }
 
 void ParNewGenTask::work(uint worker_id) {
@@ -603,10 +601,10 @@
                                            false);
 
   par_scan_state.start_strong_roots();
-  gch->gen_process_roots(_gen->level(),
+  gch->gen_process_roots(_strong_roots_scope,
+                         _gen->level(),
                          true,  // Process younger gens, if any,
                                 // as strong roots.
-                         false, // no scope; this is parallel code
                          GenCollectedHeap::SO_ScavengeCodeCache,
                          GenCollectedHeap::StrongAndWeakRoots,
                          &par_scan_state.to_space_root_closure(),
@@ -838,7 +836,6 @@
 {
   _state_set.flush();
   GenCollectedHeap* gch = GenCollectedHeap::heap();
-  gch->set_par_threads(0);  // 0 ==> non-parallel.
   gch->save_marks();
 }
 
@@ -952,20 +949,22 @@
                                          *to(), *this, *_old_gen, *task_queues(),
                                          _overflow_stacks, desired_plab_sz(), _term);
 
-  ParNewGenTask tsk(this, _old_gen, reserved().end(), &thread_state_set);
-  gch->set_par_threads(n_workers);
-  gch->rem_set()->prepare_for_younger_refs_iterate(true);
-  // It turns out that even when we're using 1 thread, doing the work in a
-  // separate thread causes wide variance in run times.  We can't help this
-  // in the multi-threaded case, but we special-case n=1 here to get
-  // repeatable measurements of the 1-thread overhead of the parallel code.
-  if (n_workers > 1) {
-    StrongRootsScope srs;
-    workers->run_task(&tsk);
-  } else {
-    StrongRootsScope srs;
-    tsk.work(0);
+  {
+    StrongRootsScope srs(n_workers);
+
+    ParNewGenTask tsk(this, _old_gen, reserved().end(), &thread_state_set, &srs);
+    gch->rem_set()->prepare_for_younger_refs_iterate(true);
+    // It turns out that even when we're using 1 thread, doing the work in a
+    // separate thread causes wide variance in run times.  We can't help this
+    // in the multi-threaded case, but we special-case n=1 here to get
+    // repeatable measurements of the 1-thread overhead of the parallel code.
+    if (n_workers > 1) {
+      workers->run_task(&tsk);
+    } else {
+      tsk.work(0);
+    }
   }
+
   thread_state_set.reset(0 /* Bad value in debug if not reset */,
                          promotion_failed());
 
@@ -995,7 +994,6 @@
                                               _gc_timer, _gc_tracer.gc_id());
   } else {
     thread_state_set.flush();
-    gch->set_par_threads(0);  // 0 ==> non-parallel.
     gch->save_marks();
     stats = rp->process_discovered_references(&is_alive, &keep_alive,
                                               &evacuate_followers, NULL,