--- old/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp 2015-05-13 14:03:41.677402342 +0200 +++ new/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp 2015-05-13 14:03:41.557398353 +0200 @@ -567,11 +567,13 @@ } ParNewGenTask::ParNewGenTask(ParNewGeneration* gen, Generation* old_gen, - HeapWord* young_old_boundary, ParScanThreadStateSet* state_set) : + HeapWord* young_old_boundary, ParScanThreadStateSet* state_set, + StrongRootsScope* strong_roots_scope) : AbstractGangTask("ParNewGeneration collection"), _gen(gen), _old_gen(old_gen), _young_old_boundary(young_old_boundary), - _state_set(state_set) + _state_set(state_set), + _strong_roots_scope(strong_roots_scope) {} // Reset the terminator for the given number of @@ -603,10 +605,10 @@ false); par_scan_state.start_strong_roots(); - gch->gen_process_roots(_gen->level(), + gch->gen_process_roots(_strong_roots_scope, + _gen->level(), true, // Process younger gens, if any, // as strong roots. - false, // no scope; this is parallel code GenCollectedHeap::SO_ScavengeCodeCache, GenCollectedHeap::StrongAndWeakRoots, &par_scan_state.to_space_root_closure(), @@ -952,20 +954,23 @@ *to(), *this, *_old_gen, *task_queues(), _overflow_stacks, desired_plab_sz(), _term); - ParNewGenTask tsk(this, _old_gen, reserved().end(), &thread_state_set); - gch->set_par_threads(n_workers); - gch->rem_set()->prepare_for_younger_refs_iterate(true); - // It turns out that even when we're using 1 thread, doing the work in a - // separate thread causes wide variance in run times. We can't help this - // in the multi-threaded case, but we special-case n=1 here to get - // repeatable measurements of the 1-thread overhead of the parallel code. - if (n_workers > 1) { - StrongRootsScope srs; - workers->run_task(&tsk); - } else { - StrongRootsScope srs; - tsk.work(0); + { + StrongRootsScope srs(n_workers); + + ParNewGenTask tsk(this, _old_gen, reserved().end(), &thread_state_set, &srs); + gch->set_par_threads(n_workers); + gch->rem_set()->prepare_for_younger_refs_iterate(true); + // It turns out that even when we're using 1 thread, doing the work in a + // separate thread causes wide variance in run times. We can't help this + // in the multi-threaded case, but we special-case n=1 here to get + // repeatable measurements of the 1-thread overhead of the parallel code. + if (n_workers > 1) { + workers->run_task(&tsk); + } else { + tsk.work(0); + } } + thread_state_set.reset(0 /* Bad value in debug if not reset */, promotion_failed());