rev 9846 : [mq]: par-scav-patch
rev 9847 : 8146987: Improve Parallel GC Full GC by caching results of live_words_in_range()
Summary: A large part of time in the parallel scavenge collector is spent finding out the amount of live words within memory ranges to find out where to move an object to. Try to incrementally calculate this value.
Reviewed-by: tschatzl, mgerdin
Contributed-by: ray alex <sky1young@gmail.com>
1 /*
2 * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "classfile/systemDictionary.hpp"
27 #include "gc/parallel/gcTaskManager.hpp"
28 #include "gc/parallel/objectStartArray.hpp"
29 #include "gc/parallel/parMarkBitMap.hpp"
30 #include "gc/parallel/parallelScavengeHeap.hpp"
31 #include "gc/parallel/psCompactionManager.inline.hpp"
32 #include "gc/parallel/psOldGen.hpp"
33 #include "gc/parallel/psParallelCompact.inline.hpp"
34 #include "gc/shared/taskqueue.inline.hpp"
35 #include "logging/log.hpp"
36 #include "memory/iterator.inline.hpp"
37 #include "oops/instanceKlass.inline.hpp"
38 #include "oops/instanceMirrorKlass.inline.hpp"
39 #include "oops/objArrayKlass.inline.hpp"
40 #include "oops/oop.inline.hpp"
41 #include "runtime/atomic.inline.hpp"
42
43 PSOldGen* ParCompactionManager::_old_gen = NULL;
44 ParCompactionManager** ParCompactionManager::_manager_array = NULL;
45
46 RegionTaskQueue** ParCompactionManager::_region_list = NULL;
47
48 OopTaskQueueSet* ParCompactionManager::_stack_array = NULL;
49 ParCompactionManager::ObjArrayTaskQueueSet*
50 ParCompactionManager::_objarray_queues = NULL;
51 ObjectStartArray* ParCompactionManager::_start_array = NULL;
52 ParMarkBitMap* ParCompactionManager::_mark_bitmap = NULL;
53 RegionTaskQueueSet* ParCompactionManager::_region_array = NULL;
54
55 uint* ParCompactionManager::_recycled_stack_index = NULL;
56 int ParCompactionManager::_recycled_top = -1;
57 int ParCompactionManager::_recycled_bottom = -1;
58
59 ParCompactionManager::ParCompactionManager() :
60 _action(CopyAndUpdate),
61 _region_stack(NULL),
62 _region_stack_index((uint)max_uintx) {
63
64 ParallelScavengeHeap* heap = ParallelScavengeHeap::heap();
65
66 _old_gen = heap->old_gen();
67 _start_array = old_gen()->start_array();
68
69 marking_stack()->initialize();
70 _objarray_stack.initialize();
71 }
72
73 ParCompactionManager::~ParCompactionManager() {
74 delete _recycled_stack_index;
75 }
76
77 void ParCompactionManager::initialize(ParMarkBitMap* mbm) {
78 assert(PSParallelCompact::gc_task_manager() != NULL,
79 "Needed for initialization");
80
81 _mark_bitmap = mbm;
82
83 uint parallel_gc_threads = PSParallelCompact::gc_task_manager()->workers();
84
85 assert(_manager_array == NULL, "Attempt to initialize twice");
86 _manager_array = NEW_C_HEAP_ARRAY(ParCompactionManager*, parallel_gc_threads+1, mtGC);
87 guarantee(_manager_array != NULL, "Could not allocate manager_array");
88
89 _region_list = NEW_C_HEAP_ARRAY(RegionTaskQueue*,
90 parallel_gc_threads+1, mtGC);
91 guarantee(_region_list != NULL, "Could not initialize promotion manager");
92
93 _recycled_stack_index = NEW_C_HEAP_ARRAY(uint, parallel_gc_threads, mtGC);
94
95 // parallel_gc-threads + 1 to be consistent with the number of
96 // compaction managers.
97 for(uint i=0; i<parallel_gc_threads + 1; i++) {
98 _region_list[i] = new RegionTaskQueue();
99 region_list(i)->initialize();
100 }
101
102 _stack_array = new OopTaskQueueSet(parallel_gc_threads);
103 guarantee(_stack_array != NULL, "Could not allocate stack_array");
104 _objarray_queues = new ObjArrayTaskQueueSet(parallel_gc_threads);
105 guarantee(_objarray_queues != NULL, "Could not allocate objarray_queues");
106 _region_array = new RegionTaskQueueSet(parallel_gc_threads);
107 guarantee(_region_array != NULL, "Could not allocate region_array");
108
109 // Create and register the ParCompactionManager(s) for the worker threads.
110 for(uint i=0; i<parallel_gc_threads; i++) {
111 _manager_array[i] = new ParCompactionManager();
112 guarantee(_manager_array[i] != NULL, "Could not create ParCompactionManager");
113 stack_array()->register_queue(i, _manager_array[i]->marking_stack());
114 _objarray_queues->register_queue(i, &_manager_array[i]->_objarray_stack);
115 region_array()->register_queue(i, region_list(i));
116 }
117
118 // The VMThread gets its own ParCompactionManager, which is not available
119 // for work stealing.
120 _manager_array[parallel_gc_threads] = new ParCompactionManager();
121 guarantee(_manager_array[parallel_gc_threads] != NULL,
122 "Could not create ParCompactionManager");
123 assert(PSParallelCompact::gc_task_manager()->workers() != 0,
124 "Not initialized?");
125 }
126
127 int ParCompactionManager::pop_recycled_stack_index() {
128 assert(_recycled_bottom <= _recycled_top, "list is empty");
129 // Get the next available index
130 if (_recycled_bottom < _recycled_top) {
131 uint cur, next, last;
132 do {
133 cur = _recycled_bottom;
134 next = cur + 1;
135 last = Atomic::cmpxchg(next, &_recycled_bottom, cur);
136 } while (cur != last);
137 return _recycled_stack_index[next];
138 } else {
139 return -1;
140 }
141 }
142
143 void ParCompactionManager::push_recycled_stack_index(uint v) {
144 // Get the next available index
145 int cur = Atomic::add(1, &_recycled_top);
146 _recycled_stack_index[cur] = v;
147 assert(_recycled_bottom <= _recycled_top, "list top and bottom are wrong");
148 }
149
150 bool ParCompactionManager::should_update() {
151 assert(action() != NotValid, "Action is not set");
152 return (action() == ParCompactionManager::Update) ||
153 (action() == ParCompactionManager::CopyAndUpdate) ||
154 (action() == ParCompactionManager::UpdateAndCopy);
155 }
156
157 bool ParCompactionManager::should_copy() {
158 assert(action() != NotValid, "Action is not set");
159 return (action() == ParCompactionManager::Copy) ||
160 (action() == ParCompactionManager::CopyAndUpdate) ||
161 (action() == ParCompactionManager::UpdateAndCopy);
162 }
163
164 void ParCompactionManager::region_list_push(uint list_index,
165 size_t region_index) {
166 region_list(list_index)->push(region_index);
167 }
168
169 void ParCompactionManager::verify_region_list_empty(uint list_index) {
170 assert(region_list(list_index)->is_empty(), "Not empty");
171 }
172
173 ParCompactionManager*
174 ParCompactionManager::gc_thread_compaction_manager(uint index) {
175 assert(index < ParallelGCThreads, "index out of range");
176 assert(_manager_array != NULL, "Sanity");
177 return _manager_array[index];
178 }
179
180 void InstanceKlass::oop_pc_follow_contents(oop obj, ParCompactionManager* cm) {
181 assert(obj != NULL, "can't follow the content of NULL object");
182
183 cm->follow_klass(this);
184 // Only mark the header and let the scan of the meta-data mark
185 // everything else.
186
187 ParCompactionManager::MarkAndPushClosure cl(cm);
188 InstanceKlass::oop_oop_iterate_oop_maps<true>(obj, &cl);
189 }
190
191 void InstanceMirrorKlass::oop_pc_follow_contents(oop obj, ParCompactionManager* cm) {
192 InstanceKlass::oop_pc_follow_contents(obj, cm);
193
194 // Follow the klass field in the mirror.
195 Klass* klass = java_lang_Class::as_Klass(obj);
196 if (klass != NULL) {
197 // An anonymous class doesn't have its own class loader, so the call
198 // to follow_klass will mark and push its java mirror instead of the
199 // class loader. When handling the java mirror for an anonymous class
200 // we need to make sure its class loader data is claimed, this is done
201 // by calling follow_class_loader explicitly. For non-anonymous classes
202 // the call to follow_class_loader is made when the class loader itself
203 // is handled.
204 if (klass->is_instance_klass() && InstanceKlass::cast(klass)->is_anonymous()) {
205 cm->follow_class_loader(klass->class_loader_data());
206 } else {
207 cm->follow_klass(klass);
208 }
209 } else {
210 // If klass is NULL then this a mirror for a primitive type.
211 // We don't have to follow them, since they are handled as strong
212 // roots in Universe::oops_do.
213 assert(java_lang_Class::is_primitive(obj), "Sanity check");
214 }
215
216 ParCompactionManager::MarkAndPushClosure cl(cm);
217 oop_oop_iterate_statics<true>(obj, &cl);
218 }
219
220 void InstanceClassLoaderKlass::oop_pc_follow_contents(oop obj, ParCompactionManager* cm) {
221 InstanceKlass::oop_pc_follow_contents(obj, cm);
222
223 ClassLoaderData * const loader_data = java_lang_ClassLoader::loader_data(obj);
224 if (loader_data != NULL) {
225 cm->follow_class_loader(loader_data);
226 }
227 }
228
229 template <class T>
230 static void oop_pc_follow_contents_specialized(InstanceRefKlass* klass, oop obj, ParCompactionManager* cm) {
231 T* referent_addr = (T*)java_lang_ref_Reference::referent_addr(obj);
232 T heap_oop = oopDesc::load_heap_oop(referent_addr);
233 log_develop_trace(gc, ref)("InstanceRefKlass::oop_pc_follow_contents " PTR_FORMAT, p2i(obj));
234 if (!oopDesc::is_null(heap_oop)) {
235 oop referent = oopDesc::decode_heap_oop_not_null(heap_oop);
236 if (PSParallelCompact::mark_bitmap()->is_unmarked(referent) &&
237 PSParallelCompact::ref_processor()->discover_reference(obj, klass->reference_type())) {
238 // reference already enqueued, referent will be traversed later
239 klass->InstanceKlass::oop_pc_follow_contents(obj, cm);
240 log_develop_trace(gc, ref)(" Non NULL enqueued " PTR_FORMAT, p2i(obj));
241 return;
242 } else {
243 // treat referent as normal oop
244 log_develop_trace(gc, ref)(" Non NULL normal " PTR_FORMAT, p2i(obj));
245 cm->mark_and_push(referent_addr);
246 }
247 }
248 T* next_addr = (T*)java_lang_ref_Reference::next_addr(obj);
249 // Treat discovered as normal oop, if ref is not "active",
250 // i.e. if next is non-NULL.
251 T next_oop = oopDesc::load_heap_oop(next_addr);
252 if (!oopDesc::is_null(next_oop)) { // i.e. ref is not "active"
253 T* discovered_addr = (T*)java_lang_ref_Reference::discovered_addr(obj);
254 log_develop_trace(gc, ref)(" Process discovered as normal " PTR_FORMAT, p2i(discovered_addr));
255 cm->mark_and_push(discovered_addr);
256 }
257 cm->mark_and_push(next_addr);
258 klass->InstanceKlass::oop_pc_follow_contents(obj, cm);
259 }
260
261
262 void InstanceRefKlass::oop_pc_follow_contents(oop obj, ParCompactionManager* cm) {
263 if (UseCompressedOops) {
264 oop_pc_follow_contents_specialized<narrowOop>(this, obj, cm);
265 } else {
266 oop_pc_follow_contents_specialized<oop>(this, obj, cm);
267 }
268 }
269
270 void ObjArrayKlass::oop_pc_follow_contents(oop obj, ParCompactionManager* cm) {
271 cm->follow_klass(this);
272
273 if (UseCompressedOops) {
274 oop_pc_follow_contents_specialized<narrowOop>(objArrayOop(obj), 0, cm);
275 } else {
276 oop_pc_follow_contents_specialized<oop>(objArrayOop(obj), 0, cm);
277 }
278 }
279
280 void TypeArrayKlass::oop_pc_follow_contents(oop obj, ParCompactionManager* cm) {
281 assert(obj->is_typeArray(),"must be a type array");
282 // Performance tweak: We skip iterating over the klass pointer since we
283 // know that Universe::TypeArrayKlass never moves.
284 }
285
286 void ParCompactionManager::follow_marking_stacks() {
287 do {
288 // Drain the overflow stack first, to allow stealing from the marking stack.
289 oop obj;
290 while (marking_stack()->pop_overflow(obj)) {
291 follow_contents(obj);
292 }
293 while (marking_stack()->pop_local(obj)) {
294 follow_contents(obj);
295 }
296
297 // Process ObjArrays one at a time to avoid marking stack bloat.
298 ObjArrayTask task;
299 if (_objarray_stack.pop_overflow(task) || _objarray_stack.pop_local(task)) {
300 follow_contents((objArrayOop)task.obj(), task.index());
301 }
302 } while (!marking_stacks_empty());
303
304 assert(marking_stacks_empty(), "Sanity");
305 }
306
307 void ParCompactionManager::drain_region_stacks() {
308 do {
309 // Drain overflow stack first so other threads can steal.
310 size_t region_index;
311 while (region_stack()->pop_overflow(region_index)) {
312 PSParallelCompact::fill_and_update_region(this, region_index);
313 }
314
315 while (region_stack()->pop_local(region_index)) {
316 PSParallelCompact::fill_and_update_region(this, region_index);
317 }
318 } while (!region_stack()->is_empty());
319 }
--- EOF ---