129 assert(!mr.is_empty(), "unexpected empty region");
130 // convert address range into offset range
131 _bm.at_put_range(heapWordToOffset(mr.start()),
132 heapWordToOffset(mr.end()), false);
133 }
134
135 G1CMMarkStack::G1CMMarkStack() :
136 _max_chunk_capacity(0),
137 _base(NULL),
138 _chunk_capacity(0),
139 _out_of_memory(false),
140 _should_expand(false) {
141 set_empty();
142 }
143
144 bool G1CMMarkStack::resize(size_t new_capacity) {
145 assert(is_empty(), "Only resize when stack is empty.");
146 assert(new_capacity <= _max_chunk_capacity,
147 "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
148
149 OopChunk* new_base = MmapArrayAllocator<OopChunk, mtGC>::allocate_or_null(new_capacity);
150
151 if (new_base == NULL) {
152 log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(OopChunk));
153 return false;
154 }
155 // Release old mapping.
156 if (_base != NULL) {
157 MmapArrayAllocator<OopChunk, mtGC>::free(_base, _chunk_capacity);
158 }
159
160 _base = new_base;
161 _chunk_capacity = new_capacity;
162 set_empty();
163 _should_expand = false;
164
165 return true;
166 }
167
168 size_t G1CMMarkStack::capacity_alignment() {
169 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(OopChunk)) / sizeof(void*);
170 }
171
172 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
173 guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
174
175 size_t const OopChunkSizeInVoidStar = sizeof(OopChunk) / sizeof(void*);
176
177 _max_chunk_capacity = (size_t)align_size_up(max_capacity, capacity_alignment()) / OopChunkSizeInVoidStar;
178 size_t initial_chunk_capacity = (size_t)align_size_up(initial_capacity, capacity_alignment()) / OopChunkSizeInVoidStar;
179
180 guarantee(initial_chunk_capacity <= _max_chunk_capacity,
181 "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
182 _max_chunk_capacity,
183 initial_chunk_capacity);
184
185 log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
186 initial_chunk_capacity, _max_chunk_capacity);
187
188 return resize(initial_chunk_capacity);
189 }
190
191 void G1CMMarkStack::expand() {
192 // Clear expansion flag
193 _should_expand = false;
194
195 if (_chunk_capacity == _max_chunk_capacity) {
196 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
197 return;
198 }
199 size_t old_capacity = _chunk_capacity;
200 // Double capacity if possible
201 size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
202
203 if (resize(new_capacity)) {
204 log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
205 old_capacity, new_capacity);
206 } else {
207 log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
208 old_capacity, new_capacity);
209 }
210 }
211
212 G1CMMarkStack::~G1CMMarkStack() {
213 if (_base != NULL) {
214 MmapArrayAllocator<OopChunk, mtGC>::free(_base, _chunk_capacity);
215 }
216 }
217
218 void G1CMMarkStack::add_chunk_to_list(OopChunk* volatile* list, OopChunk* elem) {
219 elem->next = *list;
220 *list = elem;
221 }
222
223 void G1CMMarkStack::add_chunk_to_chunk_list(OopChunk* elem) {
224 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
225 add_chunk_to_list(&_chunk_list, elem);
226 _chunks_in_chunk_list++;
227 }
228
229 void G1CMMarkStack::add_chunk_to_free_list(OopChunk* elem) {
230 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
231 add_chunk_to_list(&_free_list, elem);
232 }
233
234 G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_list(OopChunk* volatile* list) {
235 OopChunk* result = *list;
236 if (result != NULL) {
237 *list = (*list)->next;
238 }
239 return result;
240 }
241
242 G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
243 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
244 OopChunk* result = remove_chunk_from_list(&_chunk_list);
245 if (result != NULL) {
246 _chunks_in_chunk_list--;
247 }
248 return result;
249 }
250
251 G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_free_list() {
252 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
253 return remove_chunk_from_list(&_free_list);
254 }
255
256 G1CMMarkStack::OopChunk* G1CMMarkStack::allocate_new_chunk() {
257 // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
258 // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
259 // wraparound of _hwm.
260 if (_hwm >= _chunk_capacity) {
261 return NULL;
262 }
263
264 size_t cur_idx = Atomic::add(1, &_hwm) - 1;
265 if (cur_idx >= _chunk_capacity) {
266 return NULL;
267 }
268
269 OopChunk* result = ::new (&_base[cur_idx]) OopChunk;
270 result->next = NULL;
271 return result;
272 }
273
274 bool G1CMMarkStack::par_push_chunk(oop* ptr_arr) {
275 // Get a new chunk.
276 OopChunk* new_chunk = remove_chunk_from_free_list();
277
278 if (new_chunk == NULL) {
279 // Did not get a chunk from the free list. Allocate from backing memory.
280 new_chunk = allocate_new_chunk();
281 }
282
283 if (new_chunk == NULL) {
284 _out_of_memory = true;
285 return false;
286 }
287
288 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, OopsPerChunk * sizeof(oop));
289
290 add_chunk_to_chunk_list(new_chunk);
291
292 return true;
293 }
294
295 bool G1CMMarkStack::par_pop_chunk(oop* ptr_arr) {
296 OopChunk* cur = remove_chunk_from_chunk_list();
297
298 if (cur == NULL) {
299 return false;
300 }
301
302 Copy::conjoint_memory_atomic(cur->data, ptr_arr, OopsPerChunk * sizeof(oop));
303
304 add_chunk_to_free_list(cur);
305 return true;
306 }
307
308 void G1CMMarkStack::set_empty() {
309 _chunks_in_chunk_list = 0;
310 _hwm = 0;
311 clear_out_of_memory();
312 _chunk_list = NULL;
313 _free_list = NULL;
314 }
315
316 G1CMRootRegions::G1CMRootRegions() :
317 _cm(NULL), _scan_in_progress(false),
318 _should_abort(false), _claimed_survivor_index(0) { }
319
320 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
321 _survivors = survivors;
322 _cm = cm;
1991 }
1992 }
1993
1994 return NULL;
1995 }
1996
1997 #ifndef PRODUCT
1998 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
1999 private:
2000 G1CollectedHeap* _g1h;
2001 const char* _phase;
2002 int _info;
2003
2004 public:
2005 VerifyNoCSetOops(const char* phase, int info = -1) :
2006 _g1h(G1CollectedHeap::heap()),
2007 _phase(phase),
2008 _info(info)
2009 { }
2010
2011 void operator()(oop obj) const {
2012 guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || obj->is_oop(),
2013 "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2014 p2i(obj), _phase, _info);
2015 guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->is_in_cset(obj),
2016 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2017 p2i(obj), _phase, _info);
2018 }
2019 };
2020
2021 void G1ConcurrentMark::verify_no_cset_oops() {
2022 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2023 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2024 return;
2025 }
2026
2027 // Verify entries on the global mark stack
2028 _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
2029
2030 // Verify entries on the task queues
2031 for (uint i = 0; i < _max_worker_id; ++i) {
2032 G1CMTaskQueue* queue = _task_queues->queue(i);
2033 queue->iterate(VerifyNoCSetOops("Queue", i));
2034 }
2035
2036 // Verify the global finger
2037 HeapWord* global_finger = finger();
2191 class G1CMBitMapClosure : public BitMapClosure {
2192 private:
2193 // the bitmap that is being iterated over
2194 G1CMBitMap* _nextMarkBitMap;
2195 G1ConcurrentMark* _cm;
2196 G1CMTask* _task;
2197
2198 public:
2199 G1CMBitMapClosure(G1CMTask *task, G1ConcurrentMark* cm, G1CMBitMap* nextMarkBitMap) :
2200 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2201
2202 bool do_bit(size_t offset) {
2203 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2204 assert(_nextMarkBitMap->isMarked(addr), "invariant");
2205 assert( addr < _cm->finger(), "invariant");
2206 assert(addr >= _task->finger(), "invariant");
2207
2208 // We move that task's local finger along.
2209 _task->move_finger_to(addr);
2210
2211 _task->scan_object(oop(addr));
2212 // we only partially drain the local queue and global stack
2213 _task->drain_local_queue(true);
2214 _task->drain_global_stack(true);
2215
2216 // if the has_aborted flag has been raised, we need to bail out of
2217 // the iteration
2218 return !_task->has_aborted();
2219 }
2220 };
2221
2222 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2223 ReferenceProcessor* result = g1h->ref_processor_cm();
2224 assert(result != NULL, "CM reference processor should not be NULL");
2225 return result;
2226 }
2227
2228 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2229 G1ConcurrentMark* cm,
2230 G1CMTask* task)
2231 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2382
2383 _real_refs_reached_limit = _refs_reached + refs_reached_period;
2384 _refs_reached_limit = _real_refs_reached_limit;
2385 }
2386
2387 void G1CMTask::decrease_limits() {
2388 // This is called when we believe that we're going to do an infrequent
2389 // operation which will increase the per byte scanned cost (i.e. move
2390 // entries to/from the global stack). It basically tries to decrease the
2391 // scanning limit so that the clock is called earlier.
2392
2393 _words_scanned_limit = _real_words_scanned_limit -
2394 3 * words_scanned_period / 4;
2395 _refs_reached_limit = _real_refs_reached_limit -
2396 3 * refs_reached_period / 4;
2397 }
2398
2399 void G1CMTask::move_entries_to_global_stack() {
2400 // Local array where we'll store the entries that will be popped
2401 // from the local queue.
2402 oop buffer[G1CMMarkStack::OopsPerChunk];
2403
2404 size_t n = 0;
2405 oop obj;
2406 while (n < G1CMMarkStack::OopsPerChunk && _task_queue->pop_local(obj)) {
2407 buffer[n] = obj;
2408 ++n;
2409 }
2410 if (n < G1CMMarkStack::OopsPerChunk) {
2411 buffer[n] = NULL;
2412 }
2413
2414 if (n > 0) {
2415 if (!_cm->mark_stack_push(buffer)) {
2416 set_has_aborted();
2417 }
2418 }
2419
2420 // This operation was quite expensive, so decrease the limits.
2421 decrease_limits();
2422 }
2423
2424 bool G1CMTask::get_entries_from_global_stack() {
2425 // Local array where we'll store the entries that will be popped
2426 // from the global stack.
2427 oop buffer[G1CMMarkStack::OopsPerChunk];
2428
2429 if (!_cm->mark_stack_pop(buffer)) {
2430 return false;
2431 }
2432
2433 // We did actually pop at least one entry.
2434 for (size_t i = 0; i < G1CMMarkStack::OopsPerChunk; ++i) {
2435 oop elem = buffer[i];
2436 if (elem == NULL) {
2437 break;
2438 }
2439 assert(G1CMObjArrayProcessor::is_array_slice(elem) || elem->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(elem));
2440 bool success = _task_queue->push(elem);
2441 // We only call this when the local queue is empty or under a
2442 // given target limit. So, we do not expect this push to fail.
2443 assert(success, "invariant");
2444 }
2445
2446 // This operation was quite expensive, so decrease the limits
2447 decrease_limits();
2448 return true;
2449 }
2450
2451 void G1CMTask::drain_local_queue(bool partially) {
2452 if (has_aborted()) {
2453 return;
2454 }
2455
2456 // Decide what the target size is, depending whether we're going to
2457 // drain it partially (so that other tasks can steal if they run out
2458 // of things to do) or totally (at the very end).
2459 size_t target_size;
2460 if (partially) {
2461 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2462 } else {
2463 target_size = 0;
2464 }
2465
2466 if (_task_queue->size() > target_size) {
2467 oop obj;
2468 bool ret = _task_queue->pop_local(obj);
2469 while (ret) {
2470 scan_object(obj);
2471 if (_task_queue->size() <= target_size || has_aborted()) {
2472 ret = false;
2473 } else {
2474 ret = _task_queue->pop_local(obj);
2475 }
2476 }
2477 }
2478 }
2479
2480 void G1CMTask::drain_global_stack(bool partially) {
2481 if (has_aborted()) return;
2482
2483 // We have a policy to drain the local queue before we attempt to
2484 // drain the global stack.
2485 assert(partially || _task_queue->size() == 0, "invariant");
2486
2487 // Decide what the target size is, depending whether we're going to
2488 // drain it partially (so that other tasks can steal if they run out
2489 // of things to do) or totally (at the very end).
2490 // Notice that when draining the global mark stack partially, due to the racyness
2491 // of the mark stack size update we might in fact drop below the target. But,
2492 // this is not a problem.
2493 // In case of total draining, we simply process until the global mark stack is
2494 // totally empty, disregarding the size counter.
2535 concurrent() ||
2536 satb_mq_set.completed_buffers_num() == 0, "invariant");
2537
2538 // again, this was a potentially expensive operation, decrease the
2539 // limits to get the regular clock call early
2540 decrease_limits();
2541 }
2542
2543 void G1CMTask::print_stats() {
2544 log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
2545 _worker_id, _calls);
2546 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2547 _elapsed_time_ms, _termination_time_ms);
2548 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
2549 _step_times_ms.num(), _step_times_ms.avg(),
2550 _step_times_ms.sd());
2551 log_debug(gc, stats)(" max = %1.2lfms, total = %1.2lfms",
2552 _step_times_ms.maximum(), _step_times_ms.sum());
2553 }
2554
2555 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
2556 return _task_queues->steal(worker_id, hash_seed, obj);
2557 }
2558
2559 /*****************************************************************************
2560
2561 The do_marking_step(time_target_ms, ...) method is the building
2562 block of the parallel marking framework. It can be called in parallel
2563 with other invocations of do_marking_step() on different tasks
2564 (but only one per task, obviously) and concurrently with the
2565 mutator threads, or during remark, hence it eliminates the need
2566 for two versions of the code. When called during remark, it will
2567 pick up from where the task left off during the concurrent marking
2568 phase. Interestingly, tasks are also claimable during evacuation
2569 pauses too, since do_marking_step() ensures that it aborts before
2570 it needs to yield.
2571
2572 The data structures that it uses to do marking work are the
2573 following:
2574
2575 (1) Marking Bitmap. If there are gray objects that appear only
2576 on the bitmap (this happens either when dealing with an overflow
2859 // Try to reduce the number of available SATB buffers so that
2860 // remark has less work to do.
2861 drain_satb_buffers();
2862 }
2863
2864 // Since we've done everything else, we can now totally drain the
2865 // local queue and global stack.
2866 drain_local_queue(false);
2867 drain_global_stack(false);
2868
2869 // Attempt at work stealing from other task's queues.
2870 if (do_stealing && !has_aborted()) {
2871 // We have not aborted. This means that we have finished all that
2872 // we could. Let's try to do some stealing...
2873
2874 // We cannot check whether the global stack is empty, since other
2875 // tasks might be pushing objects to it concurrently.
2876 assert(_cm->out_of_regions() && _task_queue->size() == 0,
2877 "only way to reach here");
2878 while (!has_aborted()) {
2879 oop obj;
2880 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
2881 scan_object(obj);
2882
2883 // And since we're towards the end, let's totally drain the
2884 // local queue and global stack.
2885 drain_local_queue(false);
2886 drain_global_stack(false);
2887 } else {
2888 break;
2889 }
2890 }
2891 }
2892
2893 // We still haven't aborted. Now, let's try to get into the
2894 // termination protocol.
2895 if (do_termination && !has_aborted()) {
2896 // We cannot check whether the global stack is empty, since other
2897 // tasks might be concurrently pushing objects on it.
2898 // Separated the asserts so that we know which one fires.
2899 assert(_cm->out_of_regions(), "only way to reach here");
2900 assert(_task_queue->size() == 0, "only way to reach here");
2901 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
|
129 assert(!mr.is_empty(), "unexpected empty region");
130 // convert address range into offset range
131 _bm.at_put_range(heapWordToOffset(mr.start()),
132 heapWordToOffset(mr.end()), false);
133 }
134
135 G1CMMarkStack::G1CMMarkStack() :
136 _max_chunk_capacity(0),
137 _base(NULL),
138 _chunk_capacity(0),
139 _out_of_memory(false),
140 _should_expand(false) {
141 set_empty();
142 }
143
144 bool G1CMMarkStack::resize(size_t new_capacity) {
145 assert(is_empty(), "Only resize when stack is empty.");
146 assert(new_capacity <= _max_chunk_capacity,
147 "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
148
149 TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::allocate_or_null(new_capacity);
150
151 if (new_base == NULL) {
152 log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
153 return false;
154 }
155 // Release old mapping.
156 if (_base != NULL) {
157 MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::free(_base, _chunk_capacity);
158 }
159
160 _base = new_base;
161 _chunk_capacity = new_capacity;
162 set_empty();
163 _should_expand = false;
164
165 return true;
166 }
167
168 size_t G1CMMarkStack::capacity_alignment() {
169 return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
170 }
171
172 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
173 guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
174
175 size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
176
177 _max_chunk_capacity = (size_t)align_size_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
178 size_t initial_chunk_capacity = (size_t)align_size_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
179
180 guarantee(initial_chunk_capacity <= _max_chunk_capacity,
181 "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
182 _max_chunk_capacity,
183 initial_chunk_capacity);
184
185 log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
186 initial_chunk_capacity, _max_chunk_capacity);
187
188 return resize(initial_chunk_capacity);
189 }
190
191 void G1CMMarkStack::expand() {
192 // Clear expansion flag
193 _should_expand = false;
194
195 if (_chunk_capacity == _max_chunk_capacity) {
196 log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
197 return;
198 }
199 size_t old_capacity = _chunk_capacity;
200 // Double capacity if possible
201 size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
202
203 if (resize(new_capacity)) {
204 log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
205 old_capacity, new_capacity);
206 } else {
207 log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
208 old_capacity, new_capacity);
209 }
210 }
211
212 G1CMMarkStack::~G1CMMarkStack() {
213 if (_base != NULL) {
214 MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::free(_base, _chunk_capacity);
215 }
216 }
217
218 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) {
219 elem->next = *list;
220 *list = elem;
221 }
222
223 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
224 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
225 add_chunk_to_list(&_chunk_list, elem);
226 _chunks_in_chunk_list++;
227 }
228
229 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
230 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
231 add_chunk_to_list(&_free_list, elem);
232 }
233
234 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) {
235 TaskQueueEntryChunk* result = *list;
236 if (result != NULL) {
237 *list = (*list)->next;
238 }
239 return result;
240 }
241
242 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
243 MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
244 TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
245 if (result != NULL) {
246 _chunks_in_chunk_list--;
247 }
248 return result;
249 }
250
251 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
252 MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
253 return remove_chunk_from_list(&_free_list);
254 }
255
256 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() {
257 // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
258 // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
259 // wraparound of _hwm.
260 if (_hwm >= _chunk_capacity) {
261 return NULL;
262 }
263
264 size_t cur_idx = Atomic::add(1, &_hwm) - 1;
265 if (cur_idx >= _chunk_capacity) {
266 return NULL;
267 }
268
269 TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk;
270 result->next = NULL;
271 return result;
272 }
273
274 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
275 // Get a new chunk.
276 TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
277
278 if (new_chunk == NULL) {
279 // Did not get a chunk from the free list. Allocate from backing memory.
280 new_chunk = allocate_new_chunk();
281 }
282
283 if (new_chunk == NULL) {
284 _out_of_memory = true;
285 return false;
286 }
287
288 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
289
290 add_chunk_to_chunk_list(new_chunk);
291
292 return true;
293 }
294
295 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
296 TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
297
298 if (cur == NULL) {
299 return false;
300 }
301
302 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
303
304 add_chunk_to_free_list(cur);
305 return true;
306 }
307
308 void G1CMMarkStack::set_empty() {
309 _chunks_in_chunk_list = 0;
310 _hwm = 0;
311 clear_out_of_memory();
312 _chunk_list = NULL;
313 _free_list = NULL;
314 }
315
316 G1CMRootRegions::G1CMRootRegions() :
317 _cm(NULL), _scan_in_progress(false),
318 _should_abort(false), _claimed_survivor_index(0) { }
319
320 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
321 _survivors = survivors;
322 _cm = cm;
1991 }
1992 }
1993
1994 return NULL;
1995 }
1996
1997 #ifndef PRODUCT
1998 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
1999 private:
2000 G1CollectedHeap* _g1h;
2001 const char* _phase;
2002 int _info;
2003
2004 public:
2005 VerifyNoCSetOops(const char* phase, int info = -1) :
2006 _g1h(G1CollectedHeap::heap()),
2007 _phase(phase),
2008 _info(info)
2009 { }
2010
2011 void operator()(G1TaskQueueEntry task_entry) const {
2012 if (task_entry.is_array_slice()) {
2013 guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice()));
2014 return;
2015 }
2016 guarantee(task_entry.obj()->is_oop(),
2017 "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2018 p2i(task_entry.obj()), _phase, _info);
2019 guarantee(!_g1h->is_in_cset(task_entry.obj()),
2020 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2021 p2i(task_entry.obj()), _phase, _info);
2022 }
2023 };
2024
2025 void G1ConcurrentMark::verify_no_cset_oops() {
2026 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2027 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2028 return;
2029 }
2030
2031 // Verify entries on the global mark stack
2032 _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
2033
2034 // Verify entries on the task queues
2035 for (uint i = 0; i < _max_worker_id; ++i) {
2036 G1CMTaskQueue* queue = _task_queues->queue(i);
2037 queue->iterate(VerifyNoCSetOops("Queue", i));
2038 }
2039
2040 // Verify the global finger
2041 HeapWord* global_finger = finger();
2195 class G1CMBitMapClosure : public BitMapClosure {
2196 private:
2197 // the bitmap that is being iterated over
2198 G1CMBitMap* _nextMarkBitMap;
2199 G1ConcurrentMark* _cm;
2200 G1CMTask* _task;
2201
2202 public:
2203 G1CMBitMapClosure(G1CMTask *task, G1ConcurrentMark* cm, G1CMBitMap* nextMarkBitMap) :
2204 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2205
2206 bool do_bit(size_t offset) {
2207 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2208 assert(_nextMarkBitMap->isMarked(addr), "invariant");
2209 assert( addr < _cm->finger(), "invariant");
2210 assert(addr >= _task->finger(), "invariant");
2211
2212 // We move that task's local finger along.
2213 _task->move_finger_to(addr);
2214
2215 _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr)));
2216 // we only partially drain the local queue and global stack
2217 _task->drain_local_queue(true);
2218 _task->drain_global_stack(true);
2219
2220 // if the has_aborted flag has been raised, we need to bail out of
2221 // the iteration
2222 return !_task->has_aborted();
2223 }
2224 };
2225
2226 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2227 ReferenceProcessor* result = g1h->ref_processor_cm();
2228 assert(result != NULL, "CM reference processor should not be NULL");
2229 return result;
2230 }
2231
2232 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2233 G1ConcurrentMark* cm,
2234 G1CMTask* task)
2235 : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2386
2387 _real_refs_reached_limit = _refs_reached + refs_reached_period;
2388 _refs_reached_limit = _real_refs_reached_limit;
2389 }
2390
2391 void G1CMTask::decrease_limits() {
2392 // This is called when we believe that we're going to do an infrequent
2393 // operation which will increase the per byte scanned cost (i.e. move
2394 // entries to/from the global stack). It basically tries to decrease the
2395 // scanning limit so that the clock is called earlier.
2396
2397 _words_scanned_limit = _real_words_scanned_limit -
2398 3 * words_scanned_period / 4;
2399 _refs_reached_limit = _real_refs_reached_limit -
2400 3 * refs_reached_period / 4;
2401 }
2402
2403 void G1CMTask::move_entries_to_global_stack() {
2404 // Local array where we'll store the entries that will be popped
2405 // from the local queue.
2406 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2407
2408 size_t n = 0;
2409 G1TaskQueueEntry task_entry;
2410 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2411 buffer[n].assign(task_entry);
2412 ++n;
2413 }
2414 if (n < G1CMMarkStack::EntriesPerChunk) {
2415 buffer[n].assign(G1TaskQueueEntry());
2416 }
2417
2418 if (n > 0) {
2419 if (!_cm->mark_stack_push(buffer)) {
2420 set_has_aborted();
2421 }
2422 }
2423
2424 // This operation was quite expensive, so decrease the limits.
2425 decrease_limits();
2426 }
2427
2428 bool G1CMTask::get_entries_from_global_stack() {
2429 // Local array where we'll store the entries that will be popped
2430 // from the global stack.
2431 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2432
2433 if (!_cm->mark_stack_pop(buffer)) {
2434 return false;
2435 }
2436
2437 // We did actually pop at least one entry.
2438 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2439 G1TaskQueueEntry task_entry = buffer[i];
2440 if (task_entry.is_null()) {
2441 break;
2442 }
2443 assert(task_entry.is_array_slice() || task_entry.obj()->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj()));
2444 bool success = _task_queue->push(task_entry);
2445 // We only call this when the local queue is empty or under a
2446 // given target limit. So, we do not expect this push to fail.
2447 assert(success, "invariant");
2448 }
2449
2450 // This operation was quite expensive, so decrease the limits
2451 decrease_limits();
2452 return true;
2453 }
2454
2455 void G1CMTask::drain_local_queue(bool partially) {
2456 if (has_aborted()) {
2457 return;
2458 }
2459
2460 // Decide what the target size is, depending whether we're going to
2461 // drain it partially (so that other tasks can steal if they run out
2462 // of things to do) or totally (at the very end).
2463 size_t target_size;
2464 if (partially) {
2465 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2466 } else {
2467 target_size = 0;
2468 }
2469
2470 if (_task_queue->size() > target_size) {
2471 G1TaskQueueEntry entry;
2472 bool ret = _task_queue->pop_local(entry);
2473 while (ret) {
2474 scan_task_entry(entry);
2475 if (_task_queue->size() <= target_size || has_aborted()) {
2476 ret = false;
2477 } else {
2478 ret = _task_queue->pop_local(entry);
2479 }
2480 }
2481 }
2482 }
2483
2484 void G1CMTask::drain_global_stack(bool partially) {
2485 if (has_aborted()) return;
2486
2487 // We have a policy to drain the local queue before we attempt to
2488 // drain the global stack.
2489 assert(partially || _task_queue->size() == 0, "invariant");
2490
2491 // Decide what the target size is, depending whether we're going to
2492 // drain it partially (so that other tasks can steal if they run out
2493 // of things to do) or totally (at the very end).
2494 // Notice that when draining the global mark stack partially, due to the racyness
2495 // of the mark stack size update we might in fact drop below the target. But,
2496 // this is not a problem.
2497 // In case of total draining, we simply process until the global mark stack is
2498 // totally empty, disregarding the size counter.
2539 concurrent() ||
2540 satb_mq_set.completed_buffers_num() == 0, "invariant");
2541
2542 // again, this was a potentially expensive operation, decrease the
2543 // limits to get the regular clock call early
2544 decrease_limits();
2545 }
2546
2547 void G1CMTask::print_stats() {
2548 log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
2549 _worker_id, _calls);
2550 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2551 _elapsed_time_ms, _termination_time_ms);
2552 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
2553 _step_times_ms.num(), _step_times_ms.avg(),
2554 _step_times_ms.sd());
2555 log_debug(gc, stats)(" max = %1.2lfms, total = %1.2lfms",
2556 _step_times_ms.maximum(), _step_times_ms.sum());
2557 }
2558
2559 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) {
2560 return _task_queues->steal(worker_id, hash_seed, task_entry);
2561 }
2562
2563 /*****************************************************************************
2564
2565 The do_marking_step(time_target_ms, ...) method is the building
2566 block of the parallel marking framework. It can be called in parallel
2567 with other invocations of do_marking_step() on different tasks
2568 (but only one per task, obviously) and concurrently with the
2569 mutator threads, or during remark, hence it eliminates the need
2570 for two versions of the code. When called during remark, it will
2571 pick up from where the task left off during the concurrent marking
2572 phase. Interestingly, tasks are also claimable during evacuation
2573 pauses too, since do_marking_step() ensures that it aborts before
2574 it needs to yield.
2575
2576 The data structures that it uses to do marking work are the
2577 following:
2578
2579 (1) Marking Bitmap. If there are gray objects that appear only
2580 on the bitmap (this happens either when dealing with an overflow
2863 // Try to reduce the number of available SATB buffers so that
2864 // remark has less work to do.
2865 drain_satb_buffers();
2866 }
2867
2868 // Since we've done everything else, we can now totally drain the
2869 // local queue and global stack.
2870 drain_local_queue(false);
2871 drain_global_stack(false);
2872
2873 // Attempt at work stealing from other task's queues.
2874 if (do_stealing && !has_aborted()) {
2875 // We have not aborted. This means that we have finished all that
2876 // we could. Let's try to do some stealing...
2877
2878 // We cannot check whether the global stack is empty, since other
2879 // tasks might be pushing objects to it concurrently.
2880 assert(_cm->out_of_regions() && _task_queue->size() == 0,
2881 "only way to reach here");
2882 while (!has_aborted()) {
2883 G1TaskQueueEntry entry;
2884 if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) {
2885 scan_task_entry(entry);
2886
2887 // And since we're towards the end, let's totally drain the
2888 // local queue and global stack.
2889 drain_local_queue(false);
2890 drain_global_stack(false);
2891 } else {
2892 break;
2893 }
2894 }
2895 }
2896
2897 // We still haven't aborted. Now, let's try to get into the
2898 // termination protocol.
2899 if (do_termination && !has_aborted()) {
2900 // We cannot check whether the global stack is empty, since other
2901 // tasks might be concurrently pushing objects on it.
2902 // Separated the asserts so that we know which one fires.
2903 assert(_cm->out_of_regions(), "only way to reach here");
2904 assert(_task_queue->size() == 0, "only way to reach here");
2905 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
|