254 }
255
256 G1CMMarkStack::OopChunk* G1CMMarkStack::allocate_new_chunk() {
257 // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
258 // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
259 // wraparound of _hwm.
260 if (_hwm >= _chunk_capacity) {
261 return NULL;
262 }
263
264 size_t cur_idx = Atomic::add(1, &_hwm) - 1;
265 if (cur_idx >= _chunk_capacity) {
266 return NULL;
267 }
268
269 OopChunk* result = ::new (&_base[cur_idx]) OopChunk;
270 result->next = NULL;
271 return result;
272 }
273
274 bool G1CMMarkStack::par_push_chunk(oop* ptr_arr) {
275 // Get a new chunk.
276 OopChunk* new_chunk = remove_chunk_from_free_list();
277
278 if (new_chunk == NULL) {
279 // Did not get a chunk from the free list. Allocate from backing memory.
280 new_chunk = allocate_new_chunk();
281 }
282
283 if (new_chunk == NULL) {
284 _out_of_memory = true;
285 return false;
286 }
287
288 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, OopsPerChunk * sizeof(oop));
289
290 add_chunk_to_chunk_list(new_chunk);
291
292 return true;
293 }
294
295 bool G1CMMarkStack::par_pop_chunk(oop* ptr_arr) {
296 OopChunk* cur = remove_chunk_from_chunk_list();
297
298 if (cur == NULL) {
299 return false;
300 }
301
302 Copy::conjoint_memory_atomic(cur->data, ptr_arr, OopsPerChunk * sizeof(oop));
303
304 add_chunk_to_free_list(cur);
305 return true;
306 }
307
308 void G1CMMarkStack::set_empty() {
309 _chunks_in_chunk_list = 0;
310 _hwm = 0;
311 clear_out_of_memory();
312 _chunk_list = NULL;
313 _free_list = NULL;
314 }
315
316 G1CMRootRegions::G1CMRootRegions() :
317 _cm(NULL), _scan_in_progress(false),
318 _should_abort(false), _claimed_survivor_index(0) { }
319
320 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
321 _survivors = survivors;
322 _cm = cm;
1991 }
1992 }
1993
1994 return NULL;
1995 }
1996
1997 #ifndef PRODUCT
1998 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
1999 private:
2000 G1CollectedHeap* _g1h;
2001 const char* _phase;
2002 int _info;
2003
2004 public:
2005 VerifyNoCSetOops(const char* phase, int info = -1) :
2006 _g1h(G1CollectedHeap::heap()),
2007 _phase(phase),
2008 _info(info)
2009 { }
2010
2011 void operator()(oop obj) const {
2012 guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || obj->is_oop(),
2013 "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2014 p2i(obj), _phase, _info);
2015 guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->is_in_cset(obj),
2016 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2017 p2i(obj), _phase, _info);
2018 }
2019 };
2020
2021 void G1ConcurrentMark::verify_no_cset_oops() {
2022 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2023 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2024 return;
2025 }
2026
2027 // Verify entries on the global mark stack
2028 _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
2029
2030 // Verify entries on the task queues
2031 for (uint i = 0; i < _max_worker_id; ++i) {
2032 G1CMTaskQueue* queue = _task_queues->queue(i);
2033 queue->iterate(VerifyNoCSetOops("Queue", i));
2034 }
2035
2036 // Verify the global finger
2037 HeapWord* global_finger = finger();
2382
2383 _real_refs_reached_limit = _refs_reached + refs_reached_period;
2384 _refs_reached_limit = _real_refs_reached_limit;
2385 }
2386
2387 void G1CMTask::decrease_limits() {
2388 // This is called when we believe that we're going to do an infrequent
2389 // operation which will increase the per byte scanned cost (i.e. move
2390 // entries to/from the global stack). It basically tries to decrease the
2391 // scanning limit so that the clock is called earlier.
2392
2393 _words_scanned_limit = _real_words_scanned_limit -
2394 3 * words_scanned_period / 4;
2395 _refs_reached_limit = _real_refs_reached_limit -
2396 3 * refs_reached_period / 4;
2397 }
2398
2399 void G1CMTask::move_entries_to_global_stack() {
2400 // Local array where we'll store the entries that will be popped
2401 // from the local queue.
2402 oop buffer[G1CMMarkStack::OopsPerChunk];
2403
2404 size_t n = 0;
2405 oop obj;
2406 while (n < G1CMMarkStack::OopsPerChunk && _task_queue->pop_local(obj)) {
2407 buffer[n] = obj;
2408 ++n;
2409 }
2410 if (n < G1CMMarkStack::OopsPerChunk) {
2411 buffer[n] = NULL;
2412 }
2413
2414 if (n > 0) {
2415 if (!_cm->mark_stack_push(buffer)) {
2416 set_has_aborted();
2417 }
2418 }
2419
2420 // This operation was quite expensive, so decrease the limits.
2421 decrease_limits();
2422 }
2423
2424 bool G1CMTask::get_entries_from_global_stack() {
2425 // Local array where we'll store the entries that will be popped
2426 // from the global stack.
2427 oop buffer[G1CMMarkStack::OopsPerChunk];
2428
2429 if (!_cm->mark_stack_pop(buffer)) {
2430 return false;
2431 }
2432
2433 // We did actually pop at least one entry.
2434 for (size_t i = 0; i < G1CMMarkStack::OopsPerChunk; ++i) {
2435 oop elem = buffer[i];
2436 if (elem == NULL) {
2437 break;
2438 }
2439 assert(G1CMObjArrayProcessor::is_array_slice(elem) || elem->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(elem));
2440 bool success = _task_queue->push(elem);
2441 // We only call this when the local queue is empty or under a
2442 // given target limit. So, we do not expect this push to fail.
2443 assert(success, "invariant");
2444 }
2445
2446 // This operation was quite expensive, so decrease the limits
2447 decrease_limits();
2448 return true;
2449 }
2450
2451 void G1CMTask::drain_local_queue(bool partially) {
2452 if (has_aborted()) {
2453 return;
2454 }
2455
2456 // Decide what the target size is, depending whether we're going to
2457 // drain it partially (so that other tasks can steal if they run out
2458 // of things to do) or totally (at the very end).
2459 size_t target_size;
2460 if (partially) {
2461 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2462 } else {
2463 target_size = 0;
2464 }
2465
2466 if (_task_queue->size() > target_size) {
2467 oop obj;
2468 bool ret = _task_queue->pop_local(obj);
2469 while (ret) {
2470 scan_object(obj);
2471 if (_task_queue->size() <= target_size || has_aborted()) {
2472 ret = false;
2473 } else {
2474 ret = _task_queue->pop_local(obj);
2475 }
2476 }
2477 }
2478 }
2479
2480 void G1CMTask::drain_global_stack(bool partially) {
2481 if (has_aborted()) return;
2482
2483 // We have a policy to drain the local queue before we attempt to
2484 // drain the global stack.
2485 assert(partially || _task_queue->size() == 0, "invariant");
2486
2487 // Decide what the target size is, depending whether we're going to
2535 concurrent() ||
2536 satb_mq_set.completed_buffers_num() == 0, "invariant");
2537
2538 // again, this was a potentially expensive operation, decrease the
2539 // limits to get the regular clock call early
2540 decrease_limits();
2541 }
2542
2543 void G1CMTask::print_stats() {
2544 log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
2545 _worker_id, _calls);
2546 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2547 _elapsed_time_ms, _termination_time_ms);
2548 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
2549 _step_times_ms.num(), _step_times_ms.avg(),
2550 _step_times_ms.sd());
2551 log_debug(gc, stats)(" max = %1.2lfms, total = %1.2lfms",
2552 _step_times_ms.maximum(), _step_times_ms.sum());
2553 }
2554
2555 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
2556 return _task_queues->steal(worker_id, hash_seed, obj);
2557 }
2558
2559 /*****************************************************************************
2560
2561 The do_marking_step(time_target_ms, ...) method is the building
2562 block of the parallel marking framework. It can be called in parallel
2563 with other invocations of do_marking_step() on different tasks
2564 (but only one per task, obviously) and concurrently with the
2565 mutator threads, or during remark, hence it eliminates the need
2566 for two versions of the code. When called during remark, it will
2567 pick up from where the task left off during the concurrent marking
2568 phase. Interestingly, tasks are also claimable during evacuation
2569 pauses too, since do_marking_step() ensures that it aborts before
2570 it needs to yield.
2571
2572 The data structures that it uses to do marking work are the
2573 following:
2574
2575 (1) Marking Bitmap. If there are gray objects that appear only
2576 on the bitmap (this happens either when dealing with an overflow
2859 // Try to reduce the number of available SATB buffers so that
2860 // remark has less work to do.
2861 drain_satb_buffers();
2862 }
2863
2864 // Since we've done everything else, we can now totally drain the
2865 // local queue and global stack.
2866 drain_local_queue(false);
2867 drain_global_stack(false);
2868
2869 // Attempt at work stealing from other task's queues.
2870 if (do_stealing && !has_aborted()) {
2871 // We have not aborted. This means that we have finished all that
2872 // we could. Let's try to do some stealing...
2873
2874 // We cannot check whether the global stack is empty, since other
2875 // tasks might be pushing objects to it concurrently.
2876 assert(_cm->out_of_regions() && _task_queue->size() == 0,
2877 "only way to reach here");
2878 while (!has_aborted()) {
2879 oop obj;
2880 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
2881 scan_object(obj);
2882
2883 // And since we're towards the end, let's totally drain the
2884 // local queue and global stack.
2885 drain_local_queue(false);
2886 drain_global_stack(false);
2887 } else {
2888 break;
2889 }
2890 }
2891 }
2892
2893 // We still haven't aborted. Now, let's try to get into the
2894 // termination protocol.
2895 if (do_termination && !has_aborted()) {
2896 // We cannot check whether the global stack is empty, since other
2897 // tasks might be concurrently pushing objects on it.
2898 // Separated the asserts so that we know which one fires.
2899 assert(_cm->out_of_regions(), "only way to reach here");
|
254 }
255
256 G1CMMarkStack::OopChunk* G1CMMarkStack::allocate_new_chunk() {
257 // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
258 // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
259 // wraparound of _hwm.
260 if (_hwm >= _chunk_capacity) {
261 return NULL;
262 }
263
264 size_t cur_idx = Atomic::add(1, &_hwm) - 1;
265 if (cur_idx >= _chunk_capacity) {
266 return NULL;
267 }
268
269 OopChunk* result = ::new (&_base[cur_idx]) OopChunk;
270 result->next = NULL;
271 return result;
272 }
273
274 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
275 // Get a new chunk.
276 OopChunk* new_chunk = remove_chunk_from_free_list();
277
278 if (new_chunk == NULL) {
279 // Did not get a chunk from the free list. Allocate from backing memory.
280 new_chunk = allocate_new_chunk();
281 }
282
283 if (new_chunk == NULL) {
284 _out_of_memory = true;
285 return false;
286 }
287
288 Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
289
290 add_chunk_to_chunk_list(new_chunk);
291
292 return true;
293 }
294
295 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
296 OopChunk* cur = remove_chunk_from_chunk_list();
297
298 if (cur == NULL) {
299 return false;
300 }
301
302 Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
303
304 add_chunk_to_free_list(cur);
305 return true;
306 }
307
308 void G1CMMarkStack::set_empty() {
309 _chunks_in_chunk_list = 0;
310 _hwm = 0;
311 clear_out_of_memory();
312 _chunk_list = NULL;
313 _free_list = NULL;
314 }
315
316 G1CMRootRegions::G1CMRootRegions() :
317 _cm(NULL), _scan_in_progress(false),
318 _should_abort(false), _claimed_survivor_index(0) { }
319
320 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
321 _survivors = survivors;
322 _cm = cm;
1991 }
1992 }
1993
1994 return NULL;
1995 }
1996
1997 #ifndef PRODUCT
1998 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
1999 private:
2000 G1CollectedHeap* _g1h;
2001 const char* _phase;
2002 int _info;
2003
2004 public:
2005 VerifyNoCSetOops(const char* phase, int info = -1) :
2006 _g1h(G1CollectedHeap::heap()),
2007 _phase(phase),
2008 _info(info)
2009 { }
2010
2011 void operator()(G1TaskQueueEntry task_entry) const {
2012 guarantee(task_entry.is_array_slice() || task_entry.obj()->is_oop(),
2013 "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2014 p2i(task_entry.obj()), _phase, _info);
2015 guarantee(task_entry.is_array_slice() || !_g1h->is_in_cset(task_entry.obj()),
2016 "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2017 p2i(task_entry.obj()), _phase, _info);
2018 }
2019 };
2020
2021 void G1ConcurrentMark::verify_no_cset_oops() {
2022 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2023 if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2024 return;
2025 }
2026
2027 // Verify entries on the global mark stack
2028 _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
2029
2030 // Verify entries on the task queues
2031 for (uint i = 0; i < _max_worker_id; ++i) {
2032 G1CMTaskQueue* queue = _task_queues->queue(i);
2033 queue->iterate(VerifyNoCSetOops("Queue", i));
2034 }
2035
2036 // Verify the global finger
2037 HeapWord* global_finger = finger();
2382
2383 _real_refs_reached_limit = _refs_reached + refs_reached_period;
2384 _refs_reached_limit = _real_refs_reached_limit;
2385 }
2386
2387 void G1CMTask::decrease_limits() {
2388 // This is called when we believe that we're going to do an infrequent
2389 // operation which will increase the per byte scanned cost (i.e. move
2390 // entries to/from the global stack). It basically tries to decrease the
2391 // scanning limit so that the clock is called earlier.
2392
2393 _words_scanned_limit = _real_words_scanned_limit -
2394 3 * words_scanned_period / 4;
2395 _refs_reached_limit = _real_refs_reached_limit -
2396 3 * refs_reached_period / 4;
2397 }
2398
2399 void G1CMTask::move_entries_to_global_stack() {
2400 // Local array where we'll store the entries that will be popped
2401 // from the local queue.
2402 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2403
2404 size_t n = 0;
2405 G1TaskQueueEntry task_entry;
2406 while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2407 buffer[n] = task_entry;
2408 ++n;
2409 }
2410 if (n < G1CMMarkStack::EntriesPerChunk) {
2411 buffer[n] = G1TaskQueueEntry();
2412 }
2413
2414 if (n > 0) {
2415 if (!_cm->mark_stack_push(buffer)) {
2416 set_has_aborted();
2417 }
2418 }
2419
2420 // This operation was quite expensive, so decrease the limits.
2421 decrease_limits();
2422 }
2423
2424 bool G1CMTask::get_entries_from_global_stack() {
2425 // Local array where we'll store the entries that will be popped
2426 // from the global stack.
2427 G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2428
2429 if (!_cm->mark_stack_pop(buffer)) {
2430 return false;
2431 }
2432
2433 // We did actually pop at least one entry.
2434 for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2435 G1TaskQueueEntry task_entry = buffer[i];
2436 if (task_entry.is_null()) {
2437 break;
2438 }
2439 assert(task_entry.is_array_slice() || task_entry.obj()->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj()));
2440 bool success = _task_queue->push(task_entry);
2441 // We only call this when the local queue is empty or under a
2442 // given target limit. So, we do not expect this push to fail.
2443 assert(success, "invariant");
2444 }
2445
2446 // This operation was quite expensive, so decrease the limits
2447 decrease_limits();
2448 return true;
2449 }
2450
2451 void G1CMTask::drain_local_queue(bool partially) {
2452 if (has_aborted()) {
2453 return;
2454 }
2455
2456 // Decide what the target size is, depending whether we're going to
2457 // drain it partially (so that other tasks can steal if they run out
2458 // of things to do) or totally (at the very end).
2459 size_t target_size;
2460 if (partially) {
2461 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2462 } else {
2463 target_size = 0;
2464 }
2465
2466 if (_task_queue->size() > target_size) {
2467 G1TaskQueueEntry obj;
2468 bool ret = _task_queue->pop_local(obj);
2469 while (ret) {
2470 scan_object(obj);
2471 if (_task_queue->size() <= target_size || has_aborted()) {
2472 ret = false;
2473 } else {
2474 ret = _task_queue->pop_local(obj);
2475 }
2476 }
2477 }
2478 }
2479
2480 void G1CMTask::drain_global_stack(bool partially) {
2481 if (has_aborted()) return;
2482
2483 // We have a policy to drain the local queue before we attempt to
2484 // drain the global stack.
2485 assert(partially || _task_queue->size() == 0, "invariant");
2486
2487 // Decide what the target size is, depending whether we're going to
2535 concurrent() ||
2536 satb_mq_set.completed_buffers_num() == 0, "invariant");
2537
2538 // again, this was a potentially expensive operation, decrease the
2539 // limits to get the regular clock call early
2540 decrease_limits();
2541 }
2542
2543 void G1CMTask::print_stats() {
2544 log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
2545 _worker_id, _calls);
2546 log_debug(gc, stats)(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2547 _elapsed_time_ms, _termination_time_ms);
2548 log_debug(gc, stats)(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
2549 _step_times_ms.num(), _step_times_ms.avg(),
2550 _step_times_ms.sd());
2551 log_debug(gc, stats)(" max = %1.2lfms, total = %1.2lfms",
2552 _step_times_ms.maximum(), _step_times_ms.sum());
2553 }
2554
2555 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) {
2556 return _task_queues->steal(worker_id, hash_seed, task_entry);
2557 }
2558
2559 /*****************************************************************************
2560
2561 The do_marking_step(time_target_ms, ...) method is the building
2562 block of the parallel marking framework. It can be called in parallel
2563 with other invocations of do_marking_step() on different tasks
2564 (but only one per task, obviously) and concurrently with the
2565 mutator threads, or during remark, hence it eliminates the need
2566 for two versions of the code. When called during remark, it will
2567 pick up from where the task left off during the concurrent marking
2568 phase. Interestingly, tasks are also claimable during evacuation
2569 pauses too, since do_marking_step() ensures that it aborts before
2570 it needs to yield.
2571
2572 The data structures that it uses to do marking work are the
2573 following:
2574
2575 (1) Marking Bitmap. If there are gray objects that appear only
2576 on the bitmap (this happens either when dealing with an overflow
2859 // Try to reduce the number of available SATB buffers so that
2860 // remark has less work to do.
2861 drain_satb_buffers();
2862 }
2863
2864 // Since we've done everything else, we can now totally drain the
2865 // local queue and global stack.
2866 drain_local_queue(false);
2867 drain_global_stack(false);
2868
2869 // Attempt at work stealing from other task's queues.
2870 if (do_stealing && !has_aborted()) {
2871 // We have not aborted. This means that we have finished all that
2872 // we could. Let's try to do some stealing...
2873
2874 // We cannot check whether the global stack is empty, since other
2875 // tasks might be pushing objects to it concurrently.
2876 assert(_cm->out_of_regions() && _task_queue->size() == 0,
2877 "only way to reach here");
2878 while (!has_aborted()) {
2879 G1TaskQueueEntry obj;
2880 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
2881 scan_object(obj);
2882
2883 // And since we're towards the end, let's totally drain the
2884 // local queue and global stack.
2885 drain_local_queue(false);
2886 drain_global_stack(false);
2887 } else {
2888 break;
2889 }
2890 }
2891 }
2892
2893 // We still haven't aborted. Now, let's try to get into the
2894 // termination protocol.
2895 if (do_termination && !has_aborted()) {
2896 // We cannot check whether the global stack is empty, since other
2897 // tasks might be concurrently pushing objects on it.
2898 // Separated the asserts so that we know which one fires.
2899 assert(_cm->out_of_regions(), "only way to reach here");
|