hotspot Sdiff src/share/vm/gc/g1

src/share/vm/gc/g1/g1ConcurrentMark.cpp

rev 12508 : imported patch 8162104-use-is_in_cset-instead-of-obj_in_cs
rev 12511 : [mq]: 8168467-use-taskentry-as-mark-stack-elem

 254 }
 255 
 256 G1CMMarkStack::OopChunk* G1CMMarkStack::allocate_new_chunk() {
 257   // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
 258   // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
 259   // wraparound of _hwm.
 260   if (_hwm >= _chunk_capacity) {
 261     return NULL;
 262   }
 263 
 264   size_t cur_idx = Atomic::add(1, &_hwm) - 1;
 265   if (cur_idx >= _chunk_capacity) {
 266     return NULL;
 267   }
 268 
 269   OopChunk* result = ::new (&_base[cur_idx]) OopChunk;
 270   result->next = NULL;
 271   return result;
 272 }
 273 
 274 bool G1CMMarkStack::par_push_chunk(oop* ptr_arr) {
 275   // Get a new chunk.
 276   OopChunk* new_chunk = remove_chunk_from_free_list();
 277 
 278   if (new_chunk == NULL) {
 279     // Did not get a chunk from the free list. Allocate from backing memory.
 280     new_chunk = allocate_new_chunk();
 281   }
 282 
 283   if (new_chunk == NULL) {
 284     _out_of_memory = true;
 285     return false;
 286   }
 287 
 288   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, OopsPerChunk * sizeof(oop));
 289 
 290   add_chunk_to_chunk_list(new_chunk);
 291 
 292   return true;
 293 }
 294 
 295 bool G1CMMarkStack::par_pop_chunk(oop* ptr_arr) {
 296   OopChunk* cur = remove_chunk_from_chunk_list();
 297 
 298   if (cur == NULL) {
 299     return false;
 300   }
 301 
 302   Copy::conjoint_memory_atomic(cur->data, ptr_arr, OopsPerChunk * sizeof(oop));
 303 
 304   add_chunk_to_free_list(cur);
 305   return true;
 306 }
 307 
 308 void G1CMMarkStack::set_empty() {
 309   _chunks_in_chunk_list = 0;
 310   _hwm = 0;
 311   clear_out_of_memory();
 312   _chunk_list = NULL;
 313   _free_list = NULL;
 314 }
 315 
 316 G1CMRootRegions::G1CMRootRegions() :
 317   _cm(NULL), _scan_in_progress(false),
 318   _should_abort(false), _claimed_survivor_index(0) { }
 319 
 320 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
 321   _survivors = survivors;
 322   _cm = cm;

1991     }
1992   }
1993 
1994   return NULL;
1995 }
1996 
1997 #ifndef PRODUCT
1998 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
1999 private:
2000   G1CollectedHeap* _g1h;
2001   const char* _phase;
2002   int _info;
2003 
2004 public:
2005   VerifyNoCSetOops(const char* phase, int info = -1) :
2006     _g1h(G1CollectedHeap::heap()),
2007     _phase(phase),
2008     _info(info)
2009   { }
2010 
2011   void operator()(oop obj) const {
2012     guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || obj->is_oop(),
2013               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2014               p2i(obj), _phase, _info);
2015     guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->is_in_cset(obj),
2016               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2017               p2i(obj), _phase, _info);
2018   }
2019 };
2020 
2021 void G1ConcurrentMark::verify_no_cset_oops() {
2022   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2023   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2024     return;
2025   }
2026 
2027   // Verify entries on the global mark stack
2028   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
2029 
2030   // Verify entries on the task queues
2031   for (uint i = 0; i < _max_worker_id; ++i) {
2032     G1CMTaskQueue* queue = _task_queues->queue(i);
2033     queue->iterate(VerifyNoCSetOops("Queue", i));
2034   }
2035 
2036   // Verify the global finger
2037   HeapWord* global_finger = finger();

2382 
2383   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2384   _refs_reached_limit       = _real_refs_reached_limit;
2385 }
2386 
2387 void G1CMTask::decrease_limits() {
2388   // This is called when we believe that we're going to do an infrequent
2389   // operation which will increase the per byte scanned cost (i.e. move
2390   // entries to/from the global stack). It basically tries to decrease the
2391   // scanning limit so that the clock is called earlier.
2392 
2393   _words_scanned_limit = _real_words_scanned_limit -
2394     3 * words_scanned_period / 4;
2395   _refs_reached_limit  = _real_refs_reached_limit -
2396     3 * refs_reached_period / 4;
2397 }
2398 
2399 void G1CMTask::move_entries_to_global_stack() {
2400   // Local array where we'll store the entries that will be popped
2401   // from the local queue.
2402   oop buffer[G1CMMarkStack::OopsPerChunk];
2403 
2404   size_t n = 0;
2405   oop obj;
2406   while (n < G1CMMarkStack::OopsPerChunk && _task_queue->pop_local(obj)) {
2407     buffer[n] = obj;
2408     ++n;
2409   }
2410   if (n < G1CMMarkStack::OopsPerChunk) {
2411     buffer[n] = NULL;
2412   }
2413 
2414   if (n > 0) {
2415     if (!_cm->mark_stack_push(buffer)) {
2416       set_has_aborted();
2417     }
2418   }
2419 
2420   // This operation was quite expensive, so decrease the limits.
2421   decrease_limits();
2422 }
2423 
2424 bool G1CMTask::get_entries_from_global_stack() {
2425   // Local array where we'll store the entries that will be popped
2426   // from the global stack.
2427   oop buffer[G1CMMarkStack::OopsPerChunk];
2428 
2429   if (!_cm->mark_stack_pop(buffer)) {
2430     return false;
2431   }
2432 
2433   // We did actually pop at least one entry.
2434   for (size_t i = 0; i < G1CMMarkStack::OopsPerChunk; ++i) {
2435     oop elem = buffer[i];
2436     if (elem == NULL) {
2437       break;
2438     }
2439     assert(G1CMObjArrayProcessor::is_array_slice(elem) || elem->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(elem));
2440     bool success = _task_queue->push(elem);
2441     // We only call this when the local queue is empty or under a
2442     // given target limit. So, we do not expect this push to fail.
2443     assert(success, "invariant");
2444   }
2445 
2446   // This operation was quite expensive, so decrease the limits
2447   decrease_limits();
2448   return true;
2449 }
2450 
2451 void G1CMTask::drain_local_queue(bool partially) {
2452   if (has_aborted()) {
2453     return;
2454   }
2455 
2456   // Decide what the target size is, depending whether we're going to
2457   // drain it partially (so that other tasks can steal if they run out
2458   // of things to do) or totally (at the very end).
2459   size_t target_size;
2460   if (partially) {
2461     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2462   } else {
2463     target_size = 0;
2464   }
2465 
2466   if (_task_queue->size() > target_size) {
2467     oop obj;
2468     bool ret = _task_queue->pop_local(obj);
2469     while (ret) {
2470       scan_object(obj);
2471       if (_task_queue->size() <= target_size || has_aborted()) {
2472         ret = false;
2473       } else {
2474         ret = _task_queue->pop_local(obj);
2475       }
2476     }
2477   }
2478 }
2479 
2480 void G1CMTask::drain_global_stack(bool partially) {
2481   if (has_aborted()) return;
2482 
2483   // We have a policy to drain the local queue before we attempt to
2484   // drain the global stack.
2485   assert(partially || _task_queue->size() == 0, "invariant");
2486 
2487   // Decide what the target size is, depending whether we're going to

2535          concurrent() ||
2536          satb_mq_set.completed_buffers_num() == 0, "invariant");
2537 
2538   // again, this was a potentially expensive operation, decrease the
2539   // limits to get the regular clock call early
2540   decrease_limits();
2541 }
2542 
2543 void G1CMTask::print_stats() {
2544   log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
2545                        _worker_id, _calls);
2546   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2547                        _elapsed_time_ms, _termination_time_ms);
2548   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
2549                        _step_times_ms.num(), _step_times_ms.avg(),
2550                        _step_times_ms.sd());
2551   log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
2552                        _step_times_ms.maximum(), _step_times_ms.sum());
2553 }
2554 
2555 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
2556   return _task_queues->steal(worker_id, hash_seed, obj);
2557 }
2558 
2559 /*****************************************************************************
2560 
2561     The do_marking_step(time_target_ms, ...) method is the building
2562     block of the parallel marking framework. It can be called in parallel
2563     with other invocations of do_marking_step() on different tasks
2564     (but only one per task, obviously) and concurrently with the
2565     mutator threads, or during remark, hence it eliminates the need
2566     for two versions of the code. When called during remark, it will
2567     pick up from where the task left off during the concurrent marking
2568     phase. Interestingly, tasks are also claimable during evacuation
2569     pauses too, since do_marking_step() ensures that it aborts before
2570     it needs to yield.
2571 
2572     The data structures that it uses to do marking work are the
2573     following:
2574 
2575       (1) Marking Bitmap. If there are gray objects that appear only
2576       on the bitmap (this happens either when dealing with an overflow

2859     // Try to reduce the number of available SATB buffers so that
2860     // remark has less work to do.
2861     drain_satb_buffers();
2862   }
2863 
2864   // Since we've done everything else, we can now totally drain the
2865   // local queue and global stack.
2866   drain_local_queue(false);
2867   drain_global_stack(false);
2868 
2869   // Attempt at work stealing from other task's queues.
2870   if (do_stealing && !has_aborted()) {
2871     // We have not aborted. This means that we have finished all that
2872     // we could. Let's try to do some stealing...
2873 
2874     // We cannot check whether the global stack is empty, since other
2875     // tasks might be pushing objects to it concurrently.
2876     assert(_cm->out_of_regions() && _task_queue->size() == 0,
2877            "only way to reach here");
2878     while (!has_aborted()) {
2879       oop obj;
2880       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
2881         scan_object(obj);
2882 
2883         // And since we're towards the end, let's totally drain the
2884         // local queue and global stack.
2885         drain_local_queue(false);
2886         drain_global_stack(false);
2887       } else {
2888         break;
2889       }
2890     }
2891   }
2892 
2893   // We still haven't aborted. Now, let's try to get into the
2894   // termination protocol.
2895   if (do_termination && !has_aborted()) {
2896     // We cannot check whether the global stack is empty, since other
2897     // tasks might be concurrently pushing objects on it.
2898     // Separated the asserts so that we know which one fires.
2899     assert(_cm->out_of_regions(), "only way to reach here");

 254 }
 255 
 256 G1CMMarkStack::OopChunk* G1CMMarkStack::allocate_new_chunk() {
 257   // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
 258   // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
 259   // wraparound of _hwm.
 260   if (_hwm >= _chunk_capacity) {
 261     return NULL;
 262   }
 263 
 264   size_t cur_idx = Atomic::add(1, &_hwm) - 1;
 265   if (cur_idx >= _chunk_capacity) {
 266     return NULL;
 267   }
 268 
 269   OopChunk* result = ::new (&_base[cur_idx]) OopChunk;
 270   result->next = NULL;
 271   return result;
 272 }
 273 
 274 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 275   // Get a new chunk.
 276   OopChunk* new_chunk = remove_chunk_from_free_list();
 277 
 278   if (new_chunk == NULL) {
 279     // Did not get a chunk from the free list. Allocate from backing memory.
 280     new_chunk = allocate_new_chunk();
 281   }
 282 
 283   if (new_chunk == NULL) {
 284     _out_of_memory = true;
 285     return false;
 286   }
 287 
 288   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 289 
 290   add_chunk_to_chunk_list(new_chunk);
 291 
 292   return true;
 293 }
 294 
 295 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 296   OopChunk* cur = remove_chunk_from_chunk_list();
 297 
 298   if (cur == NULL) {
 299     return false;
 300   }
 301 
 302   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 303 
 304   add_chunk_to_free_list(cur);
 305   return true;
 306 }
 307 
 308 void G1CMMarkStack::set_empty() {
 309   _chunks_in_chunk_list = 0;
 310   _hwm = 0;
 311   clear_out_of_memory();
 312   _chunk_list = NULL;
 313   _free_list = NULL;
 314 }
 315 
 316 G1CMRootRegions::G1CMRootRegions() :
 317   _cm(NULL), _scan_in_progress(false),
 318   _should_abort(false), _claimed_survivor_index(0) { }
 319 
 320 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
 321   _survivors = survivors;
 322   _cm = cm;

1991     }
1992   }
1993 
1994   return NULL;
1995 }
1996 
1997 #ifndef PRODUCT
1998 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
1999 private:
2000   G1CollectedHeap* _g1h;
2001   const char* _phase;
2002   int _info;
2003 
2004 public:
2005   VerifyNoCSetOops(const char* phase, int info = -1) :
2006     _g1h(G1CollectedHeap::heap()),
2007     _phase(phase),
2008     _info(info)
2009   { }
2010 
2011   void operator()(G1TaskQueueEntry task_entry) const {
2012     guarantee(task_entry.is_array_slice() || task_entry.obj()->is_oop(),
2013               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2014               p2i(task_entry.obj()), _phase, _info);
2015     guarantee(task_entry.is_array_slice() || !_g1h->is_in_cset(task_entry.obj()),
2016               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2017               p2i(task_entry.obj()), _phase, _info);
2018   }
2019 };
2020 
2021 void G1ConcurrentMark::verify_no_cset_oops() {
2022   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2023   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2024     return;
2025   }
2026 
2027   // Verify entries on the global mark stack
2028   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
2029 
2030   // Verify entries on the task queues
2031   for (uint i = 0; i < _max_worker_id; ++i) {
2032     G1CMTaskQueue* queue = _task_queues->queue(i);
2033     queue->iterate(VerifyNoCSetOops("Queue", i));
2034   }
2035 
2036   // Verify the global finger
2037   HeapWord* global_finger = finger();

2382 
2383   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2384   _refs_reached_limit       = _real_refs_reached_limit;
2385 }
2386 
2387 void G1CMTask::decrease_limits() {
2388   // This is called when we believe that we're going to do an infrequent
2389   // operation which will increase the per byte scanned cost (i.e. move
2390   // entries to/from the global stack). It basically tries to decrease the
2391   // scanning limit so that the clock is called earlier.
2392 
2393   _words_scanned_limit = _real_words_scanned_limit -
2394     3 * words_scanned_period / 4;
2395   _refs_reached_limit  = _real_refs_reached_limit -
2396     3 * refs_reached_period / 4;
2397 }
2398 
2399 void G1CMTask::move_entries_to_global_stack() {
2400   // Local array where we'll store the entries that will be popped
2401   // from the local queue.
2402   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2403 
2404   size_t n = 0;
2405   G1TaskQueueEntry task_entry;
2406   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2407     buffer[n] = task_entry;
2408     ++n;
2409   }
2410   if (n < G1CMMarkStack::EntriesPerChunk) {
2411     buffer[n] = G1TaskQueueEntry();
2412   }
2413 
2414   if (n > 0) {
2415     if (!_cm->mark_stack_push(buffer)) {
2416       set_has_aborted();
2417     }
2418   }
2419 
2420   // This operation was quite expensive, so decrease the limits.
2421   decrease_limits();
2422 }
2423 
2424 bool G1CMTask::get_entries_from_global_stack() {
2425   // Local array where we'll store the entries that will be popped
2426   // from the global stack.
2427   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2428 
2429   if (!_cm->mark_stack_pop(buffer)) {
2430     return false;
2431   }
2432 
2433   // We did actually pop at least one entry.
2434   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2435     G1TaskQueueEntry task_entry = buffer[i];
2436     if (task_entry.is_null()) {
2437       break;
2438     }
2439     assert(task_entry.is_array_slice() || task_entry.obj()->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj()));
2440     bool success = _task_queue->push(task_entry);
2441     // We only call this when the local queue is empty or under a
2442     // given target limit. So, we do not expect this push to fail.
2443     assert(success, "invariant");
2444   }
2445 
2446   // This operation was quite expensive, so decrease the limits
2447   decrease_limits();
2448   return true;
2449 }
2450 
2451 void G1CMTask::drain_local_queue(bool partially) {
2452   if (has_aborted()) {
2453     return;
2454   }
2455 
2456   // Decide what the target size is, depending whether we're going to
2457   // drain it partially (so that other tasks can steal if they run out
2458   // of things to do) or totally (at the very end).
2459   size_t target_size;
2460   if (partially) {
2461     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2462   } else {
2463     target_size = 0;
2464   }
2465 
2466   if (_task_queue->size() > target_size) {
2467     G1TaskQueueEntry obj;
2468     bool ret = _task_queue->pop_local(obj);
2469     while (ret) {
2470       scan_object(obj);
2471       if (_task_queue->size() <= target_size || has_aborted()) {
2472         ret = false;
2473       } else {
2474         ret = _task_queue->pop_local(obj);
2475       }
2476     }
2477   }
2478 }
2479 
2480 void G1CMTask::drain_global_stack(bool partially) {
2481   if (has_aborted()) return;
2482 
2483   // We have a policy to drain the local queue before we attempt to
2484   // drain the global stack.
2485   assert(partially || _task_queue->size() == 0, "invariant");
2486 
2487   // Decide what the target size is, depending whether we're going to

2535          concurrent() ||
2536          satb_mq_set.completed_buffers_num() == 0, "invariant");
2537 
2538   // again, this was a potentially expensive operation, decrease the
2539   // limits to get the regular clock call early
2540   decrease_limits();
2541 }
2542 
2543 void G1CMTask::print_stats() {
2544   log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
2545                        _worker_id, _calls);
2546   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2547                        _elapsed_time_ms, _termination_time_ms);
2548   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
2549                        _step_times_ms.num(), _step_times_ms.avg(),
2550                        _step_times_ms.sd());
2551   log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
2552                        _step_times_ms.maximum(), _step_times_ms.sum());
2553 }
2554 
2555 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) {
2556   return _task_queues->steal(worker_id, hash_seed, task_entry);
2557 }
2558 
2559 /*****************************************************************************
2560 
2561     The do_marking_step(time_target_ms, ...) method is the building
2562     block of the parallel marking framework. It can be called in parallel
2563     with other invocations of do_marking_step() on different tasks
2564     (but only one per task, obviously) and concurrently with the
2565     mutator threads, or during remark, hence it eliminates the need
2566     for two versions of the code. When called during remark, it will
2567     pick up from where the task left off during the concurrent marking
2568     phase. Interestingly, tasks are also claimable during evacuation
2569     pauses too, since do_marking_step() ensures that it aborts before
2570     it needs to yield.
2571 
2572     The data structures that it uses to do marking work are the
2573     following:
2574 
2575       (1) Marking Bitmap. If there are gray objects that appear only
2576       on the bitmap (this happens either when dealing with an overflow

2859     // Try to reduce the number of available SATB buffers so that
2860     // remark has less work to do.
2861     drain_satb_buffers();
2862   }
2863 
2864   // Since we've done everything else, we can now totally drain the
2865   // local queue and global stack.
2866   drain_local_queue(false);
2867   drain_global_stack(false);
2868 
2869   // Attempt at work stealing from other task's queues.
2870   if (do_stealing && !has_aborted()) {
2871     // We have not aborted. This means that we have finished all that
2872     // we could. Let's try to do some stealing...
2873 
2874     // We cannot check whether the global stack is empty, since other
2875     // tasks might be pushing objects to it concurrently.
2876     assert(_cm->out_of_regions() && _task_queue->size() == 0,
2877            "only way to reach here");
2878     while (!has_aborted()) {
2879       G1TaskQueueEntry obj;
2880       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
2881         scan_object(obj);
2882 
2883         // And since we're towards the end, let's totally drain the
2884         // local queue and global stack.
2885         drain_local_queue(false);
2886         drain_global_stack(false);
2887       } else {
2888         break;
2889       }
2890     }
2891   }
2892 
2893   // We still haven't aborted. Now, let's try to get into the
2894   // termination protocol.
2895   if (do_termination && !has_aborted()) {
2896     // We cannot check whether the global stack is empty, since other
2897     // tasks might be concurrently pushing objects on it.
2898     // Separated the asserts so that we know which one fires.
2899     assert(_cm->out_of_regions(), "only way to reach here");

< prev index next >