< prev index next >

src/share/vm/gc/g1/g1ConcurrentMark.cpp

Print this page




 129   assert(!mr.is_empty(), "unexpected empty region");
 130   // convert address range into offset range
 131   _bm.at_put_range(heapWordToOffset(mr.start()),
 132                    heapWordToOffset(mr.end()), false);
 133 }
 134 
 135 G1CMMarkStack::G1CMMarkStack() :
 136   _max_chunk_capacity(0),
 137   _base(NULL),
 138   _chunk_capacity(0),
 139   _out_of_memory(false),
 140   _should_expand(false) {
 141   set_empty();
 142 }
 143 
 144 bool G1CMMarkStack::resize(size_t new_capacity) {
 145   assert(is_empty(), "Only resize when stack is empty.");
 146   assert(new_capacity <= _max_chunk_capacity,
 147          "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
 148 
 149   OopChunk* new_base = MmapArrayAllocator<OopChunk, mtGC>::allocate_or_null(new_capacity);
 150 
 151   if (new_base == NULL) {
 152     log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(OopChunk));
 153     return false;
 154   }
 155   // Release old mapping.
 156   if (_base != NULL) {
 157     MmapArrayAllocator<OopChunk, mtGC>::free(_base, _chunk_capacity);
 158   }
 159 
 160   _base = new_base;
 161   _chunk_capacity = new_capacity;
 162   set_empty();
 163   _should_expand = false;
 164 
 165   return true;
 166 }
 167 
 168 size_t G1CMMarkStack::capacity_alignment() {
 169   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(OopChunk)) / sizeof(void*);
 170 }
 171 
 172 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
 173   guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
 174 
 175   size_t const OopChunkSizeInVoidStar = sizeof(OopChunk) / sizeof(void*);
 176 
 177   _max_chunk_capacity = (size_t)align_size_up(max_capacity, capacity_alignment()) / OopChunkSizeInVoidStar;
 178   size_t initial_chunk_capacity = (size_t)align_size_up(initial_capacity, capacity_alignment()) / OopChunkSizeInVoidStar;
 179 
 180   guarantee(initial_chunk_capacity <= _max_chunk_capacity,
 181             "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
 182             _max_chunk_capacity,
 183             initial_chunk_capacity);
 184 
 185   log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
 186                 initial_chunk_capacity, _max_chunk_capacity);
 187 
 188   return resize(initial_chunk_capacity);
 189 }
 190 
 191 void G1CMMarkStack::expand() {
 192   // Clear expansion flag
 193   _should_expand = false;
 194 
 195   if (_chunk_capacity == _max_chunk_capacity) {
 196     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
 197     return;
 198   }
 199   size_t old_capacity = _chunk_capacity;
 200   // Double capacity if possible
 201   size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
 202 
 203   if (resize(new_capacity)) {
 204     log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 205                   old_capacity, new_capacity);
 206   } else {
 207     log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 208                     old_capacity, new_capacity);
 209   }
 210 }
 211 
 212 G1CMMarkStack::~G1CMMarkStack() {
 213   if (_base != NULL) {
 214     MmapArrayAllocator<OopChunk, mtGC>::free(_base, _chunk_capacity);
 215   }
 216 }
 217 
 218 void G1CMMarkStack::add_chunk_to_list(OopChunk* volatile* list, OopChunk* elem) {
 219   elem->next = *list;
 220   *list = elem;
 221 }
 222 
 223 void G1CMMarkStack::add_chunk_to_chunk_list(OopChunk* elem) {
 224   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 225   add_chunk_to_list(&_chunk_list, elem);
 226   _chunks_in_chunk_list++;
 227 }
 228 
 229 void G1CMMarkStack::add_chunk_to_free_list(OopChunk* elem) {
 230   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 231   add_chunk_to_list(&_free_list, elem);
 232 }
 233 
 234 G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_list(OopChunk* volatile* list) {
 235   OopChunk* result = *list;
 236   if (result != NULL) {
 237     *list = (*list)->next;
 238   }
 239   return result;
 240 }
 241 
 242 G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 243   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 244   OopChunk* result = remove_chunk_from_list(&_chunk_list);
 245   if (result != NULL) {
 246     _chunks_in_chunk_list--;
 247   }
 248   return result;
 249 }
 250 
 251 G1CMMarkStack::OopChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 252   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 253   return remove_chunk_from_list(&_free_list);
 254 }
 255 
 256 G1CMMarkStack::OopChunk* G1CMMarkStack::allocate_new_chunk() {
 257   // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
 258   // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
 259   // wraparound of _hwm.
 260   if (_hwm >= _chunk_capacity) {
 261     return NULL;
 262   }
 263 
 264   size_t cur_idx = Atomic::add(1, &_hwm) - 1;
 265   if (cur_idx >= _chunk_capacity) {
 266     return NULL;
 267   }
 268 
 269   OopChunk* result = ::new (&_base[cur_idx]) OopChunk;
 270   result->next = NULL;
 271   return result;
 272 }
 273 
 274 bool G1CMMarkStack::par_push_chunk(oop* ptr_arr) {
 275   // Get a new chunk.
 276   OopChunk* new_chunk = remove_chunk_from_free_list();
 277 
 278   if (new_chunk == NULL) {
 279     // Did not get a chunk from the free list. Allocate from backing memory.
 280     new_chunk = allocate_new_chunk();
 281   }
 282 
 283   if (new_chunk == NULL) {
 284     _out_of_memory = true;
 285     return false;
 286   }
 287 
 288   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, OopsPerChunk * sizeof(oop));
 289 
 290   add_chunk_to_chunk_list(new_chunk);
 291 
 292   return true;
 293 }
 294 
 295 bool G1CMMarkStack::par_pop_chunk(oop* ptr_arr) {
 296   OopChunk* cur = remove_chunk_from_chunk_list();
 297 
 298   if (cur == NULL) {
 299     return false;
 300   }
 301 
 302   Copy::conjoint_memory_atomic(cur->data, ptr_arr, OopsPerChunk * sizeof(oop));
 303 
 304   add_chunk_to_free_list(cur);
 305   return true;
 306 }
 307 
 308 void G1CMMarkStack::set_empty() {
 309   _chunks_in_chunk_list = 0;
 310   _hwm = 0;
 311   clear_out_of_memory();
 312   _chunk_list = NULL;
 313   _free_list = NULL;
 314 }
 315 
 316 G1CMRootRegions::G1CMRootRegions() :
 317   _cm(NULL), _scan_in_progress(false),
 318   _should_abort(false), _claimed_survivor_index(0) { }
 319 
 320 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
 321   _survivors = survivors;
 322   _cm = cm;


1991     }
1992   }
1993 
1994   return NULL;
1995 }
1996 
1997 #ifndef PRODUCT
1998 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
1999 private:
2000   G1CollectedHeap* _g1h;
2001   const char* _phase;
2002   int _info;
2003 
2004 public:
2005   VerifyNoCSetOops(const char* phase, int info = -1) :
2006     _g1h(G1CollectedHeap::heap()),
2007     _phase(phase),
2008     _info(info)
2009   { }
2010 
2011   void operator()(oop obj) const {
2012     guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || obj->is_oop(),




2013               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2014               p2i(obj), _phase, _info);
2015     guarantee(G1CMObjArrayProcessor::is_array_slice(obj) || !_g1h->is_in_cset(obj),
2016               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2017               p2i(obj), _phase, _info);
2018   }
2019 };
2020 
2021 void G1ConcurrentMark::verify_no_cset_oops() {
2022   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2023   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2024     return;
2025   }
2026 
2027   // Verify entries on the global mark stack
2028   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
2029 
2030   // Verify entries on the task queues
2031   for (uint i = 0; i < _max_worker_id; ++i) {
2032     G1CMTaskQueue* queue = _task_queues->queue(i);
2033     queue->iterate(VerifyNoCSetOops("Queue", i));
2034   }
2035 
2036   // Verify the global finger
2037   HeapWord* global_finger = finger();


2191 class G1CMBitMapClosure : public BitMapClosure {
2192 private:
2193   // the bitmap that is being iterated over
2194   G1CMBitMap*                 _nextMarkBitMap;
2195   G1ConcurrentMark*           _cm;
2196   G1CMTask*                   _task;
2197 
2198 public:
2199   G1CMBitMapClosure(G1CMTask *task, G1ConcurrentMark* cm, G1CMBitMap* nextMarkBitMap) :
2200     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2201 
2202   bool do_bit(size_t offset) {
2203     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2204     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2205     assert( addr < _cm->finger(), "invariant");
2206     assert(addr >= _task->finger(), "invariant");
2207 
2208     // We move that task's local finger along.
2209     _task->move_finger_to(addr);
2210 
2211     _task->scan_object(oop(addr));
2212     // we only partially drain the local queue and global stack
2213     _task->drain_local_queue(true);
2214     _task->drain_global_stack(true);
2215 
2216     // if the has_aborted flag has been raised, we need to bail out of
2217     // the iteration
2218     return !_task->has_aborted();
2219   }
2220 };
2221 
2222 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2223   ReferenceProcessor* result = g1h->ref_processor_cm();
2224   assert(result != NULL, "CM reference processor should not be NULL");
2225   return result;
2226 }
2227 
2228 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2229                                G1ConcurrentMark* cm,
2230                                G1CMTask* task)
2231   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),


2382 
2383   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2384   _refs_reached_limit       = _real_refs_reached_limit;
2385 }
2386 
2387 void G1CMTask::decrease_limits() {
2388   // This is called when we believe that we're going to do an infrequent
2389   // operation which will increase the per byte scanned cost (i.e. move
2390   // entries to/from the global stack). It basically tries to decrease the
2391   // scanning limit so that the clock is called earlier.
2392 
2393   _words_scanned_limit = _real_words_scanned_limit -
2394     3 * words_scanned_period / 4;
2395   _refs_reached_limit  = _real_refs_reached_limit -
2396     3 * refs_reached_period / 4;
2397 }
2398 
2399 void G1CMTask::move_entries_to_global_stack() {
2400   // Local array where we'll store the entries that will be popped
2401   // from the local queue.
2402   oop buffer[G1CMMarkStack::OopsPerChunk];
2403 
2404   size_t n = 0;
2405   oop obj;
2406   while (n < G1CMMarkStack::OopsPerChunk && _task_queue->pop_local(obj)) {
2407     buffer[n] = obj;
2408     ++n;
2409   }
2410   if (n < G1CMMarkStack::OopsPerChunk) {
2411     buffer[n] = NULL;
2412   }
2413 
2414   if (n > 0) {
2415     if (!_cm->mark_stack_push(buffer)) {
2416       set_has_aborted();
2417     }
2418   }
2419 
2420   // This operation was quite expensive, so decrease the limits.
2421   decrease_limits();
2422 }
2423 
2424 bool G1CMTask::get_entries_from_global_stack() {
2425   // Local array where we'll store the entries that will be popped
2426   // from the global stack.
2427   oop buffer[G1CMMarkStack::OopsPerChunk];
2428 
2429   if (!_cm->mark_stack_pop(buffer)) {
2430     return false;
2431   }
2432 
2433   // We did actually pop at least one entry.
2434   for (size_t i = 0; i < G1CMMarkStack::OopsPerChunk; ++i) {
2435     oop elem = buffer[i];
2436     if (elem == NULL) {
2437       break;
2438     }
2439     assert(G1CMObjArrayProcessor::is_array_slice(elem) || elem->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(elem));
2440     bool success = _task_queue->push(elem);
2441     // We only call this when the local queue is empty or under a
2442     // given target limit. So, we do not expect this push to fail.
2443     assert(success, "invariant");
2444   }
2445 
2446   // This operation was quite expensive, so decrease the limits
2447   decrease_limits();
2448   return true;
2449 }
2450 
2451 void G1CMTask::drain_local_queue(bool partially) {
2452   if (has_aborted()) {
2453     return;
2454   }
2455 
2456   // Decide what the target size is, depending whether we're going to
2457   // drain it partially (so that other tasks can steal if they run out
2458   // of things to do) or totally (at the very end).
2459   size_t target_size;
2460   if (partially) {
2461     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2462   } else {
2463     target_size = 0;
2464   }
2465 
2466   if (_task_queue->size() > target_size) {
2467     oop obj;
2468     bool ret = _task_queue->pop_local(obj);
2469     while (ret) {
2470       scan_object(obj);
2471       if (_task_queue->size() <= target_size || has_aborted()) {
2472         ret = false;
2473       } else {
2474         ret = _task_queue->pop_local(obj);
2475       }
2476     }
2477   }
2478 }
2479 
2480 void G1CMTask::drain_global_stack(bool partially) {
2481   if (has_aborted()) return;
2482 
2483   // We have a policy to drain the local queue before we attempt to
2484   // drain the global stack.
2485   assert(partially || _task_queue->size() == 0, "invariant");
2486 
2487   // Decide what the target size is, depending whether we're going to
2488   // drain it partially (so that other tasks can steal if they run out
2489   // of things to do) or totally (at the very end).
2490   // Notice that when draining the global mark stack partially, due to the racyness
2491   // of the mark stack size update we might in fact drop below the target. But,
2492   // this is not a problem.
2493   // In case of total draining, we simply process until the global mark stack is
2494   // totally empty, disregarding the size counter.


2535          concurrent() ||
2536          satb_mq_set.completed_buffers_num() == 0, "invariant");
2537 
2538   // again, this was a potentially expensive operation, decrease the
2539   // limits to get the regular clock call early
2540   decrease_limits();
2541 }
2542 
2543 void G1CMTask::print_stats() {
2544   log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
2545                        _worker_id, _calls);
2546   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2547                        _elapsed_time_ms, _termination_time_ms);
2548   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
2549                        _step_times_ms.num(), _step_times_ms.avg(),
2550                        _step_times_ms.sd());
2551   log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
2552                        _step_times_ms.maximum(), _step_times_ms.sum());
2553 }
2554 
2555 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
2556   return _task_queues->steal(worker_id, hash_seed, obj);
2557 }
2558 
2559 /*****************************************************************************
2560 
2561     The do_marking_step(time_target_ms, ...) method is the building
2562     block of the parallel marking framework. It can be called in parallel
2563     with other invocations of do_marking_step() on different tasks
2564     (but only one per task, obviously) and concurrently with the
2565     mutator threads, or during remark, hence it eliminates the need
2566     for two versions of the code. When called during remark, it will
2567     pick up from where the task left off during the concurrent marking
2568     phase. Interestingly, tasks are also claimable during evacuation
2569     pauses too, since do_marking_step() ensures that it aborts before
2570     it needs to yield.
2571 
2572     The data structures that it uses to do marking work are the
2573     following:
2574 
2575       (1) Marking Bitmap. If there are gray objects that appear only
2576       on the bitmap (this happens either when dealing with an overflow


2859     // Try to reduce the number of available SATB buffers so that
2860     // remark has less work to do.
2861     drain_satb_buffers();
2862   }
2863 
2864   // Since we've done everything else, we can now totally drain the
2865   // local queue and global stack.
2866   drain_local_queue(false);
2867   drain_global_stack(false);
2868 
2869   // Attempt at work stealing from other task's queues.
2870   if (do_stealing && !has_aborted()) {
2871     // We have not aborted. This means that we have finished all that
2872     // we could. Let's try to do some stealing...
2873 
2874     // We cannot check whether the global stack is empty, since other
2875     // tasks might be pushing objects to it concurrently.
2876     assert(_cm->out_of_regions() && _task_queue->size() == 0,
2877            "only way to reach here");
2878     while (!has_aborted()) {
2879       oop obj;
2880       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
2881         scan_object(obj);
2882 
2883         // And since we're towards the end, let's totally drain the
2884         // local queue and global stack.
2885         drain_local_queue(false);
2886         drain_global_stack(false);
2887       } else {
2888         break;
2889       }
2890     }
2891   }
2892 
2893   // We still haven't aborted. Now, let's try to get into the
2894   // termination protocol.
2895   if (do_termination && !has_aborted()) {
2896     // We cannot check whether the global stack is empty, since other
2897     // tasks might be concurrently pushing objects on it.
2898     // Separated the asserts so that we know which one fires.
2899     assert(_cm->out_of_regions(), "only way to reach here");
2900     assert(_task_queue->size() == 0, "only way to reach here");
2901     _termination_start_time_ms = os::elapsedVTime() * 1000.0;




 129   assert(!mr.is_empty(), "unexpected empty region");
 130   // convert address range into offset range
 131   _bm.at_put_range(heapWordToOffset(mr.start()),
 132                    heapWordToOffset(mr.end()), false);
 133 }
 134 
 135 G1CMMarkStack::G1CMMarkStack() :
 136   _max_chunk_capacity(0),
 137   _base(NULL),
 138   _chunk_capacity(0),
 139   _out_of_memory(false),
 140   _should_expand(false) {
 141   set_empty();
 142 }
 143 
 144 bool G1CMMarkStack::resize(size_t new_capacity) {
 145   assert(is_empty(), "Only resize when stack is empty.");
 146   assert(new_capacity <= _max_chunk_capacity,
 147          "Trying to resize stack to " SIZE_FORMAT " chunks when the maximum is " SIZE_FORMAT, new_capacity, _max_chunk_capacity);
 148 
 149   TaskQueueEntryChunk* new_base = MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::allocate_or_null(new_capacity);
 150 
 151   if (new_base == NULL) {
 152     log_warning(gc)("Failed to reserve memory for new overflow mark stack with " SIZE_FORMAT " chunks and size " SIZE_FORMAT "B.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
 153     return false;
 154   }
 155   // Release old mapping.
 156   if (_base != NULL) {
 157     MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::free(_base, _chunk_capacity);
 158   }
 159 
 160   _base = new_base;
 161   _chunk_capacity = new_capacity;
 162   set_empty();
 163   _should_expand = false;
 164 
 165   return true;
 166 }
 167 
 168 size_t G1CMMarkStack::capacity_alignment() {
 169   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 170 }
 171 
 172 bool G1CMMarkStack::initialize(size_t initial_capacity, size_t max_capacity) {
 173   guarantee(_max_chunk_capacity == 0, "G1CMMarkStack already initialized.");
 174 
 175   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 176 
 177   _max_chunk_capacity = (size_t)align_size_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 178   size_t initial_chunk_capacity = (size_t)align_size_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 179 
 180   guarantee(initial_chunk_capacity <= _max_chunk_capacity,
 181             "Maximum chunk capacity " SIZE_FORMAT " smaller than initial capacity " SIZE_FORMAT,
 182             _max_chunk_capacity,
 183             initial_chunk_capacity);
 184 
 185   log_debug(gc)("Initialize mark stack with " SIZE_FORMAT " chunks, maximum " SIZE_FORMAT,
 186                 initial_chunk_capacity, _max_chunk_capacity);
 187 
 188   return resize(initial_chunk_capacity);
 189 }
 190 
 191 void G1CMMarkStack::expand() {
 192   // Clear expansion flag
 193   _should_expand = false;
 194 
 195   if (_chunk_capacity == _max_chunk_capacity) {
 196     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of " SIZE_FORMAT " chunks.", _chunk_capacity);
 197     return;
 198   }
 199   size_t old_capacity = _chunk_capacity;
 200   // Double capacity if possible
 201   size_t new_capacity = MIN2(old_capacity * 2, _max_chunk_capacity);
 202 
 203   if (resize(new_capacity)) {
 204     log_debug(gc)("Expanded mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 205                   old_capacity, new_capacity);
 206   } else {
 207     log_warning(gc)("Failed to expand mark stack capacity from " SIZE_FORMAT " to " SIZE_FORMAT " chunks",
 208                     old_capacity, new_capacity);
 209   }
 210 }
 211 
 212 G1CMMarkStack::~G1CMMarkStack() {
 213   if (_base != NULL) {
 214     MmapArrayAllocator<TaskQueueEntryChunk, mtGC>::free(_base, _chunk_capacity);
 215   }
 216 }
 217 
 218 void G1CMMarkStack::add_chunk_to_list(TaskQueueEntryChunk* volatile* list, TaskQueueEntryChunk* elem) {
 219   elem->next = *list;
 220   *list = elem;
 221 }
 222 
 223 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 224   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 225   add_chunk_to_list(&_chunk_list, elem);
 226   _chunks_in_chunk_list++;
 227 }
 228 
 229 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 230   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 231   add_chunk_to_list(&_free_list, elem);
 232 }
 233 
 234 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(TaskQueueEntryChunk* volatile* list) {
 235   TaskQueueEntryChunk* result = *list;
 236   if (result != NULL) {
 237     *list = (*list)->next;
 238   }
 239   return result;
 240 }
 241 
 242 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 243   MutexLockerEx x(MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 244   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 245   if (result != NULL) {
 246     _chunks_in_chunk_list--;
 247   }
 248   return result;
 249 }
 250 
 251 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 252   MutexLockerEx x(MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 253   return remove_chunk_from_list(&_free_list);
 254 }
 255 
 256 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::allocate_new_chunk() {
 257   // This dirty read of _hwm is okay because we only ever increase the _hwm in parallel code.
 258   // Further this limits _hwm to a value of _chunk_capacity + #threads, avoiding
 259   // wraparound of _hwm.
 260   if (_hwm >= _chunk_capacity) {
 261     return NULL;
 262   }
 263 
 264   size_t cur_idx = Atomic::add(1, &_hwm) - 1;
 265   if (cur_idx >= _chunk_capacity) {
 266     return NULL;
 267   }
 268 
 269   TaskQueueEntryChunk* result = ::new (&_base[cur_idx]) TaskQueueEntryChunk;
 270   result->next = NULL;
 271   return result;
 272 }
 273 
 274 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 275   // Get a new chunk.
 276   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 277 
 278   if (new_chunk == NULL) {
 279     // Did not get a chunk from the free list. Allocate from backing memory.
 280     new_chunk = allocate_new_chunk();
 281   }
 282 
 283   if (new_chunk == NULL) {
 284     _out_of_memory = true;
 285     return false;
 286   }
 287 
 288   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 289 
 290   add_chunk_to_chunk_list(new_chunk);
 291 
 292   return true;
 293 }
 294 
 295 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 296   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 297 
 298   if (cur == NULL) {
 299     return false;
 300   }
 301 
 302   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 303 
 304   add_chunk_to_free_list(cur);
 305   return true;
 306 }
 307 
 308 void G1CMMarkStack::set_empty() {
 309   _chunks_in_chunk_list = 0;
 310   _hwm = 0;
 311   clear_out_of_memory();
 312   _chunk_list = NULL;
 313   _free_list = NULL;
 314 }
 315 
 316 G1CMRootRegions::G1CMRootRegions() :
 317   _cm(NULL), _scan_in_progress(false),
 318   _should_abort(false), _claimed_survivor_index(0) { }
 319 
 320 void G1CMRootRegions::init(const G1SurvivorRegions* survivors, G1ConcurrentMark* cm) {
 321   _survivors = survivors;
 322   _cm = cm;


1991     }
1992   }
1993 
1994   return NULL;
1995 }
1996 
1997 #ifndef PRODUCT
1998 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
1999 private:
2000   G1CollectedHeap* _g1h;
2001   const char* _phase;
2002   int _info;
2003 
2004 public:
2005   VerifyNoCSetOops(const char* phase, int info = -1) :
2006     _g1h(G1CollectedHeap::heap()),
2007     _phase(phase),
2008     _info(info)
2009   { }
2010 
2011   void operator()(G1TaskQueueEntry task_entry) const {
2012     if (task_entry.is_array_slice()) {
2013       guarantee(_g1h->is_in_reserved(task_entry.slice()), "Slice " PTR_FORMAT " must be in heap.", p2i(task_entry.slice()));
2014       return;
2015     }
2016     guarantee(task_entry.obj()->is_oop(),
2017               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2018               p2i(task_entry.obj()), _phase, _info);
2019     guarantee(!_g1h->is_in_cset(task_entry.obj()),
2020               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2021               p2i(task_entry.obj()), _phase, _info);
2022   }
2023 };
2024 
2025 void G1ConcurrentMark::verify_no_cset_oops() {
2026   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2027   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2028     return;
2029   }
2030 
2031   // Verify entries on the global mark stack
2032   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
2033 
2034   // Verify entries on the task queues
2035   for (uint i = 0; i < _max_worker_id; ++i) {
2036     G1CMTaskQueue* queue = _task_queues->queue(i);
2037     queue->iterate(VerifyNoCSetOops("Queue", i));
2038   }
2039 
2040   // Verify the global finger
2041   HeapWord* global_finger = finger();


2195 class G1CMBitMapClosure : public BitMapClosure {
2196 private:
2197   // the bitmap that is being iterated over
2198   G1CMBitMap*                 _nextMarkBitMap;
2199   G1ConcurrentMark*           _cm;
2200   G1CMTask*                   _task;
2201 
2202 public:
2203   G1CMBitMapClosure(G1CMTask *task, G1ConcurrentMark* cm, G1CMBitMap* nextMarkBitMap) :
2204     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2205 
2206   bool do_bit(size_t offset) {
2207     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2208     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2209     assert( addr < _cm->finger(), "invariant");
2210     assert(addr >= _task->finger(), "invariant");
2211 
2212     // We move that task's local finger along.
2213     _task->move_finger_to(addr);
2214 
2215     _task->scan_task_entry(G1TaskQueueEntry::from_oop(oop(addr)));
2216     // we only partially drain the local queue and global stack
2217     _task->drain_local_queue(true);
2218     _task->drain_global_stack(true);
2219 
2220     // if the has_aborted flag has been raised, we need to bail out of
2221     // the iteration
2222     return !_task->has_aborted();
2223   }
2224 };
2225 
2226 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2227   ReferenceProcessor* result = g1h->ref_processor_cm();
2228   assert(result != NULL, "CM reference processor should not be NULL");
2229   return result;
2230 }
2231 
2232 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2233                                G1ConcurrentMark* cm,
2234                                G1CMTask* task)
2235   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),


2386 
2387   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2388   _refs_reached_limit       = _real_refs_reached_limit;
2389 }
2390 
2391 void G1CMTask::decrease_limits() {
2392   // This is called when we believe that we're going to do an infrequent
2393   // operation which will increase the per byte scanned cost (i.e. move
2394   // entries to/from the global stack). It basically tries to decrease the
2395   // scanning limit so that the clock is called earlier.
2396 
2397   _words_scanned_limit = _real_words_scanned_limit -
2398     3 * words_scanned_period / 4;
2399   _refs_reached_limit  = _real_refs_reached_limit -
2400     3 * refs_reached_period / 4;
2401 }
2402 
2403 void G1CMTask::move_entries_to_global_stack() {
2404   // Local array where we'll store the entries that will be popped
2405   // from the local queue.
2406   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2407 
2408   size_t n = 0;
2409   G1TaskQueueEntry task_entry;
2410   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2411     buffer[n].assign(task_entry);
2412     ++n;
2413   }
2414   if (n < G1CMMarkStack::EntriesPerChunk) {
2415     buffer[n].assign(G1TaskQueueEntry());
2416   }
2417 
2418   if (n > 0) {
2419     if (!_cm->mark_stack_push(buffer)) {
2420       set_has_aborted();
2421     }
2422   }
2423 
2424   // This operation was quite expensive, so decrease the limits.
2425   decrease_limits();
2426 }
2427 
2428 bool G1CMTask::get_entries_from_global_stack() {
2429   // Local array where we'll store the entries that will be popped
2430   // from the global stack.
2431   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2432 
2433   if (!_cm->mark_stack_pop(buffer)) {
2434     return false;
2435   }
2436 
2437   // We did actually pop at least one entry.
2438   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2439     G1TaskQueueEntry task_entry = buffer[i];
2440     if (task_entry.is_null()) {
2441       break;
2442     }
2443     assert(task_entry.is_array_slice() || task_entry.obj()->is_oop(), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.obj()));
2444     bool success = _task_queue->push(task_entry);
2445     // We only call this when the local queue is empty or under a
2446     // given target limit. So, we do not expect this push to fail.
2447     assert(success, "invariant");
2448   }
2449 
2450   // This operation was quite expensive, so decrease the limits
2451   decrease_limits();
2452   return true;
2453 }
2454 
2455 void G1CMTask::drain_local_queue(bool partially) {
2456   if (has_aborted()) {
2457     return;
2458   }
2459 
2460   // Decide what the target size is, depending whether we're going to
2461   // drain it partially (so that other tasks can steal if they run out
2462   // of things to do) or totally (at the very end).
2463   size_t target_size;
2464   if (partially) {
2465     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2466   } else {
2467     target_size = 0;
2468   }
2469 
2470   if (_task_queue->size() > target_size) {
2471     G1TaskQueueEntry entry;
2472     bool ret = _task_queue->pop_local(entry);
2473     while (ret) {
2474       scan_task_entry(entry);
2475       if (_task_queue->size() <= target_size || has_aborted()) {
2476         ret = false;
2477       } else {
2478         ret = _task_queue->pop_local(entry);
2479       }
2480     }
2481   }
2482 }
2483 
2484 void G1CMTask::drain_global_stack(bool partially) {
2485   if (has_aborted()) return;
2486 
2487   // We have a policy to drain the local queue before we attempt to
2488   // drain the global stack.
2489   assert(partially || _task_queue->size() == 0, "invariant");
2490 
2491   // Decide what the target size is, depending whether we're going to
2492   // drain it partially (so that other tasks can steal if they run out
2493   // of things to do) or totally (at the very end).
2494   // Notice that when draining the global mark stack partially, due to the racyness
2495   // of the mark stack size update we might in fact drop below the target. But,
2496   // this is not a problem.
2497   // In case of total draining, we simply process until the global mark stack is
2498   // totally empty, disregarding the size counter.


2539          concurrent() ||
2540          satb_mq_set.completed_buffers_num() == 0, "invariant");
2541 
2542   // again, this was a potentially expensive operation, decrease the
2543   // limits to get the regular clock call early
2544   decrease_limits();
2545 }
2546 
2547 void G1CMTask::print_stats() {
2548   log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
2549                        _worker_id, _calls);
2550   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2551                        _elapsed_time_ms, _termination_time_ms);
2552   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
2553                        _step_times_ms.num(), _step_times_ms.avg(),
2554                        _step_times_ms.sd());
2555   log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
2556                        _step_times_ms.maximum(), _step_times_ms.sum());
2557 }
2558 
2559 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, G1TaskQueueEntry& task_entry) {
2560   return _task_queues->steal(worker_id, hash_seed, task_entry);
2561 }
2562 
2563 /*****************************************************************************
2564 
2565     The do_marking_step(time_target_ms, ...) method is the building
2566     block of the parallel marking framework. It can be called in parallel
2567     with other invocations of do_marking_step() on different tasks
2568     (but only one per task, obviously) and concurrently with the
2569     mutator threads, or during remark, hence it eliminates the need
2570     for two versions of the code. When called during remark, it will
2571     pick up from where the task left off during the concurrent marking
2572     phase. Interestingly, tasks are also claimable during evacuation
2573     pauses too, since do_marking_step() ensures that it aborts before
2574     it needs to yield.
2575 
2576     The data structures that it uses to do marking work are the
2577     following:
2578 
2579       (1) Marking Bitmap. If there are gray objects that appear only
2580       on the bitmap (this happens either when dealing with an overflow


2863     // Try to reduce the number of available SATB buffers so that
2864     // remark has less work to do.
2865     drain_satb_buffers();
2866   }
2867 
2868   // Since we've done everything else, we can now totally drain the
2869   // local queue and global stack.
2870   drain_local_queue(false);
2871   drain_global_stack(false);
2872 
2873   // Attempt at work stealing from other task's queues.
2874   if (do_stealing && !has_aborted()) {
2875     // We have not aborted. This means that we have finished all that
2876     // we could. Let's try to do some stealing...
2877 
2878     // We cannot check whether the global stack is empty, since other
2879     // tasks might be pushing objects to it concurrently.
2880     assert(_cm->out_of_regions() && _task_queue->size() == 0,
2881            "only way to reach here");
2882     while (!has_aborted()) {
2883       G1TaskQueueEntry entry;
2884       if (_cm->try_stealing(_worker_id, &_hash_seed, entry)) {
2885         scan_task_entry(entry);
2886 
2887         // And since we're towards the end, let's totally drain the
2888         // local queue and global stack.
2889         drain_local_queue(false);
2890         drain_global_stack(false);
2891       } else {
2892         break;
2893       }
2894     }
2895   }
2896 
2897   // We still haven't aborted. Now, let's try to get into the
2898   // termination protocol.
2899   if (do_termination && !has_aborted()) {
2900     // We cannot check whether the global stack is empty, since other
2901     // tasks might be concurrently pushing objects on it.
2902     // Separated the asserts so that we know which one fires.
2903     assert(_cm->out_of_regions(), "only way to reach here");
2904     assert(_task_queue->size() == 0, "only way to reach here");
2905     _termination_start_time_ms = os::elapsedVTime() * 1000.0;


< prev index next >