Print this page
rev 2724 : 6484965: G1: piggy-back liveness accounting phase on marking
Summary: Remove the separate counting phase of concurrent marking by tracking the amount of marked bytes and the cards spanned by marked objects in marking task/worker thread local data structures, which are updated as individual objects are marked.
Reviewed-by:

Split Close
Expand all
Collapse all
          --- old/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
          +++ new/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
↓ open down ↓ 3875 lines elided ↑ open up ↑
3876 3876      assert(_hr->is_in(obj_addr), "sanity");
3877 3877      size_t obj_size = obj->size();
3878 3878      _hr->update_bot_for_object(obj_addr, obj_size);
3879 3879      if (obj->is_forwarded() && obj->forwardee() == obj) {
3880 3880        // The object failed to move.
3881 3881        assert(!_g1->is_obj_dead(obj), "We should not be preserving dead objs.");
3882 3882        _cm->markPrev(obj);
3883 3883        assert(_cm->isPrevMarked(obj), "Should be marked!");
3884 3884        _prev_marked_bytes += (obj_size * HeapWordSize);
3885 3885        if (_g1->mark_in_progress() && !_g1->is_obj_ill(obj)) {
3886      -        _cm->markAndGrayObjectIfNecessary(obj);
     3886 +        _cm->markAndGrayObjectIfNecessary(obj, 0 /* worker_i */);
3887 3887        }
3888 3888        obj->set_mark(markOopDesc::prototype());
3889 3889        // While we were processing RSet buffers during the
3890 3890        // collection, we actually didn't scan any cards on the
3891 3891        // collection set, since we didn't want to update remebered
3892 3892        // sets with entries that point into the collection set, given
3893 3893        // that live objects fromthe collection set are about to move
3894 3894        // and such entries will be stale very soon. This change also
3895 3895        // dealt with a reliability issue which involved scanning a
3896 3896        // card in the collection set and coming across an array that
↓ open down ↓ 101 lines elided ↑ open up ↑
3998 3998    while (_evac_failure_scan_stack->length() > 0) {
3999 3999       oop obj = _evac_failure_scan_stack->pop();
4000 4000       _evac_failure_closure->set_region(heap_region_containing(obj));
4001 4001       obj->oop_iterate_backwards(_evac_failure_closure);
4002 4002    }
4003 4003  }
4004 4004  
4005 4005  oop
4006 4006  G1CollectedHeap::handle_evacuation_failure_par(OopsInHeapRegionClosure* cl,
4007 4007                                                 oop old,
4008      -                                               bool should_mark_root) {
     4008 +                                               bool should_mark_root,
     4009 +                                               int worker_i) {
4009 4010    assert(obj_in_cs(old),
4010 4011           err_msg("obj: "PTR_FORMAT" should still be in the CSet",
4011 4012                   (HeapWord*) old));
4012 4013    markOop m = old->mark();
4013 4014    oop forward_ptr = old->forward_to_atomic(old);
4014 4015    if (forward_ptr == NULL) {
4015 4016      // Forward-to-self succeeded.
4016 4017  
4017 4018      // should_mark_root will be true when this routine is called
4018 4019      // from a root scanning closure during an initial mark pause.
4019 4020      // In this case the thread that succeeds in self-forwarding the
4020 4021      // object is also responsible for marking the object.
4021 4022      if (should_mark_root) {
4022 4023        assert(!oopDesc::is_null(old), "shouldn't be");
4023      -      _cm->grayRoot(old);
     4024 +      _cm->grayRoot(old, worker_i);
4024 4025      }
4025 4026  
4026 4027      if (_evac_failure_closure != cl) {
4027 4028        MutexLockerEx x(EvacFailureStack_lock, Mutex::_no_safepoint_check_flag);
4028 4029        assert(!_drain_in_progress,
4029 4030               "Should only be true while someone holds the lock.");
4030 4031        // Set the global evac-failure closure to the current thread's.
4031 4032        assert(_evac_failure_closure == NULL, "Or locking has failed.");
4032 4033        set_evac_failure_closure(cl);
4033 4034        // Now do the common part.
↓ open down ↓ 85 lines elided ↑ open up ↑
4119 4120  }
4120 4121  
4121 4122  #ifndef PRODUCT
4122 4123  bool GCLabBitMapClosure::do_bit(size_t offset) {
4123 4124    HeapWord* addr = _bitmap->offsetToHeapWord(offset);
4124 4125    guarantee(_cm->isMarked(oop(addr)), "it should be!");
4125 4126    return true;
4126 4127  }
4127 4128  #endif // PRODUCT
4128 4129  
4129      -G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size) :
     4130 +G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size, int worker_i) :
4130 4131    ParGCAllocBuffer(gclab_word_size),
4131 4132    _should_mark_objects(false),
4132 4133    _bitmap(G1CollectedHeap::heap()->reserved_region().start(), gclab_word_size),
     4134 +  _worker_i(worker_i),
4133 4135    _retired(false)
4134 4136  {
4135 4137    //_should_mark_objects is set to true when G1ParCopyHelper needs to
4136 4138    // mark the forwarded location of an evacuated object.
4137 4139    // We set _should_mark_objects to true if marking is active, i.e. when we
4138 4140    // need to propagate a mark, or during an initial mark pause, i.e. when we
4139 4141    // need to mark objects immediately reachable by the roots.
4140 4142    if (G1CollectedHeap::heap()->mark_in_progress() ||
4141 4143        G1CollectedHeap::heap()->g1_policy()->during_initial_mark_pause()) {
4142 4144      _should_mark_objects = true;
↓ open down ↓ 1 lines elided ↑ open up ↑
4144 4146  }
4145 4147  
4146 4148  G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num)
4147 4149    : _g1h(g1h),
4148 4150      _refs(g1h->task_queue(queue_num)),
4149 4151      _dcq(&g1h->dirty_card_queue_set()),
4150 4152      _ct_bs((CardTableModRefBS*)_g1h->barrier_set()),
4151 4153      _g1_rem(g1h->g1_rem_set()),
4152 4154      _hash_seed(17), _queue_num(queue_num),
4153 4155      _term_attempts(0),
4154      -    _surviving_alloc_buffer(g1h->desired_plab_sz(GCAllocForSurvived)),
4155      -    _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured)),
     4156 +    _surviving_alloc_buffer(g1h->desired_plab_sz(GCAllocForSurvived), queue_num),
     4157 +    _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured), queue_num),
4156 4158      _age_table(false),
4157 4159      _strong_roots_time(0), _term_time(0),
4158 4160      _alloc_buffer_waste(0), _undo_waste(0)
4159 4161  {
4160 4162    // we allocate G1YoungSurvRateNumRegions plus one entries, since
4161 4163    // we "sacrifice" entry 0 to keep track of surviving bytes for
4162 4164    // non-young regions (where the age is -1)
4163 4165    // We also add a few elements at the beginning and at the end in
4164 4166    // an attempt to eliminate cache contention
4165 4167    size_t real_length = 1 + _g1h->g1_policy()->young_cset_length();
↓ open down ↓ 91 lines elided ↑ open up ↑
4257 4259  
4258 4260      while (refs()->pop_local(ref)) {
4259 4261        deal_with_reference(ref);
4260 4262      }
4261 4263    } while (!refs()->is_empty());
4262 4264  }
4263 4265  
4264 4266  G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
4265 4267    _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()),
4266 4268    _par_scan_state(par_scan_state),
     4269 +  _worker_i(par_scan_state->queue_num()),
4267 4270    _during_initial_mark(_g1->g1_policy()->during_initial_mark_pause()),
4268 4271    _mark_in_progress(_g1->mark_in_progress()) { }
4269 4272  
4270 4273  template <class T> void G1ParCopyHelper::mark_object(T* p) {
4271 4274    // This is called from do_oop_work for objects that are not
4272 4275    // in the collection set. Objects in the collection set
4273 4276    // are marked after they have been evacuated.
4274 4277  
4275 4278    T heap_oop = oopDesc::load_heap_oop(p);
4276 4279    if (!oopDesc::is_null(heap_oop)) {
4277 4280      oop obj = oopDesc::decode_heap_oop(heap_oop);
4278 4281      HeapWord* addr = (HeapWord*)obj;
4279 4282      if (_g1->is_in_g1_reserved(addr)) {
4280      -      _cm->grayRoot(oop(addr));
     4283 +      _cm->grayRoot(oop(addr), _worker_i);
4281 4284      }
4282 4285    }
4283 4286  }
4284 4287  
4285 4288  oop G1ParCopyHelper::copy_to_survivor_space(oop old, bool should_mark_root,
4286 4289                                                       bool should_mark_copy) {
4287 4290    size_t    word_sz = old->size();
4288 4291    HeapRegion* from_region = _g1->heap_region_containing_raw(old);
4289 4292    // +1 to make the -1 indexes valid...
4290 4293    int       young_index = from_region->young_index_in_cset()+1;
↓ open down ↓ 5 lines elided ↑ open up ↑
4296 4299                                             : m->age();
4297 4300    GCAllocPurpose alloc_purpose = g1p->evacuation_destination(from_region, age,
4298 4301                                                               word_sz);
4299 4302    HeapWord* obj_ptr = _par_scan_state->allocate(alloc_purpose, word_sz);
4300 4303    oop       obj     = oop(obj_ptr);
4301 4304  
4302 4305    if (obj_ptr == NULL) {
4303 4306      // This will either forward-to-self, or detect that someone else has
4304 4307      // installed a forwarding pointer.
4305 4308      OopsInHeapRegionClosure* cl = _par_scan_state->evac_failure_closure();
4306      -    return _g1->handle_evacuation_failure_par(cl, old, should_mark_root);
     4309 +    return _g1->handle_evacuation_failure_par(cl, old, should_mark_root, _worker_i);
4307 4310    }
4308 4311  
4309 4312    // We're going to allocate linearly, so might as well prefetch ahead.
4310 4313    Prefetch::write(obj_ptr, PrefetchCopyIntervalInBytes);
4311 4314  
4312 4315    oop forward_ptr = old->forward_to_atomic(obj);
4313 4316    if (forward_ptr == NULL) {
4314 4317      Copy::aligned_disjoint_words((HeapWord*) old, obj_ptr, word_sz);
4315 4318      if (g1p->track_object_age(alloc_purpose)) {
4316 4319        // We could simply do obj->incr_age(). However, this causes a
↓ open down ↓ 18 lines elided ↑ open up ↑
4335 4338          obj->set_mark(m);
4336 4339        }
4337 4340        _par_scan_state->age_table()->add(obj, word_sz);
4338 4341      } else {
4339 4342        obj->set_mark(m);
4340 4343      }
4341 4344  
4342 4345      // Mark the evacuated object or propagate "next" mark bit
4343 4346      if (should_mark_copy) {
4344 4347        if (!use_local_bitmaps ||
4345      -          !_par_scan_state->alloc_buffer(alloc_purpose)->mark(obj_ptr)) {
     4348 +          !_par_scan_state->alloc_buffer(alloc_purpose)->mark(obj_ptr, word_sz)) {
4346 4349          // if we couldn't mark it on the local bitmap (this happens when
4347 4350          // the object was not allocated in the GCLab), we have to bite
4348 4351          // the bullet and do the standard parallel mark
4349      -        _cm->markAndGrayObjectIfNecessary(obj);
     4352 +        _cm->markAndGrayObjectIfNecessary(obj, _worker_i);
4350 4353        }
4351 4354  
4352 4355        if (_g1->isMarkedNext(old)) {
4353 4356          // Unmark the object's old location so that marking
4354 4357          // doesn't think the old object is alive.
4355 4358          _cm->nextMarkBitMap()->parClear((HeapWord*)old);
     4359 +
     4360 +        // We could clear the count data for the old object here but
     4361 +        // currently we do not. Why don't we do this? The thread/task
     4362 +        // that marks a newly copied object is likely _not_ the thread/task
     4363 +        // that originally marked the old object. So, to clear the count
     4364 +        // data for the old object, we would have to scan the count
     4365 +        // data for all of the tasks (and clear the data for the old object
     4366 +        // in parallel with other threads adding to the count data). Even
     4367 +        // then we could clear a bit incorrectly (e.g. if the old object
     4368 +        // does not start or end on a card boundary). It's more important
     4369 +        // that we don't have missed bits that should've been set than
     4370 +        // having extra bits set.
     4371 +        //
     4372 +        // As a result the accumulated count data could be a superset
     4373 +        // of the data that is/would have been calculated by walking
     4374 +        // the marking bitmap.
4356 4375        }
4357 4376      }
4358 4377  
4359 4378      size_t* surv_young_words = _par_scan_state->surviving_young_words();
4360 4379      surv_young_words[young_index] += word_sz;
4361 4380  
4362 4381      if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
4363 4382        arrayOop(old)->set_length(0);
4364 4383        oop* old_p = set_partial_array_mask(old);
4365 4384        _par_scan_state->push_on_queue(old_p);
↓ open down ↓ 66 lines elided ↑ open up ↑
4432 4451        bool should_mark_copy = do_mark_object ||
4433 4452                                _during_initial_mark ||
4434 4453                                (_mark_in_progress && !_g1->is_obj_ill(obj));
4435 4454  
4436 4455        oop copy_oop = copy_to_survivor_space(obj, should_mark_root,
4437 4456                                                   should_mark_copy);
4438 4457        oopDesc::encode_store_heap_oop(p, copy_oop);
4439 4458      }
4440 4459      // When scanning the RS, we only care about objs in CS.
4441 4460      if (barrier == G1BarrierRS) {
4442      -      _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
     4461 +      assert(_worker_i == _par_scan_state->queue_num(), "sanity");
     4462 +      _par_scan_state->update_rs(_from, p, _worker_i);
4443 4463      }
4444 4464    } else {
4445 4465      // The object is not in collection set. If we're a root scanning
4446 4466      // closure during an initial mark pause (i.e. do_mark_object will
4447 4467      // be true) then attempt to mark the object.
4448 4468      if (do_mark_object) {
4449 4469        mark_object(p);
4450 4470      }
4451 4471    }
4452 4472  
4453 4473    if (barrier == G1BarrierEvac && obj != NULL) {
4454      -    _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
     4474 +    assert(_worker_i == _par_scan_state->queue_num(), "sanity");
     4475 +    _par_scan_state->update_rs(_from, p, _worker_i);
4455 4476    }
4456 4477  
4457 4478    if (do_gen_barrier && obj != NULL) {
4458 4479      par_do_barrier(p);
4459 4480    }
4460 4481  }
4461 4482  
4462 4483  template void G1ParCopyClosure<false, G1BarrierEvac, false>::do_oop_work(oop* p);
4463 4484  template void G1ParCopyClosure<false, G1BarrierEvac, false>::do_oop_work(narrowOop* p);
4464 4485  
↓ open down ↓ 1547 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX