open Sdiff src/hotspot/share/gc/cms

src/hotspot/share/gc/cms/concurrentMarkSweepGeneration.cpp

1059   // (i.e., insertion of klass pointer) until after, so that it
1060   // atomically becomes a promoted object.
1061   if (promoInfo->tracking()) {
1062     promoInfo->track((PromotedObject*)obj, old->klass());
1063   }
1064   assert(obj->klass_or_null() == NULL, "Object should be uninitialized here.");
1065   assert(!((FreeChunk*)obj_ptr)->is_free(), "Error, block will look free but show wrong size");
1066   assert(oopDesc::is_oop(old), "Will use and dereference old klass ptr below");
1067 
1068   // Finally, install the klass pointer (this should be volatile).
1069   OrderAccess::storestore();
1070   obj->set_klass(old->klass());
1071   // We should now be able to calculate the right size for this object
1072   assert(oopDesc::is_oop(obj) && obj->size() == (int)word_sz, "Error, incorrect size computed for promoted object");
1073 
1074   collector()->promoted(true,          // parallel
1075                         obj_ptr, old->is_objArray(), word_sz);
1076 
1077   NOT_PRODUCT(
1078     Atomic::inc(&_numObjectsPromoted);
1079     Atomic::add_ptr(alloc_sz, &_numWordsPromoted);
1080   )
1081 
1082   return obj;
1083 }
1084 
1085 void
1086 ConcurrentMarkSweepGeneration::
1087 par_promote_alloc_done(int thread_num) {
1088   CMSParGCThreadState* ps = _par_gc_thread_states[thread_num];
1089   ps->lab.retire(thread_num);
1090 }
1091 
1092 void
1093 ConcurrentMarkSweepGeneration::
1094 par_oop_since_save_marks_iterate_done(int thread_num) {
1095   CMSParGCThreadState* ps = _par_gc_thread_states[thread_num];
1096   ParScanWithoutBarrierClosure* dummy_cl = NULL;
1097   ps->promo.promoted_oops_iterate_nv(dummy_cl);
1098 
1099   // Because card-scanning has been completed, subsequent phases

3162   // Note that under the current task protocol, the
3163   // following assertion is true even of the spaces
3164   // expanded since the completion of the concurrent
3165   // marking. XXX This will likely change under a strict
3166   // ABORT semantics.
3167   // After perm removal the comparison was changed to
3168   // greater than or equal to from strictly greater than.
3169   // Before perm removal the highest address sweep would
3170   // have been at the end of perm gen but now is at the
3171   // end of the tenured gen.
3172   assert(_global_finger >=  _cms_space->end(),
3173          "All tasks have been completed");
3174   DEBUG_ONLY(_collector->verify_overflow_empty();)
3175 }
3176 
3177 void CMSConcMarkingTask::bump_global_finger(HeapWord* f) {
3178   HeapWord* read = _global_finger;
3179   HeapWord* cur  = read;
3180   while (f > read) {
3181     cur = read;
3182     read = (HeapWord*) Atomic::cmpxchg_ptr(f, &_global_finger, cur);
3183     if (cur == read) {
3184       // our cas succeeded
3185       assert(_global_finger >= f, "protocol consistency");
3186       break;
3187     }
3188   }
3189 }
3190 
3191 // This is really inefficient, and should be redone by
3192 // using (not yet available) block-read and -write interfaces to the
3193 // stack and the work_queue. XXX FIX ME !!!
3194 bool CMSConcMarkingTask::get_work_from_overflow_stack(CMSMarkStack* ovflw_stk,
3195                                                       OopTaskQueue* work_q) {
3196   // Fast lock-free check
3197   if (ovflw_stk->length() == 0) {
3198     return false;
3199   }
3200   assert(work_q->size() == 0, "Shouldn't steal");
3201   MutexLockerEx ml(ovflw_stk->par_lock(),
3202                    Mutex::_no_safepoint_check_flag);

7835 // prove to be expensive (quadratic in the amount of traffic)
7836 // when there are many objects in the overflow list and
7837 // there is much producer-consumer contention on the list.
7838 // *NOTE*: The overflow list manipulation code here and
7839 // in ParNewGeneration:: are very similar in shape,
7840 // except that in the ParNew case we use the old (from/eden)
7841 // copy of the object to thread the list via its klass word.
7842 // Because of the common code, if you make any changes in
7843 // the code below, please check the ParNew version to see if
7844 // similar changes might be needed.
7845 // CR 6797058 has been filed to consolidate the common code.
7846 bool CMSCollector::par_take_from_overflow_list(size_t num,
7847                                                OopTaskQueue* work_q,
7848                                                int no_of_gc_threads) {
7849   assert(work_q->size() == 0, "First empty local work queue");
7850   assert(num < work_q->max_elems(), "Can't bite more than we can chew");
7851   if (_overflow_list == NULL) {
7852     return false;
7853   }
7854   // Grab the entire list; we'll put back a suffix
7855   oop prefix = cast_to_oop(Atomic::xchg_ptr(BUSY, &_overflow_list));
7856   Thread* tid = Thread::current();
7857   // Before "no_of_gc_threads" was introduced CMSOverflowSpinCount was
7858   // set to ParallelGCThreads.
7859   size_t CMSOverflowSpinCount = (size_t) no_of_gc_threads; // was ParallelGCThreads;
7860   size_t sleep_time_millis = MAX2((size_t)1, num/100);
7861   // If the list is busy, we spin for a short while,
7862   // sleeping between attempts to get the list.
7863   for (size_t spin = 0; prefix == BUSY && spin < CMSOverflowSpinCount; spin++) {
7864     os::sleep(tid, sleep_time_millis, false);
7865     if (_overflow_list == NULL) {
7866       // Nothing left to take
7867       return false;
7868     } else if (_overflow_list != BUSY) {
7869       // Try and grab the prefix
7870       prefix = cast_to_oop(Atomic::xchg_ptr(BUSY, &_overflow_list));
7871     }
7872   }
7873   // If the list was found to be empty, or we spun long
7874   // enough, we give up and return empty-handed. If we leave
7875   // the list in the BUSY state below, it must be the case that
7876   // some other thread holds the overflow list and will set it
7877   // to a non-BUSY state in the future.
7878   if (prefix == NULL || prefix == BUSY) {
7879      // Nothing to take or waited long enough
7880      if (prefix == NULL) {
7881        // Write back the NULL in case we overwrote it with BUSY above
7882        // and it is still the same value.
7883        (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
7884      }
7885      return false;
7886   }
7887   assert(prefix != NULL && prefix != BUSY, "Error");
7888   size_t i = num;
7889   oop cur = prefix;
7890   // Walk down the first "num" objects, unless we reach the end.
7891   for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--);
7892   if (cur->mark() == NULL) {
7893     // We have "num" or fewer elements in the list, so there
7894     // is nothing to return to the global list.
7895     // Write back the NULL in lieu of the BUSY we wrote
7896     // above, if it is still the same value.
7897     if (_overflow_list == BUSY) {
7898       (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
7899     }
7900   } else {
7901     // Chop off the suffix and return it to the global list.
7902     assert(cur->mark() != BUSY, "Error");
7903     oop suffix_head = cur->mark(); // suffix will be put back on global list
7904     cur->set_mark(NULL);           // break off suffix
7905     // It's possible that the list is still in the empty(busy) state
7906     // we left it in a short while ago; in that case we may be
7907     // able to place back the suffix without incurring the cost
7908     // of a walk down the list.
7909     oop observed_overflow_list = _overflow_list;
7910     oop cur_overflow_list = observed_overflow_list;
7911     bool attached = false;
7912     while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
7913       observed_overflow_list =
7914         (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
7915       if (cur_overflow_list == observed_overflow_list) {
7916         attached = true;
7917         break;
7918       } else cur_overflow_list = observed_overflow_list;
7919     }
7920     if (!attached) {
7921       // Too bad, someone else sneaked in (at least) an element; we'll need
7922       // to do a splice. Find tail of suffix so we can prepend suffix to global
7923       // list.
7924       for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark()));
7925       oop suffix_tail = cur;
7926       assert(suffix_tail != NULL && suffix_tail->mark() == NULL,
7927              "Tautology");
7928       observed_overflow_list = _overflow_list;
7929       do {
7930         cur_overflow_list = observed_overflow_list;
7931         if (cur_overflow_list != BUSY) {
7932           // Do the splice ...
7933           suffix_tail->set_mark(markOop(cur_overflow_list));
7934         } else { // cur_overflow_list == BUSY
7935           suffix_tail->set_mark(NULL);
7936         }
7937         // ... and try to place spliced list back on overflow_list ...
7938         observed_overflow_list =
7939           (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
7940       } while (cur_overflow_list != observed_overflow_list);
7941       // ... until we have succeeded in doing so.
7942     }
7943   }
7944 
7945   // Push the prefix elements on work_q
7946   assert(prefix != NULL, "control point invariant");
7947   const markOop proto = markOopDesc::prototype();
7948   oop next;
7949   NOT_PRODUCT(ssize_t n = 0;)
7950   for (cur = prefix; cur != NULL; cur = next) {
7951     next = oop(cur->mark());
7952     cur->set_mark(proto);   // until proven otherwise
7953     assert(oopDesc::is_oop(cur), "Should be an oop");
7954     bool res = work_q->push(cur);
7955     assert(res, "Bit off more than we can chew?");
7956     NOT_PRODUCT(n++;)
7957   }
7958 #ifndef PRODUCT
7959   assert(_num_par_pushes >= n, "Too many pops?");
7960   Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes);
7961 #endif
7962   return true;
7963 }
7964 
7965 // Single-threaded
7966 void CMSCollector::push_on_overflow_list(oop p) {
7967   NOT_PRODUCT(_num_par_pushes++;)
7968   assert(oopDesc::is_oop(p), "Not an oop");
7969   preserve_mark_if_necessary(p);
7970   p->set_mark((markOop)_overflow_list);
7971   _overflow_list = p;
7972 }
7973 
7974 // Multi-threaded; use CAS to prepend to overflow list
7975 void CMSCollector::par_push_on_overflow_list(oop p) {
7976   NOT_PRODUCT(Atomic::inc(&_num_par_pushes);)
7977   assert(oopDesc::is_oop(p), "Not an oop");
7978   par_preserve_mark_if_necessary(p);
7979   oop observed_overflow_list = _overflow_list;
7980   oop cur_overflow_list;
7981   do {
7982     cur_overflow_list = observed_overflow_list;
7983     if (cur_overflow_list != BUSY) {
7984       p->set_mark(markOop(cur_overflow_list));
7985     } else {
7986       p->set_mark(NULL);
7987     }
7988     observed_overflow_list =
7989       (oop) Atomic::cmpxchg_ptr(p, &_overflow_list, cur_overflow_list);
7990   } while (cur_overflow_list != observed_overflow_list);
7991 }
7992 #undef BUSY
7993 
7994 // Single threaded
7995 // General Note on GrowableArray: pushes may silently fail
7996 // because we are (temporarily) out of C-heap for expanding
7997 // the stack. The problem is quite ubiquitous and affects
7998 // a lot of code in the JVM. The prudent thing for GrowableArray
7999 // to do (for now) is to exit with an error. However, that may
8000 // be too draconian in some cases because the caller may be
8001 // able to recover without much harm. For such cases, we
8002 // should probably introduce a "soft_push" method which returns
8003 // an indication of success or failure with the assumption that
8004 // the caller may be able to recover from a failure; code in
8005 // the VM can then be changed, incrementally, to deal with such
8006 // failures where possible, thus, incrementally hardening the VM
8007 // in such low resource situations.
8008 void CMSCollector::preserve_mark_work(oop p, markOop m) {
8009   _preserved_oop_stack.push(p);

1059   // (i.e., insertion of klass pointer) until after, so that it
1060   // atomically becomes a promoted object.
1061   if (promoInfo->tracking()) {
1062     promoInfo->track((PromotedObject*)obj, old->klass());
1063   }
1064   assert(obj->klass_or_null() == NULL, "Object should be uninitialized here.");
1065   assert(!((FreeChunk*)obj_ptr)->is_free(), "Error, block will look free but show wrong size");
1066   assert(oopDesc::is_oop(old), "Will use and dereference old klass ptr below");
1067 
1068   // Finally, install the klass pointer (this should be volatile).
1069   OrderAccess::storestore();
1070   obj->set_klass(old->klass());
1071   // We should now be able to calculate the right size for this object
1072   assert(oopDesc::is_oop(obj) && obj->size() == (int)word_sz, "Error, incorrect size computed for promoted object");
1073 
1074   collector()->promoted(true,          // parallel
1075                         obj_ptr, old->is_objArray(), word_sz);
1076 
1077   NOT_PRODUCT(
1078     Atomic::inc(&_numObjectsPromoted);
1079     Atomic::add(alloc_sz, &_numWordsPromoted);
1080   )
1081 
1082   return obj;
1083 }
1084 
1085 void
1086 ConcurrentMarkSweepGeneration::
1087 par_promote_alloc_done(int thread_num) {
1088   CMSParGCThreadState* ps = _par_gc_thread_states[thread_num];
1089   ps->lab.retire(thread_num);
1090 }
1091 
1092 void
1093 ConcurrentMarkSweepGeneration::
1094 par_oop_since_save_marks_iterate_done(int thread_num) {
1095   CMSParGCThreadState* ps = _par_gc_thread_states[thread_num];
1096   ParScanWithoutBarrierClosure* dummy_cl = NULL;
1097   ps->promo.promoted_oops_iterate_nv(dummy_cl);
1098 
1099   // Because card-scanning has been completed, subsequent phases

3162   // Note that under the current task protocol, the
3163   // following assertion is true even of the spaces
3164   // expanded since the completion of the concurrent
3165   // marking. XXX This will likely change under a strict
3166   // ABORT semantics.
3167   // After perm removal the comparison was changed to
3168   // greater than or equal to from strictly greater than.
3169   // Before perm removal the highest address sweep would
3170   // have been at the end of perm gen but now is at the
3171   // end of the tenured gen.
3172   assert(_global_finger >=  _cms_space->end(),
3173          "All tasks have been completed");
3174   DEBUG_ONLY(_collector->verify_overflow_empty();)
3175 }
3176 
3177 void CMSConcMarkingTask::bump_global_finger(HeapWord* f) {
3178   HeapWord* read = _global_finger;
3179   HeapWord* cur  = read;
3180   while (f > read) {
3181     cur = read;
3182     read = Atomic::cmpxchg(f, &_global_finger, cur);
3183     if (cur == read) {
3184       // our cas succeeded
3185       assert(_global_finger >= f, "protocol consistency");
3186       break;
3187     }
3188   }
3189 }
3190 
3191 // This is really inefficient, and should be redone by
3192 // using (not yet available) block-read and -write interfaces to the
3193 // stack and the work_queue. XXX FIX ME !!!
3194 bool CMSConcMarkingTask::get_work_from_overflow_stack(CMSMarkStack* ovflw_stk,
3195                                                       OopTaskQueue* work_q) {
3196   // Fast lock-free check
3197   if (ovflw_stk->length() == 0) {
3198     return false;
3199   }
3200   assert(work_q->size() == 0, "Shouldn't steal");
3201   MutexLockerEx ml(ovflw_stk->par_lock(),
3202                    Mutex::_no_safepoint_check_flag);

7835 // prove to be expensive (quadratic in the amount of traffic)
7836 // when there are many objects in the overflow list and
7837 // there is much producer-consumer contention on the list.
7838 // *NOTE*: The overflow list manipulation code here and
7839 // in ParNewGeneration:: are very similar in shape,
7840 // except that in the ParNew case we use the old (from/eden)
7841 // copy of the object to thread the list via its klass word.
7842 // Because of the common code, if you make any changes in
7843 // the code below, please check the ParNew version to see if
7844 // similar changes might be needed.
7845 // CR 6797058 has been filed to consolidate the common code.
7846 bool CMSCollector::par_take_from_overflow_list(size_t num,
7847                                                OopTaskQueue* work_q,
7848                                                int no_of_gc_threads) {
7849   assert(work_q->size() == 0, "First empty local work queue");
7850   assert(num < work_q->max_elems(), "Can't bite more than we can chew");
7851   if (_overflow_list == NULL) {
7852     return false;
7853   }
7854   // Grab the entire list; we'll put back a suffix
7855   oop prefix = cast_to_oop(Atomic::xchg((oopDesc*)BUSY, &_overflow_list));
7856   Thread* tid = Thread::current();
7857   // Before "no_of_gc_threads" was introduced CMSOverflowSpinCount was
7858   // set to ParallelGCThreads.
7859   size_t CMSOverflowSpinCount = (size_t) no_of_gc_threads; // was ParallelGCThreads;
7860   size_t sleep_time_millis = MAX2((size_t)1, num/100);
7861   // If the list is busy, we spin for a short while,
7862   // sleeping between attempts to get the list.
7863   for (size_t spin = 0; prefix == BUSY && spin < CMSOverflowSpinCount; spin++) {
7864     os::sleep(tid, sleep_time_millis, false);
7865     if (_overflow_list == NULL) {
7866       // Nothing left to take
7867       return false;
7868     } else if (_overflow_list != BUSY) {
7869       // Try and grab the prefix
7870       prefix = cast_to_oop(Atomic::xchg((oopDesc*)BUSY, &_overflow_list));
7871     }
7872   }
7873   // If the list was found to be empty, or we spun long
7874   // enough, we give up and return empty-handed. If we leave
7875   // the list in the BUSY state below, it must be the case that
7876   // some other thread holds the overflow list and will set it
7877   // to a non-BUSY state in the future.
7878   if (prefix == NULL || prefix == BUSY) {
7879      // Nothing to take or waited long enough
7880      if (prefix == NULL) {
7881        // Write back the NULL in case we overwrote it with BUSY above
7882        // and it is still the same value.
7883        Atomic::cmpxchg((oopDesc*)NULL, &_overflow_list, (oopDesc*)BUSY);
7884      }
7885      return false;
7886   }
7887   assert(prefix != NULL && prefix != BUSY, "Error");
7888   size_t i = num;
7889   oop cur = prefix;
7890   // Walk down the first "num" objects, unless we reach the end.
7891   for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--);
7892   if (cur->mark() == NULL) {
7893     // We have "num" or fewer elements in the list, so there
7894     // is nothing to return to the global list.
7895     // Write back the NULL in lieu of the BUSY we wrote
7896     // above, if it is still the same value.
7897     if (_overflow_list == BUSY) {
7898       Atomic::cmpxchg((oopDesc*)NULL, &_overflow_list, (oopDesc*)BUSY);
7899     }
7900   } else {
7901     // Chop off the suffix and return it to the global list.
7902     assert(cur->mark() != BUSY, "Error");
7903     oop suffix_head = cur->mark(); // suffix will be put back on global list
7904     cur->set_mark(NULL);           // break off suffix
7905     // It's possible that the list is still in the empty(busy) state
7906     // we left it in a short while ago; in that case we may be
7907     // able to place back the suffix without incurring the cost
7908     // of a walk down the list.
7909     oop observed_overflow_list = _overflow_list;
7910     oop cur_overflow_list = observed_overflow_list;
7911     bool attached = false;
7912     while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
7913       observed_overflow_list =
7914         Atomic::cmpxchg((oopDesc*)suffix_head, &_overflow_list, (oopDesc*)cur_overflow_list);
7915       if (cur_overflow_list == observed_overflow_list) {
7916         attached = true;
7917         break;
7918       } else cur_overflow_list = observed_overflow_list;
7919     }
7920     if (!attached) {
7921       // Too bad, someone else sneaked in (at least) an element; we'll need
7922       // to do a splice. Find tail of suffix so we can prepend suffix to global
7923       // list.
7924       for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark()));
7925       oop suffix_tail = cur;
7926       assert(suffix_tail != NULL && suffix_tail->mark() == NULL,
7927              "Tautology");
7928       observed_overflow_list = _overflow_list;
7929       do {
7930         cur_overflow_list = observed_overflow_list;
7931         if (cur_overflow_list != BUSY) {
7932           // Do the splice ...
7933           suffix_tail->set_mark(markOop(cur_overflow_list));
7934         } else { // cur_overflow_list == BUSY
7935           suffix_tail->set_mark(NULL);
7936         }
7937         // ... and try to place spliced list back on overflow_list ...
7938         observed_overflow_list =
7939           Atomic::cmpxchg((oopDesc*)suffix_head, &_overflow_list, (oopDesc*)cur_overflow_list);
7940       } while (cur_overflow_list != observed_overflow_list);
7941       // ... until we have succeeded in doing so.
7942     }
7943   }
7944 
7945   // Push the prefix elements on work_q
7946   assert(prefix != NULL, "control point invariant");
7947   const markOop proto = markOopDesc::prototype();
7948   oop next;
7949   NOT_PRODUCT(ssize_t n = 0;)
7950   for (cur = prefix; cur != NULL; cur = next) {
7951     next = oop(cur->mark());
7952     cur->set_mark(proto);   // until proven otherwise
7953     assert(oopDesc::is_oop(cur), "Should be an oop");
7954     bool res = work_q->push(cur);
7955     assert(res, "Bit off more than we can chew?");
7956     NOT_PRODUCT(n++;)
7957   }
7958 #ifndef PRODUCT
7959   assert(_num_par_pushes >= n, "Too many pops?");
7960   Atomic::add(-n, &_num_par_pushes);
7961 #endif
7962   return true;
7963 }
7964 
7965 // Single-threaded
7966 void CMSCollector::push_on_overflow_list(oop p) {
7967   NOT_PRODUCT(_num_par_pushes++;)
7968   assert(oopDesc::is_oop(p), "Not an oop");
7969   preserve_mark_if_necessary(p);
7970   p->set_mark((markOop)_overflow_list);
7971   _overflow_list = p;
7972 }
7973 
7974 // Multi-threaded; use CAS to prepend to overflow list
7975 void CMSCollector::par_push_on_overflow_list(oop p) {
7976   NOT_PRODUCT(Atomic::inc(&_num_par_pushes);)
7977   assert(oopDesc::is_oop(p), "Not an oop");
7978   par_preserve_mark_if_necessary(p);
7979   oop observed_overflow_list = _overflow_list;
7980   oop cur_overflow_list;
7981   do {
7982     cur_overflow_list = observed_overflow_list;
7983     if (cur_overflow_list != BUSY) {
7984       p->set_mark(markOop(cur_overflow_list));
7985     } else {
7986       p->set_mark(NULL);
7987     }
7988     observed_overflow_list =
7989       Atomic::cmpxchg((oopDesc*)p, &_overflow_list, (oopDesc*)cur_overflow_list);
7990   } while (cur_overflow_list != observed_overflow_list);
7991 }
7992 #undef BUSY
7993 
7994 // Single threaded
7995 // General Note on GrowableArray: pushes may silently fail
7996 // because we are (temporarily) out of C-heap for expanding
7997 // the stack. The problem is quite ubiquitous and affects
7998 // a lot of code in the JVM. The prudent thing for GrowableArray
7999 // to do (for now) is to exit with an error. However, that may
8000 // be too draconian in some cases because the caller may be
8001 // able to recover without much harm. For such cases, we
8002 // should probably introduce a "soft_push" method which returns
8003 // an indication of success or failure with the assumption that
8004 // the caller may be able to recover from a failure; code in
8005 // the VM can then be changed, incrementally, to deal with such
8006 // failures where possible, thus, incrementally hardening the VM
8007 // in such low resource situations.
8008 void CMSCollector::preserve_mark_work(oop p, markOop m) {
8009   _preserved_oop_stack.push(p);

< prev index next >