1059 // (i.e., insertion of klass pointer) until after, so that it
1060 // atomically becomes a promoted object.
1061 if (promoInfo->tracking()) {
1062 promoInfo->track((PromotedObject*)obj, old->klass());
1063 }
1064 assert(obj->klass_or_null() == NULL, "Object should be uninitialized here.");
1065 assert(!((FreeChunk*)obj_ptr)->is_free(), "Error, block will look free but show wrong size");
1066 assert(oopDesc::is_oop(old), "Will use and dereference old klass ptr below");
1067
1068 // Finally, install the klass pointer (this should be volatile).
1069 OrderAccess::storestore();
1070 obj->set_klass(old->klass());
1071 // We should now be able to calculate the right size for this object
1072 assert(oopDesc::is_oop(obj) && obj->size() == (int)word_sz, "Error, incorrect size computed for promoted object");
1073
1074 collector()->promoted(true, // parallel
1075 obj_ptr, old->is_objArray(), word_sz);
1076
1077 NOT_PRODUCT(
1078 Atomic::inc(&_numObjectsPromoted);
1079 Atomic::add_ptr(alloc_sz, &_numWordsPromoted);
1080 )
1081
1082 return obj;
1083 }
1084
1085 void
1086 ConcurrentMarkSweepGeneration::
1087 par_promote_alloc_done(int thread_num) {
1088 CMSParGCThreadState* ps = _par_gc_thread_states[thread_num];
1089 ps->lab.retire(thread_num);
1090 }
1091
1092 void
1093 ConcurrentMarkSweepGeneration::
1094 par_oop_since_save_marks_iterate_done(int thread_num) {
1095 CMSParGCThreadState* ps = _par_gc_thread_states[thread_num];
1096 ParScanWithoutBarrierClosure* dummy_cl = NULL;
1097 ps->promo.promoted_oops_iterate_nv(dummy_cl);
1098
1099 // Because card-scanning has been completed, subsequent phases
3162 // Note that under the current task protocol, the
3163 // following assertion is true even of the spaces
3164 // expanded since the completion of the concurrent
3165 // marking. XXX This will likely change under a strict
3166 // ABORT semantics.
3167 // After perm removal the comparison was changed to
3168 // greater than or equal to from strictly greater than.
3169 // Before perm removal the highest address sweep would
3170 // have been at the end of perm gen but now is at the
3171 // end of the tenured gen.
3172 assert(_global_finger >= _cms_space->end(),
3173 "All tasks have been completed");
3174 DEBUG_ONLY(_collector->verify_overflow_empty();)
3175 }
3176
3177 void CMSConcMarkingTask::bump_global_finger(HeapWord* f) {
3178 HeapWord* read = _global_finger;
3179 HeapWord* cur = read;
3180 while (f > read) {
3181 cur = read;
3182 read = (HeapWord*) Atomic::cmpxchg_ptr(f, &_global_finger, cur);
3183 if (cur == read) {
3184 // our cas succeeded
3185 assert(_global_finger >= f, "protocol consistency");
3186 break;
3187 }
3188 }
3189 }
3190
3191 // This is really inefficient, and should be redone by
3192 // using (not yet available) block-read and -write interfaces to the
3193 // stack and the work_queue. XXX FIX ME !!!
3194 bool CMSConcMarkingTask::get_work_from_overflow_stack(CMSMarkStack* ovflw_stk,
3195 OopTaskQueue* work_q) {
3196 // Fast lock-free check
3197 if (ovflw_stk->length() == 0) {
3198 return false;
3199 }
3200 assert(work_q->size() == 0, "Shouldn't steal");
3201 MutexLockerEx ml(ovflw_stk->par_lock(),
3202 Mutex::_no_safepoint_check_flag);
7835 // prove to be expensive (quadratic in the amount of traffic)
7836 // when there are many objects in the overflow list and
7837 // there is much producer-consumer contention on the list.
7838 // *NOTE*: The overflow list manipulation code here and
7839 // in ParNewGeneration:: are very similar in shape,
7840 // except that in the ParNew case we use the old (from/eden)
7841 // copy of the object to thread the list via its klass word.
7842 // Because of the common code, if you make any changes in
7843 // the code below, please check the ParNew version to see if
7844 // similar changes might be needed.
7845 // CR 6797058 has been filed to consolidate the common code.
7846 bool CMSCollector::par_take_from_overflow_list(size_t num,
7847 OopTaskQueue* work_q,
7848 int no_of_gc_threads) {
7849 assert(work_q->size() == 0, "First empty local work queue");
7850 assert(num < work_q->max_elems(), "Can't bite more than we can chew");
7851 if (_overflow_list == NULL) {
7852 return false;
7853 }
7854 // Grab the entire list; we'll put back a suffix
7855 oop prefix = cast_to_oop(Atomic::xchg_ptr(BUSY, &_overflow_list));
7856 Thread* tid = Thread::current();
7857 // Before "no_of_gc_threads" was introduced CMSOverflowSpinCount was
7858 // set to ParallelGCThreads.
7859 size_t CMSOverflowSpinCount = (size_t) no_of_gc_threads; // was ParallelGCThreads;
7860 size_t sleep_time_millis = MAX2((size_t)1, num/100);
7861 // If the list is busy, we spin for a short while,
7862 // sleeping between attempts to get the list.
7863 for (size_t spin = 0; prefix == BUSY && spin < CMSOverflowSpinCount; spin++) {
7864 os::sleep(tid, sleep_time_millis, false);
7865 if (_overflow_list == NULL) {
7866 // Nothing left to take
7867 return false;
7868 } else if (_overflow_list != BUSY) {
7869 // Try and grab the prefix
7870 prefix = cast_to_oop(Atomic::xchg_ptr(BUSY, &_overflow_list));
7871 }
7872 }
7873 // If the list was found to be empty, or we spun long
7874 // enough, we give up and return empty-handed. If we leave
7875 // the list in the BUSY state below, it must be the case that
7876 // some other thread holds the overflow list and will set it
7877 // to a non-BUSY state in the future.
7878 if (prefix == NULL || prefix == BUSY) {
7879 // Nothing to take or waited long enough
7880 if (prefix == NULL) {
7881 // Write back the NULL in case we overwrote it with BUSY above
7882 // and it is still the same value.
7883 (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
7884 }
7885 return false;
7886 }
7887 assert(prefix != NULL && prefix != BUSY, "Error");
7888 size_t i = num;
7889 oop cur = prefix;
7890 // Walk down the first "num" objects, unless we reach the end.
7891 for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--);
7892 if (cur->mark() == NULL) {
7893 // We have "num" or fewer elements in the list, so there
7894 // is nothing to return to the global list.
7895 // Write back the NULL in lieu of the BUSY we wrote
7896 // above, if it is still the same value.
7897 if (_overflow_list == BUSY) {
7898 (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
7899 }
7900 } else {
7901 // Chop off the suffix and return it to the global list.
7902 assert(cur->mark() != BUSY, "Error");
7903 oop suffix_head = cur->mark(); // suffix will be put back on global list
7904 cur->set_mark(NULL); // break off suffix
7905 // It's possible that the list is still in the empty(busy) state
7906 // we left it in a short while ago; in that case we may be
7907 // able to place back the suffix without incurring the cost
7908 // of a walk down the list.
7909 oop observed_overflow_list = _overflow_list;
7910 oop cur_overflow_list = observed_overflow_list;
7911 bool attached = false;
7912 while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
7913 observed_overflow_list =
7914 (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
7915 if (cur_overflow_list == observed_overflow_list) {
7916 attached = true;
7917 break;
7918 } else cur_overflow_list = observed_overflow_list;
7919 }
7920 if (!attached) {
7921 // Too bad, someone else sneaked in (at least) an element; we'll need
7922 // to do a splice. Find tail of suffix so we can prepend suffix to global
7923 // list.
7924 for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark()));
7925 oop suffix_tail = cur;
7926 assert(suffix_tail != NULL && suffix_tail->mark() == NULL,
7927 "Tautology");
7928 observed_overflow_list = _overflow_list;
7929 do {
7930 cur_overflow_list = observed_overflow_list;
7931 if (cur_overflow_list != BUSY) {
7932 // Do the splice ...
7933 suffix_tail->set_mark(markOop(cur_overflow_list));
7934 } else { // cur_overflow_list == BUSY
7935 suffix_tail->set_mark(NULL);
7936 }
7937 // ... and try to place spliced list back on overflow_list ...
7938 observed_overflow_list =
7939 (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
7940 } while (cur_overflow_list != observed_overflow_list);
7941 // ... until we have succeeded in doing so.
7942 }
7943 }
7944
7945 // Push the prefix elements on work_q
7946 assert(prefix != NULL, "control point invariant");
7947 const markOop proto = markOopDesc::prototype();
7948 oop next;
7949 NOT_PRODUCT(ssize_t n = 0;)
7950 for (cur = prefix; cur != NULL; cur = next) {
7951 next = oop(cur->mark());
7952 cur->set_mark(proto); // until proven otherwise
7953 assert(oopDesc::is_oop(cur), "Should be an oop");
7954 bool res = work_q->push(cur);
7955 assert(res, "Bit off more than we can chew?");
7956 NOT_PRODUCT(n++;)
7957 }
7958 #ifndef PRODUCT
7959 assert(_num_par_pushes >= n, "Too many pops?");
7960 Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes);
7961 #endif
7962 return true;
7963 }
7964
7965 // Single-threaded
7966 void CMSCollector::push_on_overflow_list(oop p) {
7967 NOT_PRODUCT(_num_par_pushes++;)
7968 assert(oopDesc::is_oop(p), "Not an oop");
7969 preserve_mark_if_necessary(p);
7970 p->set_mark((markOop)_overflow_list);
7971 _overflow_list = p;
7972 }
7973
7974 // Multi-threaded; use CAS to prepend to overflow list
7975 void CMSCollector::par_push_on_overflow_list(oop p) {
7976 NOT_PRODUCT(Atomic::inc(&_num_par_pushes);)
7977 assert(oopDesc::is_oop(p), "Not an oop");
7978 par_preserve_mark_if_necessary(p);
7979 oop observed_overflow_list = _overflow_list;
7980 oop cur_overflow_list;
7981 do {
7982 cur_overflow_list = observed_overflow_list;
7983 if (cur_overflow_list != BUSY) {
7984 p->set_mark(markOop(cur_overflow_list));
7985 } else {
7986 p->set_mark(NULL);
7987 }
7988 observed_overflow_list =
7989 (oop) Atomic::cmpxchg_ptr(p, &_overflow_list, cur_overflow_list);
7990 } while (cur_overflow_list != observed_overflow_list);
7991 }
7992 #undef BUSY
7993
7994 // Single threaded
7995 // General Note on GrowableArray: pushes may silently fail
7996 // because we are (temporarily) out of C-heap for expanding
7997 // the stack. The problem is quite ubiquitous and affects
7998 // a lot of code in the JVM. The prudent thing for GrowableArray
7999 // to do (for now) is to exit with an error. However, that may
8000 // be too draconian in some cases because the caller may be
8001 // able to recover without much harm. For such cases, we
8002 // should probably introduce a "soft_push" method which returns
8003 // an indication of success or failure with the assumption that
8004 // the caller may be able to recover from a failure; code in
8005 // the VM can then be changed, incrementally, to deal with such
8006 // failures where possible, thus, incrementally hardening the VM
8007 // in such low resource situations.
8008 void CMSCollector::preserve_mark_work(oop p, markOop m) {
8009 _preserved_oop_stack.push(p);
|
1059 // (i.e., insertion of klass pointer) until after, so that it
1060 // atomically becomes a promoted object.
1061 if (promoInfo->tracking()) {
1062 promoInfo->track((PromotedObject*)obj, old->klass());
1063 }
1064 assert(obj->klass_or_null() == NULL, "Object should be uninitialized here.");
1065 assert(!((FreeChunk*)obj_ptr)->is_free(), "Error, block will look free but show wrong size");
1066 assert(oopDesc::is_oop(old), "Will use and dereference old klass ptr below");
1067
1068 // Finally, install the klass pointer (this should be volatile).
1069 OrderAccess::storestore();
1070 obj->set_klass(old->klass());
1071 // We should now be able to calculate the right size for this object
1072 assert(oopDesc::is_oop(obj) && obj->size() == (int)word_sz, "Error, incorrect size computed for promoted object");
1073
1074 collector()->promoted(true, // parallel
1075 obj_ptr, old->is_objArray(), word_sz);
1076
1077 NOT_PRODUCT(
1078 Atomic::inc(&_numObjectsPromoted);
1079 Atomic::add(alloc_sz, &_numWordsPromoted);
1080 )
1081
1082 return obj;
1083 }
1084
1085 void
1086 ConcurrentMarkSweepGeneration::
1087 par_promote_alloc_done(int thread_num) {
1088 CMSParGCThreadState* ps = _par_gc_thread_states[thread_num];
1089 ps->lab.retire(thread_num);
1090 }
1091
1092 void
1093 ConcurrentMarkSweepGeneration::
1094 par_oop_since_save_marks_iterate_done(int thread_num) {
1095 CMSParGCThreadState* ps = _par_gc_thread_states[thread_num];
1096 ParScanWithoutBarrierClosure* dummy_cl = NULL;
1097 ps->promo.promoted_oops_iterate_nv(dummy_cl);
1098
1099 // Because card-scanning has been completed, subsequent phases
3162 // Note that under the current task protocol, the
3163 // following assertion is true even of the spaces
3164 // expanded since the completion of the concurrent
3165 // marking. XXX This will likely change under a strict
3166 // ABORT semantics.
3167 // After perm removal the comparison was changed to
3168 // greater than or equal to from strictly greater than.
3169 // Before perm removal the highest address sweep would
3170 // have been at the end of perm gen but now is at the
3171 // end of the tenured gen.
3172 assert(_global_finger >= _cms_space->end(),
3173 "All tasks have been completed");
3174 DEBUG_ONLY(_collector->verify_overflow_empty();)
3175 }
3176
3177 void CMSConcMarkingTask::bump_global_finger(HeapWord* f) {
3178 HeapWord* read = _global_finger;
3179 HeapWord* cur = read;
3180 while (f > read) {
3181 cur = read;
3182 read = Atomic::cmpxchg(f, &_global_finger, cur);
3183 if (cur == read) {
3184 // our cas succeeded
3185 assert(_global_finger >= f, "protocol consistency");
3186 break;
3187 }
3188 }
3189 }
3190
3191 // This is really inefficient, and should be redone by
3192 // using (not yet available) block-read and -write interfaces to the
3193 // stack and the work_queue. XXX FIX ME !!!
3194 bool CMSConcMarkingTask::get_work_from_overflow_stack(CMSMarkStack* ovflw_stk,
3195 OopTaskQueue* work_q) {
3196 // Fast lock-free check
3197 if (ovflw_stk->length() == 0) {
3198 return false;
3199 }
3200 assert(work_q->size() == 0, "Shouldn't steal");
3201 MutexLockerEx ml(ovflw_stk->par_lock(),
3202 Mutex::_no_safepoint_check_flag);
7835 // prove to be expensive (quadratic in the amount of traffic)
7836 // when there are many objects in the overflow list and
7837 // there is much producer-consumer contention on the list.
7838 // *NOTE*: The overflow list manipulation code here and
7839 // in ParNewGeneration:: are very similar in shape,
7840 // except that in the ParNew case we use the old (from/eden)
7841 // copy of the object to thread the list via its klass word.
7842 // Because of the common code, if you make any changes in
7843 // the code below, please check the ParNew version to see if
7844 // similar changes might be needed.
7845 // CR 6797058 has been filed to consolidate the common code.
7846 bool CMSCollector::par_take_from_overflow_list(size_t num,
7847 OopTaskQueue* work_q,
7848 int no_of_gc_threads) {
7849 assert(work_q->size() == 0, "First empty local work queue");
7850 assert(num < work_q->max_elems(), "Can't bite more than we can chew");
7851 if (_overflow_list == NULL) {
7852 return false;
7853 }
7854 // Grab the entire list; we'll put back a suffix
7855 oop prefix = cast_to_oop(Atomic::xchg((oopDesc*)BUSY, &_overflow_list));
7856 Thread* tid = Thread::current();
7857 // Before "no_of_gc_threads" was introduced CMSOverflowSpinCount was
7858 // set to ParallelGCThreads.
7859 size_t CMSOverflowSpinCount = (size_t) no_of_gc_threads; // was ParallelGCThreads;
7860 size_t sleep_time_millis = MAX2((size_t)1, num/100);
7861 // If the list is busy, we spin for a short while,
7862 // sleeping between attempts to get the list.
7863 for (size_t spin = 0; prefix == BUSY && spin < CMSOverflowSpinCount; spin++) {
7864 os::sleep(tid, sleep_time_millis, false);
7865 if (_overflow_list == NULL) {
7866 // Nothing left to take
7867 return false;
7868 } else if (_overflow_list != BUSY) {
7869 // Try and grab the prefix
7870 prefix = cast_to_oop(Atomic::xchg((oopDesc*)BUSY, &_overflow_list));
7871 }
7872 }
7873 // If the list was found to be empty, or we spun long
7874 // enough, we give up and return empty-handed. If we leave
7875 // the list in the BUSY state below, it must be the case that
7876 // some other thread holds the overflow list and will set it
7877 // to a non-BUSY state in the future.
7878 if (prefix == NULL || prefix == BUSY) {
7879 // Nothing to take or waited long enough
7880 if (prefix == NULL) {
7881 // Write back the NULL in case we overwrote it with BUSY above
7882 // and it is still the same value.
7883 Atomic::cmpxchg((oopDesc*)NULL, &_overflow_list, (oopDesc*)BUSY);
7884 }
7885 return false;
7886 }
7887 assert(prefix != NULL && prefix != BUSY, "Error");
7888 size_t i = num;
7889 oop cur = prefix;
7890 // Walk down the first "num" objects, unless we reach the end.
7891 for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--);
7892 if (cur->mark() == NULL) {
7893 // We have "num" or fewer elements in the list, so there
7894 // is nothing to return to the global list.
7895 // Write back the NULL in lieu of the BUSY we wrote
7896 // above, if it is still the same value.
7897 if (_overflow_list == BUSY) {
7898 Atomic::cmpxchg((oopDesc*)NULL, &_overflow_list, (oopDesc*)BUSY);
7899 }
7900 } else {
7901 // Chop off the suffix and return it to the global list.
7902 assert(cur->mark() != BUSY, "Error");
7903 oop suffix_head = cur->mark(); // suffix will be put back on global list
7904 cur->set_mark(NULL); // break off suffix
7905 // It's possible that the list is still in the empty(busy) state
7906 // we left it in a short while ago; in that case we may be
7907 // able to place back the suffix without incurring the cost
7908 // of a walk down the list.
7909 oop observed_overflow_list = _overflow_list;
7910 oop cur_overflow_list = observed_overflow_list;
7911 bool attached = false;
7912 while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
7913 observed_overflow_list =
7914 Atomic::cmpxchg((oopDesc*)suffix_head, &_overflow_list, (oopDesc*)cur_overflow_list);
7915 if (cur_overflow_list == observed_overflow_list) {
7916 attached = true;
7917 break;
7918 } else cur_overflow_list = observed_overflow_list;
7919 }
7920 if (!attached) {
7921 // Too bad, someone else sneaked in (at least) an element; we'll need
7922 // to do a splice. Find tail of suffix so we can prepend suffix to global
7923 // list.
7924 for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark()));
7925 oop suffix_tail = cur;
7926 assert(suffix_tail != NULL && suffix_tail->mark() == NULL,
7927 "Tautology");
7928 observed_overflow_list = _overflow_list;
7929 do {
7930 cur_overflow_list = observed_overflow_list;
7931 if (cur_overflow_list != BUSY) {
7932 // Do the splice ...
7933 suffix_tail->set_mark(markOop(cur_overflow_list));
7934 } else { // cur_overflow_list == BUSY
7935 suffix_tail->set_mark(NULL);
7936 }
7937 // ... and try to place spliced list back on overflow_list ...
7938 observed_overflow_list =
7939 Atomic::cmpxchg((oopDesc*)suffix_head, &_overflow_list, (oopDesc*)cur_overflow_list);
7940 } while (cur_overflow_list != observed_overflow_list);
7941 // ... until we have succeeded in doing so.
7942 }
7943 }
7944
7945 // Push the prefix elements on work_q
7946 assert(prefix != NULL, "control point invariant");
7947 const markOop proto = markOopDesc::prototype();
7948 oop next;
7949 NOT_PRODUCT(ssize_t n = 0;)
7950 for (cur = prefix; cur != NULL; cur = next) {
7951 next = oop(cur->mark());
7952 cur->set_mark(proto); // until proven otherwise
7953 assert(oopDesc::is_oop(cur), "Should be an oop");
7954 bool res = work_q->push(cur);
7955 assert(res, "Bit off more than we can chew?");
7956 NOT_PRODUCT(n++;)
7957 }
7958 #ifndef PRODUCT
7959 assert(_num_par_pushes >= n, "Too many pops?");
7960 Atomic::add(-n, &_num_par_pushes);
7961 #endif
7962 return true;
7963 }
7964
7965 // Single-threaded
7966 void CMSCollector::push_on_overflow_list(oop p) {
7967 NOT_PRODUCT(_num_par_pushes++;)
7968 assert(oopDesc::is_oop(p), "Not an oop");
7969 preserve_mark_if_necessary(p);
7970 p->set_mark((markOop)_overflow_list);
7971 _overflow_list = p;
7972 }
7973
7974 // Multi-threaded; use CAS to prepend to overflow list
7975 void CMSCollector::par_push_on_overflow_list(oop p) {
7976 NOT_PRODUCT(Atomic::inc(&_num_par_pushes);)
7977 assert(oopDesc::is_oop(p), "Not an oop");
7978 par_preserve_mark_if_necessary(p);
7979 oop observed_overflow_list = _overflow_list;
7980 oop cur_overflow_list;
7981 do {
7982 cur_overflow_list = observed_overflow_list;
7983 if (cur_overflow_list != BUSY) {
7984 p->set_mark(markOop(cur_overflow_list));
7985 } else {
7986 p->set_mark(NULL);
7987 }
7988 observed_overflow_list =
7989 Atomic::cmpxchg((oopDesc*)p, &_overflow_list, (oopDesc*)cur_overflow_list);
7990 } while (cur_overflow_list != observed_overflow_list);
7991 }
7992 #undef BUSY
7993
7994 // Single threaded
7995 // General Note on GrowableArray: pushes may silently fail
7996 // because we are (temporarily) out of C-heap for expanding
7997 // the stack. The problem is quite ubiquitous and affects
7998 // a lot of code in the JVM. The prudent thing for GrowableArray
7999 // to do (for now) is to exit with an error. However, that may
8000 // be too draconian in some cases because the caller may be
8001 // able to recover without much harm. For such cases, we
8002 // should probably introduce a "soft_push" method which returns
8003 // an indication of success or failure with the assumption that
8004 // the caller may be able to recover from a failure; code in
8005 // the VM can then be changed, incrementally, to deal with such
8006 // failures where possible, thus, incrementally hardening the VM
8007 // in such low resource situations.
8008 void CMSCollector::preserve_mark_work(oop p, markOop m) {
8009 _preserved_oop_stack.push(p);
|