hotspot Sdiff src/share/vm/gc/cms

src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp

rev 11970 : imported patch overflow_list

7812 // under contention.
7813 bool CMSCollector::simulate_overflow() {
7814   if (_overflow_counter-- <= 0) { // just being defensive
7815     _overflow_counter = CMSMarkStackOverflowInterval;
7816     return true;
7817   } else {
7818     return false;
7819   }
7820 }
7821 
7822 bool CMSCollector::par_simulate_overflow() {
7823   return simulate_overflow();
7824 }
7825 #endif
7826 
7827 // Single-threaded
7828 bool CMSCollector::take_from_overflow_list(size_t num, CMSMarkStack* stack) {
7829   assert(stack->isEmpty(), "Expected precondition");
7830   assert(stack->capacity() > num, "Shouldn't bite more than can chew");
7831   size_t i = num;
7832   oop  cur = _overflow_list;
7833   const markOop proto = markOopDesc::prototype();
7834   NOT_PRODUCT(ssize_t n = 0;)
7835   for (oop next; i > 0 && cur != NULL; cur = next, i--) {
7836     next = oop(cur->mark());
7837     cur->set_mark(proto);   // until proven otherwise
7838     assert(cur->is_oop(), "Should be an oop");
7839     bool res = stack->push(cur);
7840     assert(res, "Bit off more than can chew?");
7841     NOT_PRODUCT(n++;)
7842   }
7843   _overflow_list = cur;
7844 #ifndef PRODUCT
7845   assert(_num_par_pushes >= n, "Too many pops?");
7846   _num_par_pushes -=n;
7847 #endif
7848   return !stack->isEmpty();
7849 }
7850 
7851 #define BUSY  (cast_to_oop<intptr_t>(0x1aff1aff))
7852 // (MT-safe) Get a prefix of at most "num" from the list.
7853 // The overflow list is chained through the mark word of
7854 // each object in the list. We fetch the entire list,
7855 // break off a prefix of the right size and return the
7856 // remainder. If other threads try to take objects from
7857 // the overflow list at that time, they will wait for
7858 // some time to see if data becomes available. If (and
7859 // only if) another thread places one or more object(s)
7860 // on the global list before we have returned the suffix
7861 // to the global list, we will walk down our local list
7862 // to find its end and append the global list to
7863 // our suffix before returning it. This suffix walk can
7864 // prove to be expensive (quadratic in the amount of traffic)
7865 // when there are many objects in the overflow list and
7866 // there is much producer-consumer contention on the list.
7867 // *NOTE*: The overflow list manipulation code here and
7868 // in ParNewGeneration:: are very similar in shape,
7869 // except that in the ParNew case we use the old (from/eden)
7870 // copy of the object to thread the list via its klass word.
7871 // Because of the common code, if you make any changes in
7872 // the code below, please check the ParNew version to see if
7873 // similar changes might be needed.
7874 // CR 6797058 has been filed to consolidate the common code.
7875 bool CMSCollector::par_take_from_overflow_list(size_t num,
7876                                                OopTaskQueue* work_q,
7877                                                int no_of_gc_threads) {
7878   assert(work_q->size() == 0, "First empty local work queue");
7879   assert(num < work_q->max_elems(), "Can't bite more than we can chew");
7880   if (_overflow_list == NULL) {
7881     return false;
7882   }
7883   // Grab the entire list; we'll put back a suffix
7884   oop prefix = cast_to_oop(Atomic::xchg_ptr(BUSY, &_overflow_list));
7885   Thread* tid = Thread::current();
7886   // Before "no_of_gc_threads" was introduced CMSOverflowSpinCount was
7887   // set to ParallelGCThreads.
7888   size_t CMSOverflowSpinCount = (size_t) no_of_gc_threads; // was ParallelGCThreads;
7889   size_t sleep_time_millis = MAX2((size_t)1, num/100);
7890   // If the list is busy, we spin for a short while,
7891   // sleeping between attempts to get the list.
7892   for (size_t spin = 0; prefix == BUSY && spin < CMSOverflowSpinCount; spin++) {
7893     os::sleep(tid, sleep_time_millis, false);
7894     if (_overflow_list == NULL) {
7895       // Nothing left to take
7896       return false;
7897     } else if (_overflow_list != BUSY) {
7898       // Try and grab the prefix
7899       prefix = cast_to_oop(Atomic::xchg_ptr(BUSY, &_overflow_list));
7900     }
7901   }
7902   // If the list was found to be empty, or we spun long
7903   // enough, we give up and return empty-handed. If we leave
7904   // the list in the BUSY state below, it must be the case that
7905   // some other thread holds the overflow list and will set it
7906   // to a non-BUSY state in the future.
7907   if (prefix == NULL || prefix == BUSY) {
7908      // Nothing to take or waited long enough
7909      if (prefix == NULL) {
7910        // Write back the NULL in case we overwrote it with BUSY above
7911        // and it is still the same value.
7912        (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
7913      }
7914      return false;
7915   }
7916   assert(prefix != NULL && prefix != BUSY, "Error");
7917   size_t i = num;
7918   oop cur = prefix;
7919   // Walk down the first "num" objects, unless we reach the end.
7920   for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--);
7921   if (cur->mark() == NULL) {
7922     // We have "num" or fewer elements in the list, so there
7923     // is nothing to return to the global list.
7924     // Write back the NULL in lieu of the BUSY we wrote
7925     // above, if it is still the same value.
7926     if (_overflow_list == BUSY) {
7927       (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
7928     }
7929   } else {
7930     // Chop off the suffix and return it to the global list.
7931     assert(cur->mark() != BUSY, "Error");
7932     oop suffix_head = cur->mark(); // suffix will be put back on global list
7933     cur->set_mark(NULL);           // break off suffix
7934     // It's possible that the list is still in the empty(busy) state
7935     // we left it in a short while ago; in that case we may be
7936     // able to place back the suffix without incurring the cost
7937     // of a walk down the list.
7938     oop observed_overflow_list = _overflow_list;
7939     oop cur_overflow_list = observed_overflow_list;
7940     bool attached = false;
7941     while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
7942       observed_overflow_list =
7943         (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
7944       if (cur_overflow_list == observed_overflow_list) {
7945         attached = true;
7946         break;
7947       } else cur_overflow_list = observed_overflow_list;
7948     }
7949     if (!attached) {
7950       // Too bad, someone else sneaked in (at least) an element; we'll need
7951       // to do a splice. Find tail of suffix so we can prepend suffix to global
7952       // list.
7953       for (cur = suffix_head; cur->mark() != NULL; cur = (oop)(cur->mark()));
7954       oop suffix_tail = cur;
7955       assert(suffix_tail != NULL && suffix_tail->mark() == NULL,
7956              "Tautology");
7957       observed_overflow_list = _overflow_list;
7958       do {
7959         cur_overflow_list = observed_overflow_list;
7960         if (cur_overflow_list != BUSY) {
7961           // Do the splice ...
7962           suffix_tail->set_mark(markOop(cur_overflow_list));
7963         } else { // cur_overflow_list == BUSY
7964           suffix_tail->set_mark(NULL);
7965         }
7966         // ... and try to place spliced list back on overflow_list ...
7967         observed_overflow_list =
7968           (oop) Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
7969       } while (cur_overflow_list != observed_overflow_list);
7970       // ... until we have succeeded in doing so.
7971     }
7972   }
7973 
7974   // Push the prefix elements on work_q
7975   assert(prefix != NULL, "control point invariant");
7976   const markOop proto = markOopDesc::prototype();
7977   oop next;
7978   NOT_PRODUCT(ssize_t n = 0;)
7979   for (cur = prefix; cur != NULL; cur = next) {
7980     next = oop(cur->mark());
7981     cur->set_mark(proto);   // until proven otherwise
7982     assert(cur->is_oop(), "Should be an oop");
7983     bool res = work_q->push(cur);
7984     assert(res, "Bit off more than we can chew?");
7985     NOT_PRODUCT(n++;)
7986   }
7987 #ifndef PRODUCT
7988   assert(_num_par_pushes >= n, "Too many pops?");
7989   Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes);
7990 #endif
7991   return true;
7992 }
7993 
7994 // Single-threaded
7995 void CMSCollector::push_on_overflow_list(oop p) {
7996   NOT_PRODUCT(_num_par_pushes++;)
7997   assert(p->is_oop(), "Not an oop");
7998   preserve_mark_if_necessary(p);
7999   p->set_mark((markOop)_overflow_list);
8000   _overflow_list = p;
8001 }
8002 
8003 // Multi-threaded; use CAS to prepend to overflow list
8004 void CMSCollector::par_push_on_overflow_list(oop p) {
8005   NOT_PRODUCT(Atomic::inc_ptr(&_num_par_pushes);)
8006   assert(p->is_oop(), "Not an oop");
8007   par_preserve_mark_if_necessary(p);
8008   oop observed_overflow_list = _overflow_list;
8009   oop cur_overflow_list;
8010   do {
8011     cur_overflow_list = observed_overflow_list;
8012     if (cur_overflow_list != BUSY) {
8013       p->set_mark(markOop(cur_overflow_list));
8014     } else {
8015       p->set_mark(NULL);
8016     }
8017     observed_overflow_list =
8018       (oop) Atomic::cmpxchg_ptr(p, &_overflow_list, cur_overflow_list);
8019   } while (cur_overflow_list != observed_overflow_list);
8020 }
8021 #undef BUSY
8022 
8023 // Single threaded
8024 // General Note on GrowableArray: pushes may silently fail
8025 // because we are (temporarily) out of C-heap for expanding
8026 // the stack. The problem is quite ubiquitous and affects
8027 // a lot of code in the JVM. The prudent thing for GrowableArray
8028 // to do (for now) is to exit with an error. However, that may
8029 // be too draconian in some cases because the caller may be
8030 // able to recover without much harm. For such cases, we
8031 // should probably introduce a "soft_push" method which returns
8032 // an indication of success or failure with the assumption that
8033 // the caller may be able to recover from a failure; code in
8034 // the VM can then be changed, incrementally, to deal with such
8035 // failures where possible, thus, incrementally hardening the VM
8036 // in such low resource situations.
8037 void CMSCollector::preserve_mark_work(oop p, markOop m) {
8038   _preserved_oop_stack.push(p);

7812 // under contention.
7813 bool CMSCollector::simulate_overflow() {
7814   if (_overflow_counter-- <= 0) { // just being defensive
7815     _overflow_counter = CMSMarkStackOverflowInterval;
7816     return true;
7817   } else {
7818     return false;
7819   }
7820 }
7821 
7822 bool CMSCollector::par_simulate_overflow() {
7823   return simulate_overflow();
7824 }
7825 #endif
7826 
7827 // Single-threaded
7828 bool CMSCollector::take_from_overflow_list(size_t num, CMSMarkStack* stack) {
7829   assert(stack->isEmpty(), "Expected precondition");
7830   assert(stack->capacity() > num, "Shouldn't bite more than can chew");
7831   size_t i = num;
7832   oop  cur = cast_to_oop<HeapWord*>(_overflow_list);
7833   const markOop proto = markOopDesc::prototype();
7834   NOT_PRODUCT(ssize_t n = 0;)
7835   for (oop next; i > 0 && cur != NULL; cur = next, i--) {
7836     next = oop(cur->mark());
7837     cur->set_mark(proto);   // until proven otherwise
7838     assert(cur->is_oop(), "Should be an oop");
7839     bool res = stack->push(cur);
7840     assert(res, "Bit off more than can chew?");
7841     NOT_PRODUCT(n++;)
7842   }
7843   _overflow_list = cast_from_oop<HeapWord*>(cur);
7844 #ifndef PRODUCT
7845   assert(_num_par_pushes >= n, "Too many pops?");
7846   _num_par_pushes -=n;
7847 #endif
7848   return !stack->isEmpty();
7849 }
7850 
7851 #define BUSY  ((HeapWord*)(0x1aff1aff))
7852 // (MT-safe) Get a prefix of at most "num" from the list.
7853 // The overflow list is chained through the mark word of
7854 // each object in the list. We fetch the entire list,
7855 // break off a prefix of the right size and return the
7856 // remainder. If other threads try to take objects from
7857 // the overflow list at that time, they will wait for
7858 // some time to see if data becomes available. If (and
7859 // only if) another thread places one or more object(s)
7860 // on the global list before we have returned the suffix
7861 // to the global list, we will walk down our local list
7862 // to find its end and append the global list to
7863 // our suffix before returning it. This suffix walk can
7864 // prove to be expensive (quadratic in the amount of traffic)
7865 // when there are many objects in the overflow list and
7866 // there is much producer-consumer contention on the list.
7867 // *NOTE*: The overflow list manipulation code here and
7868 // in ParNewGeneration:: are very similar in shape,
7869 // except that in the ParNew case we use the old (from/eden)
7870 // copy of the object to thread the list via its klass word.
7871 // Because of the common code, if you make any changes in
7872 // the code below, please check the ParNew version to see if
7873 // similar changes might be needed.
7874 // CR 6797058 has been filed to consolidate the common code.
7875 bool CMSCollector::par_take_from_overflow_list(size_t num,
7876                                                OopTaskQueue* work_q,
7877                                                int no_of_gc_threads) {
7878   assert(work_q->size() == 0, "First empty local work queue");
7879   assert(num < work_q->max_elems(), "Can't bite more than we can chew");
7880   if (_overflow_list == NULL) {
7881     return false;
7882   }
7883   // Grab the entire list; we'll put back a suffix
7884   HeapWord* prefix = (HeapWord*)Atomic::xchg_ptr(BUSY, &_overflow_list);
7885   Thread* tid = Thread::current();
7886   // Before "no_of_gc_threads" was introduced CMSOverflowSpinCount was
7887   // set to ParallelGCThreads.
7888   size_t CMSOverflowSpinCount = (size_t) no_of_gc_threads; // was ParallelGCThreads;
7889   size_t sleep_time_millis = MAX2((size_t)1, num/100);
7890   // If the list is busy, we spin for a short while,
7891   // sleeping between attempts to get the list.
7892   for (size_t spin = 0; prefix == BUSY && spin < CMSOverflowSpinCount; spin++) {
7893     os::sleep(tid, sleep_time_millis, false);
7894     if (_overflow_list == NULL) {
7895       // Nothing left to take
7896       return false;
7897     } else if (_overflow_list != BUSY) {
7898       // Try and grab the prefix
7899       prefix = (HeapWord*)Atomic::xchg_ptr(BUSY, &_overflow_list);
7900     }
7901   }
7902   // If the list was found to be empty, or we spun long
7903   // enough, we give up and return empty-handed. If we leave
7904   // the list in the BUSY state below, it must be the case that
7905   // some other thread holds the overflow list and will set it
7906   // to a non-BUSY state in the future.
7907   if (prefix == NULL || prefix == BUSY) {
7908      // Nothing to take or waited long enough
7909      if (prefix == NULL) {
7910        // Write back the NULL in case we overwrote it with BUSY above
7911        // and it is still the same value.
7912        (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
7913      }
7914      return false;
7915   }
7916   assert(prefix != NULL && prefix != BUSY, "Error");
7917   size_t i = num;
7918   oop cur = cast_to_oop<HeapWord*>(prefix);
7919   // Walk down the first "num" objects, unless we reach the end.
7920   for (; i > 1 && cur->mark() != NULL; cur = oop(cur->mark()), i--);
7921   if (cur->mark() == NULL) {
7922     // We have "num" or fewer elements in the list, so there
7923     // is nothing to return to the global list.
7924     // Write back the NULL in lieu of the BUSY we wrote
7925     // above, if it is still the same value.
7926     if (_overflow_list == BUSY) {
7927       (void) Atomic::cmpxchg_ptr(NULL, &_overflow_list, BUSY);
7928     }
7929   } else {
7930     // Chop off the suffix and return it to the global list.
7931     assert(cast_from_oop<HeapWord*>(cur->mark()) != BUSY, "Error");
7932     HeapWord *suffix_head = cast_from_oop<HeapWord*>(cur->mark()); // suffix will be put back on global list
7933     cur->set_mark(NULL);           // break off suffix
7934     // It's possible that the list is still in the empty(busy) state
7935     // we left it in a short while ago; in that case we may be
7936     // able to place back the suffix without incurring the cost
7937     // of a walk down the list.
7938     HeapWord* observed_overflow_list = _overflow_list;
7939     HeapWord* cur_overflow_list = observed_overflow_list;
7940     bool attached = false;
7941     while (observed_overflow_list == BUSY || observed_overflow_list == NULL) {
7942       observed_overflow_list =
7943         (HeapWord*)Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
7944       if (cur_overflow_list == observed_overflow_list) {
7945         attached = true;
7946         break;
7947       } else cur_overflow_list = observed_overflow_list;
7948     }
7949     if (!attached) {
7950       // Too bad, someone else sneaked in (at least) an element; we'll need
7951       // to do a splice. Find tail of suffix so we can prepend suffix to global
7952       // list.
7953       for (cur = cast_to_oop<HeapWord*>(suffix_head); cur->mark() != NULL; cur = (oop)(cur->mark()));
7954       oop suffix_tail = cur;
7955       assert(suffix_tail != NULL && suffix_tail->mark() == NULL,
7956              "Tautology");
7957       observed_overflow_list = _overflow_list;
7958       do {
7959         cur_overflow_list = observed_overflow_list;
7960         if (cur_overflow_list != BUSY) {
7961           // Do the splice ...
7962           suffix_tail->set_mark(markOop(cur_overflow_list));
7963         } else { // cur_overflow_list == BUSY
7964           suffix_tail->set_mark(NULL);
7965         }
7966         // ... and try to place spliced list back on overflow_list ...
7967         observed_overflow_list =
7968           (HeapWord*)Atomic::cmpxchg_ptr(suffix_head, &_overflow_list, cur_overflow_list);
7969       } while (cur_overflow_list != observed_overflow_list);
7970       // ... until we have succeeded in doing so.
7971     }
7972   }
7973 
7974   // Push the prefix elements on work_q
7975   assert(prefix != NULL, "control point invariant");
7976   const markOop proto = markOopDesc::prototype();
7977   oop next;
7978   NOT_PRODUCT(ssize_t n = 0;)
7979     for (cur = cast_to_oop<HeapWord*>(prefix); cur != NULL; cur = next) {
7980     next = oop(cur->mark());
7981     cur->set_mark(proto);   // until proven otherwise
7982     assert(cur->is_oop(), "Should be an oop");
7983     bool res = work_q->push(cur);
7984     assert(res, "Bit off more than we can chew?");
7985     NOT_PRODUCT(n++;)
7986   }
7987 #ifndef PRODUCT
7988   assert(_num_par_pushes >= n, "Too many pops?");
7989   Atomic::add_ptr(-(intptr_t)n, &_num_par_pushes);
7990 #endif
7991   return true;
7992 }
7993 
7994 // Single-threaded
7995 void CMSCollector::push_on_overflow_list(oop p) {
7996   NOT_PRODUCT(_num_par_pushes++;)
7997   assert(p->is_oop(), "Not an oop");
7998   preserve_mark_if_necessary(p);
7999   p->set_mark((markOop)_overflow_list);
8000   _overflow_list = cast_from_oop<HeapWord*>(p);
8001 }
8002 
8003 // Multi-threaded; use CAS to prepend to overflow list
8004 void CMSCollector::par_push_on_overflow_list(oop p) {
8005   NOT_PRODUCT(Atomic::inc_ptr(&_num_par_pushes);)
8006   assert(p->is_oop(), "Not an oop");
8007   par_preserve_mark_if_necessary(p);
8008   HeapWord* observed_overflow_list = _overflow_list;
8009   HeapWord* cur_overflow_list;
8010   do {
8011     cur_overflow_list = observed_overflow_list;
8012     if (cur_overflow_list != BUSY) {
8013       p->set_mark(markOop(cur_overflow_list));
8014     } else {
8015       p->set_mark(NULL);
8016     }
8017     observed_overflow_list =
8018       (HeapWord*)Atomic::cmpxchg_ptr(cast_from_oop<HeapWord*>(p), &_overflow_list, cur_overflow_list);
8019   } while (cur_overflow_list != observed_overflow_list);
8020 }
8021 #undef BUSY
8022 
8023 // Single threaded
8024 // General Note on GrowableArray: pushes may silently fail
8025 // because we are (temporarily) out of C-heap for expanding
8026 // the stack. The problem is quite ubiquitous and affects
8027 // a lot of code in the JVM. The prudent thing for GrowableArray
8028 // to do (for now) is to exit with an error. However, that may
8029 // be too draconian in some cases because the caller may be
8030 // able to recover without much harm. For such cases, we
8031 // should probably introduce a "soft_push" method which returns
8032 // an indication of success or failure with the assumption that
8033 // the caller may be able to recover from a failure; code in
8034 // the VM can then be changed, incrementally, to deal with such
8035 // failures where possible, thus, incrementally hardening the VM
8036 // in such low resource situations.
8037 void CMSCollector::preserve_mark_work(oop p, markOop m) {
8038   _preserved_oop_stack.push(p);

< prev index next >