src/share/vm/gc_implementation/g1/concurrentMark.cpp

Print this page
rev 6345 : 8040803: G1: Concurrent mark hangs when mark stack overflows
Reviewed-by: TDB


 961  * this case we should not attempt to leave / enter the STS, otherwise
 962  * we'll either hit an assert (debug / fastdebug) or deadlock
 963  * (product). So we should only leave / enter the STS if we are
 964  * operating concurrently.
 965  *
 966  * Because the thread that does the sync barrier has left the STS, it
 967  * is possible to be suspended for a Full GC or an evacuation pause
 968  * could occur. This is actually safe, since the entering the sync
 969  * barrier is one of the last things do_marking_step() does, and it
 970  * doesn't manipulate any data structures afterwards.
 971  */
 972 
 973 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 974   if (verbose_low()) {
 975     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 976   }
 977 
 978   if (concurrent()) {
 979     SuspendibleThreadSet::leave();
 980   }
 981   _first_overflow_barrier_sync.enter();


 982   if (concurrent()) {
 983     SuspendibleThreadSet::join();
 984   }
 985   // at this point everyone should have synced up and not be doing any
 986   // more work
 987 
 988   if (verbose_low()) {



 989     gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
 990   }

 991 
 992   // If we're executing the concurrent phase of marking, reset the marking
 993   // state; otherwise the marking state is reset after reference processing,
 994   // during the remark pause.
 995   // If we reset here as a result of an overflow during the remark we will
 996   // see assertion failures from any subsequent set_concurrency_and_phase()
 997   // calls.
 998   if (concurrent()) {



 999     // let the task associated with with worker 0 do this
1000     if (worker_id == 0) {
1001       // task 0 is responsible for clearing the global data structures
1002       // We should be here because of an overflow. During STW we should
1003       // not clear the overflow flag since we rely on it being true when
1004       // we exit this method to abort the pause and restart concurrent
1005       // marking.
1006       reset_marking_state(true /* clear_overflow */);
1007       force_overflow()->update();
1008 
1009       if (G1Log::fine()) {
1010         gclog_or_tty->date_stamp(PrintGCDateStamps);
1011         gclog_or_tty->stamp(PrintGCTimeStamps);
1012         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1013       }
1014     }
1015   }
1016 
1017   // after this, each task should reset its own data structures then
1018   // then go into the second barrier
1019 }
1020 
1021 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1022   if (verbose_low()) {
1023     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1024   }
1025 
1026   if (concurrent()) {
1027     SuspendibleThreadSet::leave();
1028   }
1029   _second_overflow_barrier_sync.enter();


1030   if (concurrent()) {
1031     SuspendibleThreadSet::join();
1032   }
1033   // at this point everything should be re-initialized and ready to go
1034 
1035   if (verbose_low()) {



1036     gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1037   }

1038 }
1039 
1040 #ifndef PRODUCT
1041 void ForceOverflowSettings::init() {
1042   _num_remaining = G1ConcMarkForceOverflow;
1043   _force = false;
1044   update();
1045 }
1046 
1047 void ForceOverflowSettings::update() {
1048   if (_num_remaining > 0) {
1049     _num_remaining -= 1;
1050     _force = true;
1051   } else {
1052     _force = false;
1053   }
1054 }
1055 
1056 bool ForceOverflowSettings::should_force() {
1057   if (_force) {


3223     }
3224   }
3225 }
3226 
3227 // abandon current marking iteration due to a Full GC
3228 void ConcurrentMark::abort() {
3229   // Clear all marks to force marking thread to do nothing
3230   _nextMarkBitMap->clearAll();
3231 
3232   // Note we cannot clear the previous marking bitmap here
3233   // since VerifyDuringGC verifies the objects marked during
3234   // a full GC against the previous bitmap.
3235 
3236   // Clear the liveness counting data
3237   clear_all_count_data();
3238   // Empty mark stack
3239   reset_marking_state();
3240   for (uint i = 0; i < _max_worker_id; ++i) {
3241     _tasks[i]->clear_region_fields();
3242   }


3243   _has_aborted = true;
3244 
3245   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3246   satb_mq_set.abandon_partial_marking();
3247   // This can be called either during or outside marking, we'll read
3248   // the expected_active value from the SATB queue set.
3249   satb_mq_set.set_active_all_threads(
3250                                  false, /* new active value */
3251                                  satb_mq_set.is_active() /* expected_active */);
3252 
3253   _g1h->trace_heap_after_concurrent_cycle();
3254   _g1h->register_concurrent_cycle_end();
3255 }
3256 
3257 static void print_ms_time_info(const char* prefix, const char* name,
3258                                NumberSeq& ns) {
3259   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3260                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3261   if (ns.num() > 0) {
3262     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",




 961  * this case we should not attempt to leave / enter the STS, otherwise
 962  * we'll either hit an assert (debug / fastdebug) or deadlock
 963  * (product). So we should only leave / enter the STS if we are
 964  * operating concurrently.
 965  *
 966  * Because the thread that does the sync barrier has left the STS, it
 967  * is possible to be suspended for a Full GC or an evacuation pause
 968  * could occur. This is actually safe, since the entering the sync
 969  * barrier is one of the last things do_marking_step() does, and it
 970  * doesn't manipulate any data structures afterwards.
 971  */
 972 
 973 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 974   if (verbose_low()) {
 975     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 976   }
 977 
 978   if (concurrent()) {
 979     SuspendibleThreadSet::leave();
 980   }
 981 
 982   bool barrier_aborted = !_first_overflow_barrier_sync.enter();
 983 
 984   if (concurrent()) {
 985     SuspendibleThreadSet::join();
 986   }
 987   // at this point everyone should have synced up and not be doing any
 988   // more work
 989 
 990   if (verbose_low()) {
 991     if (barrier_aborted) {
 992       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
 993     } else {
 994       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
 995     }
 996   }
 997 
 998   // If we're executing the concurrent phase of marking, reset the marking
 999   // state; otherwise the marking state is reset after reference processing,
1000   // during the remark pause.
1001   // If we reset here as a result of an overflow during the remark we will
1002   // see assertion failures from any subsequent set_concurrency_and_phase()
1003   // calls.
1004   // If the barrier aborted we don't need to reset the marking state here
1005   // since ConcurrentMark::abort() did that for us and we will now ignore
1006   // the overflow condition and just abort the whole marking phase.
1007   if (!barrier_aborted && concurrent()) {
1008     // let the task associated with with worker 0 do this
1009     if (worker_id == 0) {
1010       // task 0 is responsible for clearing the global data structures
1011       // We should be here because of an overflow. During STW we should
1012       // not clear the overflow flag since we rely on it being true when
1013       // we exit this method to abort the pause and restart concurrent
1014       // marking.
1015       reset_marking_state(true /* clear_overflow */);
1016       force_overflow()->update();
1017 
1018       if (G1Log::fine()) {
1019         gclog_or_tty->date_stamp(PrintGCDateStamps);
1020         gclog_or_tty->stamp(PrintGCTimeStamps);
1021         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1022       }
1023     }
1024   }
1025 
1026   // after this, each task should reset its own data structures then
1027   // then go into the second barrier
1028 }
1029 
1030 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1031   if (verbose_low()) {
1032     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1033   }
1034 
1035   if (concurrent()) {
1036     SuspendibleThreadSet::leave();
1037   }
1038 
1039   bool barrier_aborted = !_second_overflow_barrier_sync.enter();
1040 
1041   if (concurrent()) {
1042     SuspendibleThreadSet::join();
1043   }
1044   // at this point everything should be re-initialized and ready to go
1045 
1046   if (verbose_low()) {
1047     if (barrier_aborted) {
1048       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1049     } else {
1050       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1051     }
1052   }
1053 }
1054 
1055 #ifndef PRODUCT
1056 void ForceOverflowSettings::init() {
1057   _num_remaining = G1ConcMarkForceOverflow;
1058   _force = false;
1059   update();
1060 }
1061 
1062 void ForceOverflowSettings::update() {
1063   if (_num_remaining > 0) {
1064     _num_remaining -= 1;
1065     _force = true;
1066   } else {
1067     _force = false;
1068   }
1069 }
1070 
1071 bool ForceOverflowSettings::should_force() {
1072   if (_force) {


3238     }
3239   }
3240 }
3241 
3242 // abandon current marking iteration due to a Full GC
3243 void ConcurrentMark::abort() {
3244   // Clear all marks to force marking thread to do nothing
3245   _nextMarkBitMap->clearAll();
3246 
3247   // Note we cannot clear the previous marking bitmap here
3248   // since VerifyDuringGC verifies the objects marked during
3249   // a full GC against the previous bitmap.
3250 
3251   // Clear the liveness counting data
3252   clear_all_count_data();
3253   // Empty mark stack
3254   reset_marking_state();
3255   for (uint i = 0; i < _max_worker_id; ++i) {
3256     _tasks[i]->clear_region_fields();
3257   }
3258   _first_overflow_barrier_sync.abort();
3259   _second_overflow_barrier_sync.abort();
3260   _has_aborted = true;
3261 
3262   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3263   satb_mq_set.abandon_partial_marking();
3264   // This can be called either during or outside marking, we'll read
3265   // the expected_active value from the SATB queue set.
3266   satb_mq_set.set_active_all_threads(
3267                                  false, /* new active value */
3268                                  satb_mq_set.is_active() /* expected_active */);
3269 
3270   _g1h->trace_heap_after_concurrent_cycle();
3271   _g1h->register_concurrent_cycle_end();
3272 }
3273 
3274 static void print_ms_time_info(const char* prefix, const char* name,
3275                                NumberSeq& ns) {
3276   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3277                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3278   if (ns.num() > 0) {
3279     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",