# HG changeset patch
# Parent cb2d18ff1bbf74a2a58925f8ad3af2713a7bca7e

diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
@@ -355,6 +355,7 @@
   _alloc_seq_at_last_gc_start(0),
   _alloc_seq_at_last_gc_end(0),
   _safepoint_workers(NULL),
+  _stalled_threads(0),
 #ifdef ASSERT
   _heap_expansion_count(0),
 #endif
@@ -1586,15 +1587,68 @@
   concurrent_thread()->try_set_full_gc();
   cancel_concgc(_oom_evacuation);
 
-  if ((! Thread::current()->is_GC_task_thread()) && (! Thread::current()->is_ConcurrentGC_thread())) {
-    assert(! Threads_lock->owned_by_self()
-           || SafepointSynchronize::is_at_safepoint(), "must not hold Threads_lock here");
-    log_warning(gc)("OOM during evacuation. Let Java thread wait until evacuation finishes.");
-    while (is_evacuation_in_progress()) { // wait.
-      Thread::current()->_ParkEvent->park(1);
+  if (Thread::current()->is_Java_thread()) {
+    switch (JavaThread::current()->thread_state()) {
+    case _thread_in_Java: {
+      /*
+        This method is called from WB leaf call, and handles evacuation failure
+        during WB execution. Since this is a leaf call, we cannot actually
+        block on any interesting mutex. Instead, we are left actively waiting
+        for transition to failing STW operation to begin.
+
+        In doing so, we have to make sure that we don't leave until we can
+        guarantee to-space invariant holds. In basic case, WB failure would
+        dereference via fwdptr after getting from here, in case some other
+        thread had managed to evacuate the same object. This is possible
+        because other threads might still have space in their GCLABs.
+
+        In more advanced case, there is a race due to non-atomic
+        evac_in_progress transition. Consider thread A is stuck here waiting
+        for the evacuation to be over. Control thread drops
+        evacuation_in_progress preparing for Full GC. Thread B misses that
+        update, and successfuly evacuates the object, does the write to
+        to-copy. But, before Thread B is able to install the fwdptr, thread A
+        discovers evac_in_progress is down, exits from here, reads the fwdptr,
+        discovers old from-copy, and stores there. Thread B then wakes up and
+        installs to-copy. This breaks to-space invariant, and silently
+        corrupts the heap: we accepted two writes to separate copies of the
+        object.
+
+        The way out is to wait for all threads to park at safepoint for
+        Full GC, before returning from here. The intent is that all threads
+        that cannot proceed with evacuation would end up here, and we will
+        transit to Full GC when that happens. This should guarantee no other
+        thread is able to evac the object in question under our feet, without
+        this thread ever knowing about it.
+       */
+      assert(! Threads_lock->owned_by_self()
+             || SafepointSynchronize::is_at_safepoint(), "must not hold Threads_lock here");
+      log_warning(gc)("OOM during evacuation. Let Java thread wait until evacuation finishes.");
+      increase_stalled_threads();
+
+      // Wait until all GC workers have settled down.
+      while (is_evacuation_in_progress()) { // wait.
+        Thread::current()->_ParkEvent->park(1);
+      }
+
+      // Wait until all Java threads that are not stalled in oom_during_evacuation()
+      // arrived at safepoint.
+      while (stalled_threads() != SafepointSynchronize::still_running()) { // wait.
+        Thread::current()->_ParkEvent->park(1);
+      }
+      decrease_stalled_threads();
+      break;
+    }
+    case _thread_in_vm: {
+      // Immediately safepoint.
+      ThreadBlockInVM block(JavaThread::current());
+      break;
+    }
+    default:
+      JavaThread::current()->print_thread_state_on(tty);
+      ShouldNotReachHere();
     }
   }
-
 }
 
 HeapWord* ShenandoahHeap::tlab_post_allocation_setup(HeapWord* obj) {
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp
@@ -198,6 +198,8 @@
 
   ShenandoahSharedEnumFlag<CancelState> _cancelled_concgc;
 
+  volatile int _stalled_threads;
+
   ReferenceProcessor* _ref_processor;
 
   ShenandoahForwardedIsAliveClosure _forwarded_is_alive;
@@ -337,6 +339,16 @@
   inline bool is_concurrent_partial_in_progress() const;
   inline bool need_update_refs() const;
 
+  int stalled_threads() { return OrderAccess::load_acquire(&_stalled_threads); }
+  void increase_stalled_threads() {
+    Atomic::inc(&_stalled_threads);
+    OrderAccess::fence();
+  }
+  void decrease_stalled_threads() {
+    Atomic::dec(&_stalled_threads);
+    OrderAccess::fence();
+  }
+
   inline bool region_in_collection_set(size_t region_index) const;
 
   // Mainly there to avoid accidentally calling the templated
diff --git a/src/hotspot/share/runtime/safepoint.cpp b/src/hotspot/share/runtime/safepoint.cpp
--- a/src/hotspot/share/runtime/safepoint.cpp
+++ b/src/hotspot/share/runtime/safepoint.cpp
@@ -73,6 +73,7 @@
 
 SafepointSynchronize::SynchronizeState volatile SafepointSynchronize::_state = SafepointSynchronize::_not_synchronized;
 volatile int  SafepointSynchronize::_waiting_to_block = 0;
+volatile int  SafepointSynchronize::_still_running = 0;
 volatile int SafepointSynchronize::_safepoint_counter = 0;
 int SafepointSynchronize::_current_jni_active_count = 0;
 long  SafepointSynchronize::_end_of_last_safepoint = 0;
@@ -114,6 +115,7 @@
   _waiting_to_block = nof_threads;
   TryingToBlock     = 0 ;
   int still_running = nof_threads;
+  OrderAccess::release_store(&_still_running, still_running);
 
   // Save the starting time, so that it can be compared to see if this has taken
   // too long to complete.
@@ -211,6 +213,7 @@
           cur_state->examine_state_of_thread();
           if (!cur_state->is_running()) {
             still_running--;
+            OrderAccess::release_store(&_still_running, still_running);
             // consider adjusting steps downward:
             //   steps = 0
             //   steps -= NNN
@@ -316,6 +319,7 @@
       assert(iterations < (uint)max_jint, "We have been iterating in the safepoint loop too long");
     }
     assert(still_running == 0, "sanity check");
+    assert(SafepointSynchronize::still_running() == 0, "sanity check");
 
     if (PrintSafepointStatistics) {
       update_statistics_on_spin_end();
diff --git a/src/hotspot/share/runtime/safepoint.hpp b/src/hotspot/share/runtime/safepoint.hpp
--- a/src/hotspot/share/runtime/safepoint.hpp
+++ b/src/hotspot/share/runtime/safepoint.hpp
@@ -106,6 +106,7 @@
  private:
   static volatile SynchronizeState _state;     // Threads might read this flag directly, without acquiring the Threads_lock
   static volatile int _waiting_to_block;       // number of threads we are waiting for to block
+  static volatile int _still_running;          // number of threads that are still running
   static int _current_jni_active_count;        // Counts the number of active critical natives during the safepoint
 
   // This counter is used for fast versions of jni_Get<Primitive>Field.
@@ -173,6 +174,8 @@
   static void   block(JavaThread *thread);
   static void   signal_thread_at_safepoint()              { _waiting_to_block--; }
 
+  static int    still_running() { return OrderAccess::load_acquire(&_still_running); }
+
   // Exception handling for page polling
   static void handle_polling_page_exception(JavaThread *thread);