open Udiff src/hotspot/share/gc/g1/g1CollectedHeap.cpp

src/hotspot/share/gc/g1/g1CollectedHeap.cpp

rev 48467 : 8137099: G1 needs to "upgrade" GC within the safepoint if it can't allocate during that safepoint to avoid OoME
Summary: During a minor GC, if memory allocation fails, start a full GC within the same VM operation in the same safepoint. This avoids a race where the GC locker can prevent the full GC from occurring, and a premature OoME.
Reviewed-by:
Contributed-by: thomas.schatzl@oracle.com, axel.siebenborn@sap.com
rev 48468 : imported patch 8137099-phohensee-review
rev 48469 : imported patch 8137099-sjohanns-messages
rev 48470 : [mq]: 8137099-erikd-review

@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2018, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.

@@ -443,76 +443,28 @@
 
 HeapWord* G1CollectedHeap::allocate_new_tlab(size_t word_size) {
   assert_heap_not_locked_and_not_at_safepoint();
   assert(!is_humongous(word_size), "we do not allow humongous TLABs");
 
-  uint dummy_gc_count_before;
-  uint dummy_gclocker_retry_count = 0;
-  return attempt_allocation(word_size, &dummy_gc_count_before, &dummy_gclocker_retry_count);
+  return attempt_allocation(word_size);
 }
 
 HeapWord*
 G1CollectedHeap::mem_allocate(size_t word_size,
                               bool*  gc_overhead_limit_was_exceeded) {
   assert_heap_not_locked_and_not_at_safepoint();
 
-  // Loop until the allocation is satisfied, or unsatisfied after GC.
-  for (uint try_count = 1, gclocker_retry_count = 0; /* we'll return */; try_count += 1) {
-    uint gc_count_before;
-
-    HeapWord* result = NULL;
-    if (!is_humongous(word_size)) {
-      result = attempt_allocation(word_size, &gc_count_before, &gclocker_retry_count);
-    } else {
-      result = attempt_allocation_humongous(word_size, &gc_count_before, &gclocker_retry_count);
-    }
-    if (result != NULL) {
-      return result;
-    }
-
-    // Create the garbage collection operation...
-    VM_G1CollectForAllocation op(gc_count_before, word_size);
-    op.set_allocation_context(AllocationContext::current());
-
-    // ...and get the VM thread to execute it.
-    VMThread::execute(&op);
-
-    if (op.prologue_succeeded() && op.pause_succeeded()) {
-      // If the operation was successful we'll return the result even
-      // if it is NULL. If the allocation attempt failed immediately
-      // after a Full GC, it's unlikely we'll be able to allocate now.
-      HeapWord* result = op.result();
-      if (result != NULL && !is_humongous(word_size)) {
-        // Allocations that take place on VM operations do not do any
-        // card dirtying and we have to do it here. We only have to do
-        // this for non-humongous allocations, though.
-        dirty_young_block(result, word_size);
+  if (is_humongous(word_size)) {
+    return attempt_allocation_humongous(word_size);
       }
-      return result;
-    } else {
-      if (gclocker_retry_count > GCLockerRetryAllocationCount) {
-        return NULL;
-      }
-      assert(op.result() == NULL,
-             "the result should be NULL if the VM op did not succeed");
-    }
-
-    // Give a warning if we seem to be looping forever.
-    if ((QueuedAllocationWarningCount > 0) &&
-        (try_count % QueuedAllocationWarningCount == 0)) {
-      log_warning(gc)("G1CollectedHeap::mem_allocate retries %d times", try_count);
-    }
-  }
-
-  ShouldNotReachHere();
-  return NULL;
+  return attempt_allocation(word_size);
 }
 
 HeapWord* G1CollectedHeap::attempt_allocation_slow(size_t word_size,
-                                                   AllocationContext_t context,
-                                                   uint* gc_count_before_ret,
-                                                   uint* gclocker_retry_count_ret) {
+                                                   AllocationContext_t context) {
+  ResourceMark rm; // For retrieving the thread names in log messages.
+
   // Make sure you read the note in attempt_allocation_humongous().
 
   assert_heap_not_locked_and_not_at_safepoint();
   assert(!is_humongous(word_size), "attempt_allocation_slow() should not "
          "be called for humongous allocation requests");

@@ -523,95 +475,94 @@
   // We will loop until a) we manage to successfully perform the
   // allocation or b) we successfully schedule a collection which
   // fails to perform the allocation. b) is the only case when we'll
   // return NULL.
   HeapWord* result = NULL;
-  for (int try_count = 1; /* we'll return */; try_count += 1) {
+  for (uint try_count = 1, gclocker_retry_count = 0; /* we'll return */; try_count += 1) {
     bool should_try_gc;
     uint gc_count_before;
 
     {
       MutexLockerEx x(Heap_lock);
       result = _allocator->attempt_allocation_locked(word_size, context);
       if (result != NULL) {
         return result;
       }
 
-      if (GCLocker::is_active_and_needs_gc()) {
-        if (g1_policy()->can_expand_young_list()) {
-          // No need for an ergo verbose message here,
-          // can_expand_young_list() does this when it returns true.
+      // If the GCLocker is active and we are bound for a GC, try expanding young gen.
+      // This is different to when only GCLocker::needs_gc() is set: try to avoid
+      // waiting because the GCLocker is active to not wait too long.
+      if (GCLocker::is_active_and_needs_gc() && g1_policy()->can_expand_young_list()) {
+        // No need for an ergo message here, can_expand_young_list() does this when
+        // it returns true.
           result = _allocator->attempt_allocation_force(word_size, context);
           if (result != NULL) {
             return result;
           }
         }
-        should_try_gc = false;
-      } else {
-        // The GCLocker may not be active but the GCLocker initiated
-        // GC may not yet have been performed (GCLocker::needs_gc()
-        // returns true). In this case we do not try this GC and
-        // wait until the GCLocker initiated GC is performed, and
-        // then retry the allocation.
-        if (GCLocker::needs_gc()) {
-          should_try_gc = false;
-        } else {
+      // Only try a GC if the GCLocker does not signal the need for a GC. Wait until
+      // the GCLocker initiated GC has been performed and then retry. This includes
+      // the case when the GC Locker is not active but has not been performed.
+      should_try_gc = !GCLocker::needs_gc();
           // Read the GC count while still holding the Heap_lock.
           gc_count_before = total_collections();
-          should_try_gc = true;
-        }
-      }
     }
 
     if (should_try_gc) {
       bool succeeded;
       result = do_collection_pause(word_size, gc_count_before, &succeeded,
                                    GCCause::_g1_inc_collection_pause);
       if (result != NULL) {
         assert(succeeded, "only way to get back a non-NULL result");
+        log_trace(gc, alloc)("%s: Successfully scheduled collection returning " PTR_FORMAT,
+                             Thread::current()->name(), p2i(result));
         return result;
       }
 
       if (succeeded) {
-        // If we get here we successfully scheduled a collection which
-        // failed to allocate. No point in trying to allocate
-        // further. We'll just return NULL.
-        MutexLockerEx x(Heap_lock);
-        *gc_count_before_ret = total_collections();
+        // We successfully scheduled a collection which failed to allocate. No
+        // point in trying to allocate further. We'll just return NULL.
+        log_trace(gc, alloc)("%s: Successfully scheduled collection failing to allocate "
+                             SIZE_FORMAT " words", Thread::current()->name(), word_size);
         return NULL;
       }
+      log_trace(gc, alloc)("%s: Unsuccessfully scheduled collection allocating " SIZE_FORMAT " words",
+                           Thread::current()->name(), word_size);
     } else {
-      if (*gclocker_retry_count_ret > GCLockerRetryAllocationCount) {
-        MutexLockerEx x(Heap_lock);
-        *gc_count_before_ret = total_collections();
+      // Failed to schedule a collection.
+      if (gclocker_retry_count > GCLockerRetryAllocationCount) {
+        log_warning(gc, alloc)("%s: Retried waiting for GCLocker too often allocating "
+                               SIZE_FORMAT " words", Thread::current()->name(), word_size);
         return NULL;
       }
+      log_trace(gc, alloc)("%s: Stall until clear", Thread::current()->name());
       // The GCLocker is either active or the GCLocker initiated
       // GC has not yet been performed. Stall until it is and
       // then retry the allocation.
       GCLocker::stall_until_clear();
-      (*gclocker_retry_count_ret) += 1;
+      gclocker_retry_count += 1;
     }
 
     // We can reach here if we were unsuccessful in scheduling a
     // collection (because another thread beat us to it) or if we were
     // stalled due to the GC locker. In either can we should retry the
     // allocation attempt in case another thread successfully
     // performed a collection and reclaimed enough space. We do the
     // first attempt (without holding the Heap_lock) here and the
     // follow-on attempt will be at the start of the next loop
     // iteration (after taking the Heap_lock).
+
     result = _allocator->attempt_allocation(word_size, context);
     if (result != NULL) {
       return result;
     }
 
     // Give a warning if we seem to be looping forever.
     if ((QueuedAllocationWarningCount > 0) &&
         (try_count % QueuedAllocationWarningCount == 0)) {
-      log_warning(gc)("G1CollectedHeap::attempt_allocation_slow() "
-                      "retries %d times", try_count);
+      log_warning(gc, alloc)("%s:  Retried allocation %u times for " SIZE_FORMAT " words",
+                             Thread::current()->name(), try_count, word_size);
     }
   }
 
   ShouldNotReachHere();
   return NULL;

@@ -828,25 +779,20 @@
       increase_used(fill_size * HeapWordSize);
     }
   }
 }
 
-inline HeapWord* G1CollectedHeap::attempt_allocation(size_t word_size,
-                                                     uint* gc_count_before_ret,
-                                                     uint* gclocker_retry_count_ret) {
+inline HeapWord* G1CollectedHeap::attempt_allocation(size_t word_size) {
   assert_heap_not_locked_and_not_at_safepoint();
   assert(!is_humongous(word_size), "attempt_allocation() should not "
          "be called for humongous allocation requests");
 
   AllocationContext_t context = AllocationContext::current();
   HeapWord* result = _allocator->attempt_allocation(word_size, context);
 
   if (result == NULL) {
-    result = attempt_allocation_slow(word_size,
-                                     context,
-                                     gc_count_before_ret,
-                                     gclocker_retry_count_ret);
+    result = attempt_allocation_slow(word_size, context);
   }
   assert_heap_not_locked();
   if (result != NULL) {
     dirty_young_block(result, word_size);
   }

@@ -923,13 +869,13 @@
                               HeapRegion::GrainWords * HeapWordSize * uncommitted_regions);
   }
   decrease_used(size_used);
 }
 
-HeapWord* G1CollectedHeap::attempt_allocation_humongous(size_t word_size,
-                                                        uint* gc_count_before_ret,
-                                                        uint* gclocker_retry_count_ret) {
+HeapWord* G1CollectedHeap::attempt_allocation_humongous(size_t word_size) {
+  ResourceMark rm; // For retrieving the thread names in log messages.
+
   // The structure of this method has a lot of similarities to
   // attempt_allocation_slow(). The reason these two were not merged
   // into a single one is that such a method would require several "if
   // allocation is not humongous do this, otherwise do that"
   // conditional paths which would obscure its flow. In fact, an early

@@ -956,14 +902,15 @@
   // We will loop until a) we manage to successfully perform the
   // allocation or b) we successfully schedule a collection which
   // fails to perform the allocation. b) is the only case when we'll
   // return NULL.
   HeapWord* result = NULL;
-  for (int try_count = 1; /* we'll return */; try_count += 1) {
+  for (uint try_count = 1, gclocker_retry_count = 0; /* we'll return */; try_count += 1) {
     bool should_try_gc;
     uint gc_count_before;
 
+
     {
       MutexLockerEx x(Heap_lock);
 
       // Given that humongous objects are not allocated in young
       // regions, we'll first try to do the allocation without doing a

@@ -973,73 +920,67 @@
         size_t size_in_regions = humongous_obj_size_in_regions(word_size);
         g1_policy()->add_bytes_allocated_in_old_since_last_gc(size_in_regions * HeapRegion::GrainBytes);
         return result;
       }
 
-      if (GCLocker::is_active_and_needs_gc()) {
-        should_try_gc = false;
-      } else {
-         // The GCLocker may not be active but the GCLocker initiated
-        // GC may not yet have been performed (GCLocker::needs_gc()
-        // returns true). In this case we do not try this GC and
-        // wait until the GCLocker initiated GC is performed, and
-        // then retry the allocation.
-        if (GCLocker::needs_gc()) {
-          should_try_gc = false;
-        } else {
+      // Only try a GC if the GCLocker does not signal the need for a GC. Wait until
+      // the GCLocker initiated GC has been performed and then retry. This includes
+      // the case when the GC Locker is not active but has not been performed.
+      should_try_gc = !GCLocker::needs_gc();
           // Read the GC count while still holding the Heap_lock.
           gc_count_before = total_collections();
-          should_try_gc = true;
-        }
-      }
     }
 
     if (should_try_gc) {
-      // If we failed to allocate the humongous object, we should try to
-      // do a collection pause (if we're allowed) in case it reclaims
-      // enough space for the allocation to succeed after the pause.
-
       bool succeeded;
       result = do_collection_pause(word_size, gc_count_before, &succeeded,
                                    GCCause::_g1_humongous_allocation);
       if (result != NULL) {
         assert(succeeded, "only way to get back a non-NULL result");
+        log_trace(gc, alloc)("%s: Successfully scheduled collection returning " PTR_FORMAT,
+                             Thread::current()->name(), p2i(result));
         return result;
       }
 
       if (succeeded) {
-        // If we get here we successfully scheduled a collection which
-        // failed to allocate. No point in trying to allocate
-        // further. We'll just return NULL.
-        MutexLockerEx x(Heap_lock);
-        *gc_count_before_ret = total_collections();
+        // We successfully scheduled a collection which failed to allocate. No
+        // point in trying to allocate further. We'll just return NULL.
+        log_trace(gc, alloc)("%s: Successfully scheduled collection failing to allocate "
+                             SIZE_FORMAT " words", Thread::current()->name(), word_size);
         return NULL;
       }
+      log_trace(gc, alloc)("%s: Unsuccessfully scheduled collection allocating " SIZE_FORMAT "",
+                           Thread::current()->name(), word_size);
     } else {
-      if (*gclocker_retry_count_ret > GCLockerRetryAllocationCount) {
-        MutexLockerEx x(Heap_lock);
-        *gc_count_before_ret = total_collections();
+      // Failed to schedule a collection.
+      if (gclocker_retry_count > GCLockerRetryAllocationCount) {
+        log_warning(gc, alloc)("%s: Retried waiting for GCLocker too often allocating "
+                               SIZE_FORMAT " words", Thread::current()->name(), word_size);
         return NULL;
       }
+      log_trace(gc, alloc)("%s: Stall until clear", Thread::current()->name());
       // The GCLocker is either active or the GCLocker initiated
       // GC has not yet been performed. Stall until it is and
       // then retry the allocation.
       GCLocker::stall_until_clear();
-      (*gclocker_retry_count_ret) += 1;
+      gclocker_retry_count += 1;
     }
 
+
     // We can reach here if we were unsuccessful in scheduling a
     // collection (because another thread beat us to it) or if we were
     // stalled due to the GC locker. In either can we should retry the
     // allocation attempt in case another thread successfully
-    // performed a collection and reclaimed enough space.  Give a
-    // warning if we seem to be looping forever.
+    // performed a collection and reclaimed enough space. 
+    // Humongous object allocation always needs a lock, so we wait for the retry
+    // in the next iteration of the loop, unlike for the regular iteration case.
+    // Give a warning if we seem to be looping forever.
 
     if ((QueuedAllocationWarningCount > 0) &&
         (try_count % QueuedAllocationWarningCount == 0)) {
-      log_warning(gc)("G1CollectedHeap::attempt_allocation_humongous() "
-                      "retries %d times", try_count);
+      log_warning(gc, alloc)("%s: Retried allocation %u times for " SIZE_FORMAT " words",
+                             Thread::current()->name(), try_count, word_size);
     }
   }
 
   ShouldNotReachHere();
   return NULL;

@@ -1337,21 +1278,19 @@
   HeapWord* result =
     attempt_allocation_at_safepoint(word_size,
                                     context,
                                     expect_null_mutator_alloc_region);
   if (result != NULL) {
-    assert(*gc_succeeded, "sanity");
     return result;
   }
 
   // In a G1 heap, we're supposed to keep allocation from failing by
   // incremental pauses.  Therefore, at least for now, we'll favor
   // expansion over collection.  (This might change in the future if we can
   // do something smarter than full collection to satisfy a failed alloc.)
   result = expand_and_allocate(word_size, context);
   if (result != NULL) {
-    assert(*gc_succeeded, "sanity");
     return result;
   }
 
   if (do_gc) {
     // Expansion didn't work, we'll try to do a Full GC.

@@ -1399,22 +1338,20 @@
                                             false, /* clear_all_soft_refs */
                                             true,  /* expect_null_mutator_alloc_region */
                                             succeeded);
 
   if (result != NULL) {
-    assert(*succeeded, "sanity");
     return result;
   }
 
   assert(!collector_policy()->should_clear_all_soft_refs(),
          "Flag should have been handled and cleared prior to this point");
 
   // What else?  We might try synchronous finalization later.  If the total
   // space available is large enough for the allocation, then a more
   // complete compaction phase than we've tried so far might be
   // appropriate.
-  assert(*succeeded, "sanity");
   return NULL;
 }
 
 // Attempting to expand the heap sufficiently
 // to support an allocation of the given "word_size".  If

@@ -2145,11 +2082,11 @@
   }
 
   // This notify_all() will ensure that a thread that called
   // System.gc() with (with ExplicitGCInvokesConcurrent set or not)
   // and it's waiting for a full GC to finish will be woken up. It is
-  // waiting in VM_G1IncCollectionPause::doit_epilogue().
+  // waiting in VM_G1CollectForAllocation::doit_epilogue().
   FullGCCount_lock->notify_all();
 }
 
 void G1CollectedHeap::collect(GCCause::Cause cause) {
   assert_heap_not_locked();

@@ -2173,17 +2110,16 @@
 
     if (should_do_concurrent_full_gc(cause)) {
       // Schedule an initial-mark evacuation pause that will start a
       // concurrent cycle. We're setting word_size to 0 which means that
       // we are not requesting a post-GC allocation.
-      VM_G1IncCollectionPause op(gc_count_before,
-                                 0,     /* word_size */
+      VM_G1CollectForAllocation op(0,     /* word_size */
+                                   gc_count_before,
+                                   cause,
                                  true,  /* should_initiate_conc_mark */
                                  g1_policy()->max_pause_time_ms(),
-                                 cause);
-      op.set_allocation_context(AllocationContext::current());
-
+                                   AllocationContext::current());
       VMThread::execute(&op);
       if (!op.pause_succeeded()) {
         if (old_marking_count_before == _old_marking_cycles_started) {
           retry_gc = op.should_retry_gc();
         } else {

@@ -2202,15 +2138,16 @@
       if (cause == GCCause::_gc_locker || cause == GCCause::_wb_young_gc
           DEBUG_ONLY(|| cause == GCCause::_scavenge_alot)) {
 
         // Schedule a standard evacuation pause. We're setting word_size
         // to 0 which means that we are not requesting a post-GC allocation.
-        VM_G1IncCollectionPause op(gc_count_before,
-                                   0,     /* word_size */
+        VM_G1CollectForAllocation op(0,     /* word_size */
+                                     gc_count_before,
+                                     cause,
                                    false, /* should_initiate_conc_mark */
                                    g1_policy()->max_pause_time_ms(),
-                                   cause);
+                                     AllocationContext::current());
         VMThread::execute(&op);
       } else {
         // Schedule a Full GC.
         VM_G1CollectFull op(gc_count_before, full_gc_count_before, cause);
         VMThread::execute(&op);

@@ -2617,17 +2554,16 @@
 HeapWord* G1CollectedHeap::do_collection_pause(size_t word_size,
                                                uint gc_count_before,
                                                bool* succeeded,
                                                GCCause::Cause gc_cause) {
   assert_heap_not_locked_and_not_at_safepoint();
-  VM_G1IncCollectionPause op(gc_count_before,
-                             word_size,
+  VM_G1CollectForAllocation op(word_size,
+                               gc_count_before,
+                               gc_cause,
                              false, /* should_initiate_conc_mark */
                              g1_policy()->max_pause_time_ms(),
-                             gc_cause);
-
-  op.set_allocation_context(AllocationContext::current());
+                               AllocationContext::current());
   VMThread::execute(&op);
 
   HeapWord* result = op.result();
   bool ret_succeeded = op.prologue_succeeded() && op.pause_succeeded();
   assert(result == NULL || ret_succeeded,

< prev index next >