# HG changeset patch
# User rkennke
# Date 1526646577 -7200
#      Fri May 18 14:29:37 2018 +0200
# Node ID 54b80b681e4274655f01360ed09377cdd289cfd0
# Parent  eaf79c646395b1311bf938ff31249ddede75d861
[backport] Use PLAB for evacuations instead of TLAB

diff --git a/src/hotspot/share/gc/shared/collectedHeap.cpp b/src/hotspot/share/gc/shared/collectedHeap.cpp
--- a/src/hotspot/share/gc/shared/collectedHeap.cpp
+++ b/src/hotspot/share/gc/shared/collectedHeap.cpp
@@ -625,10 +625,6 @@
   return 0;
 }
 
-void CollectedHeap::accumulate_statistics_all_gclabs() {
-  // Default implementation does nothing.
-}
-
 #ifndef CC_INTERP
 void CollectedHeap::compile_prepare_oop(MacroAssembler* masm, Register obj) {
   // Default implementation does nothing.
diff --git a/src/hotspot/share/gc/shared/collectedHeap.hpp b/src/hotspot/share/gc/shared/collectedHeap.hpp
--- a/src/hotspot/share/gc/shared/collectedHeap.hpp
+++ b/src/hotspot/share/gc/shared/collectedHeap.hpp
@@ -640,9 +640,6 @@
   // perform cleanup tasks serially in the VMThread.
   virtual WorkGang* get_safepoint_workers() { return NULL; }
 
-  // Accumulate additional statistics from GCLABs.
-  virtual void accumulate_statistics_all_gclabs();
-
   // Support for object pinning. This is used by JNI Get*Critical()
   // and Release*Critical() family of functions. If supported, the GC
   // must guarantee that pinned objects never move.
diff --git a/src/hotspot/share/gc/shared/plab.cpp b/src/hotspot/share/gc/shared/plab.cpp
--- a/src/hotspot/share/gc/shared/plab.cpp
+++ b/src/hotspot/share/gc/shared/plab.cpp
@@ -44,7 +44,9 @@
   _end(NULL), _hard_end(NULL), _allocated(0), _wasted(0), _undo_wasted(0)
 {
   // ArrayOopDesc::header_size depends on command line initialization.
-  AlignmentReserve = oopDesc::header_size() > MinObjAlignment ? align_object_size(arrayOopDesc::header_size(T_INT)) : 0;
+  int rsv_regular = oopDesc::header_size() + (int) Universe::heap()->oop_extra_words();
+  int rsv_array   = align_object_size(arrayOopDesc::header_size(T_INT) + Universe::heap()->oop_extra_words());
+  AlignmentReserve = rsv_regular > MinObjAlignment ? rsv_array : 0;
   assert(min_size() > AlignmentReserve,
          "Minimum PLAB size " SIZE_FORMAT " must be larger than alignment reserve " SIZE_FORMAT " "
          "to be able to contain objects", min_size(), AlignmentReserve);
@@ -82,14 +84,18 @@
 size_t PLAB::retire_internal() {
   size_t result = 0;
   if (_top < _hard_end) {
-    CollectedHeap::fill_with_object(_top, _hard_end);
+    assert(pointer_delta(_hard_end, _top) >= (size_t)(oopDesc::header_size() + Universe::heap()->oop_extra_words()),
+           "better have enough space left to fill with dummy");
+    HeapWord* obj = Universe::heap()->tlab_post_allocation_setup(_top);
+    CollectedHeap::fill_with_object(obj, _hard_end);
     result += invalidate();
   }
   return result;
 }
 
 void PLAB::add_undo_waste(HeapWord* obj, size_t word_sz) {
-  CollectedHeap::fill_with_object(obj, word_sz);
+  HeapWord* head_obj = Universe::heap()->tlab_post_allocation_setup(obj);
+  CollectedHeap::fill_with_object(head_obj, word_sz - (head_obj - obj));
   _undo_wasted += word_sz;
 }
 
diff --git a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp
--- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp
+++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp
@@ -54,8 +54,6 @@
     thread->tlab().initialize_statistics();
   }
 
-  Universe::heap()->accumulate_statistics_all_gclabs();
-
   // Publish new stats if some allocation occurred.
   if (global_stats()->allocation() != 0) {
     global_stats()->publish();
@@ -69,7 +67,7 @@
   size_t used     = Universe::heap()->tlab_used(thread);
 
   _gc_waste += (unsigned)remaining();
-  size_t total_allocated = _gclab ? thread->allocated_bytes_gclab() : thread->allocated_bytes();
+  size_t total_allocated = thread->allocated_bytes();
   size_t allocated_since_last_gc = total_allocated - _allocated_before_last_gc;
   _allocated_before_last_gc = total_allocated;
 
@@ -114,11 +112,7 @@
     invariants();
 
     if (retire) {
-      if (_gclab) {
-        myThread()->incr_allocated_bytes_gclab(used_bytes());
-      } else {
-        myThread()->incr_allocated_bytes(used_bytes());
-      }
+      myThread()->incr_allocated_bytes(used_bytes());
     }
 
     HeapWord* obj = Universe::heap()->tlab_post_allocation_setup(top());
@@ -194,9 +188,7 @@
   invariants();
 }
 
-void ThreadLocalAllocBuffer::initialize(bool gclab) {
-  _initialized = true;
-  _gclab = gclab;
+void ThreadLocalAllocBuffer::initialize() {
   initialize(NULL,                    // start
              NULL,                    // top
              NULL);                   // end
@@ -253,8 +245,7 @@
   // During jvm startup, the main thread is initialized
   // before the heap is initialized.  So reinitialize it now.
   guarantee(Thread::current()->is_Java_thread(), "tlab initialization thread not Java thread");
-  Thread::current()->tlab().initialize(false);
-  Thread::current()->gclab().initialize(true);
+  Thread::current()->tlab().initialize();
 
   log_develop_trace(gc, tlab)("TLAB min: " SIZE_FORMAT " initial: " SIZE_FORMAT " max: " SIZE_FORMAT,
                                min_size(), Thread::current()->tlab().initial_desired_size(), max_size());
@@ -317,27 +308,9 @@
 }
 
 Thread* ThreadLocalAllocBuffer::myThread() {
-  ByteSize gclab_offset = Thread::gclab_start_offset();
-  ByteSize tlab_offset = Thread::tlab_start_offset();
-  ByteSize offs = _gclab ? gclab_offset : tlab_offset;
-  Thread* thread = (Thread*)(((char *)this) +
-                   in_bytes(start_offset()) - in_bytes(offs));
-#ifdef ASSERT
-  assert(this == (_gclab ? &thread->gclab() : &thread->tlab()), "must be");
-#endif
-  return thread;
-}
-
-size_t ThreadLocalAllocBuffer::end_reserve() {
-  int reserve_size = typeArrayOopDesc::header_size(T_INT);
-  return MAX2(reserve_size, _reserve_for_allocation_prefetch);
-}
-
-void ThreadLocalAllocBuffer::rollback(size_t size) {
-  HeapWord* old_top = top();
-  if (old_top != NULL) { // Pathological case: we accept that we can't rollback.
-    set_top(old_top - size);
-  }
+  return (Thread*)(((char *)this) +
+                   in_bytes(start_offset()) -
+                   in_bytes(Thread::tlab_start_offset()));
 }
 
 
diff --git a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp
--- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp
+++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp
@@ -61,8 +61,8 @@
 
   AdaptiveWeightedAverage _allocation_fraction;  // fraction of eden allocated in tlabs
 
-  bool _gclab;
-  bool _initialized;
+  void accumulate_statistics();
+  void initialize_statistics();
 
   void set_start(HeapWord* start)                { _start = start; }
   void set_end(HeapWord* end)                    { _end = end; }
@@ -81,6 +81,9 @@
   // Make parsable and release it.
   void reset();
 
+  // Resize based on amount of allocation, etc.
+  void resize();
+
   void invariants() const { assert(top() >= start() && top() <= end(), "invalid tlab"); }
 
   void initialize(HeapWord* start, HeapWord* top, HeapWord* end);
@@ -101,12 +104,10 @@
   static GlobalTLABStats* global_stats() { return _global_stats; }
 
 public:
-  ThreadLocalAllocBuffer() : _allocation_fraction(TLABAllocationWeight), _allocated_before_last_gc(0), _initialized(false) {
+  ThreadLocalAllocBuffer() : _allocation_fraction(TLABAllocationWeight), _allocated_before_last_gc(0) {
     // do nothing.  tlabs must be inited by initialize() calls
   }
 
-  bool is_initialized() const { return _initialized; };
-
   static size_t min_size()                       { return align_object_size(MinTLABSize / HeapWordSize) + alignment_reserve(); }
   static size_t max_size()                       { assert(_max_size != 0, "max_size not set up"); return _max_size; }
   static size_t max_size_in_bytes()              { return max_size() * BytesPerWord; }
@@ -127,17 +128,11 @@
   // Allocate size HeapWords. The memory is NOT initialized to zero.
   inline HeapWord* allocate(size_t size);
 
-  // Resize based on amount of allocation, etc.
-  void resize();
-
-  void accumulate_statistics();
-  void initialize_statistics();
-
-  // Rolls back a single allocation of the given size.
-  void rollback(size_t size);
-
   // Reserve space at the end of TLAB
-  static size_t end_reserve();
+  static size_t end_reserve() {
+    int reserve_size = typeArrayOopDesc::header_size(T_INT);
+    return MAX2(reserve_size, _reserve_for_allocation_prefetch);
+  }
   static size_t alignment_reserve()              { return align_object_size(end_reserve()); }
   static size_t alignment_reserve_in_bytes()     { return alignment_reserve() * HeapWordSize; }
 
@@ -165,7 +160,7 @@
   static void resize_all_tlabs();
 
   void fill(HeapWord* start, HeapWord* top, size_t new_size);
-  void initialize(bool gclab);
+  void initialize();
 
   static size_t refill_waste_limit_increment()   { return TLABWasteIncrement; }
 
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
@@ -27,6 +27,7 @@
 #include "gc/shared/gcTimer.hpp"
 #include "gc/shared/gcTraceTime.inline.hpp"
 #include "gc/shared/parallelCleaning.hpp"
+#include "gc/shared/plab.hpp"
 
 #include "gc/shenandoah/brooksPointer.hpp"
 #include "gc/shenandoah/shenandoahAllocTracker.hpp"
@@ -373,6 +374,8 @@
   _alloc_tracker(NULL),
   _cycle_memory_manager("Shenandoah Cycles", "end of GC cycle"),
   _stw_memory_manager("Shenandoah Pauses", "end of GC pause"),
+  _mutator_gclab_stats(new PLABStats("Shenandoah mutator GCLAB stats", OldPLABSize, PLABWeight)),
+  _collector_gclab_stats(new PLABStats("Shenandoah collector GCLAB stats", YoungPLABSize, PLABWeight)),
   _memory_pool(NULL)
 {
   log_info(gc, init)("Parallel GC threads: "UINT32_FORMAT, ParallelGCThreads);
@@ -548,23 +551,21 @@
 class ShenandoahInitGCLABClosure : public ThreadClosure {
 public:
   void do_thread(Thread* thread) {
-    thread->gclab().initialize(true);
+    ShenandoahHeap::heap()->initialize_gclab(thread);
   }
 };
 
 void ShenandoahHeap::post_initialize() {
   CollectedHeap::post_initialize();
-  if (UseTLAB) {
-    MutexLocker ml(Threads_lock);
-
-    ShenandoahInitGCLABClosure init_gclabs;
-    Threads::java_threads_do(&init_gclabs);
-    gc_threads_do(&init_gclabs);
-
-    // gclab can not be initialized early during VM startup, as it can not determinate its max_size.
-    // Now, we will let WorkGang to initialize gclab when new worker is created.
-    _workers->set_initialize_gclab();
-  }
+  MutexLocker ml(Threads_lock);
+
+  ShenandoahInitGCLABClosure init_gclabs;
+  Threads::java_threads_do(&init_gclabs);
+  gc_threads_do(&init_gclabs);
+
+  // gclab can not be initialized early during VM startup, as it can not determinate its max_size.
+  // Now, we will let WorkGang to initialize gclab when new worker is created.
+  _workers->set_initialize_gclab();
 
   _scm->initialize(_max_workers);
   _full_gc->initialize(_gc_timer);
@@ -677,30 +678,28 @@
 HeapWord* ShenandoahHeap::allocate_from_gclab_slow(Thread* thread, size_t size) {
   // Retain tlab and allocate object in shared space if
   // the amount free in the tlab is too large to discard.
-  if (thread->gclab().free() > thread->gclab().refill_waste_limit()) {
-    thread->gclab().record_slow_allocation(size);
+  PLAB* gclab = thread->gclab();
+
+  // Discard gclab and allocate a new one.
+  // To minimize fragmentation, the last GCLAB may be smaller than the rest.
+  gclab->retire();
+  // Figure out size of new GCLAB
+  size_t new_gclab_size;
+  if (thread->is_Java_thread()) {
+    new_gclab_size = _mutator_gclab_stats->desired_plab_sz(Threads::number_of_threads());
+  } else {
+    new_gclab_size = _collector_gclab_stats->desired_plab_sz(workers()->active_workers());
+  }
+
+  // Allocate a new GCLAB...
+  HeapWord* gclab_buf = allocate_new_gclab(new_gclab_size);
+  if (gclab_buf == NULL) {
     return NULL;
   }
 
-  // Discard gclab and allocate a new one.
-  // To minimize fragmentation, the last GCLAB may be smaller than the rest.
-  size_t new_gclab_size = thread->gclab().compute_size(size);
-
-  thread->gclab().clear_before_allocation();
-
-  if (new_gclab_size == 0) {
-    return NULL;
-  }
-
-  // Allocate a new GCLAB...
-  HeapWord* obj = allocate_new_gclab(new_gclab_size);
-  if (obj == NULL) {
-    return NULL;
-  }
-
   if (ZeroTLAB) {
     // ..and clear it.
-    Copy::zero_to_words(obj, new_gclab_size);
+    Copy::zero_to_words(gclab_buf, new_gclab_size);
   } else {
     // ...and zap just allocated object.
 #ifdef ASSERT
@@ -708,11 +707,11 @@
     // ensure that the returned space is not considered parsable by
     // any concurrent GC thread.
     size_t hdr_size = oopDesc::header_size();
-    Copy::fill_to_words(obj + hdr_size, new_gclab_size - hdr_size, badHeapWordVal);
+    Copy::fill_to_words(gclab_buf + hdr_size, new_gclab_size - hdr_size, badHeapWordVal);
 #endif // ASSERT
   }
-  thread->gclab().fill(obj, obj + size, new_gclab_size);
-  return obj;
+  gclab->set_buf(gclab_buf, new_gclab_size);
+  return gclab->allocate(size);
 }
 
 HeapWord* ShenandoahHeap::allocate_new_tlab(size_t word_size) {
@@ -1081,18 +1080,20 @@
   ShenandoahRetireTLABClosure(bool retire) : _retire(retire) {}
 
   void do_thread(Thread* thread) {
-    assert(thread->gclab().is_initialized(), "GCLAB should be initialized for %s", thread->name());
-    thread->gclab().make_parsable(_retire);
+    PLAB* gclab = thread->gclab();
+    if (gclab != NULL) {
+      gclab->retire();
+    }
   }
 };
 
 void ShenandoahHeap::make_tlabs_parsable(bool retire_tlabs) {
   if (UseTLAB) {
     CollectedHeap::ensure_parsability(retire_tlabs);
-    ShenandoahRetireTLABClosure cl(retire_tlabs);
-    Threads::java_threads_do(&cl);
-    gc_threads_do(&cl);
   }
+  ShenandoahRetireTLABClosure cl(retire_tlabs);
+  Threads::java_threads_do(&cl);
+  gc_threads_do(&cl);
 }
 
 
@@ -1204,28 +1205,18 @@
   return ShenandoahHeapRegion::max_tlab_size_bytes();
 }
 
-class ShenandoahResizeGCLABClosure : public ThreadClosure {
-public:
-  void do_thread(Thread* thread) {
-    assert(thread->gclab().is_initialized(), "GCLAB should be initialized for %s", thread->name());
-    thread->gclab().resize();
-  }
-};
-
-void ShenandoahHeap::resize_all_tlabs() {
-  CollectedHeap::resize_all_tlabs();
-
-  ShenandoahResizeGCLABClosure cl;
-  Threads::java_threads_do(&cl);
-  gc_threads_do(&cl);
-}
-
 class ShenandoahAccumulateStatisticsGCLABClosure : public ThreadClosure {
 public:
   void do_thread(Thread* thread) {
-    assert(thread->gclab().is_initialized(), "GCLAB should be initialized for %s", thread->name());
-    thread->gclab().accumulate_statistics();
-    thread->gclab().initialize_statistics();
+    ShenandoahHeap* heap = ShenandoahHeap::heap();
+    PLAB* gclab = thread->gclab();
+    if (gclab != NULL) {
+      if (thread->is_Java_thread()) {
+        gclab->flush_and_retire_stats(heap->mutator_gclab_stats());
+      } else {
+        gclab->flush_and_retire_stats(heap->collector_gclab_stats());
+      }
+    }
   }
 };
 
@@ -1233,6 +1224,8 @@
   ShenandoahAccumulateStatisticsGCLABClosure cl;
   Threads::java_threads_do(&cl);
   gc_threads_do(&cl);
+  _mutator_gclab_stats->adjust_desired_plab_sz();
+  _collector_gclab_stats->adjust_desired_plab_sz();
 }
 
 bool  ShenandoahHeap::can_elide_tlab_store_barriers() const {
@@ -1492,7 +1485,7 @@
 
   set_concurrent_mark_in_progress(true);
   // We need to reset all TLABs because we'd lose marks on all objects allocated in them.
-  if (UseTLAB) {
+  {
     ShenandoahGCPhase phase(ShenandoahPhaseTimings::make_parsable);
     make_tlabs_parsable(true);
   }
@@ -1572,6 +1565,7 @@
 void ShenandoahHeap::op_final_evac() {
   assert(ShenandoahSafepoint::is_at_shenandoah_safepoint(), "Should be at safepoint");
 
+  accumulate_statistics_all_gclabs();
   set_evacuation_in_progress(false);
   if (ShenandoahVerify) {
     verifier()->verify_after_evacuation();
@@ -1684,6 +1678,10 @@
 
 void ShenandoahHeap::op_full(GCCause::Cause cause) {
   full_gc()->do_it(cause);
+  if (UseTLAB) {
+    ShenandoahGCPhase phase(ShenandoahPhaseTimings::full_gc_resize_tlabs);
+    resize_all_tlabs();
+  }
 }
 
 void ShenandoahHeap::op_degenerated(ShenandoahDegenPoint point) {
@@ -2291,6 +2289,7 @@
     verifier()->verify_before_updaterefs();
   }
 
+  accumulate_statistics_all_gclabs();
   set_evacuation_in_progress(false);
   set_update_refs_in_progress(true);
   make_tlabs_parsable(true);
@@ -2892,3 +2891,17 @@
 char ShenandoahHeap::gc_state() {
   return _gc_state.raw_value();
 }
+
+void ShenandoahHeap::initialize_gclab(Thread* thread) {
+  if (thread->is_Java_thread()) {
+    thread->set_gclab(new PLAB(OldPLABSize));
+  } else {
+    thread->set_gclab(new PLAB(YoungPLABSize));
+  }
+}
+
+void ShenandoahHeap::finalize_mutator_gclab(Thread* thread) {
+  thread->gclab()->flush_and_retire_stats(ShenandoahHeap::heap()->mutator_gclab_stats());
+  delete thread->gclab();
+  thread->set_gclab(NULL);
+}
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.hpp
@@ -33,6 +33,7 @@
 #include "services/memoryManager.hpp"
 
 class ConcurrentGCTimer;
+class PLABStats;
 class ShenandoahAsserts;
 class ShenandoahAllocTracker;
 class ShenandoahCollectorPolicy;
@@ -319,6 +320,9 @@
 
   ShenandoahSharedEnumFlag<GCCycleMode> _gc_cycle_mode;
 
+  PLABStats* _mutator_gclab_stats;
+  PLABStats* _collector_gclab_stats;
+
 #ifdef ASSERT
   int     _heap_expansion_count;
 #endif
@@ -370,8 +374,6 @@
   void safe_object_iterate(ObjectClosure* cl) /* override */;
   size_t unsafe_max_tlab_alloc(Thread *thread) const /* override */;
   size_t max_tlab_size() const /* override */;
-  void resize_all_tlabs() /* override */;
-  void accumulate_statistics_all_gclabs() /* override */;
   HeapWord* tlab_post_allocation_setup(HeapWord* obj) /* override */;
   uint oop_extra_words() /* override */;
   size_t tlab_used(Thread* ignored) const /* override */;
@@ -403,6 +405,10 @@
   ShenandoahPhaseTimings*   phase_timings()     const { return _phase_timings; }
   ShenandoahAllocTracker*   alloc_tracker()     const { return _alloc_tracker; }
 
+  void accumulate_statistics_all_gclabs();
+  PLABStats* mutator_gclab_stats()   const { return _mutator_gclab_stats; }
+  PLABStats* collector_gclab_stats() const { return _collector_gclab_stats; }
+
   inline ShenandoahHeapRegion* const heap_region_containing(const void* addr) const;
   inline size_t heap_region_index_containing(const void* addr) const;
   inline bool requires_marking(const void* entry) const;
@@ -581,6 +587,9 @@
   // Call after finished with evacuation.
   void leave_evacuation();
 
+  void initialize_gclab(Thread* thread);
+  void finalize_mutator_gclab(Thread* thread);
+
 private:
   template<class T>
   inline void marked_object_iterate(ShenandoahHeapRegion* region, T* cl, HeapWord* limit);
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp
@@ -26,6 +26,7 @@
 
 #include "classfile/javaClasses.inline.hpp"
 #include "gc/shared/markBitMap.inline.hpp"
+#include "gc/shared/plab.hpp"
 #include "gc/shared/threadLocalAllocBuffer.inline.hpp"
 #include "gc/shared/suspendibleThreadSet.hpp"
 #include "gc/shenandoah/brooksPointer.inline.hpp"
@@ -253,22 +254,19 @@
 }
 
 inline HeapWord* ShenandoahHeap::allocate_from_gclab(Thread* thread, size_t size) {
-  if (UseTLAB) {
-    if (!thread->gclab().is_initialized()) {
-      assert(!thread->is_Java_thread() && !thread->is_Worker_thread(),
-             "Performance: thread should have GCLAB: %s", thread->name());
-      // No GCLABs in this thread, fallback to shared allocation
-      return NULL;
-    }
-    HeapWord* obj = thread->gclab().allocate(size);
-    if (obj != NULL) {
-      return obj;
-    }
-    // Otherwise...
-    return allocate_from_gclab_slow(thread, size);
-  } else {
+  PLAB* gclab = thread->gclab();
+  if (gclab == NULL) {
+    assert(!thread->is_Java_thread() && !thread->is_Worker_thread(),
+           "Performance: thread should have GCLAB: %s", thread->name());
+    // No GCLABs in this thread, fallback to shared allocation
     return NULL;
   }
+  HeapWord* obj = gclab->allocate(size);
+  if (obj != NULL) {
+    return obj;
+  }
+  // Otherwise...
+  return allocate_from_gclab_slow(thread, size);
 }
 
 inline oop ShenandoahHeap::evacuate_object(oop p, Thread* thread) {
@@ -349,7 +347,7 @@
     // have to explicitly overwrite the copy with the filler object. With that overwrite,
     // we have to keep the fwdptr initialized and pointing to our (stale) copy.
     if (alloc_from_gclab) {
-      thread->gclab().rollback(size_with_fwdptr);
+      thread->gclab()->undo_allocation(filler, size_with_fwdptr);
     } else {
       fill_with_object(copy, size_no_fwdptr);
     }
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahMarkCompact.cpp b/src/hotspot/share/gc/shenandoah/shenandoahMarkCompact.cpp
--- a/src/hotspot/share/gc/shenandoah/shenandoahMarkCompact.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahMarkCompact.cpp
@@ -225,11 +225,6 @@
       ShenandoahGCPhase phase(ShenandoahPhaseTimings::full_gc_heapdumps);
       heap->post_full_gc_dump(_gc_timer);
     }
-
-    if (UseTLAB) {
-      ShenandoahGCPhase phase(ShenandoahPhaseTimings::full_gc_resize_tlabs);
-      heap->resize_all_tlabs();
-    }
   }
 
 
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahWorkGroup.cpp b/src/hotspot/share/gc/shenandoah/shenandoahWorkGroup.cpp
--- a/src/hotspot/share/gc/shenandoah/shenandoahWorkGroup.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahWorkGroup.cpp
@@ -57,7 +57,7 @@
 AbstractGangWorker* ShenandoahWorkGang::install_worker(uint which) {
   AbstractGangWorker* worker = WorkGang::install_worker(which);
   if (_initialize_gclab) {
-    worker->gclab().initialize(true);
+    ShenandoahHeap::heap()->initialize_gclab(worker);
   }
 
   return worker;
diff --git a/src/hotspot/share/runtime/thread.cpp b/src/hotspot/share/runtime/thread.cpp
--- a/src/hotspot/share/runtime/thread.cpp
+++ b/src/hotspot/share/runtime/thread.cpp
@@ -308,6 +308,9 @@
 #endif // ASSERT
 
   _oom_during_evac = 0;
+#ifdef INCLUDE_ALL_GCS
+  _gclab = NULL;
+#endif
 }
 
 void Thread::set_oom_during_evac(bool oom) {
@@ -2044,8 +2047,8 @@
   if (UseG1GC || (UseShenandoahGC && (ShenandoahSATBBarrier || ShenandoahKeepAliveBarrier || ShenandoahStoreValEnqueueBarrier))) {
     flush_barrier_queues();
   }
-  if (UseShenandoahGC && UseTLAB && gclab().is_initialized()) {
-    gclab().make_parsable(true);
+  if (UseShenandoahGC && gclab() != NULL) {
+    ShenandoahHeap::heap()->finalize_mutator_gclab(this);
   }
 #endif // INCLUDE_ALL_GCS
 
@@ -2105,6 +2108,10 @@
   assert(dirty_queue.is_active(), "dirty card queue should be active");
 
   _gc_state = _gc_state_global;
+
+  if (UseShenandoahGC) {
+    ShenandoahHeap::heap()->initialize_gclab(this);
+  }
 }
 #endif // INCLUDE_ALL_GCS
 
@@ -2132,8 +2139,8 @@
   if (UseG1GC || (UseShenandoahGC && (ShenandoahSATBBarrier || ShenandoahKeepAliveBarrier || ShenandoahStoreValEnqueueBarrier))) {
     flush_barrier_queues();
   }
-  if (UseShenandoahGC && UseTLAB && gclab().is_initialized()) {
-    gclab().make_parsable(true);
+  if (UseShenandoahGC && gclab() != NULL) {
+    gclab()->flush_and_retire_stats(ShenandoahHeap::heap()->mutator_gclab_stats());
   }
 #endif // INCLUDE_ALL_GCS
 
diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp
--- a/src/hotspot/share/runtime/thread.hpp
+++ b/src/hotspot/share/runtime/thread.hpp
@@ -91,6 +91,10 @@
 
 class WorkerThread;
 
+#ifdef INCLUDE_ALL_GCS
+class PLAB;
+#endif
+
 // Class hierarchy
 // - Thread
 //   - NamedThread
@@ -321,12 +325,11 @@
   volatile void* _polling_page;                 // Thread local polling page
 
   ThreadLocalAllocBuffer _tlab;                 // Thread-local eden
-  ThreadLocalAllocBuffer _gclab;                // Thread-local allocation buffer for GC (e.g. evacuation)
+#ifdef INCLUDE_ALL_GCS
+  PLAB*                  _gclab;                // Thread-local allocation buffer for GC (e.g. evacuation)
+#endif
   jlong _allocated_bytes;                       // Cumulative number of bytes allocated on
                                                 // the Java heap
-  jlong _allocated_bytes_gclab;                 // Cumulative number of bytes allocated on
-                                                // the Java heap, in GCLABs
-
   mutable TRACE_DATA _trace_data;               // Thread-local data for tracing
 
   ThreadExt _ext;
@@ -508,23 +511,20 @@
   ThreadLocalAllocBuffer& tlab()                 { return _tlab; }
   void initialize_tlab() {
     if (UseTLAB) {
-      tlab().initialize(false);
-      gclab().initialize(true);
+      tlab().initialize();
     }
   }
 
   // Thread-Local GC Allocation Buffer (GCLAB) support
-  ThreadLocalAllocBuffer& gclab()                { return _gclab; }
-
+#ifdef INCLUDE_ALL_GCS
+  PLAB* gclab()                                  { return _gclab; }
+  void set_gclab(PLAB* gclab)                    { _gclab = gclab; }
+#endif
   jlong allocated_bytes()               { return _allocated_bytes; }
   void set_allocated_bytes(jlong value) { _allocated_bytes = value; }
   void incr_allocated_bytes(jlong size) { _allocated_bytes += size; }
   inline jlong cooked_allocated_bytes();
 
-  jlong allocated_bytes_gclab()                { return _allocated_bytes_gclab; }
-  void set_allocated_bytes_gclab(jlong value)  { _allocated_bytes_gclab = value; }
-  void incr_allocated_bytes_gclab(jlong size)  { _allocated_bytes_gclab += size; }
-
   TRACE_DEFINE_THREAD_TRACE_DATA_OFFSET;
   TRACE_DATA* trace_data() const        { return &_trace_data; }
   bool is_trace_suspend()               { return (_suspend_flags & _trace_flag) != 0; }
@@ -710,10 +710,6 @@
 
 #undef TLAB_FIELD_OFFSET
 
-  static ByteSize gclab_start_offset()         { return byte_offset_of(Thread, _gclab) + ThreadLocalAllocBuffer::start_offset(); }
-  static ByteSize gclab_top_offset()           { return byte_offset_of(Thread, _gclab) + ThreadLocalAllocBuffer::top_offset(); }
-  static ByteSize gclab_end_offset()           { return byte_offset_of(Thread, _gclab) + ThreadLocalAllocBuffer::end_offset(); }
-
   static ByteSize allocated_bytes_offset()       { return byte_offset_of(Thread, _allocated_bytes); }
 
  public: