# HG changeset patch
# Parent 30d6eb7c2df9543997eff38c9145b110c32a9056
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
@@ -849,102 +849,102 @@
       __ push(rcx);
     }
 
-    if (UseTLAB && ShenandoahAsmWB) {
-
-      Register new_obj = r8;
-      __ movptr(new_obj, Address(r15_thread, JavaThread::gclab_top_offset()));
-      __ testptr(new_obj, new_obj);
-      __ jcc(Assembler::zero, slow_case); // No TLAB.
-
-      __ load_klass(rcx, rax);
-
-      // Figure out object size.
-      __ movl(rcx, Address(rcx, Klass::layout_helper_offset()));
-      __ testl(rcx, Klass::_lh_instance_slow_path_bit);
-      // test to see if it has a finalizer or is malformed in some way
-      __ jcc(Assembler::notZero, slow_case);
-      __ cmpl(rcx, Klass::_lh_neutral_value); // Make sure it's an instance (LH > 0)
-      __ jcc(Assembler::lessEqual, not_an_instance); // Thrashes rcx, returns size in rcx. Uses rax.
-      __ bind(is_array);
-
-      // Size in rdi, new_obj in r8, src obj in rax
-
-      Register new_obj_end = rdi;
-      int oop_extra_words = Universe::heap()->oop_extra_words();
-      __ addq(rcx, oop_extra_words * HeapWordSize);
-      __ lea(new_obj_end, Address(new_obj, rcx, Address::times_1));
-      __ cmpptr(new_obj_end, Address(r15_thread, JavaThread::gclab_end_offset()));
-      __ jcc(Assembler::above, slow_case);
-      __ subq(rcx, oop_extra_words * HeapWordSize);
-
-      // Store Brooks pointer and adjust start of newobj.
-      Universe::heap()->compile_prepare_oop(_masm, new_obj);
-
-      // Size in rcx, new_obj in r8, src obj in rax
-
-      // Copy object.
-      Label loop;
-      if (!c_abi) {
-        __ push(rdi); // Save new_obj_end
-        __ push(rsi);
-      } else {
-        __ mov(r9, rdi); // Save new_obj_end
-      }
-      __ shrl(rcx, 3);   // Make it num-64-bit-words
-      __ mov(rdi, r8); // Mov dst into rdi
-      __ mov(rsi, rax); // Src into rsi.
-      __ rep_mov();
-      if (!c_abi) {
-        __ pop(rsi); // Restore rsi.
-        __ pop(rdi); // Restore new_obj_end
-      } else {
-        __ mov(rdi, r9); // Restore new_obj_end
-      }
-
-      // Src obj still in rax.
-      if (os::is_MP()) {
-        __ lock();
-      }
-      __ cmpxchgptr(new_obj, Address(rax, BrooksPointer::byte_offset(), Address::times_1));
-      __ jccb(Assembler::notEqual, done); // Failed. Updated object in rax.
-      // Otherwise, we succeeded.
-      __ mov(rax, new_obj);
-      __ movptr(Address(r15_thread, JavaThread::gclab_top_offset()), new_obj_end);
-      __ bind(done);
-
-      if (!c_abi) {
-        __ pop(rcx);
-        __ pop(r8);
-        __ pop(rdi);
-      }
-
-      __ ret(0);
-
-      __ bind(not_an_instance);
-      if (!c_abi) {
-        __ push(rdx);
-      }
-      // Layout_helper bits are in rcx
-      __ movl(rdx, rcx); // Move layout_helper bits to rdx
-      __ movl(rdi, Address(rax, arrayOopDesc::length_offset_in_bytes()));
-      __ shrl(rcx, Klass::_lh_log2_element_size_shift);
-      __ andl(rcx, Klass::_lh_log2_element_size_mask);
-      __ shll(rdi); // Shifts left by number of bits in rcx (CL)
-      __ shrl(rdx, Klass::_lh_header_size_shift);
-      __ andl(rdx, Klass::_lh_header_size_mask);
-      __ addl(rdi, rdx);
-      // Round up.
-      __ addl(rdi, HeapWordSize-1);
-      __ andl(rdi, -HeapWordSize);
-      if (!c_abi) {
-        __ pop(rdx);
-      }
-      // Move size (rdi) into rcx
-      __ movl(rcx, rdi);
-      __ jmp(is_array);
-
-      __ bind(slow_case);
-    }
+    // if (UseTLAB && ShenandoahAsmWB) {
+
+    //   Register new_obj = r8;
+    //   __ movptr(new_obj, Address(r15_thread, JavaThread::gclab_top_offset()));
+    //   __ testptr(new_obj, new_obj);
+    //   __ jcc(Assembler::zero, slow_case); // No TLAB.
+
+    //   __ load_klass(rcx, rax);
+
+    //   // Figure out object size.
+    //   __ movl(rcx, Address(rcx, Klass::layout_helper_offset()));
+    //   __ testl(rcx, Klass::_lh_instance_slow_path_bit);
+    //   // test to see if it has a finalizer or is malformed in some way
+    //   __ jcc(Assembler::notZero, slow_case);
+    //   __ cmpl(rcx, Klass::_lh_neutral_value); // Make sure it's an instance (LH > 0)
+    //   __ jcc(Assembler::lessEqual, not_an_instance); // Thrashes rcx, returns size in rcx. Uses rax.
+    //   __ bind(is_array);
+
+    //   // Size in rdi, new_obj in r8, src obj in rax
+
+    //   Register new_obj_end = rdi;
+    //   int oop_extra_words = Universe::heap()->oop_extra_words();
+    //   __ addq(rcx, oop_extra_words * HeapWordSize);
+    //   __ lea(new_obj_end, Address(new_obj, rcx, Address::times_1));
+    //   __ cmpptr(new_obj_end, Address(r15_thread, JavaThread::gclab_end_offset()));
+    //   __ jcc(Assembler::above, slow_case);
+    //   __ subq(rcx, oop_extra_words * HeapWordSize);
+
+    //   // Store Brooks pointer and adjust start of newobj.
+    //   Universe::heap()->compile_prepare_oop(_masm, new_obj);
+
+    //   // Size in rcx, new_obj in r8, src obj in rax
+
+    //   // Copy object.
+    //   Label loop;
+    //   if (!c_abi) {
+    //     __ push(rdi); // Save new_obj_end
+    //     __ push(rsi);
+    //   } else {
+    //     __ mov(r9, rdi); // Save new_obj_end
+    //   }
+    //   __ shrl(rcx, 3);   // Make it num-64-bit-words
+    //   __ mov(rdi, r8); // Mov dst into rdi
+    //   __ mov(rsi, rax); // Src into rsi.
+    //   __ rep_mov();
+    //   if (!c_abi) {
+    //     __ pop(rsi); // Restore rsi.
+    //     __ pop(rdi); // Restore new_obj_end
+    //   } else {
+    //     __ mov(rdi, r9); // Restore new_obj_end
+    //   }
+
+    //   // Src obj still in rax.
+    //   if (os::is_MP()) {
+    //     __ lock();
+    //   }
+    //   __ cmpxchgptr(new_obj, Address(rax, BrooksPointer::byte_offset(), Address::times_1));
+    //   __ jccb(Assembler::notEqual, done); // Failed. Updated object in rax.
+    //   // Otherwise, we succeeded.
+    //   __ mov(rax, new_obj);
+    //   __ movptr(Address(r15_thread, JavaThread::gclab_top_offset()), new_obj_end);
+    //   __ bind(done);
+
+    //   if (!c_abi) {
+    //     __ pop(rcx);
+    //     __ pop(r8);
+    //     __ pop(rdi);
+    //   }
+
+    //   __ ret(0);
+
+    //   __ bind(not_an_instance);
+    //   if (!c_abi) {
+    //     __ push(rdx);
+    //   }
+    //   // Layout_helper bits are in rcx
+    //   __ movl(rdx, rcx); // Move layout_helper bits to rdx
+    //   __ movl(rdi, Address(rax, arrayOopDesc::length_offset_in_bytes()));
+    //   __ shrl(rcx, Klass::_lh_log2_element_size_shift);
+    //   __ andl(rcx, Klass::_lh_log2_element_size_mask);
+    //   __ shll(rdi); // Shifts left by number of bits in rcx (CL)
+    //   __ shrl(rdx, Klass::_lh_header_size_shift);
+    //   __ andl(rdx, Klass::_lh_header_size_mask);
+    //   __ addl(rdi, rdx);
+    //   // Round up.
+    //   __ addl(rdi, HeapWordSize-1);
+    //   __ andl(rdi, -HeapWordSize);
+    //   if (!c_abi) {
+    //     __ pop(rdx);
+    //   }
+    //   // Move size (rdi) into rcx
+    //   __ movl(rcx, rdi);
+    //   __ jmp(is_array);
+
+    //   __ bind(slow_case);
+    // }
 
     if (!c_abi) {
       __ push(rdx);
diff --git a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp
--- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp
+++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp
@@ -68,7 +68,7 @@
   size_t used     = Universe::heap()->tlab_used(thread);
 
   _gc_waste += (unsigned)remaining();
-  size_t total_allocated = _gclab ? thread->allocated_bytes_gclab() : thread->allocated_bytes();
+  size_t total_allocated = thread->allocated_bytes();
   size_t allocated_since_last_gc = total_allocated - _allocated_before_last_gc;
   _allocated_before_last_gc = total_allocated;
 
@@ -113,11 +113,7 @@
     invariants();
 
     if (retire) {
-      if (_gclab) {
-        myThread()->incr_allocated_bytes_gclab(used_bytes());
-      } else {
-        myThread()->incr_allocated_bytes(used_bytes());
-      }
+      myThread()->incr_allocated_bytes(used_bytes());
     }
 
     HeapWord* obj = Universe::heap()->tlab_post_allocation_setup(top());
@@ -193,9 +189,8 @@
   invariants();
 }
 
-void ThreadLocalAllocBuffer::initialize(bool gclab) {
+void ThreadLocalAllocBuffer::initialize() {
   _initialized = true;
-  _gclab = gclab;
   initialize(NULL,                    // start
              NULL,                    // top
              NULL);                   // end
@@ -252,8 +247,7 @@
   // During jvm startup, the main (primordial) thread is initialized
   // before the heap is initialized.  So reinitialize it now.
   guarantee(Thread::current()->is_Java_thread(), "tlab initialization thread not Java thread");
-  Thread::current()->tlab().initialize(false);
-  Thread::current()->gclab().initialize(true);
+  Thread::current()->tlab().initialize();
 
   log_develop_trace(gc, tlab)("TLAB min: " SIZE_FORMAT " initial: " SIZE_FORMAT " max: " SIZE_FORMAT,
                                min_size(), Thread::current()->tlab().initial_desired_size(), max_size());
@@ -316,15 +310,9 @@
 }
 
 Thread* ThreadLocalAllocBuffer::myThread() {
-  ByteSize gclab_offset = Thread::gclab_start_offset();
-  ByteSize tlab_offset = Thread::tlab_start_offset();
-  ByteSize offs = _gclab ? gclab_offset : tlab_offset;
-  Thread* thread = (Thread*)(((char *)this) +
-                   in_bytes(start_offset()) - in_bytes(offs));
-#ifdef ASSERT
-  assert(this == (_gclab ? &thread->gclab() : &thread->tlab()), "must be");
-#endif
-  return thread;
+  return (Thread*)(((char *)this) +
+                   in_bytes(start_offset()) -
+                   in_bytes(Thread::tlab_start_offset()));
 }
 
 size_t ThreadLocalAllocBuffer::end_reserve() {
diff --git a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp
--- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp
+++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp
@@ -61,7 +61,6 @@
 
   AdaptiveWeightedAverage _allocation_fraction;  // fraction of eden allocated in tlabs
 
-  bool _gclab;
   bool _initialized;
 
   void set_start(HeapWord* start)                { _start = start; }
@@ -165,7 +164,7 @@
   static void resize_all_tlabs();
 
   void fill(HeapWord* start, HeapWord* top, size_t new_size);
-  void initialize(bool gclab);
+  void initialize();
 
   static size_t refill_waste_limit_increment()   { return TLABWasteIncrement; }
 
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp
@@ -524,7 +524,7 @@
 class ShenandoahInitGCLABClosure : public ThreadClosure {
 public:
   void do_thread(Thread* thread) {
-    thread->gclab().initialize(true);
+    thread->tlab().initialize();
   }
 };
 
@@ -533,7 +533,6 @@
     MutexLocker ml(Threads_lock);
 
     ShenandoahInitGCLABClosure init_gclabs;
-    Threads::java_threads_do(&init_gclabs);
     gc_threads_do(&init_gclabs);
 
     // gclab can not be initialized early during VM startup, as it can not determinate its max_size.
@@ -638,16 +637,16 @@
 HeapWord* ShenandoahHeap::allocate_from_gclab_slow(Thread* thread, size_t size) {
   // Retain tlab and allocate object in shared space if
   // the amount free in the tlab is too large to discard.
-  if (thread->gclab().free() > thread->gclab().refill_waste_limit()) {
-    thread->gclab().record_slow_allocation(size);
+  if (thread->tlab().free() > thread->tlab().refill_waste_limit()) {
+    thread->tlab().record_slow_allocation(size);
     return NULL;
   }
 
   // Discard gclab and allocate a new one.
   // To minimize fragmentation, the last GCLAB may be smaller than the rest.
-  size_t new_gclab_size = thread->gclab().compute_size(size);
+  size_t new_gclab_size = thread->tlab().compute_size(size);
 
-  thread->gclab().clear_before_allocation();
+  thread->tlab().clear_before_allocation();
 
   if (new_gclab_size == 0) {
     return NULL;
@@ -672,7 +671,7 @@
     Copy::fill_to_words(obj + hdr_size, new_gclab_size - hdr_size, badHeapWordVal);
 #endif // ASSERT
   }
-  thread->gclab().fill(obj, obj + size, new_gclab_size);
+  thread->tlab().fill(obj, obj + size, new_gclab_size);
   return obj;
 }
 
@@ -1031,8 +1030,8 @@
   ShenandoahRetireTLABClosure(bool retire) : _retire(retire) {}
 
   void do_thread(Thread* thread) {
-    assert(thread->gclab().is_initialized(), "GCLAB should be initialized for %s", thread->name());
-    thread->gclab().make_parsable(_retire);
+    assert(thread->tlab().is_initialized(), "GCLAB should be initialized for %s", thread->name());
+    thread->tlab().make_parsable(_retire);
   }
 };
 
@@ -1040,7 +1039,6 @@
   if (UseTLAB) {
     CollectedHeap::ensure_parsability(retire_tlabs);
     ShenandoahRetireTLABClosure cl(retire_tlabs);
-    Threads::java_threads_do(&cl);
     gc_threads_do(&cl);
   }
 }
@@ -1200,8 +1198,8 @@
 class ShenandoahResizeGCLABClosure : public ThreadClosure {
 public:
   void do_thread(Thread* thread) {
-    assert(thread->gclab().is_initialized(), "GCLAB should be initialized for %s", thread->name());
-    thread->gclab().resize();
+    assert(thread->tlab().is_initialized(), "GCLAB should be initialized for %s", thread->name());
+    thread->tlab().resize();
   }
 };
 
@@ -1209,22 +1207,20 @@
   CollectedHeap::resize_all_tlabs();
 
   ShenandoahResizeGCLABClosure cl;
-  Threads::java_threads_do(&cl);
   gc_threads_do(&cl);
 }
 
 class ShenandoahAccumulateStatisticsGCLABClosure : public ThreadClosure {
 public:
   void do_thread(Thread* thread) {
-    assert(thread->gclab().is_initialized(), "GCLAB should be initialized for %s", thread->name());
-    thread->gclab().accumulate_statistics();
-    thread->gclab().initialize_statistics();
+    assert(thread->tlab().is_initialized(), "GCLAB should be initialized for %s", thread->name());
+    thread->tlab().accumulate_statistics();
+    thread->tlab().initialize_statistics();
   }
 };
 
 void ShenandoahHeap::accumulate_statistics_all_gclabs() {
   ShenandoahAccumulateStatisticsGCLABClosure cl;
-  Threads::java_threads_do(&cl);
   gc_threads_do(&cl);
 }
 
@@ -1589,7 +1585,7 @@
   if ((! Thread::current()->is_GC_task_thread()) && (! Thread::current()->is_ConcurrentGC_thread())) {
     assert(! Threads_lock->owned_by_self()
            || SafepointSynchronize::is_at_safepoint(), "must not hold Threads_lock here");
-    log_warning(gc)("OOM during evacuation. Let Java thread wait until evacuation finishes.");
+    log_info(gc)("OOM during evacuation. Let Java thread wait until evacuation finishes.");
     while (is_evacuation_in_progress()) { // wait.
       Thread::current()->_ParkEvent->park(1);
     }
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp
--- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp
@@ -274,14 +274,10 @@
 }
 
 inline HeapWord* ShenandoahHeap::allocate_from_gclab(Thread* thread, size_t size) {
-  if (UseTLAB) {
-    if (!thread->gclab().is_initialized()) {
-      assert(!thread->is_Java_thread() && !thread->is_Worker_thread(),
-             "Performance: thread should have GCLAB: %s", thread->name());
-      // No GCLABs in this thread, fallback to shared allocation
-      return NULL;
-    }
-    HeapWord* obj = thread->gclab().allocate(size);
+  if (UseTLAB && thread->is_Worker_thread()) {
+    assert(thread->tlab().is_initialized(),
+           "Performance: thread should have GCLAB: %s", thread->name());
+    HeapWord* obj = thread->tlab().allocate(size);
     if (obj != NULL) {
       return obj;
     }
@@ -374,7 +370,7 @@
     // have to explicitly overwrite the copy with the filler object. With that overwrite,
     // we have to keep the fwdptr initialized and pointing to our (stale) copy.
     if (alloc_from_gclab) {
-      thread->gclab().rollback(size_with_fwdptr);
+      thread->tlab().rollback(size_with_fwdptr);
     } else {
       fill_with_object(copy, size_no_fwdptr);
     }
diff --git a/src/hotspot/share/gc/shenandoah/shenandoahWorkGroup.cpp b/src/hotspot/share/gc/shenandoah/shenandoahWorkGroup.cpp
--- a/src/hotspot/share/gc/shenandoah/shenandoahWorkGroup.cpp
+++ b/src/hotspot/share/gc/shenandoah/shenandoahWorkGroup.cpp
@@ -57,7 +57,7 @@
 AbstractGangWorker* ShenandoahWorkGang::install_worker(uint which) {
   AbstractGangWorker* worker = WorkGang::install_worker(which);
   if (_initialize_gclab) {
-    worker->gclab().initialize(true);
+    worker->tlab().initialize();
   }
 
   return worker;
diff --git a/src/hotspot/share/runtime/thread.cpp b/src/hotspot/share/runtime/thread.cpp
--- a/src/hotspot/share/runtime/thread.cpp
+++ b/src/hotspot/share/runtime/thread.cpp
@@ -1912,9 +1912,6 @@
   if (UseG1GC || (UseShenandoahGC && (ShenandoahSATBBarrier || ShenandoahConditionalSATBBarrier || ShenandoahKeepAliveBarrier))) {
     flush_barrier_queues();
   }
-  if (UseShenandoahGC && UseTLAB && gclab().is_initialized()) {
-    gclab().make_parsable(true);
-  }
 #endif // INCLUDE_ALL_GCS
 
   log_info(os, thread)("JavaThread %s (tid: " UINTX_FORMAT ").",
@@ -1997,9 +1994,6 @@
   if (UseG1GC || (UseShenandoahGC && (ShenandoahSATBBarrier || ShenandoahConditionalSATBBarrier || ShenandoahKeepAliveBarrier))) {
     flush_barrier_queues();
   }
-  if (UseShenandoahGC && UseTLAB && gclab().is_initialized()) {
-    gclab().make_parsable(true);
-  }
 #endif // INCLUDE_ALL_GCS
 
   Threads::remove(this);
diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp
--- a/src/hotspot/share/runtime/thread.hpp
+++ b/src/hotspot/share/runtime/thread.hpp
@@ -272,12 +272,8 @@
   friend class GCLocker;
 
   ThreadLocalAllocBuffer _tlab;                 // Thread-local eden
-  ThreadLocalAllocBuffer _gclab;                // Thread-local allocation buffer for GC (e.g. evacuation)
   jlong _allocated_bytes;                       // Cumulative number of bytes allocated on
                                                 // the Java heap
-  jlong _allocated_bytes_gclab;                 // Cumulative number of bytes allocated on
-                                                // the Java heap, in GCLABs
-
   mutable TRACE_DATA _trace_data;               // Thread-local data for tracing
 
   ThreadExt _ext;
@@ -446,23 +442,15 @@
   ThreadLocalAllocBuffer& tlab()                 { return _tlab; }
   void initialize_tlab() {
     if (UseTLAB) {
-      tlab().initialize(false);
-      gclab().initialize(true);
+      tlab().initialize();
     }
   }
 
-  // Thread-Local GC Allocation Buffer (GCLAB) support
-  ThreadLocalAllocBuffer& gclab()                { return _gclab; }
-
   jlong allocated_bytes()               { return _allocated_bytes; }
   void set_allocated_bytes(jlong value) { _allocated_bytes = value; }
   void incr_allocated_bytes(jlong size) { _allocated_bytes += size; }
   inline jlong cooked_allocated_bytes();
 
-  jlong allocated_bytes_gclab()                { return _allocated_bytes_gclab; }
-  void set_allocated_bytes_gclab(jlong value)  { _allocated_bytes_gclab = value; }
-  void incr_allocated_bytes_gclab(jlong size)  { _allocated_bytes_gclab += size; }
-
   TRACE_DEFINE_THREAD_TRACE_DATA_OFFSET;
   TRACE_DATA* trace_data() const        { return &_trace_data; }
   bool is_trace_suspend()               { return (_suspend_flags & _trace_flag) != 0; }
@@ -643,10 +631,6 @@
 
 #undef TLAB_FIELD_OFFSET
 
-  static ByteSize gclab_start_offset()         { return byte_offset_of(Thread, _gclab) + ThreadLocalAllocBuffer::start_offset(); }
-  static ByteSize gclab_top_offset()           { return byte_offset_of(Thread, _gclab) + ThreadLocalAllocBuffer::top_offset(); }
-  static ByteSize gclab_end_offset()           { return byte_offset_of(Thread, _gclab) + ThreadLocalAllocBuffer::end_offset(); }
-
   static ByteSize allocated_bytes_offset()       { return byte_offset_of(Thread, _allocated_bytes); }
 
  public: