# HG changeset patch # Parent 30d6eb7c2df9543997eff38c9145b110c32a9056 diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp --- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp @@ -849,102 +849,102 @@ __ push(rcx); } - if (UseTLAB && ShenandoahAsmWB) { - - Register new_obj = r8; - __ movptr(new_obj, Address(r15_thread, JavaThread::gclab_top_offset())); - __ testptr(new_obj, new_obj); - __ jcc(Assembler::zero, slow_case); // No TLAB. - - __ load_klass(rcx, rax); - - // Figure out object size. - __ movl(rcx, Address(rcx, Klass::layout_helper_offset())); - __ testl(rcx, Klass::_lh_instance_slow_path_bit); - // test to see if it has a finalizer or is malformed in some way - __ jcc(Assembler::notZero, slow_case); - __ cmpl(rcx, Klass::_lh_neutral_value); // Make sure it's an instance (LH > 0) - __ jcc(Assembler::lessEqual, not_an_instance); // Thrashes rcx, returns size in rcx. Uses rax. - __ bind(is_array); - - // Size in rdi, new_obj in r8, src obj in rax - - Register new_obj_end = rdi; - int oop_extra_words = Universe::heap()->oop_extra_words(); - __ addq(rcx, oop_extra_words * HeapWordSize); - __ lea(new_obj_end, Address(new_obj, rcx, Address::times_1)); - __ cmpptr(new_obj_end, Address(r15_thread, JavaThread::gclab_end_offset())); - __ jcc(Assembler::above, slow_case); - __ subq(rcx, oop_extra_words * HeapWordSize); - - // Store Brooks pointer and adjust start of newobj. - Universe::heap()->compile_prepare_oop(_masm, new_obj); - - // Size in rcx, new_obj in r8, src obj in rax - - // Copy object. - Label loop; - if (!c_abi) { - __ push(rdi); // Save new_obj_end - __ push(rsi); - } else { - __ mov(r9, rdi); // Save new_obj_end - } - __ shrl(rcx, 3); // Make it num-64-bit-words - __ mov(rdi, r8); // Mov dst into rdi - __ mov(rsi, rax); // Src into rsi. - __ rep_mov(); - if (!c_abi) { - __ pop(rsi); // Restore rsi. - __ pop(rdi); // Restore new_obj_end - } else { - __ mov(rdi, r9); // Restore new_obj_end - } - - // Src obj still in rax. - if (os::is_MP()) { - __ lock(); - } - __ cmpxchgptr(new_obj, Address(rax, BrooksPointer::byte_offset(), Address::times_1)); - __ jccb(Assembler::notEqual, done); // Failed. Updated object in rax. - // Otherwise, we succeeded. - __ mov(rax, new_obj); - __ movptr(Address(r15_thread, JavaThread::gclab_top_offset()), new_obj_end); - __ bind(done); - - if (!c_abi) { - __ pop(rcx); - __ pop(r8); - __ pop(rdi); - } - - __ ret(0); - - __ bind(not_an_instance); - if (!c_abi) { - __ push(rdx); - } - // Layout_helper bits are in rcx - __ movl(rdx, rcx); // Move layout_helper bits to rdx - __ movl(rdi, Address(rax, arrayOopDesc::length_offset_in_bytes())); - __ shrl(rcx, Klass::_lh_log2_element_size_shift); - __ andl(rcx, Klass::_lh_log2_element_size_mask); - __ shll(rdi); // Shifts left by number of bits in rcx (CL) - __ shrl(rdx, Klass::_lh_header_size_shift); - __ andl(rdx, Klass::_lh_header_size_mask); - __ addl(rdi, rdx); - // Round up. - __ addl(rdi, HeapWordSize-1); - __ andl(rdi, -HeapWordSize); - if (!c_abi) { - __ pop(rdx); - } - // Move size (rdi) into rcx - __ movl(rcx, rdi); - __ jmp(is_array); - - __ bind(slow_case); - } + // if (UseTLAB && ShenandoahAsmWB) { + + // Register new_obj = r8; + // __ movptr(new_obj, Address(r15_thread, JavaThread::gclab_top_offset())); + // __ testptr(new_obj, new_obj); + // __ jcc(Assembler::zero, slow_case); // No TLAB. + + // __ load_klass(rcx, rax); + + // // Figure out object size. + // __ movl(rcx, Address(rcx, Klass::layout_helper_offset())); + // __ testl(rcx, Klass::_lh_instance_slow_path_bit); + // // test to see if it has a finalizer or is malformed in some way + // __ jcc(Assembler::notZero, slow_case); + // __ cmpl(rcx, Klass::_lh_neutral_value); // Make sure it's an instance (LH > 0) + // __ jcc(Assembler::lessEqual, not_an_instance); // Thrashes rcx, returns size in rcx. Uses rax. + // __ bind(is_array); + + // // Size in rdi, new_obj in r8, src obj in rax + + // Register new_obj_end = rdi; + // int oop_extra_words = Universe::heap()->oop_extra_words(); + // __ addq(rcx, oop_extra_words * HeapWordSize); + // __ lea(new_obj_end, Address(new_obj, rcx, Address::times_1)); + // __ cmpptr(new_obj_end, Address(r15_thread, JavaThread::gclab_end_offset())); + // __ jcc(Assembler::above, slow_case); + // __ subq(rcx, oop_extra_words * HeapWordSize); + + // // Store Brooks pointer and adjust start of newobj. + // Universe::heap()->compile_prepare_oop(_masm, new_obj); + + // // Size in rcx, new_obj in r8, src obj in rax + + // // Copy object. + // Label loop; + // if (!c_abi) { + // __ push(rdi); // Save new_obj_end + // __ push(rsi); + // } else { + // __ mov(r9, rdi); // Save new_obj_end + // } + // __ shrl(rcx, 3); // Make it num-64-bit-words + // __ mov(rdi, r8); // Mov dst into rdi + // __ mov(rsi, rax); // Src into rsi. + // __ rep_mov(); + // if (!c_abi) { + // __ pop(rsi); // Restore rsi. + // __ pop(rdi); // Restore new_obj_end + // } else { + // __ mov(rdi, r9); // Restore new_obj_end + // } + + // // Src obj still in rax. + // if (os::is_MP()) { + // __ lock(); + // } + // __ cmpxchgptr(new_obj, Address(rax, BrooksPointer::byte_offset(), Address::times_1)); + // __ jccb(Assembler::notEqual, done); // Failed. Updated object in rax. + // // Otherwise, we succeeded. + // __ mov(rax, new_obj); + // __ movptr(Address(r15_thread, JavaThread::gclab_top_offset()), new_obj_end); + // __ bind(done); + + // if (!c_abi) { + // __ pop(rcx); + // __ pop(r8); + // __ pop(rdi); + // } + + // __ ret(0); + + // __ bind(not_an_instance); + // if (!c_abi) { + // __ push(rdx); + // } + // // Layout_helper bits are in rcx + // __ movl(rdx, rcx); // Move layout_helper bits to rdx + // __ movl(rdi, Address(rax, arrayOopDesc::length_offset_in_bytes())); + // __ shrl(rcx, Klass::_lh_log2_element_size_shift); + // __ andl(rcx, Klass::_lh_log2_element_size_mask); + // __ shll(rdi); // Shifts left by number of bits in rcx (CL) + // __ shrl(rdx, Klass::_lh_header_size_shift); + // __ andl(rdx, Klass::_lh_header_size_mask); + // __ addl(rdi, rdx); + // // Round up. + // __ addl(rdi, HeapWordSize-1); + // __ andl(rdi, -HeapWordSize); + // if (!c_abi) { + // __ pop(rdx); + // } + // // Move size (rdi) into rcx + // __ movl(rcx, rdi); + // __ jmp(is_array); + + // __ bind(slow_case); + // } if (!c_abi) { __ push(rdx); diff --git a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp --- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp +++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.cpp @@ -68,7 +68,7 @@ size_t used = Universe::heap()->tlab_used(thread); _gc_waste += (unsigned)remaining(); - size_t total_allocated = _gclab ? thread->allocated_bytes_gclab() : thread->allocated_bytes(); + size_t total_allocated = thread->allocated_bytes(); size_t allocated_since_last_gc = total_allocated - _allocated_before_last_gc; _allocated_before_last_gc = total_allocated; @@ -113,11 +113,7 @@ invariants(); if (retire) { - if (_gclab) { - myThread()->incr_allocated_bytes_gclab(used_bytes()); - } else { - myThread()->incr_allocated_bytes(used_bytes()); - } + myThread()->incr_allocated_bytes(used_bytes()); } HeapWord* obj = Universe::heap()->tlab_post_allocation_setup(top()); @@ -193,9 +189,8 @@ invariants(); } -void ThreadLocalAllocBuffer::initialize(bool gclab) { +void ThreadLocalAllocBuffer::initialize() { _initialized = true; - _gclab = gclab; initialize(NULL, // start NULL, // top NULL); // end @@ -252,8 +247,7 @@ // During jvm startup, the main (primordial) thread is initialized // before the heap is initialized. So reinitialize it now. guarantee(Thread::current()->is_Java_thread(), "tlab initialization thread not Java thread"); - Thread::current()->tlab().initialize(false); - Thread::current()->gclab().initialize(true); + Thread::current()->tlab().initialize(); log_develop_trace(gc, tlab)("TLAB min: " SIZE_FORMAT " initial: " SIZE_FORMAT " max: " SIZE_FORMAT, min_size(), Thread::current()->tlab().initial_desired_size(), max_size()); @@ -316,15 +310,9 @@ } Thread* ThreadLocalAllocBuffer::myThread() { - ByteSize gclab_offset = Thread::gclab_start_offset(); - ByteSize tlab_offset = Thread::tlab_start_offset(); - ByteSize offs = _gclab ? gclab_offset : tlab_offset; - Thread* thread = (Thread*)(((char *)this) + - in_bytes(start_offset()) - in_bytes(offs)); -#ifdef ASSERT - assert(this == (_gclab ? &thread->gclab() : &thread->tlab()), "must be"); -#endif - return thread; + return (Thread*)(((char *)this) + + in_bytes(start_offset()) - + in_bytes(Thread::tlab_start_offset())); } size_t ThreadLocalAllocBuffer::end_reserve() { diff --git a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp --- a/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp +++ b/src/hotspot/share/gc/shared/threadLocalAllocBuffer.hpp @@ -61,7 +61,6 @@ AdaptiveWeightedAverage _allocation_fraction; // fraction of eden allocated in tlabs - bool _gclab; bool _initialized; void set_start(HeapWord* start) { _start = start; } @@ -165,7 +164,7 @@ static void resize_all_tlabs(); void fill(HeapWord* start, HeapWord* top, size_t new_size); - void initialize(bool gclab); + void initialize(); static size_t refill_waste_limit_increment() { return TLABWasteIncrement; } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp @@ -524,7 +524,7 @@ class ShenandoahInitGCLABClosure : public ThreadClosure { public: void do_thread(Thread* thread) { - thread->gclab().initialize(true); + thread->tlab().initialize(); } }; @@ -533,7 +533,6 @@ MutexLocker ml(Threads_lock); ShenandoahInitGCLABClosure init_gclabs; - Threads::java_threads_do(&init_gclabs); gc_threads_do(&init_gclabs); // gclab can not be initialized early during VM startup, as it can not determinate its max_size. @@ -638,16 +637,16 @@ HeapWord* ShenandoahHeap::allocate_from_gclab_slow(Thread* thread, size_t size) { // Retain tlab and allocate object in shared space if // the amount free in the tlab is too large to discard. - if (thread->gclab().free() > thread->gclab().refill_waste_limit()) { - thread->gclab().record_slow_allocation(size); + if (thread->tlab().free() > thread->tlab().refill_waste_limit()) { + thread->tlab().record_slow_allocation(size); return NULL; } // Discard gclab and allocate a new one. // To minimize fragmentation, the last GCLAB may be smaller than the rest. - size_t new_gclab_size = thread->gclab().compute_size(size); + size_t new_gclab_size = thread->tlab().compute_size(size); - thread->gclab().clear_before_allocation(); + thread->tlab().clear_before_allocation(); if (new_gclab_size == 0) { return NULL; @@ -672,7 +671,7 @@ Copy::fill_to_words(obj + hdr_size, new_gclab_size - hdr_size, badHeapWordVal); #endif // ASSERT } - thread->gclab().fill(obj, obj + size, new_gclab_size); + thread->tlab().fill(obj, obj + size, new_gclab_size); return obj; } @@ -1031,8 +1030,8 @@ ShenandoahRetireTLABClosure(bool retire) : _retire(retire) {} void do_thread(Thread* thread) { - assert(thread->gclab().is_initialized(), "GCLAB should be initialized for %s", thread->name()); - thread->gclab().make_parsable(_retire); + assert(thread->tlab().is_initialized(), "GCLAB should be initialized for %s", thread->name()); + thread->tlab().make_parsable(_retire); } }; @@ -1040,7 +1039,6 @@ if (UseTLAB) { CollectedHeap::ensure_parsability(retire_tlabs); ShenandoahRetireTLABClosure cl(retire_tlabs); - Threads::java_threads_do(&cl); gc_threads_do(&cl); } } @@ -1200,8 +1198,8 @@ class ShenandoahResizeGCLABClosure : public ThreadClosure { public: void do_thread(Thread* thread) { - assert(thread->gclab().is_initialized(), "GCLAB should be initialized for %s", thread->name()); - thread->gclab().resize(); + assert(thread->tlab().is_initialized(), "GCLAB should be initialized for %s", thread->name()); + thread->tlab().resize(); } }; @@ -1209,22 +1207,20 @@ CollectedHeap::resize_all_tlabs(); ShenandoahResizeGCLABClosure cl; - Threads::java_threads_do(&cl); gc_threads_do(&cl); } class ShenandoahAccumulateStatisticsGCLABClosure : public ThreadClosure { public: void do_thread(Thread* thread) { - assert(thread->gclab().is_initialized(), "GCLAB should be initialized for %s", thread->name()); - thread->gclab().accumulate_statistics(); - thread->gclab().initialize_statistics(); + assert(thread->tlab().is_initialized(), "GCLAB should be initialized for %s", thread->name()); + thread->tlab().accumulate_statistics(); + thread->tlab().initialize_statistics(); } }; void ShenandoahHeap::accumulate_statistics_all_gclabs() { ShenandoahAccumulateStatisticsGCLABClosure cl; - Threads::java_threads_do(&cl); gc_threads_do(&cl); } @@ -1589,7 +1585,7 @@ if ((! Thread::current()->is_GC_task_thread()) && (! Thread::current()->is_ConcurrentGC_thread())) { assert(! Threads_lock->owned_by_self() || SafepointSynchronize::is_at_safepoint(), "must not hold Threads_lock here"); - log_warning(gc)("OOM during evacuation. Let Java thread wait until evacuation finishes."); + log_info(gc)("OOM during evacuation. Let Java thread wait until evacuation finishes."); while (is_evacuation_in_progress()) { // wait. Thread::current()->_ParkEvent->park(1); } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp b/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp --- a/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahHeap.inline.hpp @@ -274,14 +274,10 @@ } inline HeapWord* ShenandoahHeap::allocate_from_gclab(Thread* thread, size_t size) { - if (UseTLAB) { - if (!thread->gclab().is_initialized()) { - assert(!thread->is_Java_thread() && !thread->is_Worker_thread(), - "Performance: thread should have GCLAB: %s", thread->name()); - // No GCLABs in this thread, fallback to shared allocation - return NULL; - } - HeapWord* obj = thread->gclab().allocate(size); + if (UseTLAB && thread->is_Worker_thread()) { + assert(thread->tlab().is_initialized(), + "Performance: thread should have GCLAB: %s", thread->name()); + HeapWord* obj = thread->tlab().allocate(size); if (obj != NULL) { return obj; } @@ -374,7 +370,7 @@ // have to explicitly overwrite the copy with the filler object. With that overwrite, // we have to keep the fwdptr initialized and pointing to our (stale) copy. if (alloc_from_gclab) { - thread->gclab().rollback(size_with_fwdptr); + thread->tlab().rollback(size_with_fwdptr); } else { fill_with_object(copy, size_no_fwdptr); } diff --git a/src/hotspot/share/gc/shenandoah/shenandoahWorkGroup.cpp b/src/hotspot/share/gc/shenandoah/shenandoahWorkGroup.cpp --- a/src/hotspot/share/gc/shenandoah/shenandoahWorkGroup.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahWorkGroup.cpp @@ -57,7 +57,7 @@ AbstractGangWorker* ShenandoahWorkGang::install_worker(uint which) { AbstractGangWorker* worker = WorkGang::install_worker(which); if (_initialize_gclab) { - worker->gclab().initialize(true); + worker->tlab().initialize(); } return worker; diff --git a/src/hotspot/share/runtime/thread.cpp b/src/hotspot/share/runtime/thread.cpp --- a/src/hotspot/share/runtime/thread.cpp +++ b/src/hotspot/share/runtime/thread.cpp @@ -1912,9 +1912,6 @@ if (UseG1GC || (UseShenandoahGC && (ShenandoahSATBBarrier || ShenandoahConditionalSATBBarrier || ShenandoahKeepAliveBarrier))) { flush_barrier_queues(); } - if (UseShenandoahGC && UseTLAB && gclab().is_initialized()) { - gclab().make_parsable(true); - } #endif // INCLUDE_ALL_GCS log_info(os, thread)("JavaThread %s (tid: " UINTX_FORMAT ").", @@ -1997,9 +1994,6 @@ if (UseG1GC || (UseShenandoahGC && (ShenandoahSATBBarrier || ShenandoahConditionalSATBBarrier || ShenandoahKeepAliveBarrier))) { flush_barrier_queues(); } - if (UseShenandoahGC && UseTLAB && gclab().is_initialized()) { - gclab().make_parsable(true); - } #endif // INCLUDE_ALL_GCS Threads::remove(this); diff --git a/src/hotspot/share/runtime/thread.hpp b/src/hotspot/share/runtime/thread.hpp --- a/src/hotspot/share/runtime/thread.hpp +++ b/src/hotspot/share/runtime/thread.hpp @@ -272,12 +272,8 @@ friend class GCLocker; ThreadLocalAllocBuffer _tlab; // Thread-local eden - ThreadLocalAllocBuffer _gclab; // Thread-local allocation buffer for GC (e.g. evacuation) jlong _allocated_bytes; // Cumulative number of bytes allocated on // the Java heap - jlong _allocated_bytes_gclab; // Cumulative number of bytes allocated on - // the Java heap, in GCLABs - mutable TRACE_DATA _trace_data; // Thread-local data for tracing ThreadExt _ext; @@ -446,23 +442,15 @@ ThreadLocalAllocBuffer& tlab() { return _tlab; } void initialize_tlab() { if (UseTLAB) { - tlab().initialize(false); - gclab().initialize(true); + tlab().initialize(); } } - // Thread-Local GC Allocation Buffer (GCLAB) support - ThreadLocalAllocBuffer& gclab() { return _gclab; } - jlong allocated_bytes() { return _allocated_bytes; } void set_allocated_bytes(jlong value) { _allocated_bytes = value; } void incr_allocated_bytes(jlong size) { _allocated_bytes += size; } inline jlong cooked_allocated_bytes(); - jlong allocated_bytes_gclab() { return _allocated_bytes_gclab; } - void set_allocated_bytes_gclab(jlong value) { _allocated_bytes_gclab = value; } - void incr_allocated_bytes_gclab(jlong size) { _allocated_bytes_gclab += size; } - TRACE_DEFINE_THREAD_TRACE_DATA_OFFSET; TRACE_DATA* trace_data() const { return &_trace_data; } bool is_trace_suspend() { return (_suspend_flags & _trace_flag) != 0; } @@ -643,10 +631,6 @@ #undef TLAB_FIELD_OFFSET - static ByteSize gclab_start_offset() { return byte_offset_of(Thread, _gclab) + ThreadLocalAllocBuffer::start_offset(); } - static ByteSize gclab_top_offset() { return byte_offset_of(Thread, _gclab) + ThreadLocalAllocBuffer::top_offset(); } - static ByteSize gclab_end_offset() { return byte_offset_of(Thread, _gclab) + ThreadLocalAllocBuffer::end_offset(); } - static ByteSize allocated_bytes_offset() { return byte_offset_of(Thread, _allocated_bytes); } public: