--- old/src/hotspot/cpu/x86/interp_masm_x86.cpp 2017-12-13 15:36:02.264783027 -0500 +++ new/src/hotspot/cpu/x86/interp_masm_x86.cpp 2017-12-13 15:36:01.968781559 -0500 @@ -1123,38 +1123,36 @@ // Code below is taking care of recycling TLVB memory, no safepoint should // occur between this point and the end of the remove_activation() method - Label vtbuffer_slow, vtbuffer_done, no_buffered_value_returned; const Register thread1 = NOT_LP64(rcx) LP64_ONLY(r15_thread); const uintptr_t chunk_mask = VTBufferChunk::chunk_mask(); - NOT_LP64(get_thread(thread1)); - cmpptr(Address(thread1, JavaThread::return_buffered_value_offset()), (intptr_t)NULL_WORD); - jcc(Assembler::equal, no_buffered_value_returned); - movptr(rbx, Address(rbp, frame::interpreter_frame_vt_alloc_ptr_offset * wordSize)); - call_VM_leaf(CAST_FROM_FN_PTR(address, - InterpreterRuntime::return_value_step2), rax, rbx); - NOT_LP64(get_thread(thread1)); - get_vm_result(rax, thread1); - jmp(vtbuffer_done); - bind(no_buffered_value_returned); + + Label TLVB_cleanup_done; + if (state == qtos) { + Label no_buffered_value_returned; + if (ReturnValuesInThreadLocalBuffer) { + NOT_LP64(get_thread(thread1)); + cmpptr(Address(thread1, JavaThread::return_buffered_value_offset()), (intptr_t)NULL_WORD); + jcc(Assembler::equal, no_buffered_value_returned); + movptr(rbx, Address(rbp, frame::interpreter_frame_vt_alloc_ptr_offset * wordSize)); + call_VM_leaf(CAST_FROM_FN_PTR(address, + InterpreterRuntime::relocate_return_value), rax, rbx); + NOT_LP64(get_thread(thread1)); + get_vm_result(rax, thread1); + jmp(TLVB_cleanup_done); // clean up has been performed during relocation + bind(no_buffered_value_returned); + } + } movptr(rbx, Address(rbp, frame::interpreter_frame_vt_alloc_ptr_offset * wordSize)); NOT_LP64(get_thread(thread1)); movptr(rcx, Address(thread1, JavaThread::vt_alloc_ptr_offset())); cmpptr(rbx, rcx); - jcc(Assembler::equal, vtbuffer_done); - andptr(rbx, chunk_mask); - andptr(rcx, chunk_mask); - cmpptr(rbx, rcx); - jcc(Assembler::notEqual, vtbuffer_slow); - movptr(rbx, Address(rbp, frame::interpreter_frame_vt_alloc_ptr_offset * wordSize)); - movptr(Address(thread1, JavaThread::vt_alloc_ptr_offset()), rbx); - jmp(vtbuffer_done); - bind(vtbuffer_slow); + jcc(Assembler::equal, TLVB_cleanup_done); push(state); movptr(rbx, Address(rbp, frame::interpreter_frame_vt_alloc_ptr_offset * wordSize)); call_VM_leaf(CAST_FROM_FN_PTR(address, - InterpreterRuntime::recycle_vtbuffer), rbx); + InterpreterRuntime::recycle_vtbuffer), rbx); pop(state); - bind(vtbuffer_done); + bind(TLVB_cleanup_done); // remove activation // get sender sp --- old/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp 2017-12-13 15:36:02.908786220 -0500 +++ new/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp 2017-12-13 15:36:02.640784891 -0500 @@ -256,6 +256,19 @@ // across a GC if there's one. __ super_call_VM_leaf(StubRoutines::store_value_type_fields_to_buf()); __ bind(skip); + + if (ReturnValuesInThreadLocalBuffer) { + // vt_alloc_ptr adjustment + Label no_adjustment; + __ cmpptr(rax, Address(r15_thread, in_bytes(JavaThread::vt_alloc_ptr_offset()))); + __ jcc(Assembler::notEqual, no_adjustment); + __ load_klass(rbx, rax); + __ movl(r13, Address(rbx, Klass::layout_helper_offset())); + __ lea(r14, Address(rax, r13, Address::times_1)); + __ movptr(Address(r15_thread, in_bytes(JavaThread::vt_alloc_ptr_offset())), r14); + __ bind(no_adjustment); + } + #endif } --- old/src/hotspot/cpu/x86/templateTable_x86.cpp 2017-12-13 15:36:03.436788839 -0500 +++ new/src/hotspot/cpu/x86/templateTable_x86.cpp 2017-12-13 15:36:03.204787688 -0500 @@ -2677,10 +2677,24 @@ } #endif if (state == qtos) { - const Register thread1 = NOT_LP64(rcx) LP64_ONLY(r15_thread); - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::return_value), rax); - NOT_LP64(__ get_thread(thread1)); - __ get_vm_result(rax, thread1); +#ifndef _LP64 + ShouldNotReachHere(); +#else + Label not_buffered; + __ lea(r14, ExternalAddress(VTBuffer::_base)); + __ cmpptr(rax, r14); + __ jcc(Assembler::below, not_buffered); + __ lea(r14, ExternalAddress(VTBuffer::_end)); + __ cmpptr(rax, r14); + __ jcc(Assembler::aboveEqual, not_buffered); + if (ReturnValuesInThreadLocalBuffer) { + __ movptr((Address(r15_thread, JavaThread::return_buffered_value_offset())), rax); + } else { + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::reallocate_value_in_heap), rax); + __ get_vm_result(rax, r15_thread); + } + __ bind(not_buffered); +#endif } // Narrow result if state is itos but result type is smaller. --- old/src/hotspot/share/interpreter/interpreterRuntime.cpp 2017-12-13 15:36:04.044791853 -0500 +++ new/src/hotspot/share/interpreter/interpreterRuntime.cpp 2017-12-13 15:36:03.780790544 -0500 @@ -590,44 +590,30 @@ VTBuffer::fix_frame_vt_alloc_ptr(f, VTBufferChunk::chunk(thread->vt_alloc_ptr())); IRT_END -IRT_ENTRY(void, InterpreterRuntime::return_value(JavaThread* thread, oopDesc* obj)) - if (!VTBuffer::is_in_vt_buffer(obj)) { - thread->set_vm_result(obj); - return; - } +IRT_ENTRY(void, InterpreterRuntime::reallocate_value_in_heap(JavaThread* thread, oopDesc* obj)) + assert(VTBuffer::is_in_vt_buffer(obj), "must be called on buffered value"); + assert(!ReturnValuesInThreadLocalBuffer, "Should re-allocate value if ReturnValuesInThreadLocalBuffer is true"); + + Handle obj_h(THREAD, obj); assert(obj->klass()->is_value(), "Sanity check"); ValueKlass* vk = ValueKlass::cast(obj->klass()); - RegisterMap reg_map(thread, false); - frame current_frame = thread->last_frame(); - frame caller_frame = current_frame.sender(®_map); - if (!caller_frame.is_interpreted_frame()) { - // caller is not an interpreted frame, creating a new value in Java heap - Handle obj_h(THREAD, obj); - instanceOop res = vk->allocate_instance(CHECK); - Handle res_h(THREAD, res); - // copy value - vk->value_store(vk->data_for_oop(obj_h()), - vk->data_for_oop(res_h()), true, false); - thread->set_vm_result(res_h()); - return; - } else { - // A buffered value is being returned to an interpreted frame, - // but the work has to be delayed to remove_activation() because - // the frame cannot be modified now (GC can run at the safepoint - // when exiting runtime, and frame layout must be kept consistent - // with the OopMap). - thread->set_return_buffered_value(obj); - thread->set_vm_result(obj); - } + instanceOop res = vk->allocate_instance(CHECK); + Handle res_h(THREAD, res); + // copy value + vk->value_store(vk->data_for_oop(obj_h()), + vk->data_for_oop(res_h()), true, false); + thread->set_vm_result(res_h()); IRT_END -IRT_LEAF(void, InterpreterRuntime::return_value_step2(oopDesc* obj, void* alloc_ptr)) - +IRT_LEAF(void, InterpreterRuntime::relocate_return_value(oopDesc* obj, void* alloc_ptr)) JavaThread* thread = (JavaThread*)Thread::current(); assert(obj == thread->return_buffered_value(), "Consistency check"); assert(!Universe::heap()->is_in_reserved(obj), "Should only apply to buffered values"); oop dest = VTBuffer::relocate_return_value(thread, alloc_ptr, obj); + ValueKlass* vklass = ValueKlass::cast(dest->klass()); + void* start = (char*)(oopDesc*)dest + vklass->size_helper() * wordSize; + VTBuffer::recycle_vtbuffer(thread, start); thread->set_return_buffered_value(NULL); thread->set_vm_result(dest); IRT_END --- old/src/hotspot/share/interpreter/interpreterRuntime.hpp 2017-12-13 15:36:04.616794690 -0500 +++ new/src/hotspot/share/interpreter/interpreterRuntime.hpp 2017-12-13 15:36:04.352793381 -0500 @@ -122,8 +122,8 @@ // Value Buffers support static void recycle_vtbuffer(void *alloc_ptr); static void recycle_buffered_values(JavaThread* thread); - static void return_value(JavaThread* thread, oopDesc* obj); - static void return_value_step2(oopDesc* obj, void* alloc_ptr); + static void reallocate_value_in_heap(JavaThread* thread, oopDesc* obj); + static void relocate_return_value(oopDesc* obj, void* alloc_ptr); static void check_areturn(JavaThread* thread, oopDesc* obj); static void fix_frame_vt_alloc_ptr(JavaThread* thread); --- old/src/hotspot/share/memory/vtBuffer.cpp 2017-12-13 15:36:05.204797606 -0500 +++ new/src/hotspot/share/memory/vtBuffer.cpp 2017-12-13 15:36:04.932796257 -0500 @@ -42,12 +42,14 @@ int VTBuffer::_total_allocated = 0; int VTBuffer::_total_failed = 0; address VTBuffer::_base = NULL; +address VTBuffer::_end = NULL; address VTBuffer::_commit_ptr; size_t VTBuffer::_size; void VTBuffer::init() { if ((!(EnableValhalla || EnableMVT)) || ValueTypesBufferMaxMemory == 0) { _base = NULL; + _end = NULL; _commit_ptr = NULL; _size = 0; return; @@ -62,9 +64,11 @@ ValueTypesBufferMaxMemory = 0; _size = 0; _commit_ptr = NULL; + _end = NULL; } else { _commit_ptr = _base; _size = size; + _end = _base + _size; } } --- old/src/hotspot/share/memory/vtBuffer.hpp 2017-12-13 15:36:05.836800740 -0500 +++ new/src/hotspot/share/memory/vtBuffer.hpp 2017-12-13 15:36:05.564799391 -0500 @@ -136,8 +136,11 @@ class VTBuffer : AllStatic { friend class VMStructs; + friend class TemplateTable; + friend class InterpreterRuntime; private: static address _base; + static address _end; static size_t _size; static address _commit_ptr; --- old/src/hotspot/share/runtime/globals.hpp 2017-12-13 15:36:06.400803536 -0500 +++ new/src/hotspot/share/runtime/globals.hpp 2017-12-13 15:36:06.132802207 -0500 @@ -4128,6 +4128,9 @@ \ develop(bool, StressValueTypeReturnedAsFields, false, \ "stress return of fields instead of a value type reference") \ + \ + product(bool, ReturnValuesInThreadLocalBuffer, false, \ + "Interpreter returns values in TLVB when possible") \