--- old/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp 2018-09-14 14:46:13.683823408 +0300 +++ new/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp 2018-09-14 14:46:13.487810979 +0300 @@ -24,6 +24,8 @@ #include "precompiled.hpp" #include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shared/collectedHeap.hpp" +#include "runtime/thread.hpp" #define __ masm-> @@ -166,3 +168,118 @@ Register obj1, Register obj2) { __ cmp(obj1, obj2); } + +// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. +void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, Register obj_end, Register tmp1, Register tmp2, + RegisterOrConstant size_expression, Label& slow_case) { + if (!Universe::heap()->supports_inline_contig_alloc()) { + __ b(slow_case); + return; + } + + CollectedHeap* ch = Universe::heap(); + + const Register top_addr = tmp1; + const Register heap_end = tmp2; + + if (size_expression.is_register()) { + assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register()); + } else { + assert_different_registers(obj, obj_end, top_addr, heap_end); + } + + bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance + if (load_const) { + __ mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference); + } else { + __ ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset())); + } + // Calculate new heap_top by adding the size of the object + Label retry; + __ bind(retry); + +#ifdef AARCH64 + __ ldxr(obj, top_addr); +#else + __ ldr(obj, Address(top_addr)); +#endif // AARCH64 + + __ ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr())); + __ add_rc(obj_end, obj, size_expression); + // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case. + __ cmp(obj_end, obj); + __ b(slow_case, lo); + // Update heap_top if allocation succeeded + __ cmp(obj_end, heap_end); + __ b(slow_case, hi); + +#ifdef AARCH64 + __ stxr(heap_end/*scratched*/, obj_end, top_addr); + __ cbnz_w(heap_end, retry); +#else + __ atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/); + __ b(retry, ne); +#endif // AARCH64 + + incr_allocated_bytes(masm, size_expression, tmp1); +} + +// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`. +void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj, Register obj_end, Register tmp1, + RegisterOrConstant size_expression, Label& slow_case) { + const Register tlab_end = tmp1; + assert_different_registers(obj, obj_end, tlab_end); + + __ ldr(obj, Address(Rthread, JavaThread::tlab_top_offset())); + __ ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset())); + __ add_rc(obj_end, obj, size_expression); + __ cmp(obj_end, tlab_end); + __ b(slow_case, hi); + __ str(obj_end, Address(Rthread, JavaThread::tlab_top_offset())); +} + +void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, RegisterOrConstant size_in_bytes, Register tmp) { +#ifdef AARCH64 + __ ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); + __ add_rc(tmp, tmp, size_in_bytes); + __ str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset()))); +#else + // Bump total bytes allocated by this thread + Label done; + + // Borrow the Rthread for alloc counter + Register Ralloc = Rthread; + __ add(Ralloc, Ralloc, in_bytes(JavaThread::allocated_bytes_offset())); + __ ldr(tmp, Address(Ralloc)); + __ adds(tmp, tmp, size_in_bytes); + __ str(tmp, Address(Ralloc), cc); + __ b(done, cc); + + // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated) + // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by + // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself. + Register low, high; + // Select ether R0/R1 or R2/R3 + + if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) { + low = R2; + high = R3; + } else { + low = R0; + high = R1; + } + __ push(RegisterSet(low, high)); + + __ ldrd(low, Address(Ralloc)); + __ adds(low, low, size_in_bytes); + __ adc(high, high, 0); + __ strd(low, Address(Ralloc)); + + __ pop(RegisterSet(low, high)); + + __ bind(done); + + // Unborrow the Rthread + __ sub(Rthread, Ralloc, in_bytes(JavaThread::allocated_bytes_offset())); +#endif // AARCH64 +}