# HG changeset patch # User enevill # Date 1457893965 0 # Sun Mar 13 18:32:45 2016 +0000 # Node ID 236adba84ea6996b20acd3efa071c48fa59c2658 # Parent cdc9ef77a4da4c011cffdaa15583c957af3b4a97 8151775: aarch64: add support for 8.1 LSE atomic operations Reviewed-by: aph diff --git a/src/cpu/aarch64/vm/assembler_aarch64.hpp b/src/cpu/aarch64/vm/assembler_aarch64.hpp --- a/src/cpu/aarch64/vm/assembler_aarch64.hpp +++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp @@ -1221,6 +1221,38 @@ INSN(caspal, true, true) #undef INSN + // 8.1 Atomic operations + void lse_atomic(Register Rs, Register Rt, Register Rn, + enum operand_size sz, int op1, int op2, bool a, bool r) { + starti; + f(sz, 31, 30), f(0b111000, 29, 24), f(a, 23), f(r, 22), f(1, 21); + rf(Rs, 16), f(op1, 15), f(op2, 14, 12), f(0, 11, 10), rf(Rn, 5), rf(Rt, 0); + } + +#define INSN(NAME, NAME_A, NAME_L, NAME_AL, op1, op2) \ + void NAME(operand_size sz, Register Rs, Register Rt, Register Rn) { \ + lse_atomic(Rs, Rt, Rn, sz, op1, op2, false, false); \ + } \ + void NAME_A(operand_size sz, Register Rs, Register Rt, Register Rn) { \ + lse_atomic(Rs, Rt, Rn, sz, op1, op2, true, false); \ + } \ + void NAME_L(operand_size sz, Register Rs, Register Rt, Register Rn) { \ + lse_atomic(Rs, Rt, Rn, sz, op1, op2, false, true); \ + } \ + void NAME_AL(operand_size sz, Register Rs, Register Rt, Register Rn) {\ + lse_atomic(Rs, Rt, Rn, sz, op1, op2, true, true); \ + } + INSN(ldadd, ldadda, ldaddl, ldaddal, 0, 0b000); + INSN(ldbic, ldbica, ldbicl, ldbical, 0, 0b001); + INSN(ldeor, ldeora, ldeorl, ldeoral, 0, 0b010); + INSN(ldorr, ldorra, ldorrl, ldorral, 0, 0b011); + INSN(ldsmax, ldsmaxa, ldsmaxl, ldsmaxal, 0, 0b100); + INSN(ldsmin, ldsmina, ldsminl, ldsminal, 0, 0b101); + INSN(ldumax, ldumaxa, ldumaxl, ldumaxal, 0, 0b110); + INSN(ldumin, ldumina, lduminl, lduminal, 0, 0b111); + INSN(swp, swpa, swpl, swpal, 1, 0b000); +#undef INSN + // Load register (literal) #define INSN(NAME, opc, V) \ void NAME(Register Rt, address dest) { \ diff --git a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp --- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp +++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp @@ -3120,6 +3120,7 @@ Address addr = as_Address(src->as_address_ptr(), noreg); BasicType type = src->type(); bool is_oop = type == T_OBJECT || type == T_ARRAY; + Assembler::operand_size sz = Assembler::xword; void (MacroAssembler::* lda)(Register Rd, Register Ra); void (MacroAssembler::* add)(Register Rd, Register Rn, RegisterOrConstant increment); @@ -3130,6 +3131,7 @@ lda = &MacroAssembler::ldaxrw; add = &MacroAssembler::addw; stl = &MacroAssembler::stlxrw; + sz = Assembler::word; break; case T_LONG: lda = &MacroAssembler::ldaxr; @@ -3142,6 +3144,7 @@ lda = &MacroAssembler::ldaxrw; add = &MacroAssembler::addw; stl = &MacroAssembler::stlxrw; + sz = Assembler::word; } else { lda = &MacroAssembler::ldaxr; add = &MacroAssembler::add; @@ -3170,14 +3173,23 @@ assert_different_registers(inc.as_register(), dst, addr.base(), tmp, rscratch1, rscratch2); } - Label again; __ lea(tmp, addr); - __ prfm(Address(tmp), PSTL1STRM); - __ bind(again); - (_masm->*lda)(dst, tmp); - (_masm->*add)(rscratch1, dst, inc); - (_masm->*stl)(rscratch2, rscratch1, tmp); - __ cbnzw(rscratch2, again); + if (UseLSE) { + if (inc.is_register()) { + __ ldaddal(sz, inc.as_register(), dst, tmp); + } else { + __ mov(rscratch2, inc.as_constant()); + __ ldaddal(sz, rscratch2, dst, tmp); + } + } else { + Label again; + __ prfm(Address(tmp), PSTL1STRM); + __ bind(again); + (_masm->*lda)(dst, tmp); + (_masm->*add)(rscratch1, dst, inc); + (_masm->*stl)(rscratch2, rscratch1, tmp); + __ cbnzw(rscratch2, again); + } break; } case lir_xchg: @@ -3190,13 +3202,17 @@ obj = rscratch1; } assert_different_registers(obj, addr.base(), tmp, rscratch2, dst); - Label again; __ lea(tmp, addr); - __ prfm(Address(tmp), PSTL1STRM); - __ bind(again); - (_masm->*lda)(dst, tmp); - (_masm->*stl)(rscratch2, obj, tmp); - __ cbnzw(rscratch2, again); + if (UseLSE) { + __ swp(sz, obj, dst, tmp); + } else { + Label again; + __ prfm(Address(tmp), PSTL1STRM); + __ bind(again); + (_masm->*lda)(dst, tmp); + (_masm->*stl)(rscratch2, obj, tmp); + __ cbnzw(rscratch2, again); + } if (is_oop && UseCompressedOops) { __ decode_heap_oop(dst); } diff --git a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp --- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp +++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp @@ -1637,6 +1637,11 @@ } void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) { + if (UseLSE) { + mov(tmp, 1); + ldadd(Assembler::word, tmp, zr, counter_addr); + return; + } Label retry_load; prfm(Address(counter_addr), PSTL1STRM); bind(retry_load); @@ -2172,8 +2177,18 @@ return a != b.as_register() && a != c && b.as_register() != c; } -#define ATOMIC_OP(LDXR, OP, IOP, STXR) \ +#define ATOMIC_OP(LDXR, OP, IOP, AOP, STXR, sz) \ void MacroAssembler::atomic_##OP(Register prev, RegisterOrConstant incr, Register addr) { \ + if (UseLSE) { \ + prev = prev->is_valid() ? prev : zr; \ + if (incr.is_register()) { \ + AOP(sz, incr.as_register(), prev, addr); \ + } else { \ + mov(rscratch2, incr.as_constant()); \ + AOP(sz, rscratch2, prev, addr); \ + } \ + return; \ + } \ Register result = rscratch2; \ if (prev->is_valid()) \ result = different(prev, incr, addr) ? prev : rscratch2; \ @@ -2190,13 +2205,18 @@ } \ } -ATOMIC_OP(ldxr, add, sub, stxr) -ATOMIC_OP(ldxrw, addw, subw, stxrw) +ATOMIC_OP(ldxr, add, sub, ldadd, stxr, Assembler::xword) +ATOMIC_OP(ldxrw, addw, subw, ldadd, stxrw, Assembler::word) #undef ATOMIC_OP -#define ATOMIC_XCHG(OP, LDXR, STXR) \ +#define ATOMIC_XCHG(OP, LDXR, STXR, sz) \ void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) { \ + if (UseLSE) { \ + prev = prev->is_valid() ? prev : zr; \ + swp(sz, newv, prev, addr); \ + return; \ + } \ Register result = rscratch2; \ if (prev->is_valid()) \ result = different(prev, newv, addr) ? prev : rscratch2; \ @@ -2211,8 +2231,8 @@ mov(prev, result); \ } -ATOMIC_XCHG(xchg, ldxr, stxr) -ATOMIC_XCHG(xchgw, ldxrw, stxrw) +ATOMIC_XCHG(xchg, ldxr, stxr, Assembler::xword) +ATOMIC_XCHG(xchgw, ldxrw, stxrw, Assembler::word) #undef ATOMIC_XCHG diff --git a/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp b/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp --- a/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp +++ b/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp @@ -1981,16 +1981,21 @@ Register rscratch3 = r0; __ push(rscratch1); __ push(rscratch2); - __ push(rscratch3); - Label L; __ mov(rscratch2, (address) &BytecodeCounter::_counter_value); - __ prfm(Address(rscratch2), PSTL1STRM); - __ bind(L); - __ ldxr(rscratch1, rscratch2); - __ add(rscratch1, rscratch1, 1); - __ stxr(rscratch3, rscratch1, rscratch2); - __ cbnzw(rscratch3, L); - __ pop(rscratch3); + if (UseLSE) { + __ mov(rscratch1, 1); + __ ldadd(Assembler::xword, rscratch1, zr, rscratch2); + } else { + __ push(rscratch3); + Label L; + __ prfm(Address(rscratch2), PSTL1STRM); + __ bind(L); + __ ldxr(rscratch1, rscratch2); + __ add(rscratch1, rscratch1, 1); + __ stxr(rscratch3, rscratch1, rscratch2); + __ cbnzw(rscratch3, L); + __ pop(rscratch3); + } __ pop(rscratch2); __ pop(rscratch1); } diff --git a/src/cpu/aarch64/vm/vm_version_aarch64.hpp b/src/cpu/aarch64/vm/vm_version_aarch64.hpp --- a/src/cpu/aarch64/vm/vm_version_aarch64.hpp +++ b/src/cpu/aarch64/vm/vm_version_aarch64.hpp @@ -73,6 +73,7 @@ CPU_SHA1 = (1<<5), CPU_SHA2 = (1<<6), CPU_CRC32 = (1<<7), + CPU_LSE = (1<<8), CPU_A53MAC = (1 << 30), CPU_DMB_ATOMICS = (1 << 31), }; diff --git a/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp b/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp --- a/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp +++ b/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp @@ -36,6 +36,19 @@ #define READ_MEM_BARRIER __atomic_thread_fence(__ATOMIC_ACQUIRE); #define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE); +// CASALW w2, w0, [x1] +#define CASALW ".word 0b10001000111000101111110000100000;" +// CASAL x2, x0, [x1] +#define CASAL ".word 0b11001000111000101111110000100000;" +// LDADDALW w0, w2, [x1] +#define LDADDALW ".word 0b10111000111000000000000000100010;" +// LDADDAL w0, w2, [x1] +#define LDADDAL ".word 0b11111000111000000000000000100010;" +// SWPW w0, w2, [x1] +#define SWPW ".word 0b10111000001000001000000000100010;" +// SWP x0, x2, [x1] +#define SWP ".word 0b11111000001000001000000000100010;" + inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; } inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; } inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; } @@ -51,6 +64,17 @@ inline jint Atomic::add(jint add_value, volatile jint* dest) { + if (UseLSE) { + register jint r_add_value asm("w0") = add_value; + register volatile jint *r_dest asm("x1") = dest; + register jint r_result asm("w2"); + __asm volatile(LDADDALW + : [_result]"=r"(r_result) + : [_add_value]"r"(r_add_value), + [_dest]"r"(r_dest) + : "memory"); + return r_result+add_value; + } return __sync_add_and_fetch(dest, add_value); } @@ -76,6 +100,18 @@ inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) { + if (UseLSE) { + register jint r_exchange_value asm("w0") = exchange_value; + register volatile jint *r_dest asm("x1") = dest; + register jint r_result asm("w2"); + __asm volatile(SWPW + : [_result]"=r"(r_result) + : [_exchange_value]"r"(r_exchange_value), + [_dest]"r"(r_dest) + : "memory"); + FULL_MEM_BARRIER; + return r_result; + } jint res = __sync_lock_test_and_set (dest, exchange_value); FULL_MEM_BARRIER; return res; @@ -87,9 +123,19 @@ (volatile intptr_t*) dest); } - inline jint Atomic::cmpxchg (jint exchange_value, volatile jint* dest, jint compare_value) { + if (UseLSE) { + register jint r_exchange_value asm("w0") = exchange_value; + register volatile jint *r_dest asm("x1") = dest; + register jint r_compare_value asm("w2") = compare_value; + __asm volatile(CASALW + : [_compare_value]"+r"(r_compare_value) + : [_exchange_value]"r"(r_exchange_value), + [_dest]"r"(r_dest) + : "memory"); + return r_compare_value; + } return __sync_val_compare_and_swap(dest, compare_value, exchange_value); } @@ -98,6 +144,17 @@ inline intptr_t Atomic::add_ptr(intptr_t add_value, volatile intptr_t* dest) { + if (UseLSE) { + register intptr_t r_add_value asm("x0") = add_value; + register volatile intptr_t *r_dest asm("x1") = dest; + register intptr_t r_result asm("x2"); + __asm volatile(LDADDAL + : [_result]"=r"(r_result) + : [_add_value]"r"(r_add_value), + [_dest]"r"(r_dest) + : "memory"); + return r_result+add_value; + } return __sync_add_and_fetch(dest, add_value); } @@ -118,6 +175,18 @@ inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) { + if (UseLSE) { + register intptr_t r_exchange_value asm("x0") = exchange_value; + register volatile intptr_t *r_dest asm("x1") = dest; + register intptr_t r_result asm("x2"); + __asm volatile(SWP + : [_result]"=r"(r_result) + : [_exchange_value]"r"(r_exchange_value), + [_dest]"r"(r_dest) + : "memory"); + FULL_MEM_BARRIER; + return r_result; + } intptr_t res = __sync_lock_test_and_set (dest, exchange_value); FULL_MEM_BARRIER; return res; @@ -125,11 +194,33 @@ inline jlong Atomic::cmpxchg (jlong exchange_value, volatile jlong* dest, jlong compare_value) { + if (UseLSE) { + register jlong r_exchange_value asm("x0") = exchange_value; + register volatile jlong *r_dest asm("x1") = dest; + register jlong r_compare_value asm("x2") = compare_value; + __asm volatile(CASAL + : [_compare_value]"+r"(r_compare_value) + : [_exchange_value]"r"(r_exchange_value), + [_dest]"r"(r_dest) + : "memory"); + return r_compare_value; + } return __sync_val_compare_and_swap(dest, compare_value, exchange_value); } inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value) { + if (UseLSE) { + register intptr_t r_exchange_value asm("x0") = exchange_value; + register volatile intptr_t *r_dest asm("x1") = dest; + register intptr_t r_compare_value asm("x2") = compare_value; + __asm volatile(CASAL + : [_compare_value]"+r"(r_compare_value) + : [_exchange_value]"r"(r_exchange_value), + [_dest]"r"(r_dest) + : "memory"); + return r_compare_value; + } return __sync_val_compare_and_swap(dest, compare_value, exchange_value); }