< prev index next >

src/hotspot/os_cpu/aix_ppc/atomic_aix_ppc.hpp

Print this page
rev 49898 : 8202080: Introduce ordering semantics for Atomic::add and other RMW atomics
Reviewed-by:

*** 1,8 **** /* ! * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved. ! * Copyright (c) 2012, 2014 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. --- 1,8 ---- /* ! * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. ! * Copyright (c) 2012, 2018 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation.
*** 24,51 **** */ #ifndef OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP #define OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP ! #ifndef _LP64 ! #error "Atomic currently only impleneted for PPC64" #endif #include "utilities/debug.hpp" // Implementation of class atomic // // machine barrier instructions: // ! // - ppc_sync two-way memory barrier, aka fence ! // - ppc_lwsync orders Store|Store, // Load|Store, // Load|Load, // but not Store|Load ! // - ppc_eieio orders memory accesses for device memory (only) ! // - ppc_isync invalidates speculatively executed instructions // From the POWER ISA 2.06 documentation: // "[...] an isync instruction prevents the execution of // instructions following the isync until instructions // preceding the isync have completed, [...]" // From IBM's AIX assembler reference: --- 24,51 ---- */ #ifndef OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP #define OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP ! #ifndef PPC64 ! #error "Atomic currently only implemented for PPC64" #endif #include "utilities/debug.hpp" // Implementation of class atomic // // machine barrier instructions: // ! // - sync two-way memory barrier, aka fence ! // - lwsync orders Store|Store, // Load|Store, // Load|Load, // but not Store|Load ! // - eieio orders memory accesses for device memory (only) ! // - isync invalidates speculatively executed instructions // From the POWER ISA 2.06 documentation: // "[...] an isync instruction prevents the execution of // instructions following the isync until instructions // preceding the isync have completed, [...]" // From IBM's AIX assembler reference:
*** 58,72 **** // environment established by the previous instructions." // // semantic barrier instructions: // (as defined in orderAccess.hpp) // ! // - ppc_release orders Store|Store, (maps to ppc_lwsync) // Load|Store ! // - ppc_acquire orders Load|Store, (maps to ppc_lwsync) // Load|Load ! // - ppc_fence orders Store|Store, (maps to ppc_sync) // Load|Store, // Load|Load, // Store|Load // --- 58,72 ---- // environment established by the previous instructions." // // semantic barrier instructions: // (as defined in orderAccess.hpp) // ! // - release orders Store|Store, (maps to lwsync) // Load|Store ! // - acquire orders Load|Store, (maps to lwsync) // Load|Load ! // - fence orders Store|Store, (maps to sync) // Load|Store, // Load|Load, // Store|Load //
*** 77,160 **** #define strasm_acquire strasm_lwsync #define strasm_fence strasm_sync #define strasm_nobarrier "" #define strasm_nobarrier_clobber_memory "" template<size_t byte_size> struct Atomic::PlatformAdd : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > { template<typename I, typename D> ! D add_and_fetch(I add_value, D volatile* dest) const; }; template<> template<typename I, typename D> ! inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) const { STATIC_ASSERT(4 == sizeof(I)); STATIC_ASSERT(4 == sizeof(D)); D result; __asm__ __volatile__ ( - strasm_lwsync "1: lwarx %0, 0, %2 \n" " add %0, %0, %1 \n" " stwcx. %0, 0, %2 \n" " bne- 1b \n" - strasm_isync : /*%0*/"=&r" (result) : /*%1*/"r" (add_value), /*%2*/"r" (dest) : "cc", "memory" ); return result; } template<> template<typename I, typename D> ! inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) const { STATIC_ASSERT(8 == sizeof(I)); STATIC_ASSERT(8 == sizeof(D)); D result; __asm__ __volatile__ ( - strasm_lwsync "1: ldarx %0, 0, %2 \n" " add %0, %0, %1 \n" " stdcx. %0, 0, %2 \n" " bne- 1b \n" - strasm_isync : /*%0*/"=&r" (result) : /*%1*/"r" (add_value), /*%2*/"r" (dest) : "cc", "memory" ); return result; } template<> template<typename T> inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, ! T volatile* dest) const { ! STATIC_ASSERT(4 == sizeof(T)); // Note that xchg doesn't necessarily do an acquire // (see synchronizer.cpp). T old_value; const uint64_t zero = 0; __asm__ __volatile__ ( - /* lwsync */ - strasm_lwsync /* atomic loop */ "1: \n" " lwarx %[old_value], %[dest], %[zero] \n" " stwcx. %[exchange_value], %[dest], %[zero] \n" " bne- 1b \n" - /* isync */ - strasm_sync /* exit */ "2: \n" /* out */ : [old_value] "=&r" (old_value), "=m" (*dest) --- 77,187 ---- #define strasm_acquire strasm_lwsync #define strasm_fence strasm_sync #define strasm_nobarrier "" #define strasm_nobarrier_clobber_memory "" + inline void pre_membar(atomic_memory_order order) { + switch (order) { + case memory_order_relaxed: + case memory_order_consume: + case memory_order_acquire: break; + case memory_order_release: + case memory_order_acq_rel: __asm__ __volatile__ (strasm_lwsync); break; + default /*conservative*/ : __asm__ __volatile__ (strasm_sync); break; + } + } + + inline void post_membar(atomic_memory_order order) { + switch (order) { + case memory_order_relaxed: + case memory_order_consume: + case memory_order_release: break; + case memory_order_acquire: + case memory_order_acq_rel: __asm__ __volatile__ (strasm_isync); break; + default /*conservative*/ : __asm__ __volatile__ (strasm_sync); break; + } + } + + template<size_t byte_size> struct Atomic::PlatformAdd : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > { template<typename I, typename D> ! D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const; }; template<> template<typename I, typename D> ! inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest, ! atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(I)); STATIC_ASSERT(4 == sizeof(D)); D result; + pre_membar(order); + __asm__ __volatile__ ( "1: lwarx %0, 0, %2 \n" " add %0, %0, %1 \n" " stwcx. %0, 0, %2 \n" " bne- 1b \n" : /*%0*/"=&r" (result) : /*%1*/"r" (add_value), /*%2*/"r" (dest) : "cc", "memory" ); + post_membar(order); + return result; } template<> template<typename I, typename D> ! inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest, ! atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(I)); STATIC_ASSERT(8 == sizeof(D)); D result; + pre_membar(order); + __asm__ __volatile__ ( "1: ldarx %0, 0, %2 \n" " add %0, %0, %1 \n" " stdcx. %0, 0, %2 \n" " bne- 1b \n" : /*%0*/"=&r" (result) : /*%1*/"r" (add_value), /*%2*/"r" (dest) : "cc", "memory" ); + post_membar(order); + return result; } template<> template<typename T> inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, ! T volatile* dest, ! atomic_memory_order order) const { // Note that xchg doesn't necessarily do an acquire // (see synchronizer.cpp). T old_value; const uint64_t zero = 0; + pre_membar(order); + __asm__ __volatile__ ( /* atomic loop */ "1: \n" " lwarx %[old_value], %[dest], %[zero] \n" " stwcx. %[exchange_value], %[dest], %[zero] \n" " bne- 1b \n" /* exit */ "2: \n" /* out */ : [old_value] "=&r" (old_value), "=m" (*dest)
*** 166,199 **** /* clobber */ : "cc", "memory" ); return old_value; } template<> template<typename T> inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, ! T volatile* dest) const { STATIC_ASSERT(8 == sizeof(T)); // Note that xchg doesn't necessarily do an acquire // (see synchronizer.cpp). T old_value; const uint64_t zero = 0; __asm__ __volatile__ ( - /* lwsync */ - strasm_lwsync /* atomic loop */ "1: \n" " ldarx %[old_value], %[dest], %[zero] \n" " stdcx. %[exchange_value], %[dest], %[zero] \n" " bne- 1b \n" - /* isync */ - strasm_sync /* exit */ "2: \n" /* out */ : [old_value] "=&r" (old_value), "=m" (*dest) --- 193,227 ---- /* clobber */ : "cc", "memory" ); + post_membar(order); + return old_value; } template<> template<typename T> inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, ! T volatile* dest, ! atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); // Note that xchg doesn't necessarily do an acquire // (see synchronizer.cpp). T old_value; const uint64_t zero = 0; + pre_membar(order); + __asm__ __volatile__ ( /* atomic loop */ "1: \n" " ldarx %[old_value], %[dest], %[zero] \n" " stdcx. %[exchange_value], %[dest], %[zero] \n" " bne- 1b \n" /* exit */ "2: \n" /* out */ : [old_value] "=&r" (old_value), "=m" (*dest)
*** 205,241 **** /* clobber */ : "cc", "memory" ); ! return old_value; ! } ! ! inline void cmpxchg_pre_membar(cmpxchg_memory_order order) { ! if (order != memory_order_relaxed) { ! __asm__ __volatile__ ( ! /* fence */ ! strasm_sync ! ); ! } ! } ! inline void cmpxchg_post_membar(cmpxchg_memory_order order) { ! if (order != memory_order_relaxed) { ! __asm__ __volatile__ ( ! /* fence */ ! strasm_sync ! ); ! } } template<> template<typename T> inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, T volatile* dest, T compare_value, ! cmpxchg_memory_order order) const { STATIC_ASSERT(1 == sizeof(T)); // Note that cmpxchg guarantees a two-way memory barrier across // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not // specified otherwise (see atomic.hpp). --- 233,253 ---- /* clobber */ : "cc", "memory" ); ! post_membar(order); ! return old_value; } template<> template<typename T> inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, T volatile* dest, T compare_value, ! atomic_memory_order order) const { STATIC_ASSERT(1 == sizeof(T)); // Note that cmpxchg guarantees a two-way memory barrier across // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not // specified otherwise (see atomic.hpp).
*** 252,262 **** masked_exchange_val = ((unsigned int)(unsigned char)exchange_value), xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount; unsigned int old_value, value32; ! cmpxchg_pre_membar(order); __asm__ __volatile__ ( /* simple guard */ " lbz %[old_value], 0(%[dest]) \n" " cmpw %[masked_compare_val], %[old_value] \n" --- 264,274 ---- masked_exchange_val = ((unsigned int)(unsigned char)exchange_value), xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount; unsigned int old_value, value32; ! pre_membar(order); __asm__ __volatile__ ( /* simple guard */ " lbz %[old_value], 0(%[dest]) \n" " cmpw %[masked_compare_val], %[old_value] \n"
*** 291,321 **** /* clobber */ : "cc", "memory" ); ! cmpxchg_post_membar(order); return PrimitiveConversions::cast<T>((unsigned char)old_value); } template<> template<typename T> inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, T volatile* dest, T compare_value, ! cmpxchg_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); // Note that cmpxchg guarantees a two-way memory barrier across // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not // specified otherwise (see atomic.hpp). T old_value; const uint64_t zero = 0; ! cmpxchg_pre_membar(order); __asm__ __volatile__ ( /* simple guard */ " lwz %[old_value], 0(%[dest]) \n" " cmpw %[compare_value], %[old_value] \n" --- 303,333 ---- /* clobber */ : "cc", "memory" ); ! post_membar(order); return PrimitiveConversions::cast<T>((unsigned char)old_value); } template<> template<typename T> inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, T volatile* dest, T compare_value, ! atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); // Note that cmpxchg guarantees a two-way memory barrier across // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not // specified otherwise (see atomic.hpp). T old_value; const uint64_t zero = 0; ! pre_membar(order); __asm__ __volatile__ ( /* simple guard */ " lwz %[old_value], 0(%[dest]) \n" " cmpw %[compare_value], %[old_value] \n"
*** 341,371 **** /* clobber */ : "cc", "memory" ); ! cmpxchg_post_membar(order); return old_value; } template<> template<typename T> inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, T volatile* dest, T compare_value, ! cmpxchg_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); // Note that cmpxchg guarantees a two-way memory barrier across // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not // specified otherwise (see atomic.hpp). T old_value; const uint64_t zero = 0; ! cmpxchg_pre_membar(order); __asm__ __volatile__ ( /* simple guard */ " ld %[old_value], 0(%[dest]) \n" " cmpd %[compare_value], %[old_value] \n" --- 353,383 ---- /* clobber */ : "cc", "memory" ); ! post_membar(order); return old_value; } template<> template<typename T> inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, T volatile* dest, T compare_value, ! atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); // Note that cmpxchg guarantees a two-way memory barrier across // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not // specified otherwise (see atomic.hpp). T old_value; const uint64_t zero = 0; ! pre_membar(order); __asm__ __volatile__ ( /* simple guard */ " ld %[old_value], 0(%[dest]) \n" " cmpd %[compare_value], %[old_value] \n"
*** 391,401 **** /* clobber */ : "cc", "memory" ); ! cmpxchg_post_membar(order); return old_value; } #undef strasm_sync --- 403,413 ---- /* clobber */ : "cc", "memory" ); ! post_membar(order); return old_value; } #undef strasm_sync
< prev index next >