/* * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * Copyright (c) 2015, Linaro Ltd. All rights reserved. * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. * */ #ifndef OS_CPU_LINUX_AARCH32_VM_ATOMIC_LINUX_AARCH32_INLINE_HPP #define OS_CPU_LINUX_AARCH32_VM_ATOMIC_LINUX_AARCH32_INLINE_HPP #include "runtime/os.hpp" #include "vm_version_aarch32.hpp" // Implementation of class atomic // various toolchains set different symbols to indicate that ARMv7 architecture is set as a target // starting from v7 use more lightweight barrier instructions #if (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) #define FULL_MEM_BARRIER __asm__ __volatile__ ("dmb ish" : : : "memory") #define READ_MEM_BARRIER __asm__ __volatile__ ("dmb ish" : : : "memory") #define WRITE_MEM_BARRIER __asm__ __volatile__ ("dmb ishst" : : : "memory") #else #define FULL_MEM_BARRIER __sync_synchronize() #define READ_MEM_BARRIER __asm__ __volatile__ ("mcr p15,0,r0,c7,c10,5" : : : "memory") #define WRITE_MEM_BARRIER __asm__ __volatile__ ("mcr p15,0,r0,c7,c10,5" : : : "memory") #endif template<> template inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const { STATIC_ASSERT(8 == sizeof(T)); // have seen a few toolchains which only set a subset of appropriate defines // and as well do not provide atomic API, hence so complicated condition #if (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6K__) || (defined(__ARM_FEATURE_LDREX) && (__ARM_FEATURE_LDREX & 8)) register long long res; __asm__ __volatile__ ( "ldrexd %Q[res], %R[res], [%[addr]]" : [res] "=r" (res) : [addr] "r" (reinterpret_cast(src)) : "memory"); return PrimitiveConversions::cast(res); #else return PrimitiveConversions::cast(__atomic_load_n(reinterpret_cast(src), __ATOMIC_RELAXED)); #endif } template<> template inline void Atomic::PlatformStore<8>::operator()(T store_value, T volatile* dest) const { STATIC_ASSERT(8 == sizeof(T)); // have seen a few toolchains which only set a subset of appropriate defines // and as well do not provide atomic API, hence so complicated condition #if (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6K__) || (defined(__ARM_FEATURE_LDREX) && (__ARM_FEATURE_LDREX & 8)) // the below is only supported since ARMv6K, adapt otherwise register long long t1; register int t3; __asm__ __volatile__ ( "repeat_%=:\n\t" "ldrexd %Q[t1],%R[t1],[%[addr]]\n\t" "strexd %[t3],%Q[val],%R[val],[%[addr]]\n\t" "cmp %[t3],#0\n\t" "bne repeat_%=" : [t1] "=&r" (t1), [t3] "=&r" (t3) : [val] "r" (PrimitiveConversions::cast(store_value)), [addr] "r" (reinterpret_cast(dest)) : "memory"); #else __atomic_store_n(reinterpret_cast(dest), PrimitiveConversions::cast(store_value), __ATOMIC_RELAXED); #endif } template struct Atomic::PlatformAdd : Atomic::AddAndFetch > { template D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const; }; template<> template inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(I)); STATIC_ASSERT(4 == sizeof(D)); return __sync_add_and_fetch(dest, add_value); } template template inline T Atomic::PlatformXchg::operator()(T exchange_value, T volatile* dest, atomic_memory_order order) const { STATIC_ASSERT(byte_size == sizeof(T)); T res = __sync_lock_test_and_set(dest, exchange_value); FULL_MEM_BARRIER; return res; } // No direct support for cmpxchg of bytes; emulate using int. template<> struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {}; template<> template inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, T volatile* dest, T compare_value, atomic_memory_order order) const { STATIC_ASSERT(4 == sizeof(T)); if (order == memory_order_relaxed) { T value = compare_value; __atomic_compare_exchange(dest, &value, &exchange_value, /*weak*/false, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return value; } else { return __sync_val_compare_and_swap(dest, compare_value, exchange_value); } } template<> template inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, T volatile* dest, T compare_value, atomic_memory_order order) const { STATIC_ASSERT(8 == sizeof(T)); if (order == memory_order_relaxed) { // have seen a few toolchains which only set a subset of appropriate defines // and as well do not provide dword CAS, hence so complicated condition #if (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6K__) || (defined(__ARM_FEATURE_LDREX) && (__ARM_FEATURE_LDREX & 8)) register long long old_value; register int store_result; __asm__ __volatile__ ( "mov %[res],#1\n\t" "repeat_%=:\n\t" "ldrexd %Q[old],%R[old],[%[addr]]\n\t" "cmp %Q[old], %Q[cmpr]\n\t" "ittt eq\n\t" "cmpeq %R[old], %R[cmpr]\n\t" "strexdeq %[res],%Q[exch],%R[exch],[%[addr]]\n\t" "cmpeq %[res],#1\n\t" "beq repeat_%=" : [old] "=&r" (old_value), [res] "=&r" (store_result) : [exch] "r" (exchange_value), [cmpr] "r" (compare_value), [addr] "r" (dest) : "memory"); return old_value; #else T value = compare_value; __atomic_compare_exchange(dest, &value, &exchange_value, /*weak*/false, __ATOMIC_RELAXED, __ATOMIC_RELAXED); return value; #endif } else { #if (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6K__) || (defined(__ARM_FEATURE_LDREX) && (__ARM_FEATURE_LDREX & 8)) register long long old_value; register int store_result; __asm__ __volatile__ ( "dmb ish\n\t" "mov %[res],#1\n\t" "repeat_%=:\n\t" "ldrexd %Q[old],%R[old],[%[addr]]\n\t" "cmp %Q[old], %Q[cmpr]\n\t" "ittt eq\n\t" "cmpeq %R[old], %R[cmpr]\n\t" "strexdeq %[res],%Q[exch],%R[exch],[%[addr]]\n\t" "cmpeq %[res],#1\n\t" "beq repeat_%=\n\t" "dmb ish" : [old] "=&r" (old_value), [res] "=&r" (store_result) : [exch] "r" (exchange_value), [cmpr] "r" (compare_value), [addr] "r" (dest) : "memory"); return old_value; #else return __sync_val_compare_and_swap(dest, compare_value, exchange_value); #endif } } #endif // OS_CPU_LINUX_AARCH32_VM_ATOMIC_LINUX_AARCH32_INLINE_HPP