--- /dev/null 2018-09-25 19:25:43.000000000 +0300 +++ new/src/hotspot/os_cpu/linux_aarch32/atomic_linux_aarch32.hpp 2018-09-25 19:25:43.000000000 +0300 @@ -0,0 +1,209 @@ +/* + * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2015, Linaro Ltd. All rights reserved. + * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef OS_CPU_LINUX_AARCH32_VM_ATOMIC_LINUX_AARCH32_INLINE_HPP +#define OS_CPU_LINUX_AARCH32_VM_ATOMIC_LINUX_AARCH32_INLINE_HPP + +#include "runtime/os.hpp" +#include "vm_version_aarch32.hpp" + +// Implementation of class atomic + +// various toolchains set different symbols to indicate that ARMv7 architecture is set as a target +// starting from v7 use more lightweight barrier instructions +#if (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) +#define FULL_MEM_BARRIER __asm__ __volatile__ ("dmb ish" : : : "memory") +#define READ_MEM_BARRIER __asm__ __volatile__ ("dmb ish" : : : "memory") +#define WRITE_MEM_BARRIER __asm__ __volatile__ ("dmb ishst" : : : "memory") +#else +#define FULL_MEM_BARRIER __sync_synchronize() +#define READ_MEM_BARRIER __asm__ __volatile__ ("mcr p15,0,r0,c7,c10,5" : : : "memory") +#define WRITE_MEM_BARRIER __asm__ __volatile__ ("mcr p15,0,r0,c7,c10,5" : : : "memory") +#endif + +template<> +template +inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const { + STATIC_ASSERT(8 == sizeof(T)); +// have seen a few toolchains which only set a subset of appropriate defines +// and as well do not provide atomic API, hence so complicated condition +#if (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6K__) || (defined(__ARM_FEATURE_LDREX) && (__ARM_FEATURE_LDREX & 8)) + register long long res; + __asm__ __volatile__ ( + "ldrexd %Q[res], %R[res], [%[addr]]" + : [res] "=r" (res) + : [addr] "r" (reinterpret_cast(src)) + : "memory"); + return PrimitiveConversions::cast(res); +#else + return PrimitiveConversions::cast(__atomic_load_n(reinterpret_cast(src), + __ATOMIC_RELAXED)); +#endif +} + +template<> +template +inline void Atomic::PlatformStore<8>::operator()(T store_value, + T volatile* dest) const { + STATIC_ASSERT(8 == sizeof(T)); +// have seen a few toolchains which only set a subset of appropriate defines +// and as well do not provide atomic API, hence so complicated condition +#if (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6K__) || (defined(__ARM_FEATURE_LDREX) && (__ARM_FEATURE_LDREX & 8)) + // the below is only supported since ARMv6K, adapt otherwise + register long long t1; + register int t3; + __asm__ __volatile__ ( + "repeat_%=:\n\t" + "ldrexd %Q[t1],%R[t1],[%[addr]]\n\t" + "strexd %[t3],%Q[val],%R[val],[%[addr]]\n\t" + "cmp %[t3],#0\n\t" + "bne repeat_%=" + : [t1] "=&r" (t1), + [t3] "=&r" (t3) + : [val] "r" (PrimitiveConversions::cast(store_value)), + [addr] "r" (reinterpret_cast(dest)) + : "memory"); +#else + __atomic_store_n(reinterpret_cast(dest), + PrimitiveConversions::cast(store_value), __ATOMIC_RELAXED); +#endif +} + +template +struct Atomic::PlatformAdd + : Atomic::AddAndFetch > +{ + template + D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const; +}; + +template<> +template +inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const { + STATIC_ASSERT(4 == sizeof(I)); + STATIC_ASSERT(4 == sizeof(D)); + return __sync_add_and_fetch(dest, add_value); +} + +template +template +inline T Atomic::PlatformXchg::operator()(T exchange_value, + T volatile* dest, + atomic_memory_order order) const { + STATIC_ASSERT(byte_size == sizeof(T)); + T res = __sync_lock_test_and_set(dest, exchange_value); + FULL_MEM_BARRIER; + return res; +} + +// No direct support for cmpxchg of bytes; emulate using int. +template<> +struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {}; + +template<> +template +inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, + T volatile* dest, + T compare_value, + atomic_memory_order order) const { + STATIC_ASSERT(4 == sizeof(T)); + if (order == memory_order_relaxed) { + T value = compare_value; + __atomic_compare_exchange(dest, &value, &exchange_value, /*weak*/false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + return value; + } else { + return __sync_val_compare_and_swap(dest, compare_value, exchange_value); + } +} + +template<> +template +inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, + T volatile* dest, + T compare_value, + atomic_memory_order order) const { + STATIC_ASSERT(8 == sizeof(T)); + if (order == memory_order_relaxed) { +// have seen a few toolchains which only set a subset of appropriate defines +// and as well do not provide dword CAS, hence so complicated condition +#if (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6K__) || (defined(__ARM_FEATURE_LDREX) && (__ARM_FEATURE_LDREX & 8)) + register long long old_value; + register int store_result; + __asm__ __volatile__ ( + "mov %[res],#1\n\t" + "repeat_%=:\n\t" + "ldrexd %Q[old],%R[old],[%[addr]]\n\t" + "cmp %Q[old], %Q[cmpr]\n\t" + "ittt eq\n\t" + "cmpeq %R[old], %R[cmpr]\n\t" + "strexdeq %[res],%Q[exch],%R[exch],[%[addr]]\n\t" + "cmpeq %[res],#1\n\t" + "beq repeat_%=" + : [old] "=&r" (old_value), + [res] "=&r" (store_result) + : [exch] "r" (exchange_value), + [cmpr] "r" (compare_value), + [addr] "r" (dest) + : "memory"); + return old_value; +#else + T value = compare_value; + __atomic_compare_exchange(dest, &value, &exchange_value, /*weak*/false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); + return value; +#endif + } else { +#if (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6K__) || (defined(__ARM_FEATURE_LDREX) && (__ARM_FEATURE_LDREX & 8)) + register long long old_value; + register int store_result; + __asm__ __volatile__ ( + "dmb ish\n\t" + "mov %[res],#1\n\t" + "repeat_%=:\n\t" + "ldrexd %Q[old],%R[old],[%[addr]]\n\t" + "cmp %Q[old], %Q[cmpr]\n\t" + "ittt eq\n\t" + "cmpeq %R[old], %R[cmpr]\n\t" + "strexdeq %[res],%Q[exch],%R[exch],[%[addr]]\n\t" + "cmpeq %[res],#1\n\t" + "beq repeat_%=\n\t" + "dmb ish" + : [old] "=&r" (old_value), + [res] "=&r" (store_result) + : [exch] "r" (exchange_value), + [cmpr] "r" (compare_value), + [addr] "r" (dest) + : "memory"); + return old_value; +#else + return __sync_val_compare_and_swap(dest, compare_value, exchange_value); +#endif + } +} + +#endif // OS_CPU_LINUX_AARCH32_VM_ATOMIC_LINUX_AARCH32_INLINE_HPP