1 /*
   2  * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
   4  * Copyright (c) 2015, Linaro Ltd. All rights reserved.
   5  * Copyright (c) 2015-2018, Azul Systems, Inc. All rights reserved.
   6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   7  *
   8  * This code is free software; you can redistribute it and/or modify it
   9  * under the terms of the GNU General Public License version 2 only, as
  10  * published by the Free Software Foundation.
  11  *
  12  * This code is distributed in the hope that it will be useful, but WITHOUT
  13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15  * version 2 for more details (a copy is included in the LICENSE file that
  16  * accompanied this code).
  17  *
  18  * You should have received a copy of the GNU General Public License version
  19  * 2 along with this work; if not, write to the Free Software Foundation,
  20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  21  *
  22  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  23  * or visit www.oracle.com if you need additional information or have any
  24  * questions.
  25  *
  26  */
  27 
  28 #ifndef OS_CPU_LINUX_AARCH32_VM_ATOMIC_LINUX_AARCH32_INLINE_HPP
  29 #define OS_CPU_LINUX_AARCH32_VM_ATOMIC_LINUX_AARCH32_INLINE_HPP
  30 
  31 #include "runtime/os.hpp"
  32 #include "vm_version_aarch32.hpp"
  33 
  34 // Implementation of class atomic
  35 
  36 // various toolchains set different symbols to indicate that ARMv7 architecture is set as a target
  37 // starting from v7 use more lightweight barrier instructions
  38 #if (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__)
  39 #define FULL_MEM_BARRIER  __asm__ __volatile__ ("dmb ish"   : : : "memory")
  40 #define READ_MEM_BARRIER  __asm__ __volatile__ ("dmb ish"   : : : "memory")
  41 #define WRITE_MEM_BARRIER __asm__ __volatile__ ("dmb ishst" : : : "memory")
  42 #else
  43 #define FULL_MEM_BARRIER  __sync_synchronize()
  44 #define READ_MEM_BARRIER  __asm__ __volatile__ ("mcr p15,0,r0,c7,c10,5" : : : "memory")
  45 #define WRITE_MEM_BARRIER __asm__ __volatile__ ("mcr p15,0,r0,c7,c10,5" : : : "memory")
  46 #endif
  47 
  48 template<>
  49 template<typename T>
  50 inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
  51   STATIC_ASSERT(8 == sizeof(T));
  52 // have seen a few toolchains which only set a subset of appropriate defines
  53 // and as well do not provide atomic API, hence so complicated condition
  54 #if (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6K__) || (defined(__ARM_FEATURE_LDREX) && (__ARM_FEATURE_LDREX & 8))
  55   register long long res;
  56   __asm__ __volatile__ (
  57       "ldrexd %Q[res], %R[res], [%[addr]]"
  58       : [res] "=r" (res)
  59       : [addr] "r" (reinterpret_cast<const volatile jlong*>(src))
  60       : "memory");
  61   return PrimitiveConversions::cast<T>(res);
  62 #else
  63   return PrimitiveConversions::cast<T>(__atomic_load_n(reinterpret_cast<const volatile jlong*>(src),
  64                                                        __ATOMIC_RELAXED));
  65 #endif
  66 }
  67 
  68 template<>
  69 template<typename T>
  70 inline void Atomic::PlatformStore<8>::operator()(T store_value,
  71                                                  T volatile* dest) const {
  72   STATIC_ASSERT(8 == sizeof(T));
  73 // have seen a few toolchains which only set a subset of appropriate defines
  74 // and as well do not provide atomic API, hence so complicated condition
  75 #if (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6K__) || (defined(__ARM_FEATURE_LDREX) && (__ARM_FEATURE_LDREX & 8))
  76   // the below is only supported since ARMv6K, adapt otherwise
  77   register long long t1;
  78   register int t3;
  79   __asm__ __volatile__ (
  80       "repeat_%=:\n\t"
  81       "ldrexd %Q[t1],%R[t1],[%[addr]]\n\t"
  82       "strexd %[t3],%Q[val],%R[val],[%[addr]]\n\t"
  83       "cmp %[t3],#0\n\t"
  84       "bne repeat_%="
  85       : [t1] "=&r" (t1),
  86         [t3] "=&r" (t3)
  87       : [val] "r" (PrimitiveConversions::cast<jlong>(store_value)),
  88         [addr] "r" (reinterpret_cast<volatile jlong*>(dest))
  89       : "memory");
  90 #else
  91   __atomic_store_n(reinterpret_cast<volatile jlong*>(dest),
  92                    PrimitiveConversions::cast<jlong>(store_value), __ATOMIC_RELAXED);
  93 #endif
  94 }
  95 
  96 template<size_t byte_size>
  97 struct Atomic::PlatformAdd
  98   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
  99 {
 100   template<typename I, typename D>
 101   D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const;
 102 };
 103 
 104 template<>
 105 template<typename I, typename D>
 106 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const {
 107   STATIC_ASSERT(4 == sizeof(I));
 108   STATIC_ASSERT(4 == sizeof(D));
 109   return __sync_add_and_fetch(dest, add_value);
 110 }
 111 
 112 template<size_t byte_size>
 113 template<typename T>
 114 inline T Atomic::PlatformXchg<byte_size>::operator()(T exchange_value,
 115                                              T volatile* dest,
 116                                              atomic_memory_order order) const {
 117   STATIC_ASSERT(byte_size == sizeof(T));
 118   T res = __sync_lock_test_and_set(dest, exchange_value);
 119   FULL_MEM_BARRIER;
 120   return res;
 121 }
 122 
 123 // No direct support for cmpxchg of bytes; emulate using int.
 124 template<>
 125 struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};
 126 
 127 template<>
 128 template<typename T>
 129 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
 130                                                 T volatile* dest,
 131                                                 T compare_value,
 132                                                 atomic_memory_order order) const {
 133   STATIC_ASSERT(4 == sizeof(T));
 134   if (order == memory_order_relaxed) {
 135     T value = compare_value;
 136     __atomic_compare_exchange(dest, &value, &exchange_value, /*weak*/false,
 137                               __ATOMIC_RELAXED, __ATOMIC_RELAXED);
 138     return value;
 139   } else {
 140     return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
 141   }
 142 }
 143 
 144 template<>
 145 template<typename T>
 146 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
 147                                                 T volatile* dest,
 148                                                 T compare_value,
 149                                                 atomic_memory_order order) const {
 150   STATIC_ASSERT(8 == sizeof(T));
 151   if (order == memory_order_relaxed) {
 152 // have seen a few toolchains which only set a subset of appropriate defines
 153 // and as well do not provide dword CAS, hence so complicated condition
 154 #if (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6K__) || (defined(__ARM_FEATURE_LDREX) && (__ARM_FEATURE_LDREX & 8))
 155     register long long old_value;
 156     register int store_result;
 157     __asm__ __volatile__ (
 158       "mov %[res],#1\n\t"
 159       "repeat_%=:\n\t"
 160       "ldrexd %Q[old],%R[old],[%[addr]]\n\t"
 161       "cmp %Q[old], %Q[cmpr]\n\t"
 162       "ittt eq\n\t"
 163       "cmpeq %R[old], %R[cmpr]\n\t"
 164       "strexdeq %[res],%Q[exch],%R[exch],[%[addr]]\n\t"
 165       "cmpeq %[res],#1\n\t"
 166       "beq repeat_%="
 167       : [old] "=&r" (old_value),
 168         [res] "=&r" (store_result)
 169       : [exch] "r" (exchange_value),
 170         [cmpr] "r" (compare_value),
 171         [addr] "r" (dest)
 172       : "memory");
 173     return old_value;
 174 #else
 175     T value = compare_value;
 176     __atomic_compare_exchange(dest, &value, &exchange_value, /*weak*/false,
 177                               __ATOMIC_RELAXED, __ATOMIC_RELAXED);
 178     return value;
 179 #endif
 180   } else {
 181 #if (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6K__) || (defined(__ARM_FEATURE_LDREX) && (__ARM_FEATURE_LDREX & 8))
 182     register long long old_value;
 183     register int store_result;
 184     __asm__ __volatile__ (
 185       "dmb ish\n\t"
 186       "mov %[res],#1\n\t"
 187       "repeat_%=:\n\t"
 188       "ldrexd %Q[old],%R[old],[%[addr]]\n\t"
 189       "cmp %Q[old], %Q[cmpr]\n\t"
 190       "ittt eq\n\t"
 191       "cmpeq %R[old], %R[cmpr]\n\t"
 192       "strexdeq %[res],%Q[exch],%R[exch],[%[addr]]\n\t"
 193       "cmpeq %[res],#1\n\t"
 194       "beq repeat_%=\n\t"
 195       "dmb ish"
 196       : [old] "=&r" (old_value),
 197         [res] "=&r" (store_result)
 198       : [exch] "r" (exchange_value),
 199         [cmpr] "r" (compare_value),
 200         [addr] "r" (dest)
 201       : "memory");
 202     return old_value;
 203 #else
 204     return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
 205 #endif
 206   }
 207 }
 208 
 209 #endif // OS_CPU_LINUX_AARCH32_VM_ATOMIC_LINUX_AARCH32_INLINE_HPP