1 /*
   2  * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #ifndef OS_CPU_LINUX_ARM_VM_ATOMIC_LINUX_ARM_HPP
  26 #define OS_CPU_LINUX_ARM_VM_ATOMIC_LINUX_ARM_HPP
  27 
  28 #include "runtime/os.hpp"
  29 #include "vm_version_arm.hpp"
  30 
  31 // Implementation of class atomic
  32 
  33 /*
  34  * Atomic long operations on 32-bit ARM
  35  * ARM v7 supports LDREXD/STREXD synchronization instructions so no problem.
  36  * ARM < v7 does not have explicit 64 atomic load/store capability.
  37  * However, gcc emits LDRD/STRD instructions on v5te and LDM/STM on v5t
  38  * when loading/storing 64 bits.
  39  * For non-MP machines (which is all we support for ARM < v7)
  40  * under current Linux distros these instructions appear atomic.
  41  * See section A3.5.3 of ARM Architecture Reference Manual for ARM v7.
  42  * Also, for cmpxchg64, if ARM < v7 we check for cmpxchg64 support in the
  43  * Linux kernel using _kuser_helper_version. See entry-armv.S in the Linux
  44  * kernel source or kernel_user_helpers.txt in Linux Doc.
  45  */
  46 
  47 template <>
  48 inline void Atomic::specialized_store<int64_t>(int64_t value, volatile int64_t* dest) {
  49   assert(((intx)dest & (sizeof(int64_t)-1)) == 0, "Atomic 64 bit store mis-aligned");
  50 #ifdef AARCH64
  51   *dest = value;
  52 #else
  53   (*os::atomic_store_long_func)(value, dest);
  54 #endif
  55 }
  56 
  57 template <>
  58 inline int64_t Atomic::specialized_load<int64_t>(const volatile int64_t* src) {
  59   assert(((intx)src & (sizeof(int64_t)-1)) == 0, "Atomic 64 bit load mis-aligned");
  60 #ifdef AARCH64
  61   return *src;
  62 #else
  63   return (*os::atomic_load_long_func)(src);
  64 #endif
  65 }
  66 
  67 // As per atomic.hpp all read-modify-write operations have to provide two-way
  68 // barriers semantics. For AARCH64 we are using load-acquire-with-reservation and
  69 // store-release-with-reservation. While load-acquire combined with store-release
  70 // do not generally form two-way barriers, their use with reservations does - the
  71 // ARMv8 architecture manual Section F "Barrier Litmus Tests" indicates they
  72 // provide sequentially consistent semantics. All we need to add is an explicit
  73 // barrier in the failure path of the cmpxchg operations (as these don't execute
  74 // the store) - arguably this may be overly cautious as there is a very low
  75 // likelihood that the hardware would pull loads/stores into the region guarded
  76 // by the reservation.
  77 //
  78 // For ARMv7 we add explicit barriers in the stubs.
  79 
  80 template <>
  81 inline int32_t Atomic::specialized_add<int32_t>(int32_t add_value, volatile int32_t* dest) {
  82 #ifdef AARCH64
  83   int32_t val;
  84   int tmp;
  85   __asm__ volatile(
  86     "1:\n\t"
  87     " ldaxr %w[val], [%[dest]]\n\t"
  88     " add %w[val], %w[val], %w[add_val]\n\t"
  89     " stlxr %w[tmp], %w[val], [%[dest]]\n\t"
  90     " cbnz %w[tmp], 1b\n\t"
  91     : [val] "=&r" (val), [tmp] "=&r" (tmp)
  92     : [add_val] "r" (add_value), [dest] "r" (dest)
  93     : "memory");
  94   return val;
  95 #else
  96   return (*os::atomic_add_func)(add_value, dest);
  97 #endif
  98 }
  99 
 100 template <>
 101 inline int32_t Atomic::specialized_xchg<int32_t>(int32_t exchange_value, volatile int32_t* dest) {
 102 #ifdef AARCH64
 103   int32_t old_val;
 104   int tmp;
 105   __asm__ volatile(
 106     "1:\n\t"
 107     " ldaxr %w[old_val], [%[dest]]\n\t"
 108     " stlxr %w[tmp], %w[new_val], [%[dest]]\n\t"
 109     " cbnz %w[tmp], 1b\n\t"
 110     : [old_val] "=&r" (old_val), [tmp] "=&r" (tmp)
 111     : [new_val] "r" (exchange_value), [dest] "r" (dest)
 112     : "memory");
 113   return old_val;
 114 #else
 115   return (*os::atomic_xchg_func)(exchange_value, dest);
 116 #endif
 117 }
 118 
 119 // The memory_order parameter is ignored - we always provide the strongest/most-conservative ordering
 120 
 121 template <>
 122 inline int32_t Atomic::specialized_cmpxchg<int32_t>(int32_t exchange_value, volatile int32_t* dest, int32_t compare_value, cmpxchg_memory_order order) {
 123 #ifdef AARCH64
 124   int32_t rv;
 125   int tmp;
 126   __asm__ volatile(
 127     "1:\n\t"
 128     " ldaxr %w[rv], [%[dest]]\n\t"
 129     " cmp %w[rv], %w[cv]\n\t"
 130     " b.ne 2f\n\t"
 131     " stlxr %w[tmp], %w[ev], [%[dest]]\n\t"
 132     " cbnz %w[tmp], 1b\n\t"
 133     " b 3f\n\t"
 134     "2:\n\t"
 135     " dmb sy\n\t"
 136     "3:\n\t"
 137     : [rv] "=&r" (rv), [tmp] "=&r" (tmp)
 138     : [ev] "r" (exchange_value), [dest] "r" (dest), [cv] "r" (compare_value)
 139     : "memory");
 140   return rv;
 141 #else
 142   // Warning:  Arguments are swapped to avoid moving them for kernel call
 143   return (*os::atomic_cmpxchg_func)(compare_value, exchange_value, dest);
 144 #endif
 145 }
 146 
 147 template <>
 148 inline int64_t Atomic::specialized_cmpxchg<int64_t>(int64_t exchange_value, volatile int64_t* dest, int64_t compare_value, cmpxchg_memory_order order) {
 149 #ifdef AARCH64
 150   int64_t rv;
 151   int tmp;
 152   __asm__ volatile(
 153     "1:\n\t"
 154     " ldaxr %[rv], [%[dest]]\n\t"
 155     " cmp %[rv], %[cv]\n\t"
 156     " b.ne 2f\n\t"
 157     " stlxr %w[tmp], %[ev], [%[dest]]\n\t"
 158     " cbnz %w[tmp], 1b\n\t"
 159     " b 3f\n\t"
 160     "2:\n\t"
 161     " dmb sy\n\t"
 162     "3:\n\t"
 163     : [rv] "=&r" (rv), [tmp] "=&r" (tmp)
 164     : [ev] "r" (exchange_value), [dest] "r" (dest), [cv] "r" (compare_value)
 165     : "memory");
 166   return rv;
 167 #else
 168   assert(VM_Version::supports_cx8(), "64 bit atomic compare and exchange not supported on this architecture!");
 169   return (*os::atomic_cmpxchg_long_func)(compare_value, exchange_value, dest);
 170 #endif
 171 }
 172 
 173 #ifdef AARCH64
 174 template <>
 175 inline int64_t Atomic::specialized_add<int64_t>(int64_t add_value, volatile int64_t* dest) {
 176   int64_t val;
 177   int tmp;
 178   __asm__ volatile(
 179     "1:\n\t"
 180     " ldaxr %[val], [%[dest]]\n\t"
 181     " add %[val], %[val], %[add_val]\n\t"
 182     " stlxr %w[tmp], %[val], [%[dest]]\n\t"
 183     " cbnz %w[tmp], 1b\n\t"
 184     : [val] "=&r" (val), [tmp] "=&r" (tmp)
 185     : [add_val] "r" (add_value), [dest] "r" (dest)
 186     : "memory");
 187   return val;
 188 }
 189 
 190 template <>
 191 inline int64_t Atomic::specialized_xchg<int64_t>(int64_t exchange_value, volatile int64_t* dest) {
 192   int64_t old_val;
 193   int tmp;
 194   __asm__ volatile(
 195     "1:\n\t"
 196     " ldaxr %[old_val], [%[dest]]\n\t"
 197     " stlxr %w[tmp], %[new_val], [%[dest]]\n\t"
 198     " cbnz %w[tmp], 1b\n\t"
 199     : [old_val] "=&r" (old_val), [tmp] "=&r" (tmp)
 200     : [new_val] "r" (exchange_value), [dest] "r" (dest)
 201     : "memory");
 202   return old_val;
 203 }
 204 
 205 #endif
 206 
 207 #endif // OS_CPU_LINUX_ARM_VM_ATOMIC_LINUX_ARM_HPP