1 /*
   2  * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2012, 2019 SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #ifndef OS_CPU_LINUX_PPC_ATOMIC_LINUX_PPC_HPP
  27 #define OS_CPU_LINUX_PPC_ATOMIC_LINUX_PPC_HPP
  28 
  29 #ifndef PPC64
  30 #error "Atomic currently only implemented for PPC64"
  31 #endif
  32 
  33 #include "orderAccess_linux_ppc.hpp"
  34 #include "utilities/debug.hpp"
  35 
  36 // Implementation of class atomic
  37 
  38 //
  39 // machine barrier instructions:
  40 //
  41 // - sync            two-way memory barrier, aka fence
  42 // - lwsync          orders  Store|Store,
  43 //                            Load|Store,
  44 //                            Load|Load,
  45 //                   but not Store|Load
  46 // - eieio           orders memory accesses for device memory (only)
  47 // - isync           invalidates speculatively executed instructions
  48 //                   From the POWER ISA 2.06 documentation:
  49 //                    "[...] an isync instruction prevents the execution of
  50 //                   instructions following the isync until instructions
  51 //                   preceding the isync have completed, [...]"
  52 //                   From IBM's AIX assembler reference:
  53 //                    "The isync [...] instructions causes the processor to
  54 //                   refetch any instructions that might have been fetched
  55 //                   prior to the isync instruction. The instruction isync
  56 //                   causes the processor to wait for all previous instructions
  57 //                   to complete. Then any instructions already fetched are
  58 //                   discarded and instruction processing continues in the
  59 //                   environment established by the previous instructions."
  60 //
  61 // semantic barrier instructions:
  62 // (as defined in orderAccess.hpp)
  63 //
  64 // - release         orders Store|Store,       (maps to lwsync)
  65 //                           Load|Store
  66 // - acquire         orders  Load|Store,       (maps to lwsync)
  67 //                           Load|Load
  68 // - fence           orders Store|Store,       (maps to sync)
  69 //                           Load|Store,
  70 //                           Load|Load,
  71 //                          Store|Load
  72 //
  73 
  74 inline void pre_membar(atomic_memory_order order) {
  75   switch (order) {
  76     case memory_order_relaxed:
  77     case memory_order_acquire: break;
  78     case memory_order_release:
  79     case memory_order_acq_rel: __asm__ __volatile__ ("lwsync" : : : "memory"); break;
  80     default /*conservative*/ : __asm__ __volatile__ ("sync"   : : : "memory"); break;
  81   }
  82 }
  83 
  84 inline void post_membar(atomic_memory_order order) {
  85   switch (order) {
  86     case memory_order_relaxed:
  87     case memory_order_release: break;
  88     case memory_order_acquire:
  89     case memory_order_acq_rel: __asm__ __volatile__ ("isync"  : : : "memory"); break;
  90     default /*conservative*/ : __asm__ __volatile__ ("sync"   : : : "memory"); break;
  91   }
  92 }
  93 
  94 
  95 template<size_t byte_size>
  96 struct Atomic::PlatformAdd
  97   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
  98 {
  99   template<typename D, typename I>
 100   D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const;
 101 };
 102 
 103 template<>
 104 template<typename D, typename I>
 105 inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value,
 106                                                atomic_memory_order order) const {
 107   STATIC_ASSERT(4 == sizeof(I));
 108   STATIC_ASSERT(4 == sizeof(D));
 109 
 110   D result;
 111 
 112   pre_membar(order);
 113 
 114   __asm__ __volatile__ (
 115     "1: lwarx   %0,  0, %2    \n"
 116     "   add     %0, %0, %1    \n"
 117     "   stwcx.  %0,  0, %2    \n"
 118     "   bne-    1b            \n"
 119     : /*%0*/"=&r" (result)
 120     : /*%1*/"r" (add_value), /*%2*/"r" (dest)
 121     : "cc", "memory" );
 122 
 123   post_membar(order);
 124 
 125   return result;
 126 }
 127 
 128 
 129 template<>
 130 template<typename D, typename I>
 131 inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I add_value,
 132                                                atomic_memory_order order) const {
 133   STATIC_ASSERT(8 == sizeof(I));
 134   STATIC_ASSERT(8 == sizeof(D));
 135 
 136   D result;
 137 
 138   pre_membar(order);
 139 
 140   __asm__ __volatile__ (
 141     "1: ldarx   %0,  0, %2    \n"
 142     "   add     %0, %0, %1    \n"
 143     "   stdcx.  %0,  0, %2    \n"
 144     "   bne-    1b            \n"
 145     : /*%0*/"=&r" (result)
 146     : /*%1*/"r" (add_value), /*%2*/"r" (dest)
 147     : "cc", "memory" );
 148 
 149   post_membar(order);
 150 
 151   return result;
 152 }
 153 
 154 template<>
 155 template<typename T>
 156 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
 157                                              T volatile* dest,
 158                                              atomic_memory_order order) const {
 159   // Note that xchg doesn't necessarily do an acquire
 160   // (see synchronizer.cpp).
 161 
 162   T old_value;
 163   const uint64_t zero = 0;
 164 
 165   pre_membar(order);
 166 
 167   __asm__ __volatile__ (
 168     /* atomic loop */
 169     "1:                                                 \n"
 170     "   lwarx   %[old_value], %[dest], %[zero]          \n"
 171     "   stwcx.  %[exchange_value], %[dest], %[zero]     \n"
 172     "   bne-    1b                                      \n"
 173     /* exit */
 174     "2:                                                 \n"
 175     /* out */
 176     : [old_value]       "=&r"   (old_value),
 177                         "=m"    (*dest)
 178     /* in */
 179     : [dest]            "b"     (dest),
 180       [zero]            "r"     (zero),
 181       [exchange_value]  "r"     (exchange_value),
 182                         "m"     (*dest)
 183     /* clobber */
 184     : "cc",
 185       "memory"
 186     );
 187 
 188   post_membar(order);
 189 
 190   return old_value;
 191 }
 192 
 193 template<>
 194 template<typename T>
 195 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
 196                                              T volatile* dest,
 197                                              atomic_memory_order order) const {
 198   STATIC_ASSERT(8 == sizeof(T));
 199   // Note that xchg doesn't necessarily do an acquire
 200   // (see synchronizer.cpp).
 201 
 202   T old_value;
 203   const uint64_t zero = 0;
 204 
 205   pre_membar(order);
 206 
 207   __asm__ __volatile__ (
 208     /* atomic loop */
 209     "1:                                                 \n"
 210     "   ldarx   %[old_value], %[dest], %[zero]          \n"
 211     "   stdcx.  %[exchange_value], %[dest], %[zero]     \n"
 212     "   bne-    1b                                      \n"
 213     /* exit */
 214     "2:                                                 \n"
 215     /* out */
 216     : [old_value]       "=&r"   (old_value),
 217                         "=m"    (*dest)
 218     /* in */
 219     : [dest]            "b"     (dest),
 220       [zero]            "r"     (zero),
 221       [exchange_value]  "r"     (exchange_value),
 222                         "m"     (*dest)
 223     /* clobber */
 224     : "cc",
 225       "memory"
 226     );
 227 
 228   post_membar(order);
 229 
 230   return old_value;
 231 }
 232 
 233 template<>
 234 template<typename T>
 235 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
 236                                                 T volatile* dest,
 237                                                 T compare_value,
 238                                                 atomic_memory_order order) const {
 239   STATIC_ASSERT(1 == sizeof(T));
 240 
 241   // Note that cmpxchg guarantees a two-way memory barrier across
 242   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 243   // specified otherwise (see atomic.hpp).
 244 
 245   // Using 32 bit internally.
 246   volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3);
 247 
 248 #ifdef VM_LITTLE_ENDIAN
 249   const unsigned int shift_amount        = ((uintptr_t)dest & 3) * 8;
 250 #else
 251   const unsigned int shift_amount        = ((~(uintptr_t)dest) & 3) * 8;
 252 #endif
 253   const unsigned int masked_compare_val  = ((unsigned int)(unsigned char)compare_value),
 254                      masked_exchange_val = ((unsigned int)(unsigned char)exchange_value),
 255                      xor_value           = (masked_compare_val ^ masked_exchange_val) << shift_amount;
 256 
 257   unsigned int old_value, value32;
 258 
 259   pre_membar(order);
 260 
 261   __asm__ __volatile__ (
 262     /* simple guard */
 263     "   lbz     %[old_value], 0(%[dest])                  \n"
 264     "   cmpw    %[masked_compare_val], %[old_value]       \n"
 265     "   bne-    2f                                        \n"
 266     /* atomic loop */
 267     "1:                                                   \n"
 268     "   lwarx   %[value32], 0, %[dest_base]               \n"
 269     /* extract byte and compare */
 270     "   srd     %[old_value], %[value32], %[shift_amount] \n"
 271     "   clrldi  %[old_value], %[old_value], 56            \n"
 272     "   cmpw    %[masked_compare_val], %[old_value]       \n"
 273     "   bne-    2f                                        \n"
 274     /* replace byte and try to store */
 275     "   xor     %[value32], %[xor_value], %[value32]      \n"
 276     "   stwcx.  %[value32], 0, %[dest_base]               \n"
 277     "   bne-    1b                                        \n"
 278     /* exit */
 279     "2:                                                   \n"
 280     /* out */
 281     : [old_value]           "=&r"   (old_value),
 282       [value32]             "=&r"   (value32),
 283                             "=m"    (*dest),
 284                             "=m"    (*dest_base)
 285     /* in */
 286     : [dest]                "b"     (dest),
 287       [dest_base]           "b"     (dest_base),
 288       [shift_amount]        "r"     (shift_amount),
 289       [masked_compare_val]  "r"     (masked_compare_val),
 290       [xor_value]           "r"     (xor_value),
 291                             "m"     (*dest),
 292                             "m"     (*dest_base)
 293     /* clobber */
 294     : "cc",
 295       "memory"
 296     );
 297 
 298   post_membar(order);
 299 
 300   return PrimitiveConversions::cast<T>((unsigned char)old_value);
 301 }
 302 
 303 template<>
 304 template<typename T>
 305 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
 306                                                 T volatile* dest,
 307                                                 T compare_value,
 308                                                 atomic_memory_order order) const {
 309   STATIC_ASSERT(4 == sizeof(T));
 310 
 311   // Note that cmpxchg guarantees a two-way memory barrier across
 312   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 313   // specified otherwise (see atomic.hpp).
 314 
 315   T old_value;
 316   const uint64_t zero = 0;
 317 
 318   pre_membar(order);
 319 
 320   __asm__ __volatile__ (
 321     /* simple guard */
 322     "   lwz     %[old_value], 0(%[dest])                \n"
 323     "   cmpw    %[compare_value], %[old_value]          \n"
 324     "   bne-    2f                                      \n"
 325     /* atomic loop */
 326     "1:                                                 \n"
 327     "   lwarx   %[old_value], %[dest], %[zero]          \n"
 328     "   cmpw    %[compare_value], %[old_value]          \n"
 329     "   bne-    2f                                      \n"
 330     "   stwcx.  %[exchange_value], %[dest], %[zero]     \n"
 331     "   bne-    1b                                      \n"
 332     /* exit */
 333     "2:                                                 \n"
 334     /* out */
 335     : [old_value]       "=&r"   (old_value),
 336                         "=m"    (*dest)
 337     /* in */
 338     : [dest]            "b"     (dest),
 339       [zero]            "r"     (zero),
 340       [compare_value]   "r"     (compare_value),
 341       [exchange_value]  "r"     (exchange_value),
 342                         "m"     (*dest)
 343     /* clobber */
 344     : "cc",
 345       "memory"
 346     );
 347 
 348   post_membar(order);
 349 
 350   return old_value;
 351 }
 352 
 353 template<>
 354 template<typename T>
 355 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
 356                                                 T volatile* dest,
 357                                                 T compare_value,
 358                                                 atomic_memory_order order) const {
 359   STATIC_ASSERT(8 == sizeof(T));
 360 
 361   // Note that cmpxchg guarantees a two-way memory barrier across
 362   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 363   // specified otherwise (see atomic.hpp).
 364 
 365   T old_value;
 366   const uint64_t zero = 0;
 367 
 368   pre_membar(order);
 369 
 370   __asm__ __volatile__ (
 371     /* simple guard */
 372     "   ld      %[old_value], 0(%[dest])                \n"
 373     "   cmpd    %[compare_value], %[old_value]          \n"
 374     "   bne-    2f                                      \n"
 375     /* atomic loop */
 376     "1:                                                 \n"
 377     "   ldarx   %[old_value], %[dest], %[zero]          \n"
 378     "   cmpd    %[compare_value], %[old_value]          \n"
 379     "   bne-    2f                                      \n"
 380     "   stdcx.  %[exchange_value], %[dest], %[zero]     \n"
 381     "   bne-    1b                                      \n"
 382     /* exit */
 383     "2:                                                 \n"
 384     /* out */
 385     : [old_value]       "=&r"   (old_value),
 386                         "=m"    (*dest)
 387     /* in */
 388     : [dest]            "b"     (dest),
 389       [zero]            "r"     (zero),
 390       [compare_value]   "r"     (compare_value),
 391       [exchange_value]  "r"     (exchange_value),
 392                         "m"     (*dest)
 393     /* clobber */
 394     : "cc",
 395       "memory"
 396     );
 397 
 398   post_membar(order);
 399 
 400   return old_value;
 401 }
 402 
 403 template<size_t byte_size>
 404 struct Atomic::PlatformOrderedLoad<byte_size, X_ACQUIRE>
 405 {
 406   template <typename T>
 407   T operator()(const volatile T* p) const {
 408     T t = Atomic::load(p);
 409     // Use twi-isync for load_acquire (faster than lwsync).
 410     __asm__ __volatile__ ("twi 0,%0,0\n isync\n" : : "r" (t) : "memory");
 411         return t;
 412   }
 413 };
 414 
 415 #endif // OS_CPU_LINUX_PPC_ATOMIC_LINUX_PPC_HPP