1 /*
   2  * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2012, 2018 SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #ifndef OS_CPU_LINUX_PPC_ATOMIC_LINUX_PPC_HPP
  27 #define OS_CPU_LINUX_PPC_ATOMIC_LINUX_PPC_HPP
  28 
  29 #ifndef PPC64
  30 #error "Atomic currently only implemented for PPC64"
  31 #endif
  32 
  33 // Implementation of class atomic
  34 
  35 //
  36 // machine barrier instructions:
  37 //
  38 // - sync            two-way memory barrier, aka fence
  39 // - lwsync          orders  Store|Store,
  40 //                            Load|Store,
  41 //                            Load|Load,
  42 //                   but not Store|Load
  43 // - eieio           orders memory accesses for device memory (only)
  44 // - isync           invalidates speculatively executed instructions
  45 //                   From the POWER ISA 2.06 documentation:
  46 //                    "[...] an isync instruction prevents the execution of
  47 //                   instructions following the isync until instructions
  48 //                   preceding the isync have completed, [...]"
  49 //                   From IBM's AIX assembler reference:
  50 //                    "The isync [...] instructions causes the processor to
  51 //                   refetch any instructions that might have been fetched
  52 //                   prior to the isync instruction. The instruction isync
  53 //                   causes the processor to wait for all previous instructions
  54 //                   to complete. Then any instructions already fetched are
  55 //                   discarded and instruction processing continues in the
  56 //                   environment established by the previous instructions."
  57 //
  58 // semantic barrier instructions:
  59 // (as defined in orderAccess.hpp)
  60 //
  61 // - release         orders Store|Store,       (maps to lwsync)
  62 //                           Load|Store
  63 // - acquire         orders  Load|Store,       (maps to lwsync)
  64 //                           Load|Load
  65 // - fence           orders Store|Store,       (maps to sync)
  66 //                           Load|Store,
  67 //                           Load|Load,
  68 //                          Store|Load
  69 //
  70 
  71 #define strasm_sync                       "\n  sync    \n"
  72 #define strasm_lwsync                     "\n  lwsync  \n"
  73 #define strasm_isync                      "\n  isync   \n"
  74 #define strasm_release                    strasm_lwsync
  75 #define strasm_acquire                    strasm_lwsync
  76 #define strasm_fence                      strasm_sync
  77 #define strasm_nobarrier                  ""
  78 #define strasm_nobarrier_clobber_memory   ""
  79 
  80 inline void pre_membar(atomic_memory_order order) {
  81   switch (order) {
  82     case memory_order_relaxed:
  83     case memory_order_acquire: break;
  84     case memory_order_release:
  85     case memory_order_acq_rel: __asm__ __volatile__ (strasm_lwsync); break;
  86     default /*conservative*/ : __asm__ __volatile__ (strasm_sync); break;
  87   }
  88 }
  89 
  90 inline void post_membar(atomic_memory_order order) {
  91   switch (order) {
  92     case memory_order_relaxed:
  93     case memory_order_release: break;
  94     case memory_order_acquire:
  95     case memory_order_acq_rel: __asm__ __volatile__ (strasm_isync); break;
  96     default /*conservative*/ : __asm__ __volatile__ (strasm_sync); break;
  97   }
  98 }
  99 
 100 
 101 template<size_t byte_size>
 102 struct Atomic::PlatformAdd
 103   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
 104 {
 105   template<typename I, typename D>
 106   D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const;
 107 };
 108 
 109 template<>
 110 template<typename I, typename D>
 111 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest,
 112                                                atomic_memory_order order) const {
 113   STATIC_ASSERT(4 == sizeof(I));
 114   STATIC_ASSERT(4 == sizeof(D));
 115 
 116   D result;
 117 
 118   pre_membar(order);
 119 
 120   __asm__ __volatile__ (
 121     "1: lwarx   %0,  0, %2    \n"
 122     "   add     %0, %0, %1    \n"
 123     "   stwcx.  %0,  0, %2    \n"
 124     "   bne-    1b            \n"
 125     : /*%0*/"=&r" (result)
 126     : /*%1*/"r" (add_value), /*%2*/"r" (dest)
 127     : "cc", "memory" );
 128 
 129   post_membar(order);
 130 
 131   return result;
 132 }
 133 
 134 
 135 template<>
 136 template<typename I, typename D>
 137 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest,
 138                                                atomic_memory_order order) const {
 139   STATIC_ASSERT(8 == sizeof(I));
 140   STATIC_ASSERT(8 == sizeof(D));
 141 
 142   D result;
 143 
 144   pre_membar(order);
 145 
 146   __asm__ __volatile__ (
 147     "1: ldarx   %0,  0, %2    \n"
 148     "   add     %0, %0, %1    \n"
 149     "   stdcx.  %0,  0, %2    \n"
 150     "   bne-    1b            \n"
 151     : /*%0*/"=&r" (result)
 152     : /*%1*/"r" (add_value), /*%2*/"r" (dest)
 153     : "cc", "memory" );
 154 
 155   post_membar(order);
 156 
 157   return result;
 158 }
 159 
 160 template<>
 161 template<typename T>
 162 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
 163                                              T volatile* dest,
 164                                              atomic_memory_order order) const {
 165   // Note that xchg doesn't necessarily do an acquire
 166   // (see synchronizer.cpp).
 167 
 168   T old_value;
 169   const uint64_t zero = 0;
 170 
 171   pre_membar(order);
 172 
 173   __asm__ __volatile__ (
 174     /* atomic loop */
 175     "1:                                                 \n"
 176     "   lwarx   %[old_value], %[dest], %[zero]          \n"
 177     "   stwcx.  %[exchange_value], %[dest], %[zero]     \n"
 178     "   bne-    1b                                      \n"
 179     /* exit */
 180     "2:                                                 \n"
 181     /* out */
 182     : [old_value]       "=&r"   (old_value),
 183                         "=m"    (*dest)
 184     /* in */
 185     : [dest]            "b"     (dest),
 186       [zero]            "r"     (zero),
 187       [exchange_value]  "r"     (exchange_value),
 188                         "m"     (*dest)
 189     /* clobber */
 190     : "cc",
 191       "memory"
 192     );
 193 
 194   post_membar(order);
 195 
 196   return old_value;
 197 }
 198 
 199 template<>
 200 template<typename T>
 201 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
 202                                              T volatile* dest,
 203                                              atomic_memory_order order) const {
 204   STATIC_ASSERT(8 == sizeof(T));
 205   // Note that xchg doesn't necessarily do an acquire
 206   // (see synchronizer.cpp).
 207 
 208   T old_value;
 209   const uint64_t zero = 0;
 210 
 211   pre_membar(order);
 212 
 213   __asm__ __volatile__ (
 214     /* atomic loop */
 215     "1:                                                 \n"
 216     "   ldarx   %[old_value], %[dest], %[zero]          \n"
 217     "   stdcx.  %[exchange_value], %[dest], %[zero]     \n"
 218     "   bne-    1b                                      \n"
 219     /* exit */
 220     "2:                                                 \n"
 221     /* out */
 222     : [old_value]       "=&r"   (old_value),
 223                         "=m"    (*dest)
 224     /* in */
 225     : [dest]            "b"     (dest),
 226       [zero]            "r"     (zero),
 227       [exchange_value]  "r"     (exchange_value),
 228                         "m"     (*dest)
 229     /* clobber */
 230     : "cc",
 231       "memory"
 232     );
 233 
 234   post_membar(order);
 235 
 236   return old_value;
 237 }
 238 
 239 template<>
 240 template<typename T>
 241 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
 242                                                 T volatile* dest,
 243                                                 T compare_value,
 244                                                 atomic_memory_order order) const {
 245   STATIC_ASSERT(1 == sizeof(T));
 246 
 247   // Note that cmpxchg guarantees a two-way memory barrier across
 248   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 249   // specified otherwise (see atomic.hpp).
 250 
 251   // Using 32 bit internally.
 252   volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3);
 253 
 254 #ifdef VM_LITTLE_ENDIAN
 255   const unsigned int shift_amount        = ((uintptr_t)dest & 3) * 8;
 256 #else
 257   const unsigned int shift_amount        = ((~(uintptr_t)dest) & 3) * 8;
 258 #endif
 259   const unsigned int masked_compare_val  = ((unsigned int)(unsigned char)compare_value),
 260                      masked_exchange_val = ((unsigned int)(unsigned char)exchange_value),
 261                      xor_value           = (masked_compare_val ^ masked_exchange_val) << shift_amount;
 262 
 263   unsigned int old_value, value32;
 264 
 265   pre_membar(order);
 266 
 267   __asm__ __volatile__ (
 268     /* simple guard */
 269     "   lbz     %[old_value], 0(%[dest])                  \n"
 270     "   cmpw    %[masked_compare_val], %[old_value]       \n"
 271     "   bne-    2f                                        \n"
 272     /* atomic loop */
 273     "1:                                                   \n"
 274     "   lwarx   %[value32], 0, %[dest_base]               \n"
 275     /* extract byte and compare */
 276     "   srd     %[old_value], %[value32], %[shift_amount] \n"
 277     "   clrldi  %[old_value], %[old_value], 56            \n"
 278     "   cmpw    %[masked_compare_val], %[old_value]       \n"
 279     "   bne-    2f                                        \n"
 280     /* replace byte and try to store */
 281     "   xor     %[value32], %[xor_value], %[value32]      \n"
 282     "   stwcx.  %[value32], 0, %[dest_base]               \n"
 283     "   bne-    1b                                        \n"
 284     /* exit */
 285     "2:                                                   \n"
 286     /* out */
 287     : [old_value]           "=&r"   (old_value),
 288       [value32]             "=&r"   (value32),
 289                             "=m"    (*dest),
 290                             "=m"    (*dest_base)
 291     /* in */
 292     : [dest]                "b"     (dest),
 293       [dest_base]           "b"     (dest_base),
 294       [shift_amount]        "r"     (shift_amount),
 295       [masked_compare_val]  "r"     (masked_compare_val),
 296       [xor_value]           "r"     (xor_value),
 297                             "m"     (*dest),
 298                             "m"     (*dest_base)
 299     /* clobber */
 300     : "cc",
 301       "memory"
 302     );
 303 
 304   post_membar(order);
 305 
 306   return PrimitiveConversions::cast<T>((unsigned char)old_value);
 307 }
 308 
 309 template<>
 310 template<typename T>
 311 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
 312                                                 T volatile* dest,
 313                                                 T compare_value,
 314                                                 atomic_memory_order order) const {
 315   STATIC_ASSERT(4 == sizeof(T));
 316 
 317   // Note that cmpxchg guarantees a two-way memory barrier across
 318   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 319   // specified otherwise (see atomic.hpp).
 320 
 321   T old_value;
 322   const uint64_t zero = 0;
 323 
 324   pre_membar(order);
 325 
 326   __asm__ __volatile__ (
 327     /* simple guard */
 328     "   lwz     %[old_value], 0(%[dest])                \n"
 329     "   cmpw    %[compare_value], %[old_value]          \n"
 330     "   bne-    2f                                      \n"
 331     /* atomic loop */
 332     "1:                                                 \n"
 333     "   lwarx   %[old_value], %[dest], %[zero]          \n"
 334     "   cmpw    %[compare_value], %[old_value]          \n"
 335     "   bne-    2f                                      \n"
 336     "   stwcx.  %[exchange_value], %[dest], %[zero]     \n"
 337     "   bne-    1b                                      \n"
 338     /* exit */
 339     "2:                                                 \n"
 340     /* out */
 341     : [old_value]       "=&r"   (old_value),
 342                         "=m"    (*dest)
 343     /* in */
 344     : [dest]            "b"     (dest),
 345       [zero]            "r"     (zero),
 346       [compare_value]   "r"     (compare_value),
 347       [exchange_value]  "r"     (exchange_value),
 348                         "m"     (*dest)
 349     /* clobber */
 350     : "cc",
 351       "memory"
 352     );
 353 
 354   post_membar(order);
 355 
 356   return old_value;
 357 }
 358 
 359 template<>
 360 template<typename T>
 361 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
 362                                                 T volatile* dest,
 363                                                 T compare_value,
 364                                                 atomic_memory_order order) const {
 365   STATIC_ASSERT(8 == sizeof(T));
 366 
 367   // Note that cmpxchg guarantees a two-way memory barrier across
 368   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 369   // specified otherwise (see atomic.hpp).
 370 
 371   T old_value;
 372   const uint64_t zero = 0;
 373 
 374   pre_membar(order);
 375 
 376   __asm__ __volatile__ (
 377     /* simple guard */
 378     "   ld      %[old_value], 0(%[dest])                \n"
 379     "   cmpd    %[compare_value], %[old_value]          \n"
 380     "   bne-    2f                                      \n"
 381     /* atomic loop */
 382     "1:                                                 \n"
 383     "   ldarx   %[old_value], %[dest], %[zero]          \n"
 384     "   cmpd    %[compare_value], %[old_value]          \n"
 385     "   bne-    2f                                      \n"
 386     "   stdcx.  %[exchange_value], %[dest], %[zero]     \n"
 387     "   bne-    1b                                      \n"
 388     /* exit */
 389     "2:                                                 \n"
 390     /* out */
 391     : [old_value]       "=&r"   (old_value),
 392                         "=m"    (*dest)
 393     /* in */
 394     : [dest]            "b"     (dest),
 395       [zero]            "r"     (zero),
 396       [compare_value]   "r"     (compare_value),
 397       [exchange_value]  "r"     (exchange_value),
 398                         "m"     (*dest)
 399     /* clobber */
 400     : "cc",
 401       "memory"
 402     );
 403 
 404   post_membar(order);
 405 
 406   return old_value;
 407 }
 408 
 409 #undef strasm_sync
 410 #undef strasm_lwsync
 411 #undef strasm_isync
 412 #undef strasm_release
 413 #undef strasm_acquire
 414 #undef strasm_fence
 415 #undef strasm_nobarrier
 416 #undef strasm_nobarrier_clobber_memory
 417 
 418 #endif // OS_CPU_LINUX_PPC_ATOMIC_LINUX_PPC_HPP