1 /*
   2  * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2012, 2019 SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #ifndef OS_CPU_AIX_PPC_ATOMIC_AIX_PPC_HPP
  27 #define OS_CPU_AIX_PPC_ATOMIC_AIX_PPC_HPP
  28 
  29 #ifndef PPC64
  30 #error "Atomic currently only implemented for PPC64"
  31 #endif
  32 
  33 #include "utilities/debug.hpp"
  34 
  35 // Implementation of class atomic
  36 
  37 //
  38 // machine barrier instructions:
  39 //
  40 // - sync            two-way memory barrier, aka fence
  41 // - lwsync          orders  Store|Store,
  42 //                            Load|Store,
  43 //                            Load|Load,
  44 //                   but not Store|Load
  45 // - eieio           orders memory accesses for device memory (only)
  46 // - isync           invalidates speculatively executed instructions
  47 //                   From the POWER ISA 2.06 documentation:
  48 //                    "[...] an isync instruction prevents the execution of
  49 //                   instructions following the isync until instructions
  50 //                   preceding the isync have completed, [...]"
  51 //                   From IBM's AIX assembler reference:
  52 //                    "The isync [...] instructions causes the processor to
  53 //                   refetch any instructions that might have been fetched
  54 //                   prior to the isync instruction. The instruction isync
  55 //                   causes the processor to wait for all previous instructions
  56 //                   to complete. Then any instructions already fetched are
  57 //                   discarded and instruction processing continues in the
  58 //                   environment established by the previous instructions."
  59 //
  60 // semantic barrier instructions:
  61 // (as defined in orderAccess.hpp)
  62 //
  63 // - release         orders Store|Store,       (maps to lwsync)
  64 //                           Load|Store
  65 // - acquire         orders  Load|Store,       (maps to lwsync)
  66 //                           Load|Load
  67 // - fence           orders Store|Store,       (maps to sync)
  68 //                           Load|Store,
  69 //                           Load|Load,
  70 //                          Store|Load
  71 //
  72 
  73 inline void pre_membar(atomic_memory_order order) {
  74   switch (order) {
  75     case memory_order_relaxed:
  76     case memory_order_acquire: break;
  77     case memory_order_release:
  78     case memory_order_acq_rel: __asm__ __volatile__ ("lwsync" : : : "memory"); break;
  79     default /*conservative*/ : __asm__ __volatile__ ("sync"   : : : "memory"); break;
  80   }
  81 }
  82 
  83 inline void post_membar(atomic_memory_order order) {
  84   switch (order) {
  85     case memory_order_relaxed:
  86     case memory_order_release: break;
  87     case memory_order_acquire:
  88     case memory_order_acq_rel: __asm__ __volatile__ ("isync"  : : : "memory"); break;
  89     default /*conservative*/ : __asm__ __volatile__ ("sync"   : : : "memory"); break;
  90   }
  91 }
  92 
  93 
  94 template<size_t byte_size>
  95 struct Atomic::PlatformAdd
  96   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
  97 {
  98   template<typename I, typename D>
  99   D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const;
 100 };
 101 
 102 template<>
 103 template<typename I, typename D>
 104 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest,
 105                                                atomic_memory_order order) const {
 106   STATIC_ASSERT(4 == sizeof(I));
 107   STATIC_ASSERT(4 == sizeof(D));
 108 
 109   D result;
 110 
 111   pre_membar(order);
 112 
 113   __asm__ __volatile__ (
 114     "1: lwarx   %0,  0, %2    \n"
 115     "   add     %0, %0, %1    \n"
 116     "   stwcx.  %0,  0, %2    \n"
 117     "   bne-    1b            \n"
 118     : /*%0*/"=&r" (result)
 119     : /*%1*/"r" (add_value), /*%2*/"r" (dest)
 120     : "cc", "memory" );
 121 
 122   post_membar(order);
 123 
 124   return result;
 125 }
 126 
 127 
 128 template<>
 129 template<typename I, typename D>
 130 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest,
 131                                                atomic_memory_order order) const {
 132   STATIC_ASSERT(8 == sizeof(I));
 133   STATIC_ASSERT(8 == sizeof(D));
 134 
 135   D result;
 136 
 137   pre_membar(order);
 138 
 139   __asm__ __volatile__ (
 140     "1: ldarx   %0,  0, %2    \n"
 141     "   add     %0, %0, %1    \n"
 142     "   stdcx.  %0,  0, %2    \n"
 143     "   bne-    1b            \n"
 144     : /*%0*/"=&r" (result)
 145     : /*%1*/"r" (add_value), /*%2*/"r" (dest)
 146     : "cc", "memory" );
 147 
 148   post_membar(order);
 149 
 150   return result;
 151 }
 152 
 153 template<>
 154 template<typename T>
 155 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
 156                                              T volatile* dest,
 157                                              atomic_memory_order order) const {
 158   // Note that xchg doesn't necessarily do an acquire
 159   // (see synchronizer.cpp).
 160 
 161   T old_value;
 162   const uint64_t zero = 0;
 163 
 164   pre_membar(order);
 165 
 166   __asm__ __volatile__ (
 167     /* atomic loop */
 168     "1:                                                 \n"
 169     "   lwarx   %[old_value], %[dest], %[zero]          \n"
 170     "   stwcx.  %[exchange_value], %[dest], %[zero]     \n"
 171     "   bne-    1b                                      \n"
 172     /* exit */
 173     "2:                                                 \n"
 174     /* out */
 175     : [old_value]       "=&r"   (old_value),
 176                         "=m"    (*dest)
 177     /* in */
 178     : [dest]            "b"     (dest),
 179       [zero]            "r"     (zero),
 180       [exchange_value]  "r"     (exchange_value),
 181                         "m"     (*dest)
 182     /* clobber */
 183     : "cc",
 184       "memory"
 185     );
 186 
 187   post_membar(order);
 188 
 189   return old_value;
 190 }
 191 
 192 template<>
 193 template<typename T>
 194 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
 195                                              T volatile* dest,
 196                                              atomic_memory_order order) const {
 197   STATIC_ASSERT(8 == sizeof(T));
 198   // Note that xchg doesn't necessarily do an acquire
 199   // (see synchronizer.cpp).
 200 
 201   T old_value;
 202   const uint64_t zero = 0;
 203 
 204   pre_membar(order);
 205 
 206   __asm__ __volatile__ (
 207     /* atomic loop */
 208     "1:                                                 \n"
 209     "   ldarx   %[old_value], %[dest], %[zero]          \n"
 210     "   stdcx.  %[exchange_value], %[dest], %[zero]     \n"
 211     "   bne-    1b                                      \n"
 212     /* exit */
 213     "2:                                                 \n"
 214     /* out */
 215     : [old_value]       "=&r"   (old_value),
 216                         "=m"    (*dest)
 217     /* in */
 218     : [dest]            "b"     (dest),
 219       [zero]            "r"     (zero),
 220       [exchange_value]  "r"     (exchange_value),
 221                         "m"     (*dest)
 222     /* clobber */
 223     : "cc",
 224       "memory"
 225     );
 226 
 227   post_membar(order);
 228 
 229   return old_value;
 230 }
 231 
 232 template<>
 233 template<typename T>
 234 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
 235                                                 T volatile* dest,
 236                                                 T compare_value,
 237                                                 atomic_memory_order order) const {
 238   STATIC_ASSERT(1 == sizeof(T));
 239 
 240   // Note that cmpxchg guarantees a two-way memory barrier across
 241   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 242   // specified otherwise (see atomic.hpp).
 243 
 244   // Using 32 bit internally.
 245   volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3);
 246 
 247 #ifdef VM_LITTLE_ENDIAN
 248   const unsigned int shift_amount        = ((uintptr_t)dest & 3) * 8;
 249 #else
 250   const unsigned int shift_amount        = ((~(uintptr_t)dest) & 3) * 8;
 251 #endif
 252   const unsigned int masked_compare_val  = ((unsigned int)(unsigned char)compare_value),
 253                      masked_exchange_val = ((unsigned int)(unsigned char)exchange_value),
 254                      xor_value           = (masked_compare_val ^ masked_exchange_val) << shift_amount;
 255 
 256   unsigned int old_value, value32;
 257 
 258   pre_membar(order);
 259 
 260   __asm__ __volatile__ (
 261     /* simple guard */
 262     "   lbz     %[old_value], 0(%[dest])                  \n"
 263     "   cmpw    %[masked_compare_val], %[old_value]       \n"
 264     "   bne-    2f                                        \n"
 265     /* atomic loop */
 266     "1:                                                   \n"
 267     "   lwarx   %[value32], 0, %[dest_base]               \n"
 268     /* extract byte and compare */
 269     "   srd     %[old_value], %[value32], %[shift_amount] \n"
 270     "   clrldi  %[old_value], %[old_value], 56            \n"
 271     "   cmpw    %[masked_compare_val], %[old_value]       \n"
 272     "   bne-    2f                                        \n"
 273     /* replace byte and try to store */
 274     "   xor     %[value32], %[xor_value], %[value32]      \n"
 275     "   stwcx.  %[value32], 0, %[dest_base]               \n"
 276     "   bne-    1b                                        \n"
 277     /* exit */
 278     "2:                                                   \n"
 279     /* out */
 280     : [old_value]           "=&r"   (old_value),
 281       [value32]             "=&r"   (value32),
 282                             "=m"    (*dest),
 283                             "=m"    (*dest_base)
 284     /* in */
 285     : [dest]                "b"     (dest),
 286       [dest_base]           "b"     (dest_base),
 287       [shift_amount]        "r"     (shift_amount),
 288       [masked_compare_val]  "r"     (masked_compare_val),
 289       [xor_value]           "r"     (xor_value),
 290                             "m"     (*dest),
 291                             "m"     (*dest_base)
 292     /* clobber */
 293     : "cc",
 294       "memory"
 295     );
 296 
 297   post_membar(order);
 298 
 299   return PrimitiveConversions::cast<T>((unsigned char)old_value);
 300 }
 301 
 302 template<>
 303 template<typename T>
 304 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
 305                                                 T volatile* dest,
 306                                                 T compare_value,
 307                                                 atomic_memory_order order) const {
 308   STATIC_ASSERT(4 == sizeof(T));
 309 
 310   // Note that cmpxchg guarantees a two-way memory barrier across
 311   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 312   // specified otherwise (see atomic.hpp).
 313 
 314   T old_value;
 315   const uint64_t zero = 0;
 316 
 317   pre_membar(order);
 318 
 319   __asm__ __volatile__ (
 320     /* simple guard */
 321     "   lwz     %[old_value], 0(%[dest])                \n"
 322     "   cmpw    %[compare_value], %[old_value]          \n"
 323     "   bne-    2f                                      \n"
 324     /* atomic loop */
 325     "1:                                                 \n"
 326     "   lwarx   %[old_value], %[dest], %[zero]          \n"
 327     "   cmpw    %[compare_value], %[old_value]          \n"
 328     "   bne-    2f                                      \n"
 329     "   stwcx.  %[exchange_value], %[dest], %[zero]     \n"
 330     "   bne-    1b                                      \n"
 331     /* exit */
 332     "2:                                                 \n"
 333     /* out */
 334     : [old_value]       "=&r"   (old_value),
 335                         "=m"    (*dest)
 336     /* in */
 337     : [dest]            "b"     (dest),
 338       [zero]            "r"     (zero),
 339       [compare_value]   "r"     (compare_value),
 340       [exchange_value]  "r"     (exchange_value),
 341                         "m"     (*dest)
 342     /* clobber */
 343     : "cc",
 344       "memory"
 345     );
 346 
 347   post_membar(order);
 348 
 349   return old_value;
 350 }
 351 
 352 template<>
 353 template<typename T>
 354 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
 355                                                 T volatile* dest,
 356                                                 T compare_value,
 357                                                 atomic_memory_order order) const {
 358   STATIC_ASSERT(8 == sizeof(T));
 359 
 360   // Note that cmpxchg guarantees a two-way memory barrier across
 361   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 362   // specified otherwise (see atomic.hpp).
 363 
 364   T old_value;
 365   const uint64_t zero = 0;
 366 
 367   pre_membar(order);
 368 
 369   __asm__ __volatile__ (
 370     /* simple guard */
 371     "   ld      %[old_value], 0(%[dest])                \n"
 372     "   cmpd    %[compare_value], %[old_value]          \n"
 373     "   bne-    2f                                      \n"
 374     /* atomic loop */
 375     "1:                                                 \n"
 376     "   ldarx   %[old_value], %[dest], %[zero]          \n"
 377     "   cmpd    %[compare_value], %[old_value]          \n"
 378     "   bne-    2f                                      \n"
 379     "   stdcx.  %[exchange_value], %[dest], %[zero]     \n"
 380     "   bne-    1b                                      \n"
 381     /* exit */
 382     "2:                                                 \n"
 383     /* out */
 384     : [old_value]       "=&r"   (old_value),
 385                         "=m"    (*dest)
 386     /* in */
 387     : [dest]            "b"     (dest),
 388       [zero]            "r"     (zero),
 389       [compare_value]   "r"     (compare_value),
 390       [exchange_value]  "r"     (exchange_value),
 391                         "m"     (*dest)
 392     /* clobber */
 393     : "cc",
 394       "memory"
 395     );
 396 
 397   post_membar(order);
 398 
 399   return old_value;
 400 }
 401 
 402 #endif // OS_CPU_AIX_PPC_ATOMIC_AIX_PPC_HPP