1 /*
   2  * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2012, 2014 SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #ifndef OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
  27 #define OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
  28 
  29 #ifndef _LP64
  30 #error "Atomic currently only impleneted for PPC64"
  31 #endif
  32 
  33 #include "utilities/debug.hpp"
  34 
  35 // Implementation of class atomic
  36 
  37 inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
  38 inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
  39 inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
  40 inline void Atomic::store    (jlong    store_value, jlong*    dest) { *dest = store_value; }
  41 inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
  42 inline void Atomic::store_ptr(void*    store_value, void*     dest) { *(void**)dest = store_value; }
  43 
  44 inline void Atomic::store    (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
  45 inline void Atomic::store    (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
  46 inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *dest = store_value; }
  47 inline void Atomic::store    (jlong    store_value, volatile jlong*    dest) { *dest = store_value; }
  48 inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
  49 inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void* volatile *)dest = store_value; }
  50 
  51 inline jlong Atomic::load(const volatile jlong* src) { return *src; }
  52 
  53 //
  54 //   machine barrier instructions:
  55 //
  56 //   - ppc_sync            two-way memory barrier, aka fence
  57 //   - ppc_lwsync          orders  Store|Store,
  58 //                                  Load|Store,
  59 //                                  Load|Load,
  60 //                         but not Store|Load
  61 //   - ppc_eieio           orders memory accesses for device memory (only)
  62 //   - ppc_isync           invalidates speculatively executed instructions
  63 //                         From the POWER ISA 2.06 documentation:
  64 //                          "[...] an isync instruction prevents the execution of
  65 //                         instructions following the isync until instructions
  66 //                         preceding the isync have completed, [...]"
  67 //                         From IBM's AIX assembler reference:
  68 //                          "The isync [...] instructions causes the processor to
  69 //                         refetch any instructions that might have been fetched
  70 //                         prior to the isync instruction. The instruction isync
  71 //                         causes the processor to wait for all previous instructions
  72 //                         to complete. Then any instructions already fetched are
  73 //                         discarded and instruction processing continues in the
  74 //                         environment established by the previous instructions."
  75 //
  76 //   semantic barrier instructions:
  77 //   (as defined in orderAccess.hpp)
  78 //
  79 //   - ppc_release         orders Store|Store,       (maps to ppc_lwsync)
  80 //                                 Load|Store
  81 //   - ppc_acquire         orders  Load|Store,       (maps to ppc_lwsync)
  82 //                                 Load|Load
  83 //   - ppc_fence           orders Store|Store,       (maps to ppc_sync)
  84 //                                 Load|Store,
  85 //                                 Load|Load,
  86 //                                Store|Load
  87 //
  88 
  89 #define strasm_sync                       "\n  sync    \n"
  90 #define strasm_lwsync                     "\n  lwsync  \n"
  91 #define strasm_isync                      "\n  isync   \n"
  92 #define strasm_release                    strasm_lwsync
  93 #define strasm_acquire                    strasm_lwsync
  94 #define strasm_fence                      strasm_sync
  95 #define strasm_nobarrier                  ""
  96 #define strasm_nobarrier_clobber_memory   ""
  97 
  98 template<size_t byte_size>
  99 struct Atomic::PlatformAdd
 100   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
 101 {
 102   template<typename I, typename D>
 103   D add_and_fetch(I add_value, D volatile* dest) const;
 104 };
 105 
 106 template<>
 107 template<typename I, typename D>
 108 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) const {
 109   STATIC_ASSERT(4 == sizeof(I));
 110   STATIC_ASSERT(4 == sizeof(D));
 111 
 112   D result;
 113 
 114   __asm__ __volatile__ (
 115     strasm_lwsync
 116     "1: lwarx   %0,  0, %2    \n"
 117     "   add     %0, %0, %1    \n"
 118     "   stwcx.  %0,  0, %2    \n"
 119     "   bne-    1b            \n"
 120     strasm_isync
 121     : /*%0*/"=&r" (result)
 122     : /*%1*/"r" (add_value), /*%2*/"r" (dest)
 123     : "cc", "memory" );
 124 
 125   return result;
 126 }
 127 
 128 
 129 template<>
 130 template<typename I, typename D>
 131 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) const {
 132   STATIC_ASSERT(8 == sizeof(I));
 133   STATIC_ASSERT(8 == sizeof(D));
 134 
 135   D result;
 136 
 137   __asm__ __volatile__ (
 138     strasm_lwsync
 139     "1: ldarx   %0,  0, %2    \n"
 140     "   add     %0, %0, %1    \n"
 141     "   stdcx.  %0,  0, %2    \n"
 142     "   bne-    1b            \n"
 143     strasm_isync
 144     : /*%0*/"=&r" (result)
 145     : /*%1*/"r" (add_value), /*%2*/"r" (dest)
 146     : "cc", "memory" );
 147 
 148   return result;
 149 }
 150 
 151 template<>
 152 template<typename T>
 153 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
 154                                              T volatile* dest) const {
 155   STATIC_ASSERT(4 == sizeof(T));
 156   // Note that xchg_ptr doesn't necessarily do an acquire
 157   // (see synchronizer.cpp).
 158 
 159   T old_value;
 160   const uint64_t zero = 0;
 161 
 162   __asm__ __volatile__ (
 163     /* lwsync */
 164     strasm_lwsync
 165     /* atomic loop */
 166     "1:                                                 \n"
 167     "   lwarx   %[old_value], %[dest], %[zero]          \n"
 168     "   stwcx.  %[exchange_value], %[dest], %[zero]     \n"
 169     "   bne-    1b                                      \n"
 170     /* isync */
 171     strasm_sync
 172     /* exit */
 173     "2:                                                 \n"
 174     /* out */
 175     : [old_value]       "=&r"   (old_value),
 176                         "=m"    (*dest)
 177     /* in */
 178     : [dest]            "b"     (dest),
 179       [zero]            "r"     (zero),
 180       [exchange_value]  "r"     (exchange_value),
 181                         "m"     (*dest)
 182     /* clobber */
 183     : "cc",
 184       "memory"
 185     );
 186 
 187   return old_value;
 188 }
 189 
 190 template<>
 191 template<typename T>
 192 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
 193                                              T volatile* dest) const {
 194   STATIC_ASSERT(8 == sizeof(T));
 195   // Note that xchg_ptr doesn't necessarily do an acquire
 196   // (see synchronizer.cpp).
 197 
 198   T old_value;
 199   const uint64_t zero = 0;
 200 
 201   __asm__ __volatile__ (
 202     /* lwsync */
 203     strasm_lwsync
 204     /* atomic loop */
 205     "1:                                                 \n"
 206     "   ldarx   %[old_value], %[dest], %[zero]          \n"
 207     "   stdcx.  %[exchange_value], %[dest], %[zero]     \n"
 208     "   bne-    1b                                      \n"
 209     /* isync */
 210     strasm_sync
 211     /* exit */
 212     "2:                                                 \n"
 213     /* out */
 214     : [old_value]       "=&r"   (old_value),
 215                         "=m"    (*dest)
 216     /* in */
 217     : [dest]            "b"     (dest),
 218       [zero]            "r"     (zero),
 219       [exchange_value]  "r"     (exchange_value),
 220                         "m"     (*dest)
 221     /* clobber */
 222     : "cc",
 223       "memory"
 224     );
 225 
 226   return old_value;
 227 }
 228 
 229 inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {
 230   if (order != memory_order_relaxed) {
 231     __asm__ __volatile__ (
 232       /* fence */
 233       strasm_sync
 234       );
 235   }
 236 }
 237 
 238 inline void cmpxchg_post_membar(cmpxchg_memory_order order) {
 239   if (order != memory_order_relaxed) {
 240     __asm__ __volatile__ (
 241       /* fence */
 242       strasm_sync
 243       );
 244   }
 245 }
 246 
 247 template<>
 248 template<typename T>
 249 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
 250                                                 T volatile* dest,
 251                                                 T compare_value,
 252                                                 cmpxchg_memory_order order) const {
 253   STATIC_ASSERT(1 == sizeof(T));
 254 
 255   // Note that cmpxchg guarantees a two-way memory barrier across
 256   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 257   // specified otherwise (see atomic.hpp).
 258 
 259   // Using 32 bit internally.
 260   volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3);
 261 
 262 #ifdef VM_LITTLE_ENDIAN
 263   const unsigned int shift_amount        = ((uintptr_t)dest & 3) * 8;
 264 #else
 265   const unsigned int shift_amount        = ((~(uintptr_t)dest) & 3) * 8;
 266 #endif
 267   const unsigned int masked_compare_val  = ((unsigned int)(unsigned char)compare_value),
 268                      masked_exchange_val = ((unsigned int)(unsigned char)exchange_value),
 269                      xor_value           = (masked_compare_val ^ masked_exchange_val) << shift_amount;
 270 
 271   unsigned int old_value, value32;
 272 
 273   cmpxchg_pre_membar(order);
 274 
 275   __asm__ __volatile__ (
 276     /* simple guard */
 277     "   lbz     %[old_value], 0(%[dest])                  \n"
 278     "   cmpw    %[masked_compare_val], %[old_value]       \n"
 279     "   bne-    2f                                        \n"
 280     /* atomic loop */
 281     "1:                                                   \n"
 282     "   lwarx   %[value32], 0, %[dest_base]               \n"
 283     /* extract byte and compare */
 284     "   srd     %[old_value], %[value32], %[shift_amount] \n"
 285     "   clrldi  %[old_value], %[old_value], 56            \n"
 286     "   cmpw    %[masked_compare_val], %[old_value]       \n"
 287     "   bne-    2f                                        \n"
 288     /* replace byte and try to store */
 289     "   xor     %[value32], %[xor_value], %[value32]      \n"
 290     "   stwcx.  %[value32], 0, %[dest_base]               \n"
 291     "   bne-    1b                                        \n"
 292     /* exit */
 293     "2:                                                   \n"
 294     /* out */
 295     : [old_value]           "=&r"   (old_value),
 296       [value32]             "=&r"   (value32),
 297                             "=m"    (*dest),
 298                             "=m"    (*dest_base)
 299     /* in */
 300     : [dest]                "b"     (dest),
 301       [dest_base]           "b"     (dest_base),
 302       [shift_amount]        "r"     (shift_amount),
 303       [masked_compare_val]  "r"     (masked_compare_val),
 304       [xor_value]           "r"     (xor_value),
 305                             "m"     (*dest),
 306                             "m"     (*dest_base)
 307     /* clobber */
 308     : "cc",
 309       "memory"
 310     );
 311 
 312   cmpxchg_post_membar(order);
 313 
 314   return PrimitiveConversions::cast<T>((unsigned char)old_value);
 315 }
 316 
 317 template<>
 318 template<typename T>
 319 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
 320                                                 T volatile* dest,
 321                                                 T compare_value,
 322                                                 cmpxchg_memory_order order) const {
 323   STATIC_ASSERT(4 == sizeof(T));
 324 
 325   // Note that cmpxchg guarantees a two-way memory barrier across
 326   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 327   // specified otherwise (see atomic.hpp).
 328 
 329   T old_value;
 330   const uint64_t zero = 0;
 331 
 332   cmpxchg_pre_membar(order);
 333 
 334   __asm__ __volatile__ (
 335     /* simple guard */
 336     "   lwz     %[old_value], 0(%[dest])                \n"
 337     "   cmpw    %[compare_value], %[old_value]          \n"
 338     "   bne-    2f                                      \n"
 339     /* atomic loop */
 340     "1:                                                 \n"
 341     "   lwarx   %[old_value], %[dest], %[zero]          \n"
 342     "   cmpw    %[compare_value], %[old_value]          \n"
 343     "   bne-    2f                                      \n"
 344     "   stwcx.  %[exchange_value], %[dest], %[zero]     \n"
 345     "   bne-    1b                                      \n"
 346     /* exit */
 347     "2:                                                 \n"
 348     /* out */
 349     : [old_value]       "=&r"   (old_value),
 350                         "=m"    (*dest)
 351     /* in */
 352     : [dest]            "b"     (dest),
 353       [zero]            "r"     (zero),
 354       [compare_value]   "r"     (compare_value),
 355       [exchange_value]  "r"     (exchange_value),
 356                         "m"     (*dest)
 357     /* clobber */
 358     : "cc",
 359       "memory"
 360     );
 361 
 362   cmpxchg_post_membar(order);
 363 
 364   return old_value;
 365 }
 366 
 367 template<>
 368 template<typename T>
 369 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
 370                                                 T volatile* dest,
 371                                                 T compare_value,
 372                                                 cmpxchg_memory_order order) const {
 373   STATIC_ASSERT(8 == sizeof(T));
 374 
 375   // Note that cmpxchg guarantees a two-way memory barrier across
 376   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 377   // specified otherwise (see atomic.hpp).
 378 
 379   T old_value;
 380   const uint64_t zero = 0;
 381 
 382   cmpxchg_pre_membar(order);
 383 
 384   __asm__ __volatile__ (
 385     /* simple guard */
 386     "   ld      %[old_value], 0(%[dest])                \n"
 387     "   cmpd    %[compare_value], %[old_value]          \n"
 388     "   bne-    2f                                      \n"
 389     /* atomic loop */
 390     "1:                                                 \n"
 391     "   ldarx   %[old_value], %[dest], %[zero]          \n"
 392     "   cmpd    %[compare_value], %[old_value]          \n"
 393     "   bne-    2f                                      \n"
 394     "   stdcx.  %[exchange_value], %[dest], %[zero]     \n"
 395     "   bne-    1b                                      \n"
 396     /* exit */
 397     "2:                                                 \n"
 398     /* out */
 399     : [old_value]       "=&r"   (old_value),
 400                         "=m"    (*dest)
 401     /* in */
 402     : [dest]            "b"     (dest),
 403       [zero]            "r"     (zero),
 404       [compare_value]   "r"     (compare_value),
 405       [exchange_value]  "r"     (exchange_value),
 406                         "m"     (*dest)
 407     /* clobber */
 408     : "cc",
 409       "memory"
 410     );
 411 
 412   cmpxchg_post_membar(order);
 413 
 414   return old_value;
 415 }
 416 
 417 #undef strasm_sync
 418 #undef strasm_lwsync
 419 #undef strasm_isync
 420 #undef strasm_release
 421 #undef strasm_acquire
 422 #undef strasm_fence
 423 #undef strasm_nobarrier
 424 #undef strasm_nobarrier_clobber_memory
 425 
 426 #endif // OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP