1 /*
   2  * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2012, 2014 SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #ifndef OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
  27 #define OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
  28 
  29 #ifndef _LP64
  30 #error "Atomic currently only impleneted for PPC64"
  31 #endif
  32 
  33 // Implementation of class atomic
  34 
  35 //
  36 //   machine barrier instructions:
  37 //
  38 //   - ppc_sync            two-way memory barrier, aka fence
  39 //   - ppc_lwsync          orders  Store|Store,
  40 //                                  Load|Store,
  41 //                                  Load|Load,
  42 //                         but not Store|Load
  43 //   - ppc_eieio           orders memory accesses for device memory (only)
  44 //   - ppc_isync           invalidates speculatively executed instructions
  45 //                         From the POWER ISA 2.06 documentation:
  46 //                          "[...] an isync instruction prevents the execution of
  47 //                         instructions following the isync until instructions
  48 //                         preceding the isync have completed, [...]"
  49 //                         From IBM's AIX assembler reference:
  50 //                          "The isync [...] instructions causes the processor to
  51 //                         refetch any instructions that might have been fetched
  52 //                         prior to the isync instruction. The instruction isync
  53 //                         causes the processor to wait for all previous instructions
  54 //                         to complete. Then any instructions already fetched are
  55 //                         discarded and instruction processing continues in the
  56 //                         environment established by the previous instructions."
  57 //
  58 //   semantic barrier instructions:
  59 //   (as defined in orderAccess.hpp)
  60 //
  61 //   - ppc_release         orders Store|Store,       (maps to ppc_lwsync)
  62 //                                 Load|Store
  63 //   - ppc_acquire         orders  Load|Store,       (maps to ppc_lwsync)
  64 //                                 Load|Load
  65 //   - ppc_fence           orders Store|Store,       (maps to ppc_sync)
  66 //                                 Load|Store,
  67 //                                 Load|Load,
  68 //                                Store|Load
  69 //
  70 
  71 #define strasm_sync                       "\n  sync    \n"
  72 #define strasm_lwsync                     "\n  lwsync  \n"
  73 #define strasm_isync                      "\n  isync   \n"
  74 #define strasm_release                    strasm_lwsync
  75 #define strasm_acquire                    strasm_lwsync
  76 #define strasm_fence                      strasm_sync
  77 #define strasm_nobarrier                  ""
  78 #define strasm_nobarrier_clobber_memory   ""
  79 
  80 template <>
  81 inline int32_t Atomic::specialized_add<int32_t>(int32_t add_value, volatile int32_t* dest) {
  82   unsigned int result;
  83 
  84   __asm__ __volatile__ (
  85     strasm_lwsync
  86     "1: lwarx   %0,  0, %2    \n"
  87     "   add     %0, %0, %1    \n"
  88     "   stwcx.  %0,  0, %2    \n"
  89     "   bne-    1b            \n"
  90     strasm_isync
  91     : /*%0*/"=&r" (result)
  92     : /*%1*/"r" (add_value), /*%2*/"r" (dest)
  93     : "cc", "memory" );
  94 
  95   return (int32_t) result;
  96 }
  97 
  98 
  99 template <>
 100 inline int64_t Atomic::specialized_add<int64_t>(int64_t add_value, volatile int64_t* dest) {
 101   long result;
 102 
 103   __asm__ __volatile__ (
 104     strasm_lwsync
 105     "1: ldarx   %0,  0, %2    \n"
 106     "   add     %0, %0, %1    \n"
 107     "   stdcx.  %0,  0, %2    \n"
 108     "   bne-    1b            \n"
 109     strasm_isync
 110     : /*%0*/"=&r" (result)
 111     : /*%1*/"r" (add_value), /*%2*/"r" (dest)
 112     : "cc", "memory" );
 113 
 114   return (int64_t) result;
 115 }
 116 
 117 template <>
 118 inline void Atomic::specialized_inc<int32_t>(volatile int32_t* dest) {
 119   unsigned int temp;
 120 
 121   __asm__ __volatile__ (
 122     strasm_nobarrier
 123     "1: lwarx   %0,  0, %2    \n"
 124     "   addic   %0, %0,  1    \n"
 125     "   stwcx.  %0,  0, %2    \n"
 126     "   bne-    1b            \n"
 127     strasm_nobarrier
 128     : /*%0*/"=&r" (temp), "=m" (*dest)
 129     : /*%2*/"r" (dest), "m" (*dest)
 130     : "cc" strasm_nobarrier_clobber_memory);
 131 
 132 }
 133 
 134 template <>
 135 inline void Atomic::specialized_inc<int64_t>(volatile int64_t* dest) {
 136   long temp;
 137 
 138   __asm__ __volatile__ (
 139     strasm_nobarrier
 140     "1: ldarx   %0,  0, %2    \n"
 141     "   addic   %0, %0,  1    \n"
 142     "   stdcx.  %0,  0, %2    \n"
 143     "   bne-    1b            \n"
 144     strasm_nobarrier
 145     : /*%0*/"=&r" (temp), "=m" (*dest)
 146     : /*%2*/"r" (dest), "m" (*dest)
 147     : "cc" strasm_nobarrier_clobber_memory);
 148 
 149 }
 150 
 151 template <>
 152 inline void Atomic::specialized_dec<int32_t>(volatile int32_t* dest) {
 153   unsigned int temp;
 154 
 155   __asm__ __volatile__ (
 156     strasm_nobarrier
 157     "1: lwarx   %0,  0, %2    \n"
 158     "   addic   %0, %0, -1    \n"
 159     "   stwcx.  %0,  0, %2    \n"
 160     "   bne-    1b            \n"
 161     strasm_nobarrier
 162     : /*%0*/"=&r" (temp), "=m" (*dest)
 163     : /*%2*/"r" (dest), "m" (*dest)
 164     : "cc" strasm_nobarrier_clobber_memory);
 165 
 166 }
 167 
 168 template <>
 169 inline void Atomic::specialized_dec<int64_t>(volatile int64_t* dest) {
 170   long temp;
 171 
 172   __asm__ __volatile__ (
 173     strasm_nobarrier
 174     "1: ldarx   %0,  0, %2    \n"
 175     "   addic   %0, %0, -1    \n"
 176     "   stdcx.  %0,  0, %2    \n"
 177     "   bne-    1b            \n"
 178     strasm_nobarrier
 179     : /*%0*/"=&r" (temp), "=m" (*dest)
 180     : /*%2*/"r" (dest), "m" (*dest)
 181     : "cc" strasm_nobarrier_clobber_memory);
 182 
 183 }
 184 
 185 template <>
 186 inline int32_t Atomic::specialized_xchg<int32_t>(int32_t exchange_value, volatile int32_t* dest) {
 187   // Note that xchg_ptr doesn't necessarily do an acquire
 188   // (see synchronizer.cpp).
 189 
 190   unsigned int old_value;
 191   const uint64_t zero = 0;
 192 
 193   __asm__ __volatile__ (
 194     /* lwsync */
 195     strasm_lwsync
 196     /* atomic loop */
 197     "1:                                                 \n"
 198     "   lwarx   %[old_value], %[dest], %[zero]          \n"
 199     "   stwcx.  %[exchange_value], %[dest], %[zero]     \n"
 200     "   bne-    1b                                      \n"
 201     /* isync */
 202     strasm_sync
 203     /* exit */
 204     "2:                                                 \n"
 205     /* out */
 206     : [old_value]       "=&r"   (old_value),
 207                         "=m"    (*dest)
 208     /* in */
 209     : [dest]            "b"     (dest),
 210       [zero]            "r"     (zero),
 211       [exchange_value]  "r"     (exchange_value),
 212                         "m"     (*dest)
 213     /* clobber */
 214     : "cc",
 215       "memory"
 216     );
 217 
 218   return (int32_t) old_value;
 219 }
 220 
 221 template <>
 222 inline int64_t Atomic::specialized_xchg<int64_t>(int64_t exchange_value, volatile int64_t* dest) {
 223   // Note that xchg_ptr doesn't necessarily do an acquire
 224   // (see synchronizer.cpp).
 225 
 226   long old_value;
 227   const uint64_t zero = 0;
 228 
 229   __asm__ __volatile__ (
 230     /* lwsync */
 231     strasm_lwsync
 232     /* atomic loop */
 233     "1:                                                 \n"
 234     "   ldarx   %[old_value], %[dest], %[zero]          \n"
 235     "   stdcx.  %[exchange_value], %[dest], %[zero]     \n"
 236     "   bne-    1b                                      \n"
 237     /* isync */
 238     strasm_sync
 239     /* exit */
 240     "2:                                                 \n"
 241     /* out */
 242     : [old_value]       "=&r"   (old_value),
 243                         "=m"    (*dest)
 244     /* in */
 245     : [dest]            "b"     (dest),
 246       [zero]            "r"     (zero),
 247       [exchange_value]  "r"     (exchange_value),
 248                         "m"     (*dest)
 249     /* clobber */
 250     : "cc",
 251       "memory"
 252     );
 253 
 254   return (int64_t) old_value;
 255 }
 256 
 257 inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {
 258   if (order != memory_order_relaxed) {
 259     __asm__ __volatile__ (
 260       /* fence */
 261       strasm_sync
 262       );
 263   }
 264 }
 265 
 266 inline void cmpxchg_post_membar(cmpxchg_memory_order order) {
 267   if (order != memory_order_relaxed) {
 268     __asm__ __volatile__ (
 269       /* fence */
 270       strasm_sync
 271       );
 272   }
 273 }
 274 
 275 #define VM_HAS_SPECIALIZED_CMPXCHG_BYTE
 276 template <>
 277 inline int8_t Atomic::specialized_cmpxchg<int8_t>(int8_t exchange_value, volatile int8_t* dest, int8_t compare_value, cmpxchg_memory_order order) {
 278   // Note that cmpxchg guarantees a two-way memory barrier across
 279   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 280   // specified otherwise (see atomic.hpp).
 281 
 282   // Using 32 bit internally.
 283   volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3);
 284 
 285 #ifdef VM_LITTLE_ENDIAN
 286   const unsigned int shift_amount        = ((uintptr_t)dest & 3) * 8;
 287 #else
 288   const unsigned int shift_amount        = ((~(uintptr_t)dest) & 3) * 8;
 289 #endif
 290   const unsigned int masked_compare_val  = ((unsigned int)(unsigned char)compare_value),
 291                      masked_exchange_val = ((unsigned int)(unsigned char)exchange_value),
 292                      xor_value           = (masked_compare_val ^ masked_exchange_val) << shift_amount;
 293 
 294   unsigned int old_value, value32;
 295 
 296   cmpxchg_pre_membar(order);
 297 
 298   __asm__ __volatile__ (
 299     /* simple guard */
 300     "   lbz     %[old_value], 0(%[dest])                  \n"
 301     "   cmpw    %[masked_compare_val], %[old_value]       \n"
 302     "   bne-    2f                                        \n"
 303     /* atomic loop */
 304     "1:                                                   \n"
 305     "   lwarx   %[value32], 0, %[dest_base]               \n"
 306     /* extract byte and compare */
 307     "   srd     %[old_value], %[value32], %[shift_amount] \n"
 308     "   clrldi  %[old_value], %[old_value], 56            \n"
 309     "   cmpw    %[masked_compare_val], %[old_value]       \n"
 310     "   bne-    2f                                        \n"
 311     /* replace byte and try to store */
 312     "   xor     %[value32], %[xor_value], %[value32]      \n"
 313     "   stwcx.  %[value32], 0, %[dest_base]               \n"
 314     "   bne-    1b                                        \n"
 315     /* exit */
 316     "2:                                                   \n"
 317     /* out */
 318     : [old_value]           "=&r"   (old_value),
 319       [value32]             "=&r"   (value32),
 320                             "=m"    (*dest),
 321                             "=m"    (*dest_base)
 322     /* in */
 323     : [dest]                "b"     (dest),
 324       [dest_base]           "b"     (dest_base),
 325       [shift_amount]        "r"     (shift_amount),
 326       [masked_compare_val]  "r"     (masked_compare_val),
 327       [xor_value]           "r"     (xor_value),
 328                             "m"     (*dest),
 329                             "m"     (*dest_base)
 330     /* clobber */
 331     : "cc",
 332       "memory"
 333     );
 334 
 335   cmpxchg_post_membar(order);
 336 
 337   return (int8_t)(unsigned char)old_value;
 338 }
 339 
 340 template <>
 341 inline int32_t Atomic::specialized_cmpxchg<int32_t>(int32_t exchange_value, volatile int32_t* dest, int32_t compare_value, cmpxchg_memory_order order) {
 342   // Note that cmpxchg guarantees a two-way memory barrier across
 343   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 344   // specified otherwise (see atomic.hpp).
 345 
 346   unsigned int old_value;
 347   const uint64_t zero = 0;
 348 
 349   cmpxchg_pre_membar(order);
 350 
 351   __asm__ __volatile__ (
 352     /* simple guard */
 353     "   lwz     %[old_value], 0(%[dest])                \n"
 354     "   cmpw    %[compare_value], %[old_value]          \n"
 355     "   bne-    2f                                      \n"
 356     /* atomic loop */
 357     "1:                                                 \n"
 358     "   lwarx   %[old_value], %[dest], %[zero]          \n"
 359     "   cmpw    %[compare_value], %[old_value]          \n"
 360     "   bne-    2f                                      \n"
 361     "   stwcx.  %[exchange_value], %[dest], %[zero]     \n"
 362     "   bne-    1b                                      \n"
 363     /* exit */
 364     "2:                                                 \n"
 365     /* out */
 366     : [old_value]       "=&r"   (old_value),
 367                         "=m"    (*dest)
 368     /* in */
 369     : [dest]            "b"     (dest),
 370       [zero]            "r"     (zero),
 371       [compare_value]   "r"     (compare_value),
 372       [exchange_value]  "r"     (exchange_value),
 373                         "m"     (*dest)
 374     /* clobber */
 375     : "cc",
 376       "memory"
 377     );
 378 
 379   cmpxchg_post_membar(order);
 380 
 381   return (int32_t) old_value;
 382 }
 383 
 384 template <>
 385 inline int64_t Atomic::specialized_cmpxchg<int64_t>(int64_t exchange_value, volatile int64_t* dest, int64_t compare_value, cmpxchg_memory_order order) {
 386   // Note that cmpxchg guarantees a two-way memory barrier across
 387   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 388   // specified otherwise (see atomic.hpp).
 389 
 390   long old_value;
 391   const uint64_t zero = 0;
 392 
 393   cmpxchg_pre_membar(order);
 394 
 395   __asm__ __volatile__ (
 396     /* simple guard */
 397     "   ld      %[old_value], 0(%[dest])                \n"
 398     "   cmpd    %[compare_value], %[old_value]          \n"
 399     "   bne-    2f                                      \n"
 400     /* atomic loop */
 401     "1:                                                 \n"
 402     "   ldarx   %[old_value], %[dest], %[zero]          \n"
 403     "   cmpd    %[compare_value], %[old_value]          \n"
 404     "   bne-    2f                                      \n"
 405     "   stdcx.  %[exchange_value], %[dest], %[zero]     \n"
 406     "   bne-    1b                                      \n"
 407     /* exit */
 408     "2:                                                 \n"
 409     /* out */
 410     : [old_value]       "=&r"   (old_value),
 411                         "=m"    (*dest)
 412     /* in */
 413     : [dest]            "b"     (dest),
 414       [zero]            "r"     (zero),
 415       [compare_value]   "r"     (compare_value),
 416       [exchange_value]  "r"     (exchange_value),
 417                         "m"     (*dest)
 418     /* clobber */
 419     : "cc",
 420       "memory"
 421     );
 422 
 423   cmpxchg_post_membar(order);
 424 
 425   return (int64_t) old_value;
 426 }
 427 
 428 #undef strasm_sync
 429 #undef strasm_lwsync
 430 #undef strasm_isync
 431 #undef strasm_release
 432 #undef strasm_acquire
 433 #undef strasm_fence
 434 #undef strasm_nobarrier
 435 #undef strasm_nobarrier_clobber_memory
 436 
 437 #endif // OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP