1 /*
   2  * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2012, 2014 SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #ifndef OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP
  27 #define OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP
  28 
  29 #ifndef PPC64
  30 #error "Atomic currently only implemented for PPC64"
  31 #endif
  32 
  33 // Implementation of class atomic
  34 
  35 //
  36 // machine barrier instructions:
  37 //
  38 // - sync            two-way memory barrier, aka fence
  39 // - lwsync          orders  Store|Store,
  40 //                            Load|Store,
  41 //                            Load|Load,
  42 //                   but not Store|Load
  43 // - eieio           orders memory accesses for device memory (only)
  44 // - isync           invalidates speculatively executed instructions
  45 //                   From the POWER ISA 2.06 documentation:
  46 //                    "[...] an isync instruction prevents the execution of
  47 //                   instructions following the isync until instructions
  48 //                   preceding the isync have completed, [...]"
  49 //                   From IBM's AIX assembler reference:
  50 //                    "The isync [...] instructions causes the processor to
  51 //                   refetch any instructions that might have been fetched
  52 //                   prior to the isync instruction. The instruction isync
  53 //                   causes the processor to wait for all previous instructions
  54 //                   to complete. Then any instructions already fetched are
  55 //                   discarded and instruction processing continues in the
  56 //                   environment established by the previous instructions."
  57 //
  58 // semantic barrier instructions:
  59 // (as defined in orderAccess.hpp)
  60 //
  61 // - release         orders Store|Store,       (maps to lwsync)
  62 //                           Load|Store
  63 // - acquire         orders  Load|Store,       (maps to lwsync)
  64 //                           Load|Load
  65 // - fence           orders Store|Store,       (maps to sync)
  66 //                           Load|Store,
  67 //                           Load|Load,
  68 //                          Store|Load
  69 //
  70 
  71 #define strasm_sync                       "\n  sync    \n"
  72 #define strasm_lwsync                     "\n  lwsync  \n"
  73 #define strasm_isync                      "\n  isync   \n"
  74 #define strasm_release                    strasm_lwsync
  75 #define strasm_acquire                    strasm_lwsync
  76 #define strasm_fence                      strasm_sync
  77 #define strasm_nobarrier                  ""
  78 #define strasm_nobarrier_clobber_memory   ""
  79 
  80 template <>
  81 inline int32_t Atomic::specialized_add<int32_t>(int32_t add_value, volatile int32_t* dest) {
  82   unsigned int result;
  83 
  84   __asm__ __volatile__ (
  85     strasm_lwsync
  86     "1: lwarx   %0,  0, %2    \n"
  87     "   add     %0, %0, %1    \n"
  88     "   stwcx.  %0,  0, %2    \n"
  89     "   bne-    1b            \n"
  90     strasm_isync
  91     : /*%0*/"=&r" (result)
  92     : /*%1*/"r" (add_value), /*%2*/"r" (dest)
  93     : "cc", "memory" );
  94 
  95   return (int32_t) result;
  96 }
  97 
  98 
  99 template <>
 100 inline int64_t Atomic::specialized_add<int64_t>(int64_t add_value, volatile int64_t* dest) {
 101   long result;
 102 
 103   __asm__ __volatile__ (
 104     strasm_lwsync
 105     "1: ldarx   %0,  0, %2    \n"
 106     "   add     %0, %0, %1    \n"
 107     "   stdcx.  %0,  0, %2    \n"
 108     "   bne-    1b            \n"
 109     strasm_isync
 110     : /*%0*/"=&r" (result)
 111     : /*%1*/"r" (add_value), /*%2*/"r" (dest)
 112     : "cc", "memory" );
 113 
 114   return (int64_t) result;
 115 }
 116 
 117 
 118 template <>
 119 inline void Atomic::specialized_inc<int32_t>(volatile int32_t* dest) {
 120   unsigned int temp;
 121 
 122   __asm__ __volatile__ (
 123     strasm_nobarrier
 124     "1: lwarx   %0,  0, %2    \n"
 125     "   addic   %0, %0,  1    \n"
 126     "   stwcx.  %0,  0, %2    \n"
 127     "   bne-    1b            \n"
 128     strasm_nobarrier
 129     : /*%0*/"=&r" (temp), "=m" (*dest)
 130     : /*%2*/"r" (dest), "m" (*dest)
 131     : "cc" strasm_nobarrier_clobber_memory);
 132 
 133 }
 134 
 135 template <>
 136 inline void Atomic::specialized_inc<int64_t>(volatile int64_t* dest) {
 137   long temp;
 138 
 139   __asm__ __volatile__ (
 140     strasm_nobarrier
 141     "1: ldarx   %0,  0, %2    \n"
 142     "   addic   %0, %0,  1    \n"
 143     "   stdcx.  %0,  0, %2    \n"
 144     "   bne-    1b            \n"
 145     strasm_nobarrier
 146     : /*%0*/"=&r" (temp), "=m" (*dest)
 147     : /*%2*/"r" (dest), "m" (*dest)
 148     : "cc" strasm_nobarrier_clobber_memory);
 149 
 150 }
 151 
 152 
 153 template <>
 154 inline void Atomic::specialized_dec<int32_t>(volatile int32_t* dest) {
 155   unsigned int temp;
 156 
 157   __asm__ __volatile__ (
 158     strasm_nobarrier
 159     "1: lwarx   %0,  0, %2    \n"
 160     "   addic   %0, %0, -1    \n"
 161     "   stwcx.  %0,  0, %2    \n"
 162     "   bne-    1b            \n"
 163     strasm_nobarrier
 164     : /*%0*/"=&r" (temp), "=m" (*dest)
 165     : /*%2*/"r" (dest), "m" (*dest)
 166     : "cc" strasm_nobarrier_clobber_memory);
 167 
 168 }
 169 
 170 
 171 template <>
 172 inline void Atomic::specialized_dec<int64_t>(volatile int64_t* dest) {
 173   long temp;
 174 
 175   __asm__ __volatile__ (
 176     strasm_nobarrier
 177     "1: ldarx   %0,  0, %2    \n"
 178     "   addic   %0, %0, -1    \n"
 179     "   stdcx.  %0,  0, %2    \n"
 180     "   bne-    1b            \n"
 181     strasm_nobarrier
 182     : /*%0*/"=&r" (temp), "=m" (*dest)
 183     : /*%2*/"r" (dest), "m" (*dest)
 184     : "cc" strasm_nobarrier_clobber_memory);
 185 
 186 }
 187 
 188 
 189 template <>
 190 inline int32_t Atomic::specialized_xchg<int32_t>(int32_t exchange_value, volatile int32_t* dest) {
 191   // Note that xchg_ptr doesn't necessarily do an acquire
 192   // (see synchronizer.cpp).
 193 
 194   unsigned int old_value;
 195   const uint64_t zero = 0;
 196 
 197   __asm__ __volatile__ (
 198     /* lwsync */
 199     strasm_lwsync
 200     /* atomic loop */
 201     "1:                                                 \n"
 202     "   lwarx   %[old_value], %[dest], %[zero]          \n"
 203     "   stwcx.  %[exchange_value], %[dest], %[zero]     \n"
 204     "   bne-    1b                                      \n"
 205     /* isync */
 206     strasm_sync
 207     /* exit */
 208     "2:                                                 \n"
 209     /* out */
 210     : [old_value]       "=&r"   (old_value),
 211                         "=m"    (*dest)
 212     /* in */
 213     : [dest]            "b"     (dest),
 214       [zero]            "r"     (zero),
 215       [exchange_value]  "r"     (exchange_value),
 216                         "m"     (*dest)
 217     /* clobber */
 218     : "cc",
 219       "memory"
 220     );
 221 
 222   return (int32_t) old_value;
 223 }
 224 
 225 
 226 template <>
 227 inline int64_t Atomic::specialized_xchg<int64_t>(int64_t exchange_value, volatile int64_t* dest) {
 228   // Note that xchg_ptr doesn't necessarily do an acquire
 229   // (see synchronizer.cpp).
 230 
 231   long old_value;
 232   const uint64_t zero = 0;
 233 
 234   __asm__ __volatile__ (
 235     /* lwsync */
 236     strasm_lwsync
 237     /* atomic loop */
 238     "1:                                                 \n"
 239     "   ldarx   %[old_value], %[dest], %[zero]          \n"
 240     "   stdcx.  %[exchange_value], %[dest], %[zero]     \n"
 241     "   bne-    1b                                      \n"
 242     /* isync */
 243     strasm_sync
 244     /* exit */
 245     "2:                                                 \n"
 246     /* out */
 247     : [old_value]       "=&r"   (old_value),
 248                         "=m"    (*dest)
 249     /* in */
 250     : [dest]            "b"     (dest),
 251       [zero]            "r"     (zero),
 252       [exchange_value]  "r"     (exchange_value),
 253                         "m"     (*dest)
 254     /* clobber */
 255     : "cc",
 256       "memory"
 257     );
 258 
 259   return (int64_t) old_value;
 260 }
 261 
 262 
 263 inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {
 264   if (order != memory_order_relaxed) {
 265     __asm__ __volatile__ (
 266       /* fence */
 267       strasm_sync
 268       );
 269   }
 270 }
 271 
 272 inline void cmpxchg_post_membar(cmpxchg_memory_order order) {
 273   if (order != memory_order_relaxed) {
 274     __asm__ __volatile__ (
 275       /* fence */
 276       strasm_sync
 277       );
 278   }
 279 }
 280 
 281 #define VM_HAS_SPECIALIZED_CMPXCHG_BYTE
 282 template <>
 283 inline int8_t Atomic::specialized_cmpxchg<int8_t>(int8_t exchange_value, volatile int8_t* dest, int8_t compare_value, cmpxchg_memory_order order) {
 284   // Note that cmpxchg guarantees a two-way memory barrier across
 285   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 286   // specified otherwise (see atomic.hpp).
 287 
 288   // Using 32 bit internally.
 289   volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3);
 290 
 291 #ifdef VM_LITTLE_ENDIAN
 292   const unsigned int shift_amount        = ((uintptr_t)dest & 3) * 8;
 293 #else
 294   const unsigned int shift_amount        = ((~(uintptr_t)dest) & 3) * 8;
 295 #endif
 296   const unsigned int masked_compare_val  = ((unsigned int)(unsigned char)compare_value),
 297                      masked_exchange_val = ((unsigned int)(unsigned char)exchange_value),
 298                      xor_value           = (masked_compare_val ^ masked_exchange_val) << shift_amount;
 299 
 300   unsigned int old_value, value32;
 301 
 302   cmpxchg_pre_membar(order);
 303 
 304   __asm__ __volatile__ (
 305     /* simple guard */
 306     "   lbz     %[old_value], 0(%[dest])                  \n"
 307     "   cmpw    %[masked_compare_val], %[old_value]       \n"
 308     "   bne-    2f                                        \n"
 309     /* atomic loop */
 310     "1:                                                   \n"
 311     "   lwarx   %[value32], 0, %[dest_base]               \n"
 312     /* extract byte and compare */
 313     "   srd     %[old_value], %[value32], %[shift_amount] \n"
 314     "   clrldi  %[old_value], %[old_value], 56            \n"
 315     "   cmpw    %[masked_compare_val], %[old_value]       \n"
 316     "   bne-    2f                                        \n"
 317     /* replace byte and try to store */
 318     "   xor     %[value32], %[xor_value], %[value32]      \n"
 319     "   stwcx.  %[value32], 0, %[dest_base]               \n"
 320     "   bne-    1b                                        \n"
 321     /* exit */
 322     "2:                                                   \n"
 323     /* out */
 324     : [old_value]           "=&r"   (old_value),
 325       [value32]             "=&r"   (value32),
 326                             "=m"    (*dest),
 327                             "=m"    (*dest_base)
 328     /* in */
 329     : [dest]                "b"     (dest),
 330       [dest_base]           "b"     (dest_base),
 331       [shift_amount]        "r"     (shift_amount),
 332       [masked_compare_val]  "r"     (masked_compare_val),
 333       [xor_value]           "r"     (xor_value),
 334                             "m"     (*dest),
 335                             "m"     (*dest_base)
 336     /* clobber */
 337     : "cc",
 338       "memory"
 339     );
 340 
 341   cmpxchg_post_membar(order);
 342 
 343   return (int8_t)(unsigned char)old_value;
 344 }
 345 
 346 template <>
 347 inline int32_t Atomic::specialized_cmpxchg<int32_t>(int32_t exchange_value, volatile int32_t* dest, int32_t compare_value, cmpxchg_memory_order order) {
 348   // Note that cmpxchg guarantees a two-way memory barrier across
 349   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 350   // specified otherwise (see atomic.hpp).
 351 
 352   unsigned int old_value;
 353   const uint64_t zero = 0;
 354 
 355   cmpxchg_pre_membar(order);
 356 
 357   __asm__ __volatile__ (
 358     /* simple guard */
 359     "   lwz     %[old_value], 0(%[dest])                \n"
 360     "   cmpw    %[compare_value], %[old_value]          \n"
 361     "   bne-    2f                                      \n"
 362     /* atomic loop */
 363     "1:                                                 \n"
 364     "   lwarx   %[old_value], %[dest], %[zero]          \n"
 365     "   cmpw    %[compare_value], %[old_value]          \n"
 366     "   bne-    2f                                      \n"
 367     "   stwcx.  %[exchange_value], %[dest], %[zero]     \n"
 368     "   bne-    1b                                      \n"
 369     /* exit */
 370     "2:                                                 \n"
 371     /* out */
 372     : [old_value]       "=&r"   (old_value),
 373                         "=m"    (*dest)
 374     /* in */
 375     : [dest]            "b"     (dest),
 376       [zero]            "r"     (zero),
 377       [compare_value]   "r"     (compare_value),
 378       [exchange_value]  "r"     (exchange_value),
 379                         "m"     (*dest)
 380     /* clobber */
 381     : "cc",
 382       "memory"
 383     );
 384 
 385   cmpxchg_post_membar(order);
 386 
 387   return (int32_t) old_value;
 388 }
 389 
 390 
 391 template <>
 392 inline int64_t Atomic::specialized_cmpxchg<int64_t>(int64_t exchange_value, volatile int64_t* dest, int64_t compare_value, cmpxchg_memory_order order) {
 393   // Note that cmpxchg guarantees a two-way memory barrier across
 394   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 395   // specified otherwise (see atomic.hpp).
 396 
 397   long old_value;
 398   const uint64_t zero = 0;
 399 
 400   cmpxchg_pre_membar(order);
 401 
 402   __asm__ __volatile__ (
 403     /* simple guard */
 404     "   ld      %[old_value], 0(%[dest])                \n"
 405     "   cmpd    %[compare_value], %[old_value]          \n"
 406     "   bne-    2f                                      \n"
 407     /* atomic loop */
 408     "1:                                                 \n"
 409     "   ldarx   %[old_value], %[dest], %[zero]          \n"
 410     "   cmpd    %[compare_value], %[old_value]          \n"
 411     "   bne-    2f                                      \n"
 412     "   stdcx.  %[exchange_value], %[dest], %[zero]     \n"
 413     "   bne-    1b                                      \n"
 414     /* exit */
 415     "2:                                                 \n"
 416     /* out */
 417     : [old_value]       "=&r"   (old_value),
 418                         "=m"    (*dest)
 419     /* in */
 420     : [dest]            "b"     (dest),
 421       [zero]            "r"     (zero),
 422       [compare_value]   "r"     (compare_value),
 423       [exchange_value]  "r"     (exchange_value),
 424                         "m"     (*dest)
 425     /* clobber */
 426     : "cc",
 427       "memory"
 428     );
 429 
 430   cmpxchg_post_membar(order);
 431 
 432   return (int64_t) old_value;
 433 }
 434 
 435 
 436 #undef strasm_sync
 437 #undef strasm_lwsync
 438 #undef strasm_isync
 439 #undef strasm_release
 440 #undef strasm_acquire
 441 #undef strasm_fence
 442 #undef strasm_nobarrier
 443 #undef strasm_nobarrier_clobber_memory
 444 
 445 #endif // OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP