1 /*
   2  * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2012, 2014 SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #ifndef OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP
  27 #define OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP
  28 
  29 #ifndef PPC64
  30 #error "Atomic currently only implemented for PPC64"
  31 #endif
  32 
  33 // Implementation of class atomic
  34 
  35 inline void Atomic::store    (jbyte    store_value, jbyte*    dest) { *dest = store_value; }
  36 inline void Atomic::store    (jshort   store_value, jshort*   dest) { *dest = store_value; }
  37 inline void Atomic::store    (jint     store_value, jint*     dest) { *dest = store_value; }
  38 inline void Atomic::store    (jlong    store_value, jlong*    dest) { *dest = store_value; }
  39 inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
  40 inline void Atomic::store_ptr(void*    store_value, void*     dest) { *(void**)dest = store_value; }
  41 
  42 inline void Atomic::store    (jbyte    store_value, volatile jbyte*    dest) { *dest = store_value; }
  43 inline void Atomic::store    (jshort   store_value, volatile jshort*   dest) { *dest = store_value; }
  44 inline void Atomic::store    (jint     store_value, volatile jint*     dest) { *dest = store_value; }
  45 inline void Atomic::store    (jlong    store_value, volatile jlong*    dest) { *dest = store_value; }
  46 inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
  47 inline void Atomic::store_ptr(void*    store_value, volatile void*     dest) { *(void* volatile *)dest = store_value; }
  48 
  49 inline jlong Atomic::load(const volatile jlong* src) { return *src; }
  50 
  51 //
  52 // machine barrier instructions:
  53 //
  54 // - sync            two-way memory barrier, aka fence
  55 // - lwsync          orders  Store|Store,
  56 //                            Load|Store,
  57 //                            Load|Load,
  58 //                   but not Store|Load
  59 // - eieio           orders memory accesses for device memory (only)
  60 // - isync           invalidates speculatively executed instructions
  61 //                   From the POWER ISA 2.06 documentation:
  62 //                    "[...] an isync instruction prevents the execution of
  63 //                   instructions following the isync until instructions
  64 //                   preceding the isync have completed, [...]"
  65 //                   From IBM's AIX assembler reference:
  66 //                    "The isync [...] instructions causes the processor to
  67 //                   refetch any instructions that might have been fetched
  68 //                   prior to the isync instruction. The instruction isync
  69 //                   causes the processor to wait for all previous instructions
  70 //                   to complete. Then any instructions already fetched are
  71 //                   discarded and instruction processing continues in the
  72 //                   environment established by the previous instructions."
  73 //
  74 // semantic barrier instructions:
  75 // (as defined in orderAccess.hpp)
  76 //
  77 // - release         orders Store|Store,       (maps to lwsync)
  78 //                           Load|Store
  79 // - acquire         orders  Load|Store,       (maps to lwsync)
  80 //                           Load|Load
  81 // - fence           orders Store|Store,       (maps to sync)
  82 //                           Load|Store,
  83 //                           Load|Load,
  84 //                          Store|Load
  85 //
  86 
  87 #define strasm_sync                       "\n  sync    \n"
  88 #define strasm_lwsync                     "\n  lwsync  \n"
  89 #define strasm_isync                      "\n  isync   \n"
  90 #define strasm_release                    strasm_lwsync
  91 #define strasm_acquire                    strasm_lwsync
  92 #define strasm_fence                      strasm_sync
  93 #define strasm_nobarrier                  ""
  94 #define strasm_nobarrier_clobber_memory   ""
  95 
  96 template<size_t byte_size>
  97 struct Atomic::PlatformAdd
  98   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
  99 {
 100   template<typename I, typename D>
 101   D add_and_fetch(I add_value, D volatile* dest) const;
 102 };
 103 
 104 template<>
 105 template<typename I, typename D>
 106 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) const {
 107   STATIC_ASSERT(4 == sizeof(I));
 108   STATIC_ASSERT(4 == sizeof(D));
 109 
 110   D result;
 111 
 112   __asm__ __volatile__ (
 113     strasm_lwsync
 114     "1: lwarx   %0,  0, %2    \n"
 115     "   add     %0, %0, %1    \n"
 116     "   stwcx.  %0,  0, %2    \n"
 117     "   bne-    1b            \n"
 118     strasm_isync
 119     : /*%0*/"=&r" (result)
 120     : /*%1*/"r" (add_value), /*%2*/"r" (dest)
 121     : "cc", "memory" );
 122 
 123   return result;
 124 }
 125 
 126 
 127 template<>
 128 template<typename I, typename D>
 129 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) const {
 130   STATIC_ASSERT(8 == sizeof(I));
 131   STATIC_ASSERT(8 == sizeof(D));
 132 
 133   D result;
 134 
 135   __asm__ __volatile__ (
 136     strasm_lwsync
 137     "1: ldarx   %0,  0, %2    \n"
 138     "   add     %0, %0, %1    \n"
 139     "   stdcx.  %0,  0, %2    \n"
 140     "   bne-    1b            \n"
 141     strasm_isync
 142     : /*%0*/"=&r" (result)
 143     : /*%1*/"r" (add_value), /*%2*/"r" (dest)
 144     : "cc", "memory" );
 145 
 146   return result;
 147 }
 148 
 149 
 150 inline void Atomic::inc    (volatile jint*     dest) {
 151 
 152   unsigned int temp;
 153 
 154   __asm__ __volatile__ (
 155     strasm_nobarrier
 156     "1: lwarx   %0,  0, %2    \n"
 157     "   addic   %0, %0,  1    \n"
 158     "   stwcx.  %0,  0, %2    \n"
 159     "   bne-    1b            \n"
 160     strasm_nobarrier
 161     : /*%0*/"=&r" (temp), "=m" (*dest)
 162     : /*%2*/"r" (dest), "m" (*dest)
 163     : "cc" strasm_nobarrier_clobber_memory);
 164 
 165 }
 166 
 167 inline void Atomic::inc_ptr(volatile intptr_t* dest) {
 168 
 169   long temp;
 170 
 171   __asm__ __volatile__ (
 172     strasm_nobarrier
 173     "1: ldarx   %0,  0, %2    \n"
 174     "   addic   %0, %0,  1    \n"
 175     "   stdcx.  %0,  0, %2    \n"
 176     "   bne-    1b            \n"
 177     strasm_nobarrier
 178     : /*%0*/"=&r" (temp), "=m" (*dest)
 179     : /*%2*/"r" (dest), "m" (*dest)
 180     : "cc" strasm_nobarrier_clobber_memory);
 181 
 182 }
 183 
 184 inline void Atomic::inc_ptr(volatile void*     dest) {
 185   inc_ptr((volatile intptr_t*)dest);
 186 }
 187 
 188 
 189 inline void Atomic::dec    (volatile jint*     dest) {
 190 
 191   unsigned int temp;
 192 
 193   __asm__ __volatile__ (
 194     strasm_nobarrier
 195     "1: lwarx   %0,  0, %2    \n"
 196     "   addic   %0, %0, -1    \n"
 197     "   stwcx.  %0,  0, %2    \n"
 198     "   bne-    1b            \n"
 199     strasm_nobarrier
 200     : /*%0*/"=&r" (temp), "=m" (*dest)
 201     : /*%2*/"r" (dest), "m" (*dest)
 202     : "cc" strasm_nobarrier_clobber_memory);
 203 
 204 }
 205 
 206 inline void Atomic::dec_ptr(volatile intptr_t* dest) {
 207 
 208   long temp;
 209 
 210   __asm__ __volatile__ (
 211     strasm_nobarrier
 212     "1: ldarx   %0,  0, %2    \n"
 213     "   addic   %0, %0, -1    \n"
 214     "   stdcx.  %0,  0, %2    \n"
 215     "   bne-    1b            \n"
 216     strasm_nobarrier
 217     : /*%0*/"=&r" (temp), "=m" (*dest)
 218     : /*%2*/"r" (dest), "m" (*dest)
 219     : "cc" strasm_nobarrier_clobber_memory);
 220 
 221 }
 222 
 223 inline void Atomic::dec_ptr(volatile void*     dest) {
 224   dec_ptr((volatile intptr_t*)dest);
 225 }
 226 
 227 inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
 228 
 229   // Note that xchg_ptr doesn't necessarily do an acquire
 230   // (see synchronizer.cpp).
 231 
 232   unsigned int old_value;
 233   const uint64_t zero = 0;
 234 
 235   __asm__ __volatile__ (
 236     /* lwsync */
 237     strasm_lwsync
 238     /* atomic loop */
 239     "1:                                                 \n"
 240     "   lwarx   %[old_value], %[dest], %[zero]          \n"
 241     "   stwcx.  %[exchange_value], %[dest], %[zero]     \n"
 242     "   bne-    1b                                      \n"
 243     /* isync */
 244     strasm_sync
 245     /* exit */
 246     "2:                                                 \n"
 247     /* out */
 248     : [old_value]       "=&r"   (old_value),
 249                         "=m"    (*dest)
 250     /* in */
 251     : [dest]            "b"     (dest),
 252       [zero]            "r"     (zero),
 253       [exchange_value]  "r"     (exchange_value),
 254                         "m"     (*dest)
 255     /* clobber */
 256     : "cc",
 257       "memory"
 258     );
 259 
 260   return (jint) old_value;
 261 }
 262 
 263 inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
 264 
 265   // Note that xchg_ptr doesn't necessarily do an acquire
 266   // (see synchronizer.cpp).
 267 
 268   long old_value;
 269   const uint64_t zero = 0;
 270 
 271   __asm__ __volatile__ (
 272     /* lwsync */
 273     strasm_lwsync
 274     /* atomic loop */
 275     "1:                                                 \n"
 276     "   ldarx   %[old_value], %[dest], %[zero]          \n"
 277     "   stdcx.  %[exchange_value], %[dest], %[zero]     \n"
 278     "   bne-    1b                                      \n"
 279     /* isync */
 280     strasm_sync
 281     /* exit */
 282     "2:                                                 \n"
 283     /* out */
 284     : [old_value]       "=&r"   (old_value),
 285                         "=m"    (*dest)
 286     /* in */
 287     : [dest]            "b"     (dest),
 288       [zero]            "r"     (zero),
 289       [exchange_value]  "r"     (exchange_value),
 290                         "m"     (*dest)
 291     /* clobber */
 292     : "cc",
 293       "memory"
 294     );
 295 
 296   return (intptr_t) old_value;
 297 }
 298 
 299 inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
 300   return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
 301 }
 302 
 303 inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {
 304   if (order != memory_order_relaxed) {
 305     __asm__ __volatile__ (
 306       /* fence */
 307       strasm_sync
 308       );
 309   }
 310 }
 311 
 312 inline void cmpxchg_post_membar(cmpxchg_memory_order order) {
 313   if (order != memory_order_relaxed) {
 314     __asm__ __volatile__ (
 315       /* fence */
 316       strasm_sync
 317       );
 318   }
 319 }
 320 
 321 template<>
 322 template<typename T>
 323 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
 324                                                 T volatile* dest,
 325                                                 T compare_value,
 326                                                 cmpxchg_memory_order order) const {
 327   STATIC_ASSERT(1 == sizeof(T));
 328 
 329   // Note that cmpxchg guarantees a two-way memory barrier across
 330   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 331   // specified otherwise (see atomic.hpp).
 332 
 333   // Using 32 bit internally.
 334   volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3);
 335 
 336 #ifdef VM_LITTLE_ENDIAN
 337   const unsigned int shift_amount        = ((uintptr_t)dest & 3) * 8;
 338 #else
 339   const unsigned int shift_amount        = ((~(uintptr_t)dest) & 3) * 8;
 340 #endif
 341   const unsigned int masked_compare_val  = ((unsigned int)(unsigned char)compare_value),
 342                      masked_exchange_val = ((unsigned int)(unsigned char)exchange_value),
 343                      xor_value           = (masked_compare_val ^ masked_exchange_val) << shift_amount;
 344 
 345   unsigned int old_value, value32;
 346 
 347   cmpxchg_pre_membar(order);
 348 
 349   __asm__ __volatile__ (
 350     /* simple guard */
 351     "   lbz     %[old_value], 0(%[dest])                  \n"
 352     "   cmpw    %[masked_compare_val], %[old_value]       \n"
 353     "   bne-    2f                                        \n"
 354     /* atomic loop */
 355     "1:                                                   \n"
 356     "   lwarx   %[value32], 0, %[dest_base]               \n"
 357     /* extract byte and compare */
 358     "   srd     %[old_value], %[value32], %[shift_amount] \n"
 359     "   clrldi  %[old_value], %[old_value], 56            \n"
 360     "   cmpw    %[masked_compare_val], %[old_value]       \n"
 361     "   bne-    2f                                        \n"
 362     /* replace byte and try to store */
 363     "   xor     %[value32], %[xor_value], %[value32]      \n"
 364     "   stwcx.  %[value32], 0, %[dest_base]               \n"
 365     "   bne-    1b                                        \n"
 366     /* exit */
 367     "2:                                                   \n"
 368     /* out */
 369     : [old_value]           "=&r"   (old_value),
 370       [value32]             "=&r"   (value32),
 371                             "=m"    (*dest),
 372                             "=m"    (*dest_base)
 373     /* in */
 374     : [dest]                "b"     (dest),
 375       [dest_base]           "b"     (dest_base),
 376       [shift_amount]        "r"     (shift_amount),
 377       [masked_compare_val]  "r"     (masked_compare_val),
 378       [xor_value]           "r"     (xor_value),
 379                             "m"     (*dest),
 380                             "m"     (*dest_base)
 381     /* clobber */
 382     : "cc",
 383       "memory"
 384     );
 385 
 386   cmpxchg_post_membar(order);
 387 
 388   return PrimitiveConversions::cast<T>((unsigned char)old_value);
 389 }
 390 
 391 template<>
 392 template<typename T>
 393 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
 394                                                 T volatile* dest,
 395                                                 T compare_value,
 396                                                 cmpxchg_memory_order order) const {
 397   STATIC_ASSERT(4 == sizeof(T));
 398 
 399   // Note that cmpxchg guarantees a two-way memory barrier across
 400   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 401   // specified otherwise (see atomic.hpp).
 402 
 403   T old_value;
 404   const uint64_t zero = 0;
 405 
 406   cmpxchg_pre_membar(order);
 407 
 408   __asm__ __volatile__ (
 409     /* simple guard */
 410     "   lwz     %[old_value], 0(%[dest])                \n"
 411     "   cmpw    %[compare_value], %[old_value]          \n"
 412     "   bne-    2f                                      \n"
 413     /* atomic loop */
 414     "1:                                                 \n"
 415     "   lwarx   %[old_value], %[dest], %[zero]          \n"
 416     "   cmpw    %[compare_value], %[old_value]          \n"
 417     "   bne-    2f                                      \n"
 418     "   stwcx.  %[exchange_value], %[dest], %[zero]     \n"
 419     "   bne-    1b                                      \n"
 420     /* exit */
 421     "2:                                                 \n"
 422     /* out */
 423     : [old_value]       "=&r"   (old_value),
 424                         "=m"    (*dest)
 425     /* in */
 426     : [dest]            "b"     (dest),
 427       [zero]            "r"     (zero),
 428       [compare_value]   "r"     (compare_value),
 429       [exchange_value]  "r"     (exchange_value),
 430                         "m"     (*dest)
 431     /* clobber */
 432     : "cc",
 433       "memory"
 434     );
 435 
 436   cmpxchg_post_membar(order);
 437 
 438   return old_value;
 439 }
 440 
 441 template<>
 442 template<typename T>
 443 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
 444                                                 T volatile* dest,
 445                                                 T compare_value,
 446                                                 cmpxchg_memory_order order) const {
 447   STATIC_ASSERT(8 == sizeof(T));
 448 
 449   // Note that cmpxchg guarantees a two-way memory barrier across
 450   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 451   // specified otherwise (see atomic.hpp).
 452 
 453   T old_value;
 454   const uint64_t zero = 0;
 455 
 456   cmpxchg_pre_membar(order);
 457 
 458   __asm__ __volatile__ (
 459     /* simple guard */
 460     "   ld      %[old_value], 0(%[dest])                \n"
 461     "   cmpd    %[compare_value], %[old_value]          \n"
 462     "   bne-    2f                                      \n"
 463     /* atomic loop */
 464     "1:                                                 \n"
 465     "   ldarx   %[old_value], %[dest], %[zero]          \n"
 466     "   cmpd    %[compare_value], %[old_value]          \n"
 467     "   bne-    2f                                      \n"
 468     "   stdcx.  %[exchange_value], %[dest], %[zero]     \n"
 469     "   bne-    1b                                      \n"
 470     /* exit */
 471     "2:                                                 \n"
 472     /* out */
 473     : [old_value]       "=&r"   (old_value),
 474                         "=m"    (*dest)
 475     /* in */
 476     : [dest]            "b"     (dest),
 477       [zero]            "r"     (zero),
 478       [compare_value]   "r"     (compare_value),
 479       [exchange_value]  "r"     (exchange_value),
 480                         "m"     (*dest)
 481     /* clobber */
 482     : "cc",
 483       "memory"
 484     );
 485 
 486   cmpxchg_post_membar(order);
 487 
 488   return old_value;
 489 }
 490 
 491 #undef strasm_sync
 492 #undef strasm_lwsync
 493 #undef strasm_isync
 494 #undef strasm_release
 495 #undef strasm_acquire
 496 #undef strasm_fence
 497 #undef strasm_nobarrier
 498 #undef strasm_nobarrier_clobber_memory
 499 
 500 #endif // OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP