jdk Sdiff src/hotspot/os_cpu/aix

src/hotspot/os_cpu/aix_ppc/atomic_aix_ppc.hpp

rev 49898 : 8202080: Introduce ordering semantics for Atomic::add and other RMW atomics
Reviewed-by:

   1 /*
   2  * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2012, 2014 SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #ifndef OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
  27 #define OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
  28 
  29 #ifndef _LP64
  30 #error "Atomic currently only impleneted for PPC64"
  31 #endif
  32 
  33 #include "utilities/debug.hpp"
  34 
  35 // Implementation of class atomic
  36 
  37 //
  38 //   machine barrier instructions:
  39 //
  40 //   - ppc_sync            two-way memory barrier, aka fence
  41 //   - ppc_lwsync          orders  Store|Store,
  42 //                                  Load|Store,
  43 //                                  Load|Load,
  44 //                         but not Store|Load
  45 //   - ppc_eieio           orders memory accesses for device memory (only)
  46 //   - ppc_isync           invalidates speculatively executed instructions
  47 //                         From the POWER ISA 2.06 documentation:
  48 //                          "[...] an isync instruction prevents the execution of
  49 //                         instructions following the isync until instructions
  50 //                         preceding the isync have completed, [...]"
  51 //                         From IBM's AIX assembler reference:
  52 //                          "The isync [...] instructions causes the processor to
  53 //                         refetch any instructions that might have been fetched
  54 //                         prior to the isync instruction. The instruction isync
  55 //                         causes the processor to wait for all previous instructions
  56 //                         to complete. Then any instructions already fetched are
  57 //                         discarded and instruction processing continues in the
  58 //                         environment established by the previous instructions."
  59 //
  60 //   semantic barrier instructions:
  61 //   (as defined in orderAccess.hpp)
  62 //
  63 //   - ppc_release         orders Store|Store,       (maps to ppc_lwsync)
  64 //                                 Load|Store
  65 //   - ppc_acquire         orders  Load|Store,       (maps to ppc_lwsync)
  66 //                                 Load|Load
  67 //   - ppc_fence           orders Store|Store,       (maps to ppc_sync)
  68 //                                 Load|Store,
  69 //                                 Load|Load,
  70 //                                Store|Load
  71 //
  72 
  73 #define strasm_sync                       "\n  sync    \n"
  74 #define strasm_lwsync                     "\n  lwsync  \n"
  75 #define strasm_isync                      "\n  isync   \n"
  76 #define strasm_release                    strasm_lwsync
  77 #define strasm_acquire                    strasm_lwsync
  78 #define strasm_fence                      strasm_sync
  79 #define strasm_nobarrier                  ""
  80 #define strasm_nobarrier_clobber_memory   ""
  81 























  82 template<size_t byte_size>
  83 struct Atomic::PlatformAdd
  84   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
  85 {
  86   template<typename I, typename D>
  87   D add_and_fetch(I add_value, D volatile* dest) const;
  88 };
  89 
  90 template<>
  91 template<typename I, typename D>
  92 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) const {

  93   STATIC_ASSERT(4 == sizeof(I));
  94   STATIC_ASSERT(4 == sizeof(D));
  95 
  96   D result;
  97 


  98   __asm__ __volatile__ (
  99     strasm_lwsync
 100     "1: lwarx   %0,  0, %2    \n"
 101     "   add     %0, %0, %1    \n"
 102     "   stwcx.  %0,  0, %2    \n"
 103     "   bne-    1b            \n"
 104     strasm_isync
 105     : /*%0*/"=&r" (result)
 106     : /*%1*/"r" (add_value), /*%2*/"r" (dest)
 107     : "cc", "memory" );
 108 


 109   return result;
 110 }
 111 
 112 
 113 template<>
 114 template<typename I, typename D>
 115 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) const {

 116   STATIC_ASSERT(8 == sizeof(I));
 117   STATIC_ASSERT(8 == sizeof(D));
 118 
 119   D result;
 120 


 121   __asm__ __volatile__ (
 122     strasm_lwsync
 123     "1: ldarx   %0,  0, %2    \n"
 124     "   add     %0, %0, %1    \n"
 125     "   stdcx.  %0,  0, %2    \n"
 126     "   bne-    1b            \n"
 127     strasm_isync
 128     : /*%0*/"=&r" (result)
 129     : /*%1*/"r" (add_value), /*%2*/"r" (dest)
 130     : "cc", "memory" );
 131 


 132   return result;
 133 }
 134 
 135 template<>
 136 template<typename T>
 137 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
 138                                              T volatile* dest) const {
 139   STATIC_ASSERT(4 == sizeof(T));
 140   // Note that xchg doesn't necessarily do an acquire
 141   // (see synchronizer.cpp).
 142 
 143   T old_value;
 144   const uint64_t zero = 0;
 145 


 146   __asm__ __volatile__ (
 147     /* lwsync */
 148     strasm_lwsync
 149     /* atomic loop */
 150     "1:                                                 \n"
 151     "   lwarx   %[old_value], %[dest], %[zero]          \n"
 152     "   stwcx.  %[exchange_value], %[dest], %[zero]     \n"
 153     "   bne-    1b                                      \n"
 154     /* isync */
 155     strasm_sync
 156     /* exit */
 157     "2:                                                 \n"
 158     /* out */
 159     : [old_value]       "=&r"   (old_value),
 160                         "=m"    (*dest)
 161     /* in */
 162     : [dest]            "b"     (dest),
 163       [zero]            "r"     (zero),
 164       [exchange_value]  "r"     (exchange_value),
 165                         "m"     (*dest)
 166     /* clobber */
 167     : "cc",
 168       "memory"
 169     );
 170 


 171   return old_value;
 172 }
 173 
 174 template<>
 175 template<typename T>
 176 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
 177                                              T volatile* dest) const {

 178   STATIC_ASSERT(8 == sizeof(T));
 179   // Note that xchg doesn't necessarily do an acquire
 180   // (see synchronizer.cpp).
 181 
 182   T old_value;
 183   const uint64_t zero = 0;
 184 


 185   __asm__ __volatile__ (
 186     /* lwsync */
 187     strasm_lwsync
 188     /* atomic loop */
 189     "1:                                                 \n"
 190     "   ldarx   %[old_value], %[dest], %[zero]          \n"
 191     "   stdcx.  %[exchange_value], %[dest], %[zero]     \n"
 192     "   bne-    1b                                      \n"
 193     /* isync */
 194     strasm_sync
 195     /* exit */
 196     "2:                                                 \n"
 197     /* out */
 198     : [old_value]       "=&r"   (old_value),
 199                         "=m"    (*dest)
 200     /* in */
 201     : [dest]            "b"     (dest),
 202       [zero]            "r"     (zero),
 203       [exchange_value]  "r"     (exchange_value),
 204                         "m"     (*dest)
 205     /* clobber */
 206     : "cc",
 207       "memory"
 208     );
 209 
 210   return old_value;
 211 }
 212 
 213 inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {
 214   if (order != memory_order_relaxed) {
 215     __asm__ __volatile__ (
 216       /* fence */
 217       strasm_sync
 218       );
 219   }
 220 }
 221 
 222 inline void cmpxchg_post_membar(cmpxchg_memory_order order) {
 223   if (order != memory_order_relaxed) {
 224     __asm__ __volatile__ (
 225       /* fence */
 226       strasm_sync
 227       );
 228   }
 229 }
 230 
 231 template<>
 232 template<typename T>
 233 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
 234                                                 T volatile* dest,
 235                                                 T compare_value,
 236                                                 cmpxchg_memory_order order) const {
 237   STATIC_ASSERT(1 == sizeof(T));
 238 
 239   // Note that cmpxchg guarantees a two-way memory barrier across
 240   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 241   // specified otherwise (see atomic.hpp).
 242 
 243   // Using 32 bit internally.
 244   volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3);
 245 
 246 #ifdef VM_LITTLE_ENDIAN
 247   const unsigned int shift_amount        = ((uintptr_t)dest & 3) * 8;
 248 #else
 249   const unsigned int shift_amount        = ((~(uintptr_t)dest) & 3) * 8;
 250 #endif
 251   const unsigned int masked_compare_val  = ((unsigned int)(unsigned char)compare_value),
 252                      masked_exchange_val = ((unsigned int)(unsigned char)exchange_value),
 253                      xor_value           = (masked_compare_val ^ masked_exchange_val) << shift_amount;
 254 
 255   unsigned int old_value, value32;
 256 
 257   cmpxchg_pre_membar(order);
 258 
 259   __asm__ __volatile__ (
 260     /* simple guard */
 261     "   lbz     %[old_value], 0(%[dest])                  \n"
 262     "   cmpw    %[masked_compare_val], %[old_value]       \n"
 263     "   bne-    2f                                        \n"
 264     /* atomic loop */
 265     "1:                                                   \n"
 266     "   lwarx   %[value32], 0, %[dest_base]               \n"
 267     /* extract byte and compare */
 268     "   srd     %[old_value], %[value32], %[shift_amount] \n"
 269     "   clrldi  %[old_value], %[old_value], 56            \n"
 270     "   cmpw    %[masked_compare_val], %[old_value]       \n"
 271     "   bne-    2f                                        \n"
 272     /* replace byte and try to store */
 273     "   xor     %[value32], %[xor_value], %[value32]      \n"
 274     "   stwcx.  %[value32], 0, %[dest_base]               \n"
 275     "   bne-    1b                                        \n"
 276     /* exit */
 277     "2:                                                   \n"
 278     /* out */
 279     : [old_value]           "=&r"   (old_value),
 280       [value32]             "=&r"   (value32),
 281                             "=m"    (*dest),
 282                             "=m"    (*dest_base)
 283     /* in */
 284     : [dest]                "b"     (dest),
 285       [dest_base]           "b"     (dest_base),
 286       [shift_amount]        "r"     (shift_amount),
 287       [masked_compare_val]  "r"     (masked_compare_val),
 288       [xor_value]           "r"     (xor_value),
 289                             "m"     (*dest),
 290                             "m"     (*dest_base)
 291     /* clobber */
 292     : "cc",
 293       "memory"
 294     );
 295 
 296   cmpxchg_post_membar(order);
 297 
 298   return PrimitiveConversions::cast<T>((unsigned char)old_value);
 299 }
 300 
 301 template<>
 302 template<typename T>
 303 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
 304                                                 T volatile* dest,
 305                                                 T compare_value,
 306                                                 cmpxchg_memory_order order) const {
 307   STATIC_ASSERT(4 == sizeof(T));
 308 
 309   // Note that cmpxchg guarantees a two-way memory barrier across
 310   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 311   // specified otherwise (see atomic.hpp).
 312 
 313   T old_value;
 314   const uint64_t zero = 0;
 315 
 316   cmpxchg_pre_membar(order);
 317 
 318   __asm__ __volatile__ (
 319     /* simple guard */
 320     "   lwz     %[old_value], 0(%[dest])                \n"
 321     "   cmpw    %[compare_value], %[old_value]          \n"
 322     "   bne-    2f                                      \n"
 323     /* atomic loop */
 324     "1:                                                 \n"
 325     "   lwarx   %[old_value], %[dest], %[zero]          \n"
 326     "   cmpw    %[compare_value], %[old_value]          \n"
 327     "   bne-    2f                                      \n"
 328     "   stwcx.  %[exchange_value], %[dest], %[zero]     \n"
 329     "   bne-    1b                                      \n"
 330     /* exit */
 331     "2:                                                 \n"
 332     /* out */
 333     : [old_value]       "=&r"   (old_value),
 334                         "=m"    (*dest)
 335     /* in */
 336     : [dest]            "b"     (dest),
 337       [zero]            "r"     (zero),
 338       [compare_value]   "r"     (compare_value),
 339       [exchange_value]  "r"     (exchange_value),
 340                         "m"     (*dest)
 341     /* clobber */
 342     : "cc",
 343       "memory"
 344     );
 345 
 346   cmpxchg_post_membar(order);
 347 
 348   return old_value;
 349 }
 350 
 351 template<>
 352 template<typename T>
 353 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
 354                                                 T volatile* dest,
 355                                                 T compare_value,
 356                                                 cmpxchg_memory_order order) const {
 357   STATIC_ASSERT(8 == sizeof(T));
 358 
 359   // Note that cmpxchg guarantees a two-way memory barrier across
 360   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 361   // specified otherwise (see atomic.hpp).
 362 
 363   T old_value;
 364   const uint64_t zero = 0;
 365 
 366   cmpxchg_pre_membar(order);
 367 
 368   __asm__ __volatile__ (
 369     /* simple guard */
 370     "   ld      %[old_value], 0(%[dest])                \n"
 371     "   cmpd    %[compare_value], %[old_value]          \n"
 372     "   bne-    2f                                      \n"
 373     /* atomic loop */
 374     "1:                                                 \n"
 375     "   ldarx   %[old_value], %[dest], %[zero]          \n"
 376     "   cmpd    %[compare_value], %[old_value]          \n"
 377     "   bne-    2f                                      \n"
 378     "   stdcx.  %[exchange_value], %[dest], %[zero]     \n"
 379     "   bne-    1b                                      \n"
 380     /* exit */
 381     "2:                                                 \n"
 382     /* out */
 383     : [old_value]       "=&r"   (old_value),
 384                         "=m"    (*dest)
 385     /* in */
 386     : [dest]            "b"     (dest),
 387       [zero]            "r"     (zero),
 388       [compare_value]   "r"     (compare_value),
 389       [exchange_value]  "r"     (exchange_value),
 390                         "m"     (*dest)
 391     /* clobber */
 392     : "cc",
 393       "memory"
 394     );
 395 
 396   cmpxchg_post_membar(order);
 397 
 398   return old_value;
 399 }
 400 
 401 #undef strasm_sync
 402 #undef strasm_lwsync
 403 #undef strasm_isync
 404 #undef strasm_release
 405 #undef strasm_acquire
 406 #undef strasm_fence
 407 #undef strasm_nobarrier
 408 #undef strasm_nobarrier_clobber_memory
 409 
 410 #endif // OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP

   1 /*
   2  * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2012, 2018 SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #ifndef OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
  27 #define OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
  28 
  29 #ifndef PPC64
  30 #error "Atomic currently only implemented for PPC64"
  31 #endif
  32 
  33 #include "utilities/debug.hpp"
  34 
  35 // Implementation of class atomic
  36 
  37 //
  38 // machine barrier instructions:
  39 //
  40 // - sync            two-way memory barrier, aka fence
  41 // - lwsync          orders  Store|Store,
  42 //                            Load|Store,
  43 //                            Load|Load,
  44 //                   but not Store|Load
  45 // - eieio           orders memory accesses for device memory (only)
  46 // - isync           invalidates speculatively executed instructions
  47 //                   From the POWER ISA 2.06 documentation:
  48 //                    "[...] an isync instruction prevents the execution of
  49 //                   instructions following the isync until instructions
  50 //                   preceding the isync have completed, [...]"
  51 //                   From IBM's AIX assembler reference:
  52 //                    "The isync [...] instructions causes the processor to
  53 //                   refetch any instructions that might have been fetched
  54 //                   prior to the isync instruction. The instruction isync
  55 //                   causes the processor to wait for all previous instructions
  56 //                   to complete. Then any instructions already fetched are
  57 //                   discarded and instruction processing continues in the
  58 //                   environment established by the previous instructions."
  59 //
  60 // semantic barrier instructions:
  61 // (as defined in orderAccess.hpp)
  62 //
  63 // - release         orders Store|Store,       (maps to lwsync)
  64 //                           Load|Store
  65 // - acquire         orders  Load|Store,       (maps to lwsync)
  66 //                           Load|Load
  67 // - fence           orders Store|Store,       (maps to sync)
  68 //                           Load|Store,
  69 //                           Load|Load,
  70 //                          Store|Load
  71 //
  72 
  73 #define strasm_sync                       "\n  sync    \n"
  74 #define strasm_lwsync                     "\n  lwsync  \n"
  75 #define strasm_isync                      "\n  isync   \n"
  76 #define strasm_release                    strasm_lwsync
  77 #define strasm_acquire                    strasm_lwsync
  78 #define strasm_fence                      strasm_sync
  79 #define strasm_nobarrier                  ""
  80 #define strasm_nobarrier_clobber_memory   ""
  81 
  82 inline void pre_membar(atomic_memory_order order) {
  83   switch (order) {
  84     case memory_order_relaxed:
  85     case memory_order_consume:
  86     case memory_order_acquire: break;
  87     case memory_order_release:
  88     case memory_order_acq_rel: __asm__ __volatile__ (strasm_lwsync); break;
  89     default /*conservative*/ : __asm__ __volatile__ (strasm_sync); break;
  90   }
  91 }
  92 
  93 inline void post_membar(atomic_memory_order order) {
  94   switch (order) {
  95     case memory_order_relaxed:
  96     case memory_order_consume:
  97     case memory_order_release: break;
  98     case memory_order_acquire:
  99     case memory_order_acq_rel: __asm__ __volatile__ (strasm_isync); break;
 100     default /*conservative*/ : __asm__ __volatile__ (strasm_sync); break;
 101   }
 102 }
 103 
 104 
 105 template<size_t byte_size>
 106 struct Atomic::PlatformAdd
 107   : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
 108 {
 109   template<typename I, typename D>
 110   D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const;
 111 };
 112 
 113 template<>
 114 template<typename I, typename D>
 115 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest,
 116                                                atomic_memory_order order) const {
 117   STATIC_ASSERT(4 == sizeof(I));
 118   STATIC_ASSERT(4 == sizeof(D));
 119 
 120   D result;
 121 
 122   pre_membar(order);
 123 
 124   __asm__ __volatile__ (

 125     "1: lwarx   %0,  0, %2    \n"
 126     "   add     %0, %0, %1    \n"
 127     "   stwcx.  %0,  0, %2    \n"
 128     "   bne-    1b            \n"

 129     : /*%0*/"=&r" (result)
 130     : /*%1*/"r" (add_value), /*%2*/"r" (dest)
 131     : "cc", "memory" );
 132 
 133   post_membar(order);
 134 
 135   return result;
 136 }
 137 
 138 
 139 template<>
 140 template<typename I, typename D>
 141 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest,
 142                                                atomic_memory_order order) const {
 143   STATIC_ASSERT(8 == sizeof(I));
 144   STATIC_ASSERT(8 == sizeof(D));
 145 
 146   D result;
 147 
 148   pre_membar(order);
 149 
 150   __asm__ __volatile__ (

 151     "1: ldarx   %0,  0, %2    \n"
 152     "   add     %0, %0, %1    \n"
 153     "   stdcx.  %0,  0, %2    \n"
 154     "   bne-    1b            \n"

 155     : /*%0*/"=&r" (result)
 156     : /*%1*/"r" (add_value), /*%2*/"r" (dest)
 157     : "cc", "memory" );
 158 
 159   post_membar(order);
 160 
 161   return result;
 162 }
 163 
 164 template<>
 165 template<typename T>
 166 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
 167                                              T volatile* dest,
 168                                              atomic_memory_order order) const {
 169   // Note that xchg doesn't necessarily do an acquire
 170   // (see synchronizer.cpp).
 171 
 172   T old_value;
 173   const uint64_t zero = 0;
 174 
 175   pre_membar(order);
 176 
 177   __asm__ __volatile__ (


 178     /* atomic loop */
 179     "1:                                                 \n"
 180     "   lwarx   %[old_value], %[dest], %[zero]          \n"
 181     "   stwcx.  %[exchange_value], %[dest], %[zero]     \n"
 182     "   bne-    1b                                      \n"


 183     /* exit */
 184     "2:                                                 \n"
 185     /* out */
 186     : [old_value]       "=&r"   (old_value),
 187                         "=m"    (*dest)
 188     /* in */
 189     : [dest]            "b"     (dest),
 190       [zero]            "r"     (zero),
 191       [exchange_value]  "r"     (exchange_value),
 192                         "m"     (*dest)
 193     /* clobber */
 194     : "cc",
 195       "memory"
 196     );
 197 
 198   post_membar(order);
 199 
 200   return old_value;
 201 }
 202 
 203 template<>
 204 template<typename T>
 205 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
 206                                              T volatile* dest,
 207                                              atomic_memory_order order) const {
 208   STATIC_ASSERT(8 == sizeof(T));
 209   // Note that xchg doesn't necessarily do an acquire
 210   // (see synchronizer.cpp).
 211 
 212   T old_value;
 213   const uint64_t zero = 0;
 214 
 215   pre_membar(order);
 216 
 217   __asm__ __volatile__ (


 218     /* atomic loop */
 219     "1:                                                 \n"
 220     "   ldarx   %[old_value], %[dest], %[zero]          \n"
 221     "   stdcx.  %[exchange_value], %[dest], %[zero]     \n"
 222     "   bne-    1b                                      \n"


 223     /* exit */
 224     "2:                                                 \n"
 225     /* out */
 226     : [old_value]       "=&r"   (old_value),
 227                         "=m"    (*dest)
 228     /* in */
 229     : [dest]            "b"     (dest),
 230       [zero]            "r"     (zero),
 231       [exchange_value]  "r"     (exchange_value),
 232                         "m"     (*dest)
 233     /* clobber */
 234     : "cc",
 235       "memory"
 236     );
 237 
 238   post_membar(order);










 239 
 240   return old_value;






 241 }
 242 
 243 template<>
 244 template<typename T>
 245 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
 246                                                 T volatile* dest,
 247                                                 T compare_value,
 248                                                 atomic_memory_order order) const {
 249   STATIC_ASSERT(1 == sizeof(T));
 250 
 251   // Note that cmpxchg guarantees a two-way memory barrier across
 252   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 253   // specified otherwise (see atomic.hpp).
 254 
 255   // Using 32 bit internally.
 256   volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3);
 257 
 258 #ifdef VM_LITTLE_ENDIAN
 259   const unsigned int shift_amount        = ((uintptr_t)dest & 3) * 8;
 260 #else
 261   const unsigned int shift_amount        = ((~(uintptr_t)dest) & 3) * 8;
 262 #endif
 263   const unsigned int masked_compare_val  = ((unsigned int)(unsigned char)compare_value),
 264                      masked_exchange_val = ((unsigned int)(unsigned char)exchange_value),
 265                      xor_value           = (masked_compare_val ^ masked_exchange_val) << shift_amount;
 266 
 267   unsigned int old_value, value32;
 268 
 269   pre_membar(order);
 270 
 271   __asm__ __volatile__ (
 272     /* simple guard */
 273     "   lbz     %[old_value], 0(%[dest])                  \n"
 274     "   cmpw    %[masked_compare_val], %[old_value]       \n"
 275     "   bne-    2f                                        \n"
 276     /* atomic loop */
 277     "1:                                                   \n"
 278     "   lwarx   %[value32], 0, %[dest_base]               \n"
 279     /* extract byte and compare */
 280     "   srd     %[old_value], %[value32], %[shift_amount] \n"
 281     "   clrldi  %[old_value], %[old_value], 56            \n"
 282     "   cmpw    %[masked_compare_val], %[old_value]       \n"
 283     "   bne-    2f                                        \n"
 284     /* replace byte and try to store */
 285     "   xor     %[value32], %[xor_value], %[value32]      \n"
 286     "   stwcx.  %[value32], 0, %[dest_base]               \n"
 287     "   bne-    1b                                        \n"
 288     /* exit */
 289     "2:                                                   \n"
 290     /* out */
 291     : [old_value]           "=&r"   (old_value),
 292       [value32]             "=&r"   (value32),
 293                             "=m"    (*dest),
 294                             "=m"    (*dest_base)
 295     /* in */
 296     : [dest]                "b"     (dest),
 297       [dest_base]           "b"     (dest_base),
 298       [shift_amount]        "r"     (shift_amount),
 299       [masked_compare_val]  "r"     (masked_compare_val),
 300       [xor_value]           "r"     (xor_value),
 301                             "m"     (*dest),
 302                             "m"     (*dest_base)
 303     /* clobber */
 304     : "cc",
 305       "memory"
 306     );
 307 
 308   post_membar(order);
 309 
 310   return PrimitiveConversions::cast<T>((unsigned char)old_value);
 311 }
 312 
 313 template<>
 314 template<typename T>
 315 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
 316                                                 T volatile* dest,
 317                                                 T compare_value,
 318                                                 atomic_memory_order order) const {
 319   STATIC_ASSERT(4 == sizeof(T));
 320 
 321   // Note that cmpxchg guarantees a two-way memory barrier across
 322   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 323   // specified otherwise (see atomic.hpp).
 324 
 325   T old_value;
 326   const uint64_t zero = 0;
 327 
 328   pre_membar(order);
 329 
 330   __asm__ __volatile__ (
 331     /* simple guard */
 332     "   lwz     %[old_value], 0(%[dest])                \n"
 333     "   cmpw    %[compare_value], %[old_value]          \n"
 334     "   bne-    2f                                      \n"
 335     /* atomic loop */
 336     "1:                                                 \n"
 337     "   lwarx   %[old_value], %[dest], %[zero]          \n"
 338     "   cmpw    %[compare_value], %[old_value]          \n"
 339     "   bne-    2f                                      \n"
 340     "   stwcx.  %[exchange_value], %[dest], %[zero]     \n"
 341     "   bne-    1b                                      \n"
 342     /* exit */
 343     "2:                                                 \n"
 344     /* out */
 345     : [old_value]       "=&r"   (old_value),
 346                         "=m"    (*dest)
 347     /* in */
 348     : [dest]            "b"     (dest),
 349       [zero]            "r"     (zero),
 350       [compare_value]   "r"     (compare_value),
 351       [exchange_value]  "r"     (exchange_value),
 352                         "m"     (*dest)
 353     /* clobber */
 354     : "cc",
 355       "memory"
 356     );
 357 
 358   post_membar(order);
 359 
 360   return old_value;
 361 }
 362 
 363 template<>
 364 template<typename T>
 365 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
 366                                                 T volatile* dest,
 367                                                 T compare_value,
 368                                                 atomic_memory_order order) const {
 369   STATIC_ASSERT(8 == sizeof(T));
 370 
 371   // Note that cmpxchg guarantees a two-way memory barrier across
 372   // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
 373   // specified otherwise (see atomic.hpp).
 374 
 375   T old_value;
 376   const uint64_t zero = 0;
 377 
 378   pre_membar(order);
 379 
 380   __asm__ __volatile__ (
 381     /* simple guard */
 382     "   ld      %[old_value], 0(%[dest])                \n"
 383     "   cmpd    %[compare_value], %[old_value]          \n"
 384     "   bne-    2f                                      \n"
 385     /* atomic loop */
 386     "1:                                                 \n"
 387     "   ldarx   %[old_value], %[dest], %[zero]          \n"
 388     "   cmpd    %[compare_value], %[old_value]          \n"
 389     "   bne-    2f                                      \n"
 390     "   stdcx.  %[exchange_value], %[dest], %[zero]     \n"
 391     "   bne-    1b                                      \n"
 392     /* exit */
 393     "2:                                                 \n"
 394     /* out */
 395     : [old_value]       "=&r"   (old_value),
 396                         "=m"    (*dest)
 397     /* in */
 398     : [dest]            "b"     (dest),
 399       [zero]            "r"     (zero),
 400       [compare_value]   "r"     (compare_value),
 401       [exchange_value]  "r"     (exchange_value),
 402                         "m"     (*dest)
 403     /* clobber */
 404     : "cc",
 405       "memory"
 406     );
 407 
 408   post_membar(order);
 409 
 410   return old_value;
 411 }
 412 
 413 #undef strasm_sync
 414 #undef strasm_lwsync
 415 #undef strasm_isync
 416 #undef strasm_release
 417 #undef strasm_acquire
 418 #undef strasm_fence
 419 #undef strasm_nobarrier
 420 #undef strasm_nobarrier_clobber_memory
 421 
 422 #endif // OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP

< prev index next >