1 /* 2 * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2012, 2018 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef OS_CPU_AIX_PPC_ATOMIC_AIX_PPC_HPP 27 #define OS_CPU_AIX_PPC_ATOMIC_AIX_PPC_HPP 28 29 #ifndef PPC64 30 #error "Atomic currently only implemented for PPC64" 31 #endif 32 33 #include "utilities/debug.hpp" 34 35 // Implementation of class atomic 36 37 // 38 // machine barrier instructions: 39 // 40 // - sync two-way memory barrier, aka fence 41 // - lwsync orders Store|Store, 42 // Load|Store, 43 // Load|Load, 44 // but not Store|Load 45 // - eieio orders memory accesses for device memory (only) 46 // - isync invalidates speculatively executed instructions 47 // From the POWER ISA 2.06 documentation: 48 // "[...] an isync instruction prevents the execution of 49 // instructions following the isync until instructions 50 // preceding the isync have completed, [...]" 51 // From IBM's AIX assembler reference: 52 // "The isync [...] instructions causes the processor to 53 // refetch any instructions that might have been fetched 54 // prior to the isync instruction. The instruction isync 55 // causes the processor to wait for all previous instructions 56 // to complete. Then any instructions already fetched are 57 // discarded and instruction processing continues in the 58 // environment established by the previous instructions." 59 // 60 // semantic barrier instructions: 61 // (as defined in orderAccess.hpp) 62 // 63 // - release orders Store|Store, (maps to lwsync) 64 // Load|Store 65 // - acquire orders Load|Store, (maps to lwsync) 66 // Load|Load 67 // - fence orders Store|Store, (maps to sync) 68 // Load|Store, 69 // Load|Load, 70 // Store|Load 71 // 72 73 #define strasm_sync "\n sync \n" 74 #define strasm_lwsync "\n lwsync \n" 75 #define strasm_isync "\n isync \n" 76 #define strasm_release strasm_lwsync 77 #define strasm_acquire strasm_lwsync 78 #define strasm_fence strasm_sync 79 #define strasm_nobarrier "" 80 #define strasm_nobarrier_clobber_memory "" 81 82 inline void pre_membar(atomic_memory_order order) { 83 switch (order) { 84 case memory_order_relaxed: 85 case memory_order_acquire: break; 86 case memory_order_release: 87 case memory_order_acq_rel: __asm__ __volatile__ (strasm_lwsync); break; 88 default /*conservative*/ : __asm__ __volatile__ (strasm_sync); break; 89 } 90 } 91 92 inline void post_membar(atomic_memory_order order) { 93 switch (order) { 94 case memory_order_relaxed: 95 case memory_order_release: break; 96 case memory_order_acquire: 97 case memory_order_acq_rel: __asm__ __volatile__ (strasm_isync); break; 98 default /*conservative*/ : __asm__ __volatile__ (strasm_sync); break; 99 } 100 } 101 102 103 template<size_t byte_size> 104 struct Atomic::PlatformAdd 105 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > 106 { 107 template<typename I, typename D> 108 D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const; 109 }; 110 111 template<> 112 template<typename I, typename D> 113 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest, 114 atomic_memory_order order) const { 115 STATIC_ASSERT(4 == sizeof(I)); 116 STATIC_ASSERT(4 == sizeof(D)); 117 118 D result; 119 120 pre_membar(order); 121 122 __asm__ __volatile__ ( 123 "1: lwarx %0, 0, %2 \n" 124 " add %0, %0, %1 \n" 125 " stwcx. %0, 0, %2 \n" 126 " bne- 1b \n" 127 : /*%0*/"=&r" (result) 128 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 129 : "cc", "memory" ); 130 131 post_membar(order); 132 133 return result; 134 } 135 136 137 template<> 138 template<typename I, typename D> 139 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest, 140 atomic_memory_order order) const { 141 STATIC_ASSERT(8 == sizeof(I)); 142 STATIC_ASSERT(8 == sizeof(D)); 143 144 D result; 145 146 pre_membar(order); 147 148 __asm__ __volatile__ ( 149 "1: ldarx %0, 0, %2 \n" 150 " add %0, %0, %1 \n" 151 " stdcx. %0, 0, %2 \n" 152 " bne- 1b \n" 153 : /*%0*/"=&r" (result) 154 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 155 : "cc", "memory" ); 156 157 post_membar(order); 158 159 return result; 160 } 161 162 template<> 163 template<typename T> 164 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, 165 T volatile* dest, 166 atomic_memory_order order) const { 167 // Note that xchg doesn't necessarily do an acquire 168 // (see synchronizer.cpp). 169 170 T old_value; 171 const uint64_t zero = 0; 172 173 pre_membar(order); 174 175 __asm__ __volatile__ ( 176 /* atomic loop */ 177 "1: \n" 178 " lwarx %[old_value], %[dest], %[zero] \n" 179 " stwcx. %[exchange_value], %[dest], %[zero] \n" 180 " bne- 1b \n" 181 /* exit */ 182 "2: \n" 183 /* out */ 184 : [old_value] "=&r" (old_value), 185 "=m" (*dest) 186 /* in */ 187 : [dest] "b" (dest), 188 [zero] "r" (zero), 189 [exchange_value] "r" (exchange_value), 190 "m" (*dest) 191 /* clobber */ 192 : "cc", 193 "memory" 194 ); 195 196 post_membar(order); 197 198 return old_value; 199 } 200 201 template<> 202 template<typename T> 203 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, 204 T volatile* dest, 205 atomic_memory_order order) const { 206 STATIC_ASSERT(8 == sizeof(T)); 207 // Note that xchg doesn't necessarily do an acquire 208 // (see synchronizer.cpp). 209 210 T old_value; 211 const uint64_t zero = 0; 212 213 pre_membar(order); 214 215 __asm__ __volatile__ ( 216 /* atomic loop */ 217 "1: \n" 218 " ldarx %[old_value], %[dest], %[zero] \n" 219 " stdcx. %[exchange_value], %[dest], %[zero] \n" 220 " bne- 1b \n" 221 /* exit */ 222 "2: \n" 223 /* out */ 224 : [old_value] "=&r" (old_value), 225 "=m" (*dest) 226 /* in */ 227 : [dest] "b" (dest), 228 [zero] "r" (zero), 229 [exchange_value] "r" (exchange_value), 230 "m" (*dest) 231 /* clobber */ 232 : "cc", 233 "memory" 234 ); 235 236 post_membar(order); 237 238 return old_value; 239 } 240 241 template<> 242 template<typename T> 243 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, 244 T volatile* dest, 245 T compare_value, 246 atomic_memory_order order) const { 247 STATIC_ASSERT(1 == sizeof(T)); 248 249 // Note that cmpxchg guarantees a two-way memory barrier across 250 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 251 // specified otherwise (see atomic.hpp). 252 253 // Using 32 bit internally. 254 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3); 255 256 #ifdef VM_LITTLE_ENDIAN 257 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8; 258 #else 259 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8; 260 #endif 261 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value), 262 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value), 263 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount; 264 265 unsigned int old_value, value32; 266 267 pre_membar(order); 268 269 __asm__ __volatile__ ( 270 /* simple guard */ 271 " lbz %[old_value], 0(%[dest]) \n" 272 " cmpw %[masked_compare_val], %[old_value] \n" 273 " bne- 2f \n" 274 /* atomic loop */ 275 "1: \n" 276 " lwarx %[value32], 0, %[dest_base] \n" 277 /* extract byte and compare */ 278 " srd %[old_value], %[value32], %[shift_amount] \n" 279 " clrldi %[old_value], %[old_value], 56 \n" 280 " cmpw %[masked_compare_val], %[old_value] \n" 281 " bne- 2f \n" 282 /* replace byte and try to store */ 283 " xor %[value32], %[xor_value], %[value32] \n" 284 " stwcx. %[value32], 0, %[dest_base] \n" 285 " bne- 1b \n" 286 /* exit */ 287 "2: \n" 288 /* out */ 289 : [old_value] "=&r" (old_value), 290 [value32] "=&r" (value32), 291 "=m" (*dest), 292 "=m" (*dest_base) 293 /* in */ 294 : [dest] "b" (dest), 295 [dest_base] "b" (dest_base), 296 [shift_amount] "r" (shift_amount), 297 [masked_compare_val] "r" (masked_compare_val), 298 [xor_value] "r" (xor_value), 299 "m" (*dest), 300 "m" (*dest_base) 301 /* clobber */ 302 : "cc", 303 "memory" 304 ); 305 306 post_membar(order); 307 308 return PrimitiveConversions::cast<T>((unsigned char)old_value); 309 } 310 311 template<> 312 template<typename T> 313 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, 314 T volatile* dest, 315 T compare_value, 316 atomic_memory_order order) const { 317 STATIC_ASSERT(4 == sizeof(T)); 318 319 // Note that cmpxchg guarantees a two-way memory barrier across 320 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 321 // specified otherwise (see atomic.hpp). 322 323 T old_value; 324 const uint64_t zero = 0; 325 326 pre_membar(order); 327 328 __asm__ __volatile__ ( 329 /* simple guard */ 330 " lwz %[old_value], 0(%[dest]) \n" 331 " cmpw %[compare_value], %[old_value] \n" 332 " bne- 2f \n" 333 /* atomic loop */ 334 "1: \n" 335 " lwarx %[old_value], %[dest], %[zero] \n" 336 " cmpw %[compare_value], %[old_value] \n" 337 " bne- 2f \n" 338 " stwcx. %[exchange_value], %[dest], %[zero] \n" 339 " bne- 1b \n" 340 /* exit */ 341 "2: \n" 342 /* out */ 343 : [old_value] "=&r" (old_value), 344 "=m" (*dest) 345 /* in */ 346 : [dest] "b" (dest), 347 [zero] "r" (zero), 348 [compare_value] "r" (compare_value), 349 [exchange_value] "r" (exchange_value), 350 "m" (*dest) 351 /* clobber */ 352 : "cc", 353 "memory" 354 ); 355 356 post_membar(order); 357 358 return old_value; 359 } 360 361 template<> 362 template<typename T> 363 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, 364 T volatile* dest, 365 T compare_value, 366 atomic_memory_order order) const { 367 STATIC_ASSERT(8 == sizeof(T)); 368 369 // Note that cmpxchg guarantees a two-way memory barrier across 370 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 371 // specified otherwise (see atomic.hpp). 372 373 T old_value; 374 const uint64_t zero = 0; 375 376 pre_membar(order); 377 378 __asm__ __volatile__ ( 379 /* simple guard */ 380 " ld %[old_value], 0(%[dest]) \n" 381 " cmpd %[compare_value], %[old_value] \n" 382 " bne- 2f \n" 383 /* atomic loop */ 384 "1: \n" 385 " ldarx %[old_value], %[dest], %[zero] \n" 386 " cmpd %[compare_value], %[old_value] \n" 387 " bne- 2f \n" 388 " stdcx. %[exchange_value], %[dest], %[zero] \n" 389 " bne- 1b \n" 390 /* exit */ 391 "2: \n" 392 /* out */ 393 : [old_value] "=&r" (old_value), 394 "=m" (*dest) 395 /* in */ 396 : [dest] "b" (dest), 397 [zero] "r" (zero), 398 [compare_value] "r" (compare_value), 399 [exchange_value] "r" (exchange_value), 400 "m" (*dest) 401 /* clobber */ 402 : "cc", 403 "memory" 404 ); 405 406 post_membar(order); 407 408 return old_value; 409 } 410 411 #undef strasm_sync 412 #undef strasm_lwsync 413 #undef strasm_isync 414 #undef strasm_release 415 #undef strasm_acquire 416 #undef strasm_fence 417 #undef strasm_nobarrier 418 #undef strasm_nobarrier_clobber_memory 419 420 #endif // OS_CPU_AIX_PPC_ATOMIC_AIX_PPC_HPP