1 /* 2 * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2012, 2018 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef OS_CPU_LINUX_PPC_ATOMIC_LINUX_PPC_HPP 27 #define OS_CPU_LINUX_PPC_ATOMIC_LINUX_PPC_HPP 28 29 #ifndef PPC64 30 #error "Atomic currently only implemented for PPC64" 31 #endif 32 33 // Implementation of class atomic 34 35 // 36 // machine barrier instructions: 37 // 38 // - sync two-way memory barrier, aka fence 39 // - lwsync orders Store|Store, 40 // Load|Store, 41 // Load|Load, 42 // but not Store|Load 43 // - eieio orders memory accesses for device memory (only) 44 // - isync invalidates speculatively executed instructions 45 // From the POWER ISA 2.06 documentation: 46 // "[...] an isync instruction prevents the execution of 47 // instructions following the isync until instructions 48 // preceding the isync have completed, [...]" 49 // From IBM's AIX assembler reference: 50 // "The isync [...] instructions causes the processor to 51 // refetch any instructions that might have been fetched 52 // prior to the isync instruction. The instruction isync 53 // causes the processor to wait for all previous instructions 54 // to complete. Then any instructions already fetched are 55 // discarded and instruction processing continues in the 56 // environment established by the previous instructions." 57 // 58 // semantic barrier instructions: 59 // (as defined in orderAccess.hpp) 60 // 61 // - release orders Store|Store, (maps to lwsync) 62 // Load|Store 63 // - acquire orders Load|Store, (maps to lwsync) 64 // Load|Load 65 // - fence orders Store|Store, (maps to sync) 66 // Load|Store, 67 // Load|Load, 68 // Store|Load 69 // 70 71 #define strasm_sync "\n sync \n" 72 #define strasm_lwsync "\n lwsync \n" 73 #define strasm_isync "\n isync \n" 74 #define strasm_release strasm_lwsync 75 #define strasm_acquire strasm_lwsync 76 #define strasm_fence strasm_sync 77 #define strasm_nobarrier "" 78 #define strasm_nobarrier_clobber_memory "" 79 80 inline void pre_membar(atomic_memory_order order) { 81 switch (order) { 82 case memory_order_relaxed: 83 case memory_order_acquire: break; 84 case memory_order_release: 85 case memory_order_acq_rel: __asm__ __volatile__ (strasm_lwsync); break; 86 default /*conservative*/ : __asm__ __volatile__ (strasm_sync); break; 87 } 88 } 89 90 inline void post_membar(atomic_memory_order order) { 91 switch (order) { 92 case memory_order_relaxed: 93 case memory_order_release: break; 94 case memory_order_acquire: 95 case memory_order_acq_rel: __asm__ __volatile__ (strasm_isync); break; 96 default /*conservative*/ : __asm__ __volatile__ (strasm_sync); break; 97 } 98 } 99 100 101 template<size_t byte_size> 102 struct Atomic::PlatformAdd 103 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > 104 { 105 template<typename I, typename D> 106 D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const; 107 }; 108 109 template<> 110 template<typename I, typename D> 111 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest, 112 atomic_memory_order order) const { 113 STATIC_ASSERT(4 == sizeof(I)); 114 STATIC_ASSERT(4 == sizeof(D)); 115 116 D result; 117 118 pre_membar(order); 119 120 __asm__ __volatile__ ( 121 "1: lwarx %0, 0, %2 \n" 122 " add %0, %0, %1 \n" 123 " stwcx. %0, 0, %2 \n" 124 " bne- 1b \n" 125 : /*%0*/"=&r" (result) 126 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 127 : "cc", "memory" ); 128 129 post_membar(order); 130 131 return result; 132 } 133 134 135 template<> 136 template<typename I, typename D> 137 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest, 138 atomic_memory_order order) const { 139 STATIC_ASSERT(8 == sizeof(I)); 140 STATIC_ASSERT(8 == sizeof(D)); 141 142 D result; 143 144 pre_membar(order); 145 146 __asm__ __volatile__ ( 147 "1: ldarx %0, 0, %2 \n" 148 " add %0, %0, %1 \n" 149 " stdcx. %0, 0, %2 \n" 150 " bne- 1b \n" 151 : /*%0*/"=&r" (result) 152 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 153 : "cc", "memory" ); 154 155 post_membar(order); 156 157 return result; 158 } 159 160 template<> 161 template<typename T> 162 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, 163 T volatile* dest, 164 atomic_memory_order order) const { 165 // Note that xchg doesn't necessarily do an acquire 166 // (see synchronizer.cpp). 167 168 T old_value; 169 const uint64_t zero = 0; 170 171 pre_membar(order); 172 173 __asm__ __volatile__ ( 174 /* atomic loop */ 175 "1: \n" 176 " lwarx %[old_value], %[dest], %[zero] \n" 177 " stwcx. %[exchange_value], %[dest], %[zero] \n" 178 " bne- 1b \n" 179 /* exit */ 180 "2: \n" 181 /* out */ 182 : [old_value] "=&r" (old_value), 183 "=m" (*dest) 184 /* in */ 185 : [dest] "b" (dest), 186 [zero] "r" (zero), 187 [exchange_value] "r" (exchange_value), 188 "m" (*dest) 189 /* clobber */ 190 : "cc", 191 "memory" 192 ); 193 194 post_membar(order); 195 196 return old_value; 197 } 198 199 template<> 200 template<typename T> 201 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, 202 T volatile* dest, 203 atomic_memory_order order) const { 204 STATIC_ASSERT(8 == sizeof(T)); 205 // Note that xchg doesn't necessarily do an acquire 206 // (see synchronizer.cpp). 207 208 T old_value; 209 const uint64_t zero = 0; 210 211 pre_membar(order); 212 213 __asm__ __volatile__ ( 214 /* atomic loop */ 215 "1: \n" 216 " ldarx %[old_value], %[dest], %[zero] \n" 217 " stdcx. %[exchange_value], %[dest], %[zero] \n" 218 " bne- 1b \n" 219 /* exit */ 220 "2: \n" 221 /* out */ 222 : [old_value] "=&r" (old_value), 223 "=m" (*dest) 224 /* in */ 225 : [dest] "b" (dest), 226 [zero] "r" (zero), 227 [exchange_value] "r" (exchange_value), 228 "m" (*dest) 229 /* clobber */ 230 : "cc", 231 "memory" 232 ); 233 234 post_membar(order); 235 236 return old_value; 237 } 238 239 template<> 240 template<typename T> 241 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, 242 T volatile* dest, 243 T compare_value, 244 atomic_memory_order order) const { 245 STATIC_ASSERT(1 == sizeof(T)); 246 247 // Note that cmpxchg guarantees a two-way memory barrier across 248 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 249 // specified otherwise (see atomic.hpp). 250 251 // Using 32 bit internally. 252 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3); 253 254 #ifdef VM_LITTLE_ENDIAN 255 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8; 256 #else 257 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8; 258 #endif 259 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value), 260 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value), 261 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount; 262 263 unsigned int old_value, value32; 264 265 pre_membar(order); 266 267 __asm__ __volatile__ ( 268 /* simple guard */ 269 " lbz %[old_value], 0(%[dest]) \n" 270 " cmpw %[masked_compare_val], %[old_value] \n" 271 " bne- 2f \n" 272 /* atomic loop */ 273 "1: \n" 274 " lwarx %[value32], 0, %[dest_base] \n" 275 /* extract byte and compare */ 276 " srd %[old_value], %[value32], %[shift_amount] \n" 277 " clrldi %[old_value], %[old_value], 56 \n" 278 " cmpw %[masked_compare_val], %[old_value] \n" 279 " bne- 2f \n" 280 /* replace byte and try to store */ 281 " xor %[value32], %[xor_value], %[value32] \n" 282 " stwcx. %[value32], 0, %[dest_base] \n" 283 " bne- 1b \n" 284 /* exit */ 285 "2: \n" 286 /* out */ 287 : [old_value] "=&r" (old_value), 288 [value32] "=&r" (value32), 289 "=m" (*dest), 290 "=m" (*dest_base) 291 /* in */ 292 : [dest] "b" (dest), 293 [dest_base] "b" (dest_base), 294 [shift_amount] "r" (shift_amount), 295 [masked_compare_val] "r" (masked_compare_val), 296 [xor_value] "r" (xor_value), 297 "m" (*dest), 298 "m" (*dest_base) 299 /* clobber */ 300 : "cc", 301 "memory" 302 ); 303 304 post_membar(order); 305 306 return PrimitiveConversions::cast<T>((unsigned char)old_value); 307 } 308 309 template<> 310 template<typename T> 311 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, 312 T volatile* dest, 313 T compare_value, 314 atomic_memory_order order) const { 315 STATIC_ASSERT(4 == sizeof(T)); 316 317 // Note that cmpxchg guarantees a two-way memory barrier across 318 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 319 // specified otherwise (see atomic.hpp). 320 321 T old_value; 322 const uint64_t zero = 0; 323 324 pre_membar(order); 325 326 __asm__ __volatile__ ( 327 /* simple guard */ 328 " lwz %[old_value], 0(%[dest]) \n" 329 " cmpw %[compare_value], %[old_value] \n" 330 " bne- 2f \n" 331 /* atomic loop */ 332 "1: \n" 333 " lwarx %[old_value], %[dest], %[zero] \n" 334 " cmpw %[compare_value], %[old_value] \n" 335 " bne- 2f \n" 336 " stwcx. %[exchange_value], %[dest], %[zero] \n" 337 " bne- 1b \n" 338 /* exit */ 339 "2: \n" 340 /* out */ 341 : [old_value] "=&r" (old_value), 342 "=m" (*dest) 343 /* in */ 344 : [dest] "b" (dest), 345 [zero] "r" (zero), 346 [compare_value] "r" (compare_value), 347 [exchange_value] "r" (exchange_value), 348 "m" (*dest) 349 /* clobber */ 350 : "cc", 351 "memory" 352 ); 353 354 post_membar(order); 355 356 return old_value; 357 } 358 359 template<> 360 template<typename T> 361 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, 362 T volatile* dest, 363 T compare_value, 364 atomic_memory_order order) const { 365 STATIC_ASSERT(8 == sizeof(T)); 366 367 // Note that cmpxchg guarantees a two-way memory barrier across 368 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 369 // specified otherwise (see atomic.hpp). 370 371 T old_value; 372 const uint64_t zero = 0; 373 374 pre_membar(order); 375 376 __asm__ __volatile__ ( 377 /* simple guard */ 378 " ld %[old_value], 0(%[dest]) \n" 379 " cmpd %[compare_value], %[old_value] \n" 380 " bne- 2f \n" 381 /* atomic loop */ 382 "1: \n" 383 " ldarx %[old_value], %[dest], %[zero] \n" 384 " cmpd %[compare_value], %[old_value] \n" 385 " bne- 2f \n" 386 " stdcx. %[exchange_value], %[dest], %[zero] \n" 387 " bne- 1b \n" 388 /* exit */ 389 "2: \n" 390 /* out */ 391 : [old_value] "=&r" (old_value), 392 "=m" (*dest) 393 /* in */ 394 : [dest] "b" (dest), 395 [zero] "r" (zero), 396 [compare_value] "r" (compare_value), 397 [exchange_value] "r" (exchange_value), 398 "m" (*dest) 399 /* clobber */ 400 : "cc", 401 "memory" 402 ); 403 404 post_membar(order); 405 406 return old_value; 407 } 408 409 #undef strasm_sync 410 #undef strasm_lwsync 411 #undef strasm_isync 412 #undef strasm_release 413 #undef strasm_acquire 414 #undef strasm_fence 415 #undef strasm_nobarrier 416 #undef strasm_nobarrier_clobber_memory 417 418 #endif // OS_CPU_LINUX_PPC_ATOMIC_LINUX_PPC_HPP