1 /* 2 * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2012, 2019 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef OS_CPU_LINUX_PPC_ATOMIC_LINUX_PPC_HPP 27 #define OS_CPU_LINUX_PPC_ATOMIC_LINUX_PPC_HPP 28 29 #ifndef PPC64 30 #error "Atomic currently only implemented for PPC64" 31 #endif 32 33 #include "orderAccess_linux_ppc.hpp" 34 #include "utilities/debug.hpp" 35 36 // Implementation of class atomic 37 38 // 39 // machine barrier instructions: 40 // 41 // - sync two-way memory barrier, aka fence 42 // - lwsync orders Store|Store, 43 // Load|Store, 44 // Load|Load, 45 // but not Store|Load 46 // - eieio orders memory accesses for device memory (only) 47 // - isync invalidates speculatively executed instructions 48 // From the POWER ISA 2.06 documentation: 49 // "[...] an isync instruction prevents the execution of 50 // instructions following the isync until instructions 51 // preceding the isync have completed, [...]" 52 // From IBM's AIX assembler reference: 53 // "The isync [...] instructions causes the processor to 54 // refetch any instructions that might have been fetched 55 // prior to the isync instruction. The instruction isync 56 // causes the processor to wait for all previous instructions 57 // to complete. Then any instructions already fetched are 58 // discarded and instruction processing continues in the 59 // environment established by the previous instructions." 60 // 61 // semantic barrier instructions: 62 // (as defined in orderAccess.hpp) 63 // 64 // - release orders Store|Store, (maps to lwsync) 65 // Load|Store 66 // - acquire orders Load|Store, (maps to lwsync) 67 // Load|Load 68 // - fence orders Store|Store, (maps to sync) 69 // Load|Store, 70 // Load|Load, 71 // Store|Load 72 // 73 74 inline void pre_membar(atomic_memory_order order) { 75 switch (order) { 76 case memory_order_relaxed: 77 case memory_order_acquire: break; 78 case memory_order_release: 79 case memory_order_acq_rel: __asm__ __volatile__ ("lwsync" : : : "memory"); break; 80 default /*conservative*/ : __asm__ __volatile__ ("sync" : : : "memory"); break; 81 } 82 } 83 84 inline void post_membar(atomic_memory_order order) { 85 switch (order) { 86 case memory_order_relaxed: 87 case memory_order_release: break; 88 case memory_order_acquire: 89 case memory_order_acq_rel: __asm__ __volatile__ ("isync" : : : "memory"); break; 90 default /*conservative*/ : __asm__ __volatile__ ("sync" : : : "memory"); break; 91 } 92 } 93 94 95 template<size_t byte_size> 96 struct Atomic::PlatformAdd 97 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > 98 { 99 template<typename D, typename I> 100 D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const; 101 }; 102 103 template<> 104 template<typename D, typename I> 105 inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value, 106 atomic_memory_order order) const { 107 STATIC_ASSERT(4 == sizeof(I)); 108 STATIC_ASSERT(4 == sizeof(D)); 109 110 D result; 111 112 pre_membar(order); 113 114 __asm__ __volatile__ ( 115 "1: lwarx %0, 0, %2 \n" 116 " add %0, %0, %1 \n" 117 " stwcx. %0, 0, %2 \n" 118 " bne- 1b \n" 119 : /*%0*/"=&r" (result) 120 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 121 : "cc", "memory" ); 122 123 post_membar(order); 124 125 return result; 126 } 127 128 129 template<> 130 template<typename D, typename I> 131 inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I add_value, 132 atomic_memory_order order) const { 133 STATIC_ASSERT(8 == sizeof(I)); 134 STATIC_ASSERT(8 == sizeof(D)); 135 136 D result; 137 138 pre_membar(order); 139 140 __asm__ __volatile__ ( 141 "1: ldarx %0, 0, %2 \n" 142 " add %0, %0, %1 \n" 143 " stdcx. %0, 0, %2 \n" 144 " bne- 1b \n" 145 : /*%0*/"=&r" (result) 146 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 147 : "cc", "memory" ); 148 149 post_membar(order); 150 151 return result; 152 } 153 154 template<> 155 template<typename T> 156 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, 157 T volatile* dest, 158 atomic_memory_order order) const { 159 // Note that xchg doesn't necessarily do an acquire 160 // (see synchronizer.cpp). 161 162 T old_value; 163 const uint64_t zero = 0; 164 165 pre_membar(order); 166 167 __asm__ __volatile__ ( 168 /* atomic loop */ 169 "1: \n" 170 " lwarx %[old_value], %[dest], %[zero] \n" 171 " stwcx. %[exchange_value], %[dest], %[zero] \n" 172 " bne- 1b \n" 173 /* exit */ 174 "2: \n" 175 /* out */ 176 : [old_value] "=&r" (old_value), 177 "=m" (*dest) 178 /* in */ 179 : [dest] "b" (dest), 180 [zero] "r" (zero), 181 [exchange_value] "r" (exchange_value), 182 "m" (*dest) 183 /* clobber */ 184 : "cc", 185 "memory" 186 ); 187 188 post_membar(order); 189 190 return old_value; 191 } 192 193 template<> 194 template<typename T> 195 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, 196 T volatile* dest, 197 atomic_memory_order order) const { 198 STATIC_ASSERT(8 == sizeof(T)); 199 // Note that xchg doesn't necessarily do an acquire 200 // (see synchronizer.cpp). 201 202 T old_value; 203 const uint64_t zero = 0; 204 205 pre_membar(order); 206 207 __asm__ __volatile__ ( 208 /* atomic loop */ 209 "1: \n" 210 " ldarx %[old_value], %[dest], %[zero] \n" 211 " stdcx. %[exchange_value], %[dest], %[zero] \n" 212 " bne- 1b \n" 213 /* exit */ 214 "2: \n" 215 /* out */ 216 : [old_value] "=&r" (old_value), 217 "=m" (*dest) 218 /* in */ 219 : [dest] "b" (dest), 220 [zero] "r" (zero), 221 [exchange_value] "r" (exchange_value), 222 "m" (*dest) 223 /* clobber */ 224 : "cc", 225 "memory" 226 ); 227 228 post_membar(order); 229 230 return old_value; 231 } 232 233 template<> 234 template<typename T> 235 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, 236 T volatile* dest, 237 T compare_value, 238 atomic_memory_order order) const { 239 STATIC_ASSERT(1 == sizeof(T)); 240 241 // Note that cmpxchg guarantees a two-way memory barrier across 242 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 243 // specified otherwise (see atomic.hpp). 244 245 // Using 32 bit internally. 246 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3); 247 248 #ifdef VM_LITTLE_ENDIAN 249 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8; 250 #else 251 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8; 252 #endif 253 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value), 254 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value), 255 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount; 256 257 unsigned int old_value, value32; 258 259 pre_membar(order); 260 261 __asm__ __volatile__ ( 262 /* simple guard */ 263 " lbz %[old_value], 0(%[dest]) \n" 264 " cmpw %[masked_compare_val], %[old_value] \n" 265 " bne- 2f \n" 266 /* atomic loop */ 267 "1: \n" 268 " lwarx %[value32], 0, %[dest_base] \n" 269 /* extract byte and compare */ 270 " srd %[old_value], %[value32], %[shift_amount] \n" 271 " clrldi %[old_value], %[old_value], 56 \n" 272 " cmpw %[masked_compare_val], %[old_value] \n" 273 " bne- 2f \n" 274 /* replace byte and try to store */ 275 " xor %[value32], %[xor_value], %[value32] \n" 276 " stwcx. %[value32], 0, %[dest_base] \n" 277 " bne- 1b \n" 278 /* exit */ 279 "2: \n" 280 /* out */ 281 : [old_value] "=&r" (old_value), 282 [value32] "=&r" (value32), 283 "=m" (*dest), 284 "=m" (*dest_base) 285 /* in */ 286 : [dest] "b" (dest), 287 [dest_base] "b" (dest_base), 288 [shift_amount] "r" (shift_amount), 289 [masked_compare_val] "r" (masked_compare_val), 290 [xor_value] "r" (xor_value), 291 "m" (*dest), 292 "m" (*dest_base) 293 /* clobber */ 294 : "cc", 295 "memory" 296 ); 297 298 post_membar(order); 299 300 return PrimitiveConversions::cast<T>((unsigned char)old_value); 301 } 302 303 template<> 304 template<typename T> 305 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, 306 T volatile* dest, 307 T compare_value, 308 atomic_memory_order order) const { 309 STATIC_ASSERT(4 == sizeof(T)); 310 311 // Note that cmpxchg guarantees a two-way memory barrier across 312 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 313 // specified otherwise (see atomic.hpp). 314 315 T old_value; 316 const uint64_t zero = 0; 317 318 pre_membar(order); 319 320 __asm__ __volatile__ ( 321 /* simple guard */ 322 " lwz %[old_value], 0(%[dest]) \n" 323 " cmpw %[compare_value], %[old_value] \n" 324 " bne- 2f \n" 325 /* atomic loop */ 326 "1: \n" 327 " lwarx %[old_value], %[dest], %[zero] \n" 328 " cmpw %[compare_value], %[old_value] \n" 329 " bne- 2f \n" 330 " stwcx. %[exchange_value], %[dest], %[zero] \n" 331 " bne- 1b \n" 332 /* exit */ 333 "2: \n" 334 /* out */ 335 : [old_value] "=&r" (old_value), 336 "=m" (*dest) 337 /* in */ 338 : [dest] "b" (dest), 339 [zero] "r" (zero), 340 [compare_value] "r" (compare_value), 341 [exchange_value] "r" (exchange_value), 342 "m" (*dest) 343 /* clobber */ 344 : "cc", 345 "memory" 346 ); 347 348 post_membar(order); 349 350 return old_value; 351 } 352 353 template<> 354 template<typename T> 355 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, 356 T volatile* dest, 357 T compare_value, 358 atomic_memory_order order) const { 359 STATIC_ASSERT(8 == sizeof(T)); 360 361 // Note that cmpxchg guarantees a two-way memory barrier across 362 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 363 // specified otherwise (see atomic.hpp). 364 365 T old_value; 366 const uint64_t zero = 0; 367 368 pre_membar(order); 369 370 __asm__ __volatile__ ( 371 /* simple guard */ 372 " ld %[old_value], 0(%[dest]) \n" 373 " cmpd %[compare_value], %[old_value] \n" 374 " bne- 2f \n" 375 /* atomic loop */ 376 "1: \n" 377 " ldarx %[old_value], %[dest], %[zero] \n" 378 " cmpd %[compare_value], %[old_value] \n" 379 " bne- 2f \n" 380 " stdcx. %[exchange_value], %[dest], %[zero] \n" 381 " bne- 1b \n" 382 /* exit */ 383 "2: \n" 384 /* out */ 385 : [old_value] "=&r" (old_value), 386 "=m" (*dest) 387 /* in */ 388 : [dest] "b" (dest), 389 [zero] "r" (zero), 390 [compare_value] "r" (compare_value), 391 [exchange_value] "r" (exchange_value), 392 "m" (*dest) 393 /* clobber */ 394 : "cc", 395 "memory" 396 ); 397 398 post_membar(order); 399 400 return old_value; 401 } 402 403 template<size_t byte_size> 404 struct Atomic::PlatformOrderedLoad<byte_size, X_ACQUIRE> 405 { 406 template <typename T> 407 T operator()(const volatile T* p) const { 408 T t = Atomic::load(p); 409 // Use twi-isync for load_acquire (faster than lwsync). 410 __asm__ __volatile__ ("twi 0,%0,0\n isync\n" : : "r" (t) : "memory"); 411 return t; 412 } 413 }; 414 415 #endif // OS_CPU_LINUX_PPC_ATOMIC_LINUX_PPC_HPP