1 /* 2 * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2012, 2019 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef OS_CPU_AIX_PPC_ATOMIC_AIX_PPC_HPP 27 #define OS_CPU_AIX_PPC_ATOMIC_AIX_PPC_HPP 28 29 #ifndef PPC64 30 #error "Atomic currently only implemented for PPC64" 31 #endif 32 33 #include "utilities/debug.hpp" 34 35 // Implementation of class atomic 36 37 // 38 // machine barrier instructions: 39 // 40 // - sync two-way memory barrier, aka fence 41 // - lwsync orders Store|Store, 42 // Load|Store, 43 // Load|Load, 44 // but not Store|Load 45 // - eieio orders memory accesses for device memory (only) 46 // - isync invalidates speculatively executed instructions 47 // From the POWER ISA 2.06 documentation: 48 // "[...] an isync instruction prevents the execution of 49 // instructions following the isync until instructions 50 // preceding the isync have completed, [...]" 51 // From IBM's AIX assembler reference: 52 // "The isync [...] instructions causes the processor to 53 // refetch any instructions that might have been fetched 54 // prior to the isync instruction. The instruction isync 55 // causes the processor to wait for all previous instructions 56 // to complete. Then any instructions already fetched are 57 // discarded and instruction processing continues in the 58 // environment established by the previous instructions." 59 // 60 // semantic barrier instructions: 61 // (as defined in orderAccess.hpp) 62 // 63 // - release orders Store|Store, (maps to lwsync) 64 // Load|Store 65 // - acquire orders Load|Store, (maps to lwsync) 66 // Load|Load 67 // - fence orders Store|Store, (maps to sync) 68 // Load|Store, 69 // Load|Load, 70 // Store|Load 71 // 72 73 inline void pre_membar(atomic_memory_order order) { 74 switch (order) { 75 case memory_order_relaxed: 76 case memory_order_acquire: break; 77 case memory_order_release: 78 case memory_order_acq_rel: __asm__ __volatile__ ("lwsync" : : : "memory"); break; 79 default /*conservative*/ : __asm__ __volatile__ ("sync" : : : "memory"); break; 80 } 81 } 82 83 inline void post_membar(atomic_memory_order order) { 84 switch (order) { 85 case memory_order_relaxed: 86 case memory_order_release: break; 87 case memory_order_acquire: 88 case memory_order_acq_rel: __asm__ __volatile__ ("isync" : : : "memory"); break; 89 default /*conservative*/ : __asm__ __volatile__ ("sync" : : : "memory"); break; 90 } 91 } 92 93 94 template<size_t byte_size> 95 struct Atomic::PlatformAdd 96 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > 97 { 98 template<typename I, typename D> 99 D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const; 100 }; 101 102 template<> 103 template<typename I, typename D> 104 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest, 105 atomic_memory_order order) const { 106 STATIC_ASSERT(4 == sizeof(I)); 107 STATIC_ASSERT(4 == sizeof(D)); 108 109 D result; 110 111 pre_membar(order); 112 113 __asm__ __volatile__ ( 114 "1: lwarx %0, 0, %2 \n" 115 " add %0, %0, %1 \n" 116 " stwcx. %0, 0, %2 \n" 117 " bne- 1b \n" 118 : /*%0*/"=&r" (result) 119 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 120 : "cc", "memory" ); 121 122 post_membar(order); 123 124 return result; 125 } 126 127 128 template<> 129 template<typename I, typename D> 130 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest, 131 atomic_memory_order order) const { 132 STATIC_ASSERT(8 == sizeof(I)); 133 STATIC_ASSERT(8 == sizeof(D)); 134 135 D result; 136 137 pre_membar(order); 138 139 __asm__ __volatile__ ( 140 "1: ldarx %0, 0, %2 \n" 141 " add %0, %0, %1 \n" 142 " stdcx. %0, 0, %2 \n" 143 " bne- 1b \n" 144 : /*%0*/"=&r" (result) 145 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 146 : "cc", "memory" ); 147 148 post_membar(order); 149 150 return result; 151 } 152 153 template<> 154 template<typename T> 155 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, 156 T volatile* dest, 157 atomic_memory_order order) const { 158 // Note that xchg doesn't necessarily do an acquire 159 // (see synchronizer.cpp). 160 161 T old_value; 162 const uint64_t zero = 0; 163 164 pre_membar(order); 165 166 __asm__ __volatile__ ( 167 /* atomic loop */ 168 "1: \n" 169 " lwarx %[old_value], %[dest], %[zero] \n" 170 " stwcx. %[exchange_value], %[dest], %[zero] \n" 171 " bne- 1b \n" 172 /* exit */ 173 "2: \n" 174 /* out */ 175 : [old_value] "=&r" (old_value), 176 "=m" (*dest) 177 /* in */ 178 : [dest] "b" (dest), 179 [zero] "r" (zero), 180 [exchange_value] "r" (exchange_value), 181 "m" (*dest) 182 /* clobber */ 183 : "cc", 184 "memory" 185 ); 186 187 post_membar(order); 188 189 return old_value; 190 } 191 192 template<> 193 template<typename T> 194 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, 195 T volatile* dest, 196 atomic_memory_order order) const { 197 STATIC_ASSERT(8 == sizeof(T)); 198 // Note that xchg doesn't necessarily do an acquire 199 // (see synchronizer.cpp). 200 201 T old_value; 202 const uint64_t zero = 0; 203 204 pre_membar(order); 205 206 __asm__ __volatile__ ( 207 /* atomic loop */ 208 "1: \n" 209 " ldarx %[old_value], %[dest], %[zero] \n" 210 " stdcx. %[exchange_value], %[dest], %[zero] \n" 211 " bne- 1b \n" 212 /* exit */ 213 "2: \n" 214 /* out */ 215 : [old_value] "=&r" (old_value), 216 "=m" (*dest) 217 /* in */ 218 : [dest] "b" (dest), 219 [zero] "r" (zero), 220 [exchange_value] "r" (exchange_value), 221 "m" (*dest) 222 /* clobber */ 223 : "cc", 224 "memory" 225 ); 226 227 post_membar(order); 228 229 return old_value; 230 } 231 232 template<> 233 template<typename T> 234 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, 235 T volatile* dest, 236 T compare_value, 237 atomic_memory_order order) const { 238 STATIC_ASSERT(1 == sizeof(T)); 239 240 // Note that cmpxchg guarantees a two-way memory barrier across 241 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 242 // specified otherwise (see atomic.hpp). 243 244 // Using 32 bit internally. 245 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3); 246 247 #ifdef VM_LITTLE_ENDIAN 248 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8; 249 #else 250 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8; 251 #endif 252 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value), 253 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value), 254 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount; 255 256 unsigned int old_value, value32; 257 258 pre_membar(order); 259 260 __asm__ __volatile__ ( 261 /* simple guard */ 262 " lbz %[old_value], 0(%[dest]) \n" 263 " cmpw %[masked_compare_val], %[old_value] \n" 264 " bne- 2f \n" 265 /* atomic loop */ 266 "1: \n" 267 " lwarx %[value32], 0, %[dest_base] \n" 268 /* extract byte and compare */ 269 " srd %[old_value], %[value32], %[shift_amount] \n" 270 " clrldi %[old_value], %[old_value], 56 \n" 271 " cmpw %[masked_compare_val], %[old_value] \n" 272 " bne- 2f \n" 273 /* replace byte and try to store */ 274 " xor %[value32], %[xor_value], %[value32] \n" 275 " stwcx. %[value32], 0, %[dest_base] \n" 276 " bne- 1b \n" 277 /* exit */ 278 "2: \n" 279 /* out */ 280 : [old_value] "=&r" (old_value), 281 [value32] "=&r" (value32), 282 "=m" (*dest), 283 "=m" (*dest_base) 284 /* in */ 285 : [dest] "b" (dest), 286 [dest_base] "b" (dest_base), 287 [shift_amount] "r" (shift_amount), 288 [masked_compare_val] "r" (masked_compare_val), 289 [xor_value] "r" (xor_value), 290 "m" (*dest), 291 "m" (*dest_base) 292 /* clobber */ 293 : "cc", 294 "memory" 295 ); 296 297 post_membar(order); 298 299 return PrimitiveConversions::cast<T>((unsigned char)old_value); 300 } 301 302 template<> 303 template<typename T> 304 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, 305 T volatile* dest, 306 T compare_value, 307 atomic_memory_order order) const { 308 STATIC_ASSERT(4 == sizeof(T)); 309 310 // Note that cmpxchg guarantees a two-way memory barrier across 311 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 312 // specified otherwise (see atomic.hpp). 313 314 T old_value; 315 const uint64_t zero = 0; 316 317 pre_membar(order); 318 319 __asm__ __volatile__ ( 320 /* simple guard */ 321 " lwz %[old_value], 0(%[dest]) \n" 322 " cmpw %[compare_value], %[old_value] \n" 323 " bne- 2f \n" 324 /* atomic loop */ 325 "1: \n" 326 " lwarx %[old_value], %[dest], %[zero] \n" 327 " cmpw %[compare_value], %[old_value] \n" 328 " bne- 2f \n" 329 " stwcx. %[exchange_value], %[dest], %[zero] \n" 330 " bne- 1b \n" 331 /* exit */ 332 "2: \n" 333 /* out */ 334 : [old_value] "=&r" (old_value), 335 "=m" (*dest) 336 /* in */ 337 : [dest] "b" (dest), 338 [zero] "r" (zero), 339 [compare_value] "r" (compare_value), 340 [exchange_value] "r" (exchange_value), 341 "m" (*dest) 342 /* clobber */ 343 : "cc", 344 "memory" 345 ); 346 347 post_membar(order); 348 349 return old_value; 350 } 351 352 template<> 353 template<typename T> 354 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, 355 T volatile* dest, 356 T compare_value, 357 atomic_memory_order order) const { 358 STATIC_ASSERT(8 == sizeof(T)); 359 360 // Note that cmpxchg guarantees a two-way memory barrier across 361 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 362 // specified otherwise (see atomic.hpp). 363 364 T old_value; 365 const uint64_t zero = 0; 366 367 pre_membar(order); 368 369 __asm__ __volatile__ ( 370 /* simple guard */ 371 " ld %[old_value], 0(%[dest]) \n" 372 " cmpd %[compare_value], %[old_value] \n" 373 " bne- 2f \n" 374 /* atomic loop */ 375 "1: \n" 376 " ldarx %[old_value], %[dest], %[zero] \n" 377 " cmpd %[compare_value], %[old_value] \n" 378 " bne- 2f \n" 379 " stdcx. %[exchange_value], %[dest], %[zero] \n" 380 " bne- 1b \n" 381 /* exit */ 382 "2: \n" 383 /* out */ 384 : [old_value] "=&r" (old_value), 385 "=m" (*dest) 386 /* in */ 387 : [dest] "b" (dest), 388 [zero] "r" (zero), 389 [compare_value] "r" (compare_value), 390 [exchange_value] "r" (exchange_value), 391 "m" (*dest) 392 /* clobber */ 393 : "cc", 394 "memory" 395 ); 396 397 post_membar(order); 398 399 return old_value; 400 } 401 402 #endif // OS_CPU_AIX_PPC_ATOMIC_AIX_PPC_HPP