1 /* 2 * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2012, 2018 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP 27 #define OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP 28 29 #ifndef PPC64 30 #error "Atomic currently only implemented for PPC64" 31 #endif 32 33 #include "utilities/debug.hpp" 34 35 // Implementation of class atomic 36 37 // 38 // machine barrier instructions: 39 // 40 // - sync two-way memory barrier, aka fence 41 // - lwsync orders Store|Store, 42 // Load|Store, 43 // Load|Load, 44 // but not Store|Load 45 // - eieio orders memory accesses for device memory (only) 46 // - isync invalidates speculatively executed instructions 47 // From the POWER ISA 2.06 documentation: 48 // "[...] an isync instruction prevents the execution of 49 // instructions following the isync until instructions 50 // preceding the isync have completed, [...]" 51 // From IBM's AIX assembler reference: 52 // "The isync [...] instructions causes the processor to 53 // refetch any instructions that might have been fetched 54 // prior to the isync instruction. The instruction isync 55 // causes the processor to wait for all previous instructions 56 // to complete. Then any instructions already fetched are 57 // discarded and instruction processing continues in the 58 // environment established by the previous instructions." 59 // 60 // semantic barrier instructions: 61 // (as defined in orderAccess.hpp) 62 // 63 // - release orders Store|Store, (maps to lwsync) 64 // Load|Store 65 // - acquire orders Load|Store, (maps to lwsync) 66 // Load|Load 67 // - fence orders Store|Store, (maps to sync) 68 // Load|Store, 69 // Load|Load, 70 // Store|Load 71 // 72 73 #define strasm_sync "\n sync \n" 74 #define strasm_lwsync "\n lwsync \n" 75 #define strasm_isync "\n isync \n" 76 #define strasm_release strasm_lwsync 77 #define strasm_acquire strasm_lwsync 78 #define strasm_fence strasm_sync 79 #define strasm_nobarrier "" 80 #define strasm_nobarrier_clobber_memory "" 81 82 inline void pre_membar(atomic_memory_order order) { 83 switch (order) { 84 case memory_order_relaxed: 85 case memory_order_consume: 86 case memory_order_acquire: break; 87 case memory_order_release: 88 case memory_order_acq_rel: __asm__ __volatile__ (strasm_lwsync); break; 89 default /*conservative*/ : __asm__ __volatile__ (strasm_sync); break; 90 } 91 } 92 93 inline void post_membar(atomic_memory_order order) { 94 switch (order) { 95 case memory_order_relaxed: 96 case memory_order_consume: 97 case memory_order_release: break; 98 case memory_order_acquire: 99 case memory_order_acq_rel: __asm__ __volatile__ (strasm_isync); break; 100 default /*conservative*/ : __asm__ __volatile__ (strasm_sync); break; 101 } 102 } 103 104 105 template<size_t byte_size> 106 struct Atomic::PlatformAdd 107 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > 108 { 109 template<typename I, typename D> 110 D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const; 111 }; 112 113 template<> 114 template<typename I, typename D> 115 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest, 116 atomic_memory_order order) const { 117 STATIC_ASSERT(4 == sizeof(I)); 118 STATIC_ASSERT(4 == sizeof(D)); 119 120 D result; 121 122 pre_membar(order); 123 124 __asm__ __volatile__ ( 125 "1: lwarx %0, 0, %2 \n" 126 " add %0, %0, %1 \n" 127 " stwcx. %0, 0, %2 \n" 128 " bne- 1b \n" 129 : /*%0*/"=&r" (result) 130 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 131 : "cc", "memory" ); 132 133 post_membar(order); 134 135 return result; 136 } 137 138 139 template<> 140 template<typename I, typename D> 141 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest, 142 atomic_memory_order order) const { 143 STATIC_ASSERT(8 == sizeof(I)); 144 STATIC_ASSERT(8 == sizeof(D)); 145 146 D result; 147 148 pre_membar(order); 149 150 __asm__ __volatile__ ( 151 "1: ldarx %0, 0, %2 \n" 152 " add %0, %0, %1 \n" 153 " stdcx. %0, 0, %2 \n" 154 " bne- 1b \n" 155 : /*%0*/"=&r" (result) 156 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 157 : "cc", "memory" ); 158 159 post_membar(order); 160 161 return result; 162 } 163 164 template<> 165 template<typename T> 166 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, 167 T volatile* dest, 168 atomic_memory_order order) const { 169 // Note that xchg doesn't necessarily do an acquire 170 // (see synchronizer.cpp). 171 172 T old_value; 173 const uint64_t zero = 0; 174 175 pre_membar(order); 176 177 __asm__ __volatile__ ( 178 /* atomic loop */ 179 "1: \n" 180 " lwarx %[old_value], %[dest], %[zero] \n" 181 " stwcx. %[exchange_value], %[dest], %[zero] \n" 182 " bne- 1b \n" 183 /* exit */ 184 "2: \n" 185 /* out */ 186 : [old_value] "=&r" (old_value), 187 "=m" (*dest) 188 /* in */ 189 : [dest] "b" (dest), 190 [zero] "r" (zero), 191 [exchange_value] "r" (exchange_value), 192 "m" (*dest) 193 /* clobber */ 194 : "cc", 195 "memory" 196 ); 197 198 post_membar(order); 199 200 return old_value; 201 } 202 203 template<> 204 template<typename T> 205 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, 206 T volatile* dest, 207 atomic_memory_order order) const { 208 STATIC_ASSERT(8 == sizeof(T)); 209 // Note that xchg doesn't necessarily do an acquire 210 // (see synchronizer.cpp). 211 212 T old_value; 213 const uint64_t zero = 0; 214 215 pre_membar(order); 216 217 __asm__ __volatile__ ( 218 /* atomic loop */ 219 "1: \n" 220 " ldarx %[old_value], %[dest], %[zero] \n" 221 " stdcx. %[exchange_value], %[dest], %[zero] \n" 222 " bne- 1b \n" 223 /* exit */ 224 "2: \n" 225 /* out */ 226 : [old_value] "=&r" (old_value), 227 "=m" (*dest) 228 /* in */ 229 : [dest] "b" (dest), 230 [zero] "r" (zero), 231 [exchange_value] "r" (exchange_value), 232 "m" (*dest) 233 /* clobber */ 234 : "cc", 235 "memory" 236 ); 237 238 post_membar(order); 239 240 return old_value; 241 } 242 243 template<> 244 template<typename T> 245 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, 246 T volatile* dest, 247 T compare_value, 248 atomic_memory_order order) const { 249 STATIC_ASSERT(1 == sizeof(T)); 250 251 // Note that cmpxchg guarantees a two-way memory barrier across 252 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 253 // specified otherwise (see atomic.hpp). 254 255 // Using 32 bit internally. 256 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3); 257 258 #ifdef VM_LITTLE_ENDIAN 259 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8; 260 #else 261 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8; 262 #endif 263 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value), 264 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value), 265 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount; 266 267 unsigned int old_value, value32; 268 269 pre_membar(order); 270 271 __asm__ __volatile__ ( 272 /* simple guard */ 273 " lbz %[old_value], 0(%[dest]) \n" 274 " cmpw %[masked_compare_val], %[old_value] \n" 275 " bne- 2f \n" 276 /* atomic loop */ 277 "1: \n" 278 " lwarx %[value32], 0, %[dest_base] \n" 279 /* extract byte and compare */ 280 " srd %[old_value], %[value32], %[shift_amount] \n" 281 " clrldi %[old_value], %[old_value], 56 \n" 282 " cmpw %[masked_compare_val], %[old_value] \n" 283 " bne- 2f \n" 284 /* replace byte and try to store */ 285 " xor %[value32], %[xor_value], %[value32] \n" 286 " stwcx. %[value32], 0, %[dest_base] \n" 287 " bne- 1b \n" 288 /* exit */ 289 "2: \n" 290 /* out */ 291 : [old_value] "=&r" (old_value), 292 [value32] "=&r" (value32), 293 "=m" (*dest), 294 "=m" (*dest_base) 295 /* in */ 296 : [dest] "b" (dest), 297 [dest_base] "b" (dest_base), 298 [shift_amount] "r" (shift_amount), 299 [masked_compare_val] "r" (masked_compare_val), 300 [xor_value] "r" (xor_value), 301 "m" (*dest), 302 "m" (*dest_base) 303 /* clobber */ 304 : "cc", 305 "memory" 306 ); 307 308 post_membar(order); 309 310 return PrimitiveConversions::cast<T>((unsigned char)old_value); 311 } 312 313 template<> 314 template<typename T> 315 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, 316 T volatile* dest, 317 T compare_value, 318 atomic_memory_order order) const { 319 STATIC_ASSERT(4 == sizeof(T)); 320 321 // Note that cmpxchg guarantees a two-way memory barrier across 322 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 323 // specified otherwise (see atomic.hpp). 324 325 T old_value; 326 const uint64_t zero = 0; 327 328 pre_membar(order); 329 330 __asm__ __volatile__ ( 331 /* simple guard */ 332 " lwz %[old_value], 0(%[dest]) \n" 333 " cmpw %[compare_value], %[old_value] \n" 334 " bne- 2f \n" 335 /* atomic loop */ 336 "1: \n" 337 " lwarx %[old_value], %[dest], %[zero] \n" 338 " cmpw %[compare_value], %[old_value] \n" 339 " bne- 2f \n" 340 " stwcx. %[exchange_value], %[dest], %[zero] \n" 341 " bne- 1b \n" 342 /* exit */ 343 "2: \n" 344 /* out */ 345 : [old_value] "=&r" (old_value), 346 "=m" (*dest) 347 /* in */ 348 : [dest] "b" (dest), 349 [zero] "r" (zero), 350 [compare_value] "r" (compare_value), 351 [exchange_value] "r" (exchange_value), 352 "m" (*dest) 353 /* clobber */ 354 : "cc", 355 "memory" 356 ); 357 358 post_membar(order); 359 360 return old_value; 361 } 362 363 template<> 364 template<typename T> 365 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, 366 T volatile* dest, 367 T compare_value, 368 atomic_memory_order order) const { 369 STATIC_ASSERT(8 == sizeof(T)); 370 371 // Note that cmpxchg guarantees a two-way memory barrier across 372 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 373 // specified otherwise (see atomic.hpp). 374 375 T old_value; 376 const uint64_t zero = 0; 377 378 pre_membar(order); 379 380 __asm__ __volatile__ ( 381 /* simple guard */ 382 " ld %[old_value], 0(%[dest]) \n" 383 " cmpd %[compare_value], %[old_value] \n" 384 " bne- 2f \n" 385 /* atomic loop */ 386 "1: \n" 387 " ldarx %[old_value], %[dest], %[zero] \n" 388 " cmpd %[compare_value], %[old_value] \n" 389 " bne- 2f \n" 390 " stdcx. %[exchange_value], %[dest], %[zero] \n" 391 " bne- 1b \n" 392 /* exit */ 393 "2: \n" 394 /* out */ 395 : [old_value] "=&r" (old_value), 396 "=m" (*dest) 397 /* in */ 398 : [dest] "b" (dest), 399 [zero] "r" (zero), 400 [compare_value] "r" (compare_value), 401 [exchange_value] "r" (exchange_value), 402 "m" (*dest) 403 /* clobber */ 404 : "cc", 405 "memory" 406 ); 407 408 post_membar(order); 409 410 return old_value; 411 } 412 413 #undef strasm_sync 414 #undef strasm_lwsync 415 #undef strasm_isync 416 #undef strasm_release 417 #undef strasm_acquire 418 #undef strasm_fence 419 #undef strasm_nobarrier 420 #undef strasm_nobarrier_clobber_memory 421 422 #endif // OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP