1 /* 2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2012, 2014 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP 27 #define OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP 28 29 #ifndef _LP64 30 #error "Atomic currently only impleneted for PPC64" 31 #endif 32 33 #include "utilities/debug.hpp" 34 35 // Implementation of class atomic 36 37 // 38 // machine barrier instructions: 39 // 40 // - ppc_sync two-way memory barrier, aka fence 41 // - ppc_lwsync orders Store|Store, 42 // Load|Store, 43 // Load|Load, 44 // but not Store|Load 45 // - ppc_eieio orders memory accesses for device memory (only) 46 // - ppc_isync invalidates speculatively executed instructions 47 // From the POWER ISA 2.06 documentation: 48 // "[...] an isync instruction prevents the execution of 49 // instructions following the isync until instructions 50 // preceding the isync have completed, [...]" 51 // From IBM's AIX assembler reference: 52 // "The isync [...] instructions causes the processor to 53 // refetch any instructions that might have been fetched 54 // prior to the isync instruction. The instruction isync 55 // causes the processor to wait for all previous instructions 56 // to complete. Then any instructions already fetched are 57 // discarded and instruction processing continues in the 58 // environment established by the previous instructions." 59 // 60 // semantic barrier instructions: 61 // (as defined in orderAccess.hpp) 62 // 63 // - ppc_release orders Store|Store, (maps to ppc_lwsync) 64 // Load|Store 65 // - ppc_acquire orders Load|Store, (maps to ppc_lwsync) 66 // Load|Load 67 // - ppc_fence orders Store|Store, (maps to ppc_sync) 68 // Load|Store, 69 // Load|Load, 70 // Store|Load 71 // 72 73 #define strasm_sync "\n sync \n" 74 #define strasm_lwsync "\n lwsync \n" 75 #define strasm_isync "\n isync \n" 76 #define strasm_release strasm_lwsync 77 #define strasm_acquire strasm_lwsync 78 #define strasm_fence strasm_sync 79 #define strasm_nobarrier "" 80 #define strasm_nobarrier_clobber_memory "" 81 82 template<size_t byte_size> 83 struct Atomic::PlatformAdd 84 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > 85 { 86 template<typename I, typename D> 87 D add_and_fetch(I add_value, D volatile* dest) const; 88 }; 89 90 template<> 91 template<typename I, typename D> 92 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) const { 93 STATIC_ASSERT(4 == sizeof(I)); 94 STATIC_ASSERT(4 == sizeof(D)); 95 96 D result; 97 98 __asm__ __volatile__ ( 99 strasm_lwsync 100 "1: lwarx %0, 0, %2 \n" 101 " add %0, %0, %1 \n" 102 " stwcx. %0, 0, %2 \n" 103 " bne- 1b \n" 104 strasm_isync 105 : /*%0*/"=&r" (result) 106 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 107 : "cc", "memory" ); 108 109 return result; 110 } 111 112 113 template<> 114 template<typename I, typename D> 115 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) const { 116 STATIC_ASSERT(8 == sizeof(I)); 117 STATIC_ASSERT(8 == sizeof(D)); 118 119 D result; 120 121 __asm__ __volatile__ ( 122 strasm_lwsync 123 "1: ldarx %0, 0, %2 \n" 124 " add %0, %0, %1 \n" 125 " stdcx. %0, 0, %2 \n" 126 " bne- 1b \n" 127 strasm_isync 128 : /*%0*/"=&r" (result) 129 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 130 : "cc", "memory" ); 131 132 return result; 133 } 134 135 template<> 136 template<typename T> 137 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, 138 T volatile* dest) const { 139 STATIC_ASSERT(4 == sizeof(T)); 140 // Note that xchg_ptr doesn't necessarily do an acquire 141 // (see synchronizer.cpp). 142 143 T old_value; 144 const uint64_t zero = 0; 145 146 __asm__ __volatile__ ( 147 /* lwsync */ 148 strasm_lwsync 149 /* atomic loop */ 150 "1: \n" 151 " lwarx %[old_value], %[dest], %[zero] \n" 152 " stwcx. %[exchange_value], %[dest], %[zero] \n" 153 " bne- 1b \n" 154 /* isync */ 155 strasm_sync 156 /* exit */ 157 "2: \n" 158 /* out */ 159 : [old_value] "=&r" (old_value), 160 "=m" (*dest) 161 /* in */ 162 : [dest] "b" (dest), 163 [zero] "r" (zero), 164 [exchange_value] "r" (exchange_value), 165 "m" (*dest) 166 /* clobber */ 167 : "cc", 168 "memory" 169 ); 170 171 return old_value; 172 } 173 174 template<> 175 template<typename T> 176 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, 177 T volatile* dest) const { 178 STATIC_ASSERT(8 == sizeof(T)); 179 // Note that xchg_ptr doesn't necessarily do an acquire 180 // (see synchronizer.cpp). 181 182 T old_value; 183 const uint64_t zero = 0; 184 185 __asm__ __volatile__ ( 186 /* lwsync */ 187 strasm_lwsync 188 /* atomic loop */ 189 "1: \n" 190 " ldarx %[old_value], %[dest], %[zero] \n" 191 " stdcx. %[exchange_value], %[dest], %[zero] \n" 192 " bne- 1b \n" 193 /* isync */ 194 strasm_sync 195 /* exit */ 196 "2: \n" 197 /* out */ 198 : [old_value] "=&r" (old_value), 199 "=m" (*dest) 200 /* in */ 201 : [dest] "b" (dest), 202 [zero] "r" (zero), 203 [exchange_value] "r" (exchange_value), 204 "m" (*dest) 205 /* clobber */ 206 : "cc", 207 "memory" 208 ); 209 210 return old_value; 211 } 212 213 inline void cmpxchg_pre_membar(cmpxchg_memory_order order) { 214 if (order != memory_order_relaxed) { 215 __asm__ __volatile__ ( 216 /* fence */ 217 strasm_sync 218 ); 219 } 220 } 221 222 inline void cmpxchg_post_membar(cmpxchg_memory_order order) { 223 if (order != memory_order_relaxed) { 224 __asm__ __volatile__ ( 225 /* fence */ 226 strasm_sync 227 ); 228 } 229 } 230 231 template<> 232 template<typename T> 233 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, 234 T volatile* dest, 235 T compare_value, 236 cmpxchg_memory_order order) const { 237 STATIC_ASSERT(1 == sizeof(T)); 238 239 // Note that cmpxchg guarantees a two-way memory barrier across 240 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 241 // specified otherwise (see atomic.hpp). 242 243 // Using 32 bit internally. 244 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3); 245 246 #ifdef VM_LITTLE_ENDIAN 247 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8; 248 #else 249 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8; 250 #endif 251 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value), 252 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value), 253 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount; 254 255 unsigned int old_value, value32; 256 257 cmpxchg_pre_membar(order); 258 259 __asm__ __volatile__ ( 260 /* simple guard */ 261 " lbz %[old_value], 0(%[dest]) \n" 262 " cmpw %[masked_compare_val], %[old_value] \n" 263 " bne- 2f \n" 264 /* atomic loop */ 265 "1: \n" 266 " lwarx %[value32], 0, %[dest_base] \n" 267 /* extract byte and compare */ 268 " srd %[old_value], %[value32], %[shift_amount] \n" 269 " clrldi %[old_value], %[old_value], 56 \n" 270 " cmpw %[masked_compare_val], %[old_value] \n" 271 " bne- 2f \n" 272 /* replace byte and try to store */ 273 " xor %[value32], %[xor_value], %[value32] \n" 274 " stwcx. %[value32], 0, %[dest_base] \n" 275 " bne- 1b \n" 276 /* exit */ 277 "2: \n" 278 /* out */ 279 : [old_value] "=&r" (old_value), 280 [value32] "=&r" (value32), 281 "=m" (*dest), 282 "=m" (*dest_base) 283 /* in */ 284 : [dest] "b" (dest), 285 [dest_base] "b" (dest_base), 286 [shift_amount] "r" (shift_amount), 287 [masked_compare_val] "r" (masked_compare_val), 288 [xor_value] "r" (xor_value), 289 "m" (*dest), 290 "m" (*dest_base) 291 /* clobber */ 292 : "cc", 293 "memory" 294 ); 295 296 cmpxchg_post_membar(order); 297 298 return PrimitiveConversions::cast<T>((unsigned char)old_value); 299 } 300 301 template<> 302 template<typename T> 303 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, 304 T volatile* dest, 305 T compare_value, 306 cmpxchg_memory_order order) const { 307 STATIC_ASSERT(4 == sizeof(T)); 308 309 // Note that cmpxchg guarantees a two-way memory barrier across 310 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 311 // specified otherwise (see atomic.hpp). 312 313 T old_value; 314 const uint64_t zero = 0; 315 316 cmpxchg_pre_membar(order); 317 318 __asm__ __volatile__ ( 319 /* simple guard */ 320 " lwz %[old_value], 0(%[dest]) \n" 321 " cmpw %[compare_value], %[old_value] \n" 322 " bne- 2f \n" 323 /* atomic loop */ 324 "1: \n" 325 " lwarx %[old_value], %[dest], %[zero] \n" 326 " cmpw %[compare_value], %[old_value] \n" 327 " bne- 2f \n" 328 " stwcx. %[exchange_value], %[dest], %[zero] \n" 329 " bne- 1b \n" 330 /* exit */ 331 "2: \n" 332 /* out */ 333 : [old_value] "=&r" (old_value), 334 "=m" (*dest) 335 /* in */ 336 : [dest] "b" (dest), 337 [zero] "r" (zero), 338 [compare_value] "r" (compare_value), 339 [exchange_value] "r" (exchange_value), 340 "m" (*dest) 341 /* clobber */ 342 : "cc", 343 "memory" 344 ); 345 346 cmpxchg_post_membar(order); 347 348 return old_value; 349 } 350 351 template<> 352 template<typename T> 353 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, 354 T volatile* dest, 355 T compare_value, 356 cmpxchg_memory_order order) const { 357 STATIC_ASSERT(8 == sizeof(T)); 358 359 // Note that cmpxchg guarantees a two-way memory barrier across 360 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 361 // specified otherwise (see atomic.hpp). 362 363 T old_value; 364 const uint64_t zero = 0; 365 366 cmpxchg_pre_membar(order); 367 368 __asm__ __volatile__ ( 369 /* simple guard */ 370 " ld %[old_value], 0(%[dest]) \n" 371 " cmpd %[compare_value], %[old_value] \n" 372 " bne- 2f \n" 373 /* atomic loop */ 374 "1: \n" 375 " ldarx %[old_value], %[dest], %[zero] \n" 376 " cmpd %[compare_value], %[old_value] \n" 377 " bne- 2f \n" 378 " stdcx. %[exchange_value], %[dest], %[zero] \n" 379 " bne- 1b \n" 380 /* exit */ 381 "2: \n" 382 /* out */ 383 : [old_value] "=&r" (old_value), 384 "=m" (*dest) 385 /* in */ 386 : [dest] "b" (dest), 387 [zero] "r" (zero), 388 [compare_value] "r" (compare_value), 389 [exchange_value] "r" (exchange_value), 390 "m" (*dest) 391 /* clobber */ 392 : "cc", 393 "memory" 394 ); 395 396 cmpxchg_post_membar(order); 397 398 return old_value; 399 } 400 401 #undef strasm_sync 402 #undef strasm_lwsync 403 #undef strasm_isync 404 #undef strasm_release 405 #undef strasm_acquire 406 #undef strasm_fence 407 #undef strasm_nobarrier 408 #undef strasm_nobarrier_clobber_memory 409 410 #endif // OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP