1 /* 2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2012, 2014 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP 27 #define OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP 28 29 #ifndef _LP64 30 #error "Atomic currently only impleneted for PPC64" 31 #endif 32 33 #include "utilities/debug.hpp" 34 35 // Implementation of class atomic 36 37 inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; } 38 inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; } 39 inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; } 40 inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; } 41 inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; } 42 inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; } 43 44 inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; } 45 inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; } 46 inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; } 47 inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; } 48 inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; } 49 inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; } 50 51 inline jlong Atomic::load(const volatile jlong* src) { return *src; } 52 53 // 54 // machine barrier instructions: 55 // 56 // - ppc_sync two-way memory barrier, aka fence 57 // - ppc_lwsync orders Store|Store, 58 // Load|Store, 59 // Load|Load, 60 // but not Store|Load 61 // - ppc_eieio orders memory accesses for device memory (only) 62 // - ppc_isync invalidates speculatively executed instructions 63 // From the POWER ISA 2.06 documentation: 64 // "[...] an isync instruction prevents the execution of 65 // instructions following the isync until instructions 66 // preceding the isync have completed, [...]" 67 // From IBM's AIX assembler reference: 68 // "The isync [...] instructions causes the processor to 69 // refetch any instructions that might have been fetched 70 // prior to the isync instruction. The instruction isync 71 // causes the processor to wait for all previous instructions 72 // to complete. Then any instructions already fetched are 73 // discarded and instruction processing continues in the 74 // environment established by the previous instructions." 75 // 76 // semantic barrier instructions: 77 // (as defined in orderAccess.hpp) 78 // 79 // - ppc_release orders Store|Store, (maps to ppc_lwsync) 80 // Load|Store 81 // - ppc_acquire orders Load|Store, (maps to ppc_lwsync) 82 // Load|Load 83 // - ppc_fence orders Store|Store, (maps to ppc_sync) 84 // Load|Store, 85 // Load|Load, 86 // Store|Load 87 // 88 89 #define strasm_sync "\n sync \n" 90 #define strasm_lwsync "\n lwsync \n" 91 #define strasm_isync "\n isync \n" 92 #define strasm_release strasm_lwsync 93 #define strasm_acquire strasm_lwsync 94 #define strasm_fence strasm_sync 95 #define strasm_nobarrier "" 96 #define strasm_nobarrier_clobber_memory "" 97 98 template<size_t byte_size> 99 struct Atomic::PlatformAdd 100 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > 101 { 102 template<typename I, typename D> 103 D add_and_fetch(I add_value, D volatile* dest) const; 104 }; 105 106 template<> 107 template<typename I, typename D> 108 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) const { 109 STATIC_ASSERT(4 == sizeof(I)); 110 STATIC_ASSERT(4 == sizeof(D)); 111 112 D result; 113 114 __asm__ __volatile__ ( 115 strasm_lwsync 116 "1: lwarx %0, 0, %2 \n" 117 " add %0, %0, %1 \n" 118 " stwcx. %0, 0, %2 \n" 119 " bne- 1b \n" 120 strasm_isync 121 : /*%0*/"=&r" (result) 122 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 123 : "cc", "memory" ); 124 125 return result; 126 } 127 128 129 template<> 130 template<typename I, typename D> 131 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) const { 132 STATIC_ASSERT(8 == sizeof(I)); 133 STATIC_ASSERT(8 == sizeof(D)); 134 135 D result; 136 137 __asm__ __volatile__ ( 138 strasm_lwsync 139 "1: ldarx %0, 0, %2 \n" 140 " add %0, %0, %1 \n" 141 " stdcx. %0, 0, %2 \n" 142 " bne- 1b \n" 143 strasm_isync 144 : /*%0*/"=&r" (result) 145 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 146 : "cc", "memory" ); 147 148 return result; 149 } 150 151 template<> 152 template<typename T> 153 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, 154 T volatile* dest) const { 155 STATIC_ASSERT(4 == sizeof(T)); 156 // Note that xchg_ptr doesn't necessarily do an acquire 157 // (see synchronizer.cpp). 158 159 T old_value; 160 const uint64_t zero = 0; 161 162 __asm__ __volatile__ ( 163 /* lwsync */ 164 strasm_lwsync 165 /* atomic loop */ 166 "1: \n" 167 " lwarx %[old_value], %[dest], %[zero] \n" 168 " stwcx. %[exchange_value], %[dest], %[zero] \n" 169 " bne- 1b \n" 170 /* isync */ 171 strasm_sync 172 /* exit */ 173 "2: \n" 174 /* out */ 175 : [old_value] "=&r" (old_value), 176 "=m" (*dest) 177 /* in */ 178 : [dest] "b" (dest), 179 [zero] "r" (zero), 180 [exchange_value] "r" (exchange_value), 181 "m" (*dest) 182 /* clobber */ 183 : "cc", 184 "memory" 185 ); 186 187 return old_value; 188 } 189 190 template<> 191 template<typename T> 192 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, 193 T volatile* dest) const { 194 STATIC_ASSERT(8 == sizeof(T)); 195 // Note that xchg_ptr doesn't necessarily do an acquire 196 // (see synchronizer.cpp). 197 198 T old_value; 199 const uint64_t zero = 0; 200 201 __asm__ __volatile__ ( 202 /* lwsync */ 203 strasm_lwsync 204 /* atomic loop */ 205 "1: \n" 206 " ldarx %[old_value], %[dest], %[zero] \n" 207 " stdcx. %[exchange_value], %[dest], %[zero] \n" 208 " bne- 1b \n" 209 /* isync */ 210 strasm_sync 211 /* exit */ 212 "2: \n" 213 /* out */ 214 : [old_value] "=&r" (old_value), 215 "=m" (*dest) 216 /* in */ 217 : [dest] "b" (dest), 218 [zero] "r" (zero), 219 [exchange_value] "r" (exchange_value), 220 "m" (*dest) 221 /* clobber */ 222 : "cc", 223 "memory" 224 ); 225 226 return old_value; 227 } 228 229 inline void cmpxchg_pre_membar(cmpxchg_memory_order order) { 230 if (order != memory_order_relaxed) { 231 __asm__ __volatile__ ( 232 /* fence */ 233 strasm_sync 234 ); 235 } 236 } 237 238 inline void cmpxchg_post_membar(cmpxchg_memory_order order) { 239 if (order != memory_order_relaxed) { 240 __asm__ __volatile__ ( 241 /* fence */ 242 strasm_sync 243 ); 244 } 245 } 246 247 template<> 248 template<typename T> 249 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, 250 T volatile* dest, 251 T compare_value, 252 cmpxchg_memory_order order) const { 253 STATIC_ASSERT(1 == sizeof(T)); 254 255 // Note that cmpxchg guarantees a two-way memory barrier across 256 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 257 // specified otherwise (see atomic.hpp). 258 259 // Using 32 bit internally. 260 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3); 261 262 #ifdef VM_LITTLE_ENDIAN 263 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8; 264 #else 265 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8; 266 #endif 267 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value), 268 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value), 269 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount; 270 271 unsigned int old_value, value32; 272 273 cmpxchg_pre_membar(order); 274 275 __asm__ __volatile__ ( 276 /* simple guard */ 277 " lbz %[old_value], 0(%[dest]) \n" 278 " cmpw %[masked_compare_val], %[old_value] \n" 279 " bne- 2f \n" 280 /* atomic loop */ 281 "1: \n" 282 " lwarx %[value32], 0, %[dest_base] \n" 283 /* extract byte and compare */ 284 " srd %[old_value], %[value32], %[shift_amount] \n" 285 " clrldi %[old_value], %[old_value], 56 \n" 286 " cmpw %[masked_compare_val], %[old_value] \n" 287 " bne- 2f \n" 288 /* replace byte and try to store */ 289 " xor %[value32], %[xor_value], %[value32] \n" 290 " stwcx. %[value32], 0, %[dest_base] \n" 291 " bne- 1b \n" 292 /* exit */ 293 "2: \n" 294 /* out */ 295 : [old_value] "=&r" (old_value), 296 [value32] "=&r" (value32), 297 "=m" (*dest), 298 "=m" (*dest_base) 299 /* in */ 300 : [dest] "b" (dest), 301 [dest_base] "b" (dest_base), 302 [shift_amount] "r" (shift_amount), 303 [masked_compare_val] "r" (masked_compare_val), 304 [xor_value] "r" (xor_value), 305 "m" (*dest), 306 "m" (*dest_base) 307 /* clobber */ 308 : "cc", 309 "memory" 310 ); 311 312 cmpxchg_post_membar(order); 313 314 return PrimitiveConversions::cast<T>((unsigned char)old_value); 315 } 316 317 template<> 318 template<typename T> 319 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, 320 T volatile* dest, 321 T compare_value, 322 cmpxchg_memory_order order) const { 323 STATIC_ASSERT(4 == sizeof(T)); 324 325 // Note that cmpxchg guarantees a two-way memory barrier across 326 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 327 // specified otherwise (see atomic.hpp). 328 329 T old_value; 330 const uint64_t zero = 0; 331 332 cmpxchg_pre_membar(order); 333 334 __asm__ __volatile__ ( 335 /* simple guard */ 336 " lwz %[old_value], 0(%[dest]) \n" 337 " cmpw %[compare_value], %[old_value] \n" 338 " bne- 2f \n" 339 /* atomic loop */ 340 "1: \n" 341 " lwarx %[old_value], %[dest], %[zero] \n" 342 " cmpw %[compare_value], %[old_value] \n" 343 " bne- 2f \n" 344 " stwcx. %[exchange_value], %[dest], %[zero] \n" 345 " bne- 1b \n" 346 /* exit */ 347 "2: \n" 348 /* out */ 349 : [old_value] "=&r" (old_value), 350 "=m" (*dest) 351 /* in */ 352 : [dest] "b" (dest), 353 [zero] "r" (zero), 354 [compare_value] "r" (compare_value), 355 [exchange_value] "r" (exchange_value), 356 "m" (*dest) 357 /* clobber */ 358 : "cc", 359 "memory" 360 ); 361 362 cmpxchg_post_membar(order); 363 364 return old_value; 365 } 366 367 template<> 368 template<typename T> 369 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, 370 T volatile* dest, 371 T compare_value, 372 cmpxchg_memory_order order) const { 373 STATIC_ASSERT(8 == sizeof(T)); 374 375 // Note that cmpxchg guarantees a two-way memory barrier across 376 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 377 // specified otherwise (see atomic.hpp). 378 379 T old_value; 380 const uint64_t zero = 0; 381 382 cmpxchg_pre_membar(order); 383 384 __asm__ __volatile__ ( 385 /* simple guard */ 386 " ld %[old_value], 0(%[dest]) \n" 387 " cmpd %[compare_value], %[old_value] \n" 388 " bne- 2f \n" 389 /* atomic loop */ 390 "1: \n" 391 " ldarx %[old_value], %[dest], %[zero] \n" 392 " cmpd %[compare_value], %[old_value] \n" 393 " bne- 2f \n" 394 " stdcx. %[exchange_value], %[dest], %[zero] \n" 395 " bne- 1b \n" 396 /* exit */ 397 "2: \n" 398 /* out */ 399 : [old_value] "=&r" (old_value), 400 "=m" (*dest) 401 /* in */ 402 : [dest] "b" (dest), 403 [zero] "r" (zero), 404 [compare_value] "r" (compare_value), 405 [exchange_value] "r" (exchange_value), 406 "m" (*dest) 407 /* clobber */ 408 : "cc", 409 "memory" 410 ); 411 412 cmpxchg_post_membar(order); 413 414 return old_value; 415 } 416 417 #undef strasm_sync 418 #undef strasm_lwsync 419 #undef strasm_isync 420 #undef strasm_release 421 #undef strasm_acquire 422 #undef strasm_fence 423 #undef strasm_nobarrier 424 #undef strasm_nobarrier_clobber_memory 425 426 #endif // OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP