1 /* 2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2012, 2014 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP 27 #define OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP 28 29 #ifndef PPC64 30 #error "Atomic currently only implemented for PPC64" 31 #endif 32 33 // Implementation of class atomic 34 35 // 36 // machine barrier instructions: 37 // 38 // - sync two-way memory barrier, aka fence 39 // - lwsync orders Store|Store, 40 // Load|Store, 41 // Load|Load, 42 // but not Store|Load 43 // - eieio orders memory accesses for device memory (only) 44 // - isync invalidates speculatively executed instructions 45 // From the POWER ISA 2.06 documentation: 46 // "[...] an isync instruction prevents the execution of 47 // instructions following the isync until instructions 48 // preceding the isync have completed, [...]" 49 // From IBM's AIX assembler reference: 50 // "The isync [...] instructions causes the processor to 51 // refetch any instructions that might have been fetched 52 // prior to the isync instruction. The instruction isync 53 // causes the processor to wait for all previous instructions 54 // to complete. Then any instructions already fetched are 55 // discarded and instruction processing continues in the 56 // environment established by the previous instructions." 57 // 58 // semantic barrier instructions: 59 // (as defined in orderAccess.hpp) 60 // 61 // - release orders Store|Store, (maps to lwsync) 62 // Load|Store 63 // - acquire orders Load|Store, (maps to lwsync) 64 // Load|Load 65 // - fence orders Store|Store, (maps to sync) 66 // Load|Store, 67 // Load|Load, 68 // Store|Load 69 // 70 71 #define strasm_sync "\n sync \n" 72 #define strasm_lwsync "\n lwsync \n" 73 #define strasm_isync "\n isync \n" 74 #define strasm_release strasm_lwsync 75 #define strasm_acquire strasm_lwsync 76 #define strasm_fence strasm_sync 77 #define strasm_nobarrier "" 78 #define strasm_nobarrier_clobber_memory "" 79 80 inline void pre_membar(cmpxchg_memory_order order) { 81 switch (order) { 82 case memory_order_relaxed: 83 case memory_order_acquire: break; 84 case memory_order_release: 85 case memory_order_acq_rel: __asm__ __volatile__ (strasm_lwsync); break; 86 default : __asm__ __volatile__ (strasm_sync); break; 87 } 88 } 89 90 inline void post_membar(cmpxchg_memory_order order) { 91 switch (order) { 92 case memory_order_relaxed: 93 case memory_order_release: break; 94 case memory_order_acquire: 95 case memory_order_acq_rel: __asm__ __volatile__ (strasm_isync); break; 96 default : __asm__ __volatile__ (strasm_sync); break; 97 } 98 } 99 100 101 template<size_t byte_size> 102 struct Atomic::PlatformAdd 103 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > 104 { 105 template<typename I, typename D> 106 D add_and_fetch(I add_value, D volatile* dest, cmpxchg_memory_order order) const; 107 }; 108 109 template<> 110 template<typename I, typename D> 111 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest, 112 cmpxchg_memory_order order) const { 113 STATIC_ASSERT(4 == sizeof(I)); 114 STATIC_ASSERT(4 == sizeof(D)); 115 116 D result; 117 118 pre_membar(order); 119 120 __asm__ __volatile__ ( 121 "1: lwarx %0, 0, %2 \n" 122 " add %0, %0, %1 \n" 123 " stwcx. %0, 0, %2 \n" 124 " bne- 1b \n" 125 : /*%0*/"=&r" (result) 126 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 127 : "cc", "memory" ); 128 129 post_membar(order); 130 131 return result; 132 } 133 134 135 template<> 136 template<typename I, typename D> 137 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest, 138 cmpxchg_memory_order order) const { 139 STATIC_ASSERT(8 == sizeof(I)); 140 STATIC_ASSERT(8 == sizeof(D)); 141 142 D result; 143 144 pre_membar(order); 145 146 __asm__ __volatile__ ( 147 "1: ldarx %0, 0, %2 \n" 148 " add %0, %0, %1 \n" 149 " stdcx. %0, 0, %2 \n" 150 " bne- 1b \n" 151 : /*%0*/"=&r" (result) 152 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 153 : "cc", "memory" ); 154 155 post_membar(order); 156 157 return result; 158 } 159 160 template<> 161 template<typename T> 162 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, 163 T volatile* dest) const { 164 // Note that xchg doesn't necessarily do an acquire 165 // (see synchronizer.cpp). 166 167 T old_value; 168 const uint64_t zero = 0; 169 170 __asm__ __volatile__ ( 171 /* lwsync */ 172 strasm_lwsync 173 /* atomic loop */ 174 "1: \n" 175 " lwarx %[old_value], %[dest], %[zero] \n" 176 " stwcx. %[exchange_value], %[dest], %[zero] \n" 177 " bne- 1b \n" 178 /* isync */ 179 strasm_sync 180 /* exit */ 181 "2: \n" 182 /* out */ 183 : [old_value] "=&r" (old_value), 184 "=m" (*dest) 185 /* in */ 186 : [dest] "b" (dest), 187 [zero] "r" (zero), 188 [exchange_value] "r" (exchange_value), 189 "m" (*dest) 190 /* clobber */ 191 : "cc", 192 "memory" 193 ); 194 195 return old_value; 196 } 197 198 template<> 199 template<typename T> 200 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, 201 T volatile* dest) const { 202 STATIC_ASSERT(8 == sizeof(T)); 203 // Note that xchg doesn't necessarily do an acquire 204 // (see synchronizer.cpp). 205 206 T old_value; 207 const uint64_t zero = 0; 208 209 __asm__ __volatile__ ( 210 /* lwsync */ 211 strasm_lwsync 212 /* atomic loop */ 213 "1: \n" 214 " ldarx %[old_value], %[dest], %[zero] \n" 215 " stdcx. %[exchange_value], %[dest], %[zero] \n" 216 " bne- 1b \n" 217 /* isync */ 218 strasm_sync 219 /* exit */ 220 "2: \n" 221 /* out */ 222 : [old_value] "=&r" (old_value), 223 "=m" (*dest) 224 /* in */ 225 : [dest] "b" (dest), 226 [zero] "r" (zero), 227 [exchange_value] "r" (exchange_value), 228 "m" (*dest) 229 /* clobber */ 230 : "cc", 231 "memory" 232 ); 233 234 return old_value; 235 } 236 237 template<> 238 template<typename T> 239 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, 240 T volatile* dest, 241 T compare_value, 242 cmpxchg_memory_order order) const { 243 STATIC_ASSERT(1 == sizeof(T)); 244 245 // Note that cmpxchg guarantees a two-way memory barrier across 246 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 247 // specified otherwise (see atomic.hpp). 248 249 // Using 32 bit internally. 250 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3); 251 252 #ifdef VM_LITTLE_ENDIAN 253 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8; 254 #else 255 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8; 256 #endif 257 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value), 258 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value), 259 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount; 260 261 unsigned int old_value, value32; 262 263 pre_membar(order); 264 265 __asm__ __volatile__ ( 266 /* simple guard */ 267 " lbz %[old_value], 0(%[dest]) \n" 268 " cmpw %[masked_compare_val], %[old_value] \n" 269 " bne- 2f \n" 270 /* atomic loop */ 271 "1: \n" 272 " lwarx %[value32], 0, %[dest_base] \n" 273 /* extract byte and compare */ 274 " srd %[old_value], %[value32], %[shift_amount] \n" 275 " clrldi %[old_value], %[old_value], 56 \n" 276 " cmpw %[masked_compare_val], %[old_value] \n" 277 " bne- 2f \n" 278 /* replace byte and try to store */ 279 " xor %[value32], %[xor_value], %[value32] \n" 280 " stwcx. %[value32], 0, %[dest_base] \n" 281 " bne- 1b \n" 282 /* exit */ 283 "2: \n" 284 /* out */ 285 : [old_value] "=&r" (old_value), 286 [value32] "=&r" (value32), 287 "=m" (*dest), 288 "=m" (*dest_base) 289 /* in */ 290 : [dest] "b" (dest), 291 [dest_base] "b" (dest_base), 292 [shift_amount] "r" (shift_amount), 293 [masked_compare_val] "r" (masked_compare_val), 294 [xor_value] "r" (xor_value), 295 "m" (*dest), 296 "m" (*dest_base) 297 /* clobber */ 298 : "cc", 299 "memory" 300 ); 301 302 post_membar(order); 303 304 return PrimitiveConversions::cast<T>((unsigned char)old_value); 305 } 306 307 template<> 308 template<typename T> 309 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, 310 T volatile* dest, 311 T compare_value, 312 cmpxchg_memory_order order) const { 313 STATIC_ASSERT(4 == sizeof(T)); 314 315 // Note that cmpxchg guarantees a two-way memory barrier across 316 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 317 // specified otherwise (see atomic.hpp). 318 319 T old_value; 320 const uint64_t zero = 0; 321 322 pre_membar(order); 323 324 __asm__ __volatile__ ( 325 /* simple guard */ 326 " lwz %[old_value], 0(%[dest]) \n" 327 " cmpw %[compare_value], %[old_value] \n" 328 " bne- 2f \n" 329 /* atomic loop */ 330 "1: \n" 331 " lwarx %[old_value], %[dest], %[zero] \n" 332 " cmpw %[compare_value], %[old_value] \n" 333 " bne- 2f \n" 334 " stwcx. %[exchange_value], %[dest], %[zero] \n" 335 " bne- 1b \n" 336 /* exit */ 337 "2: \n" 338 /* out */ 339 : [old_value] "=&r" (old_value), 340 "=m" (*dest) 341 /* in */ 342 : [dest] "b" (dest), 343 [zero] "r" (zero), 344 [compare_value] "r" (compare_value), 345 [exchange_value] "r" (exchange_value), 346 "m" (*dest) 347 /* clobber */ 348 : "cc", 349 "memory" 350 ); 351 352 post_membar(order); 353 354 return old_value; 355 } 356 357 template<> 358 template<typename T> 359 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, 360 T volatile* dest, 361 T compare_value, 362 cmpxchg_memory_order order) const { 363 STATIC_ASSERT(8 == sizeof(T)); 364 365 // Note that cmpxchg guarantees a two-way memory barrier across 366 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 367 // specified otherwise (see atomic.hpp). 368 369 T old_value; 370 const uint64_t zero = 0; 371 372 pre_membar(order); 373 374 __asm__ __volatile__ ( 375 /* simple guard */ 376 " ld %[old_value], 0(%[dest]) \n" 377 " cmpd %[compare_value], %[old_value] \n" 378 " bne- 2f \n" 379 /* atomic loop */ 380 "1: \n" 381 " ldarx %[old_value], %[dest], %[zero] \n" 382 " cmpd %[compare_value], %[old_value] \n" 383 " bne- 2f \n" 384 " stdcx. %[exchange_value], %[dest], %[zero] \n" 385 " bne- 1b \n" 386 /* exit */ 387 "2: \n" 388 /* out */ 389 : [old_value] "=&r" (old_value), 390 "=m" (*dest) 391 /* in */ 392 : [dest] "b" (dest), 393 [zero] "r" (zero), 394 [compare_value] "r" (compare_value), 395 [exchange_value] "r" (exchange_value), 396 "m" (*dest) 397 /* clobber */ 398 : "cc", 399 "memory" 400 ); 401 402 post_membar(order); 403 404 return old_value; 405 } 406 407 #undef strasm_sync 408 #undef strasm_lwsync 409 #undef strasm_isync 410 #undef strasm_release 411 #undef strasm_acquire 412 #undef strasm_fence 413 #undef strasm_nobarrier 414 #undef strasm_nobarrier_clobber_memory 415 416 #endif // OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP