1 /* 2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2012, 2014 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP 27 #define OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP 28 29 #ifndef PPC64 30 #error "Atomic currently only implemented for PPC64" 31 #endif 32 33 // Implementation of class atomic 34 35 // 36 // machine barrier instructions: 37 // 38 // - sync two-way memory barrier, aka fence 39 // - lwsync orders Store|Store, 40 // Load|Store, 41 // Load|Load, 42 // but not Store|Load 43 // - eieio orders memory accesses for device memory (only) 44 // - isync invalidates speculatively executed instructions 45 // From the POWER ISA 2.06 documentation: 46 // "[...] an isync instruction prevents the execution of 47 // instructions following the isync until instructions 48 // preceding the isync have completed, [...]" 49 // From IBM's AIX assembler reference: 50 // "The isync [...] instructions causes the processor to 51 // refetch any instructions that might have been fetched 52 // prior to the isync instruction. The instruction isync 53 // causes the processor to wait for all previous instructions 54 // to complete. Then any instructions already fetched are 55 // discarded and instruction processing continues in the 56 // environment established by the previous instructions." 57 // 58 // semantic barrier instructions: 59 // (as defined in orderAccess.hpp) 60 // 61 // - release orders Store|Store, (maps to lwsync) 62 // Load|Store 63 // - acquire orders Load|Store, (maps to lwsync) 64 // Load|Load 65 // - fence orders Store|Store, (maps to sync) 66 // Load|Store, 67 // Load|Load, 68 // Store|Load 69 // 70 71 #define strasm_sync "\n sync \n" 72 #define strasm_lwsync "\n lwsync \n" 73 #define strasm_isync "\n isync \n" 74 #define strasm_release strasm_lwsync 75 #define strasm_acquire strasm_lwsync 76 #define strasm_fence strasm_sync 77 #define strasm_nobarrier "" 78 #define strasm_nobarrier_clobber_memory "" 79 80 template<size_t byte_size> 81 struct Atomic::PlatformAdd 82 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > 83 { 84 template<typename I, typename D> 85 D add_and_fetch(I add_value, D volatile* dest) const; 86 }; 87 88 template<> 89 template<typename I, typename D> 90 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) const { 91 STATIC_ASSERT(4 == sizeof(I)); 92 STATIC_ASSERT(4 == sizeof(D)); 93 94 D result; 95 96 __asm__ __volatile__ ( 97 strasm_lwsync 98 "1: lwarx %0, 0, %2 \n" 99 " add %0, %0, %1 \n" 100 " stwcx. %0, 0, %2 \n" 101 " bne- 1b \n" 102 strasm_isync 103 : /*%0*/"=&r" (result) 104 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 105 : "cc", "memory" ); 106 107 return result; 108 } 109 110 111 template<> 112 template<typename I, typename D> 113 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) const { 114 STATIC_ASSERT(8 == sizeof(I)); 115 STATIC_ASSERT(8 == sizeof(D)); 116 117 D result; 118 119 __asm__ __volatile__ ( 120 strasm_lwsync 121 "1: ldarx %0, 0, %2 \n" 122 " add %0, %0, %1 \n" 123 " stdcx. %0, 0, %2 \n" 124 " bne- 1b \n" 125 strasm_isync 126 : /*%0*/"=&r" (result) 127 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 128 : "cc", "memory" ); 129 130 return result; 131 } 132 133 template<> 134 template<typename T> 135 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, 136 T volatile* dest) const { 137 // Note that xchg doesn't necessarily do an acquire 138 // (see synchronizer.cpp). 139 140 T old_value; 141 const uint64_t zero = 0; 142 143 __asm__ __volatile__ ( 144 /* lwsync */ 145 strasm_lwsync 146 /* atomic loop */ 147 "1: \n" 148 " lwarx %[old_value], %[dest], %[zero] \n" 149 " stwcx. %[exchange_value], %[dest], %[zero] \n" 150 " bne- 1b \n" 151 /* isync */ 152 strasm_sync 153 /* exit */ 154 "2: \n" 155 /* out */ 156 : [old_value] "=&r" (old_value), 157 "=m" (*dest) 158 /* in */ 159 : [dest] "b" (dest), 160 [zero] "r" (zero), 161 [exchange_value] "r" (exchange_value), 162 "m" (*dest) 163 /* clobber */ 164 : "cc", 165 "memory" 166 ); 167 168 return old_value; 169 } 170 171 template<> 172 template<typename T> 173 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, 174 T volatile* dest) const { 175 STATIC_ASSERT(8 == sizeof(T)); 176 // Note that xchg doesn't necessarily do an acquire 177 // (see synchronizer.cpp). 178 179 T old_value; 180 const uint64_t zero = 0; 181 182 __asm__ __volatile__ ( 183 /* lwsync */ 184 strasm_lwsync 185 /* atomic loop */ 186 "1: \n" 187 " ldarx %[old_value], %[dest], %[zero] \n" 188 " stdcx. %[exchange_value], %[dest], %[zero] \n" 189 " bne- 1b \n" 190 /* isync */ 191 strasm_sync 192 /* exit */ 193 "2: \n" 194 /* out */ 195 : [old_value] "=&r" (old_value), 196 "=m" (*dest) 197 /* in */ 198 : [dest] "b" (dest), 199 [zero] "r" (zero), 200 [exchange_value] "r" (exchange_value), 201 "m" (*dest) 202 /* clobber */ 203 : "cc", 204 "memory" 205 ); 206 207 return old_value; 208 } 209 210 inline void cmpxchg_pre_membar(cmpxchg_memory_order order) { 211 if (order != memory_order_relaxed) { 212 __asm__ __volatile__ ( 213 /* fence */ 214 strasm_sync 215 ); 216 } 217 } 218 219 inline void cmpxchg_post_membar(cmpxchg_memory_order order) { 220 if (order != memory_order_relaxed) { 221 __asm__ __volatile__ ( 222 /* fence */ 223 strasm_sync 224 ); 225 } 226 } 227 228 template<> 229 template<typename T> 230 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, 231 T volatile* dest, 232 T compare_value, 233 cmpxchg_memory_order order) const { 234 STATIC_ASSERT(1 == sizeof(T)); 235 236 // Note that cmpxchg guarantees a two-way memory barrier across 237 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 238 // specified otherwise (see atomic.hpp). 239 240 // Using 32 bit internally. 241 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3); 242 243 #ifdef VM_LITTLE_ENDIAN 244 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8; 245 #else 246 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8; 247 #endif 248 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value), 249 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value), 250 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount; 251 252 unsigned int old_value, value32; 253 254 cmpxchg_pre_membar(order); 255 256 __asm__ __volatile__ ( 257 /* simple guard */ 258 " lbz %[old_value], 0(%[dest]) \n" 259 " cmpw %[masked_compare_val], %[old_value] \n" 260 " bne- 2f \n" 261 /* atomic loop */ 262 "1: \n" 263 " lwarx %[value32], 0, %[dest_base] \n" 264 /* extract byte and compare */ 265 " srd %[old_value], %[value32], %[shift_amount] \n" 266 " clrldi %[old_value], %[old_value], 56 \n" 267 " cmpw %[masked_compare_val], %[old_value] \n" 268 " bne- 2f \n" 269 /* replace byte and try to store */ 270 " xor %[value32], %[xor_value], %[value32] \n" 271 " stwcx. %[value32], 0, %[dest_base] \n" 272 " bne- 1b \n" 273 /* exit */ 274 "2: \n" 275 /* out */ 276 : [old_value] "=&r" (old_value), 277 [value32] "=&r" (value32), 278 "=m" (*dest), 279 "=m" (*dest_base) 280 /* in */ 281 : [dest] "b" (dest), 282 [dest_base] "b" (dest_base), 283 [shift_amount] "r" (shift_amount), 284 [masked_compare_val] "r" (masked_compare_val), 285 [xor_value] "r" (xor_value), 286 "m" (*dest), 287 "m" (*dest_base) 288 /* clobber */ 289 : "cc", 290 "memory" 291 ); 292 293 cmpxchg_post_membar(order); 294 295 return PrimitiveConversions::cast<T>((unsigned char)old_value); 296 } 297 298 template<> 299 template<typename T> 300 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, 301 T volatile* dest, 302 T compare_value, 303 cmpxchg_memory_order order) const { 304 STATIC_ASSERT(4 == sizeof(T)); 305 306 // Note that cmpxchg guarantees a two-way memory barrier across 307 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 308 // specified otherwise (see atomic.hpp). 309 310 T old_value; 311 const uint64_t zero = 0; 312 313 cmpxchg_pre_membar(order); 314 315 __asm__ __volatile__ ( 316 /* simple guard */ 317 " lwz %[old_value], 0(%[dest]) \n" 318 " cmpw %[compare_value], %[old_value] \n" 319 " bne- 2f \n" 320 /* atomic loop */ 321 "1: \n" 322 " lwarx %[old_value], %[dest], %[zero] \n" 323 " cmpw %[compare_value], %[old_value] \n" 324 " bne- 2f \n" 325 " stwcx. %[exchange_value], %[dest], %[zero] \n" 326 " bne- 1b \n" 327 /* exit */ 328 "2: \n" 329 /* out */ 330 : [old_value] "=&r" (old_value), 331 "=m" (*dest) 332 /* in */ 333 : [dest] "b" (dest), 334 [zero] "r" (zero), 335 [compare_value] "r" (compare_value), 336 [exchange_value] "r" (exchange_value), 337 "m" (*dest) 338 /* clobber */ 339 : "cc", 340 "memory" 341 ); 342 343 cmpxchg_post_membar(order); 344 345 return old_value; 346 } 347 348 template<> 349 template<typename T> 350 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, 351 T volatile* dest, 352 T compare_value, 353 cmpxchg_memory_order order) const { 354 STATIC_ASSERT(8 == sizeof(T)); 355 356 // Note that cmpxchg guarantees a two-way memory barrier across 357 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 358 // specified otherwise (see atomic.hpp). 359 360 T old_value; 361 const uint64_t zero = 0; 362 363 cmpxchg_pre_membar(order); 364 365 __asm__ __volatile__ ( 366 /* simple guard */ 367 " ld %[old_value], 0(%[dest]) \n" 368 " cmpd %[compare_value], %[old_value] \n" 369 " bne- 2f \n" 370 /* atomic loop */ 371 "1: \n" 372 " ldarx %[old_value], %[dest], %[zero] \n" 373 " cmpd %[compare_value], %[old_value] \n" 374 " bne- 2f \n" 375 " stdcx. %[exchange_value], %[dest], %[zero] \n" 376 " bne- 1b \n" 377 /* exit */ 378 "2: \n" 379 /* out */ 380 : [old_value] "=&r" (old_value), 381 "=m" (*dest) 382 /* in */ 383 : [dest] "b" (dest), 384 [zero] "r" (zero), 385 [compare_value] "r" (compare_value), 386 [exchange_value] "r" (exchange_value), 387 "m" (*dest) 388 /* clobber */ 389 : "cc", 390 "memory" 391 ); 392 393 cmpxchg_post_membar(order); 394 395 return old_value; 396 } 397 398 #undef strasm_sync 399 #undef strasm_lwsync 400 #undef strasm_isync 401 #undef strasm_release 402 #undef strasm_acquire 403 #undef strasm_fence 404 #undef strasm_nobarrier 405 #undef strasm_nobarrier_clobber_memory 406 407 #endif // OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP