1 /* 2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2012, 2014 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP 27 #define OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP 28 29 #ifndef PPC64 30 #error "Atomic currently only implemented for PPC64" 31 #endif 32 33 // Implementation of class atomic 34 35 // 36 // machine barrier instructions: 37 // 38 // - sync two-way memory barrier, aka fence 39 // - lwsync orders Store|Store, 40 // Load|Store, 41 // Load|Load, 42 // but not Store|Load 43 // - eieio orders memory accesses for device memory (only) 44 // - isync invalidates speculatively executed instructions 45 // From the POWER ISA 2.06 documentation: 46 // "[...] an isync instruction prevents the execution of 47 // instructions following the isync until instructions 48 // preceding the isync have completed, [...]" 49 // From IBM's AIX assembler reference: 50 // "The isync [...] instructions causes the processor to 51 // refetch any instructions that might have been fetched 52 // prior to the isync instruction. The instruction isync 53 // causes the processor to wait for all previous instructions 54 // to complete. Then any instructions already fetched are 55 // discarded and instruction processing continues in the 56 // environment established by the previous instructions." 57 // 58 // semantic barrier instructions: 59 // (as defined in orderAccess.hpp) 60 // 61 // - release orders Store|Store, (maps to lwsync) 62 // Load|Store 63 // - acquire orders Load|Store, (maps to lwsync) 64 // Load|Load 65 // - fence orders Store|Store, (maps to sync) 66 // Load|Store, 67 // Load|Load, 68 // Store|Load 69 // 70 71 #define strasm_sync "\n sync \n" 72 #define strasm_lwsync "\n lwsync \n" 73 #define strasm_isync "\n isync \n" 74 #define strasm_release strasm_lwsync 75 #define strasm_acquire strasm_lwsync 76 #define strasm_fence strasm_sync 77 #define strasm_nobarrier "" 78 #define strasm_nobarrier_clobber_memory "" 79 80 template <> 81 inline int32_t Atomic::specialized_add<int32_t>(int32_t add_value, volatile int32_t* dest) { 82 unsigned int result; 83 84 __asm__ __volatile__ ( 85 strasm_lwsync 86 "1: lwarx %0, 0, %2 \n" 87 " add %0, %0, %1 \n" 88 " stwcx. %0, 0, %2 \n" 89 " bne- 1b \n" 90 strasm_isync 91 : /*%0*/"=&r" (result) 92 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 93 : "cc", "memory" ); 94 95 return (int32_t) result; 96 } 97 98 99 template <> 100 inline int64_t Atomic::specialized_add<int64_t>(int64_t add_value, volatile int64_t* dest) { 101 long result; 102 103 __asm__ __volatile__ ( 104 strasm_lwsync 105 "1: ldarx %0, 0, %2 \n" 106 " add %0, %0, %1 \n" 107 " stdcx. %0, 0, %2 \n" 108 " bne- 1b \n" 109 strasm_isync 110 : /*%0*/"=&r" (result) 111 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 112 : "cc", "memory" ); 113 114 return (int64_t) result; 115 } 116 117 118 template <> 119 inline void Atomic::specialized_inc<int32_t>(volatile int32_t* dest) { 120 unsigned int temp; 121 122 __asm__ __volatile__ ( 123 strasm_nobarrier 124 "1: lwarx %0, 0, %2 \n" 125 " addic %0, %0, 1 \n" 126 " stwcx. %0, 0, %2 \n" 127 " bne- 1b \n" 128 strasm_nobarrier 129 : /*%0*/"=&r" (temp), "=m" (*dest) 130 : /*%2*/"r" (dest), "m" (*dest) 131 : "cc" strasm_nobarrier_clobber_memory); 132 133 } 134 135 template <> 136 inline void Atomic::specialized_inc<int64_t>(volatile int64_t* dest) { 137 long temp; 138 139 __asm__ __volatile__ ( 140 strasm_nobarrier 141 "1: ldarx %0, 0, %2 \n" 142 " addic %0, %0, 1 \n" 143 " stdcx. %0, 0, %2 \n" 144 " bne- 1b \n" 145 strasm_nobarrier 146 : /*%0*/"=&r" (temp), "=m" (*dest) 147 : /*%2*/"r" (dest), "m" (*dest) 148 : "cc" strasm_nobarrier_clobber_memory); 149 150 } 151 152 153 template <> 154 inline void Atomic::specialized_dec<int32_t>(volatile int32_t* dest) { 155 unsigned int temp; 156 157 __asm__ __volatile__ ( 158 strasm_nobarrier 159 "1: lwarx %0, 0, %2 \n" 160 " addic %0, %0, -1 \n" 161 " stwcx. %0, 0, %2 \n" 162 " bne- 1b \n" 163 strasm_nobarrier 164 : /*%0*/"=&r" (temp), "=m" (*dest) 165 : /*%2*/"r" (dest), "m" (*dest) 166 : "cc" strasm_nobarrier_clobber_memory); 167 168 } 169 170 171 template <> 172 inline void Atomic::specialized_dec<int64_t>(volatile int64_t* dest) { 173 long temp; 174 175 __asm__ __volatile__ ( 176 strasm_nobarrier 177 "1: ldarx %0, 0, %2 \n" 178 " addic %0, %0, -1 \n" 179 " stdcx. %0, 0, %2 \n" 180 " bne- 1b \n" 181 strasm_nobarrier 182 : /*%0*/"=&r" (temp), "=m" (*dest) 183 : /*%2*/"r" (dest), "m" (*dest) 184 : "cc" strasm_nobarrier_clobber_memory); 185 186 } 187 188 189 template <> 190 inline int32_t Atomic::specialized_xchg<int32_t>(int32_t exchange_value, volatile int32_t* dest) { 191 // Note that xchg_ptr doesn't necessarily do an acquire 192 // (see synchronizer.cpp). 193 194 unsigned int old_value; 195 const uint64_t zero = 0; 196 197 __asm__ __volatile__ ( 198 /* lwsync */ 199 strasm_lwsync 200 /* atomic loop */ 201 "1: \n" 202 " lwarx %[old_value], %[dest], %[zero] \n" 203 " stwcx. %[exchange_value], %[dest], %[zero] \n" 204 " bne- 1b \n" 205 /* isync */ 206 strasm_sync 207 /* exit */ 208 "2: \n" 209 /* out */ 210 : [old_value] "=&r" (old_value), 211 "=m" (*dest) 212 /* in */ 213 : [dest] "b" (dest), 214 [zero] "r" (zero), 215 [exchange_value] "r" (exchange_value), 216 "m" (*dest) 217 /* clobber */ 218 : "cc", 219 "memory" 220 ); 221 222 return (int32_t) old_value; 223 } 224 225 226 template <> 227 inline int64_t Atomic::specialized_xchg<int64_t>(int64_t exchange_value, volatile int64_t* dest) { 228 // Note that xchg_ptr doesn't necessarily do an acquire 229 // (see synchronizer.cpp). 230 231 long old_value; 232 const uint64_t zero = 0; 233 234 __asm__ __volatile__ ( 235 /* lwsync */ 236 strasm_lwsync 237 /* atomic loop */ 238 "1: \n" 239 " ldarx %[old_value], %[dest], %[zero] \n" 240 " stdcx. %[exchange_value], %[dest], %[zero] \n" 241 " bne- 1b \n" 242 /* isync */ 243 strasm_sync 244 /* exit */ 245 "2: \n" 246 /* out */ 247 : [old_value] "=&r" (old_value), 248 "=m" (*dest) 249 /* in */ 250 : [dest] "b" (dest), 251 [zero] "r" (zero), 252 [exchange_value] "r" (exchange_value), 253 "m" (*dest) 254 /* clobber */ 255 : "cc", 256 "memory" 257 ); 258 259 return (int64_t) old_value; 260 } 261 262 263 inline void cmpxchg_pre_membar(cmpxchg_memory_order order) { 264 if (order != memory_order_relaxed) { 265 __asm__ __volatile__ ( 266 /* fence */ 267 strasm_sync 268 ); 269 } 270 } 271 272 inline void cmpxchg_post_membar(cmpxchg_memory_order order) { 273 if (order != memory_order_relaxed) { 274 __asm__ __volatile__ ( 275 /* fence */ 276 strasm_sync 277 ); 278 } 279 } 280 281 #define VM_HAS_SPECIALIZED_CMPXCHG_BYTE 282 template <> 283 inline int8_t Atomic::specialized_cmpxchg<int8_t>(int8_t exchange_value, volatile int8_t* dest, int8_t compare_value, cmpxchg_memory_order order) { 284 // Note that cmpxchg guarantees a two-way memory barrier across 285 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 286 // specified otherwise (see atomic.hpp). 287 288 // Using 32 bit internally. 289 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3); 290 291 #ifdef VM_LITTLE_ENDIAN 292 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8; 293 #else 294 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8; 295 #endif 296 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value), 297 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value), 298 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount; 299 300 unsigned int old_value, value32; 301 302 cmpxchg_pre_membar(order); 303 304 __asm__ __volatile__ ( 305 /* simple guard */ 306 " lbz %[old_value], 0(%[dest]) \n" 307 " cmpw %[masked_compare_val], %[old_value] \n" 308 " bne- 2f \n" 309 /* atomic loop */ 310 "1: \n" 311 " lwarx %[value32], 0, %[dest_base] \n" 312 /* extract byte and compare */ 313 " srd %[old_value], %[value32], %[shift_amount] \n" 314 " clrldi %[old_value], %[old_value], 56 \n" 315 " cmpw %[masked_compare_val], %[old_value] \n" 316 " bne- 2f \n" 317 /* replace byte and try to store */ 318 " xor %[value32], %[xor_value], %[value32] \n" 319 " stwcx. %[value32], 0, %[dest_base] \n" 320 " bne- 1b \n" 321 /* exit */ 322 "2: \n" 323 /* out */ 324 : [old_value] "=&r" (old_value), 325 [value32] "=&r" (value32), 326 "=m" (*dest), 327 "=m" (*dest_base) 328 /* in */ 329 : [dest] "b" (dest), 330 [dest_base] "b" (dest_base), 331 [shift_amount] "r" (shift_amount), 332 [masked_compare_val] "r" (masked_compare_val), 333 [xor_value] "r" (xor_value), 334 "m" (*dest), 335 "m" (*dest_base) 336 /* clobber */ 337 : "cc", 338 "memory" 339 ); 340 341 cmpxchg_post_membar(order); 342 343 return (int8_t)(unsigned char)old_value; 344 } 345 346 template <> 347 inline int32_t Atomic::specialized_cmpxchg<int32_t>(int32_t exchange_value, volatile int32_t* dest, int32_t compare_value, cmpxchg_memory_order order) { 348 // Note that cmpxchg guarantees a two-way memory barrier across 349 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 350 // specified otherwise (see atomic.hpp). 351 352 unsigned int old_value; 353 const uint64_t zero = 0; 354 355 cmpxchg_pre_membar(order); 356 357 __asm__ __volatile__ ( 358 /* simple guard */ 359 " lwz %[old_value], 0(%[dest]) \n" 360 " cmpw %[compare_value], %[old_value] \n" 361 " bne- 2f \n" 362 /* atomic loop */ 363 "1: \n" 364 " lwarx %[old_value], %[dest], %[zero] \n" 365 " cmpw %[compare_value], %[old_value] \n" 366 " bne- 2f \n" 367 " stwcx. %[exchange_value], %[dest], %[zero] \n" 368 " bne- 1b \n" 369 /* exit */ 370 "2: \n" 371 /* out */ 372 : [old_value] "=&r" (old_value), 373 "=m" (*dest) 374 /* in */ 375 : [dest] "b" (dest), 376 [zero] "r" (zero), 377 [compare_value] "r" (compare_value), 378 [exchange_value] "r" (exchange_value), 379 "m" (*dest) 380 /* clobber */ 381 : "cc", 382 "memory" 383 ); 384 385 cmpxchg_post_membar(order); 386 387 return (int32_t) old_value; 388 } 389 390 391 template <> 392 inline int64_t Atomic::specialized_cmpxchg<int64_t>(int64_t exchange_value, volatile int64_t* dest, int64_t compare_value, cmpxchg_memory_order order) { 393 // Note that cmpxchg guarantees a two-way memory barrier across 394 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 395 // specified otherwise (see atomic.hpp). 396 397 long old_value; 398 const uint64_t zero = 0; 399 400 cmpxchg_pre_membar(order); 401 402 __asm__ __volatile__ ( 403 /* simple guard */ 404 " ld %[old_value], 0(%[dest]) \n" 405 " cmpd %[compare_value], %[old_value] \n" 406 " bne- 2f \n" 407 /* atomic loop */ 408 "1: \n" 409 " ldarx %[old_value], %[dest], %[zero] \n" 410 " cmpd %[compare_value], %[old_value] \n" 411 " bne- 2f \n" 412 " stdcx. %[exchange_value], %[dest], %[zero] \n" 413 " bne- 1b \n" 414 /* exit */ 415 "2: \n" 416 /* out */ 417 : [old_value] "=&r" (old_value), 418 "=m" (*dest) 419 /* in */ 420 : [dest] "b" (dest), 421 [zero] "r" (zero), 422 [compare_value] "r" (compare_value), 423 [exchange_value] "r" (exchange_value), 424 "m" (*dest) 425 /* clobber */ 426 : "cc", 427 "memory" 428 ); 429 430 cmpxchg_post_membar(order); 431 432 return (int64_t) old_value; 433 } 434 435 436 #undef strasm_sync 437 #undef strasm_lwsync 438 #undef strasm_isync 439 #undef strasm_release 440 #undef strasm_acquire 441 #undef strasm_fence 442 #undef strasm_nobarrier 443 #undef strasm_nobarrier_clobber_memory 444 445 #endif // OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP