1 /* 2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2012, 2014 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP 27 #define OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP 28 29 #ifndef _LP64 30 #error "Atomic currently only impleneted for PPC64" 31 #endif 32 33 // Implementation of class atomic 34 35 // 36 // machine barrier instructions: 37 // 38 // - ppc_sync two-way memory barrier, aka fence 39 // - ppc_lwsync orders Store|Store, 40 // Load|Store, 41 // Load|Load, 42 // but not Store|Load 43 // - ppc_eieio orders memory accesses for device memory (only) 44 // - ppc_isync invalidates speculatively executed instructions 45 // From the POWER ISA 2.06 documentation: 46 // "[...] an isync instruction prevents the execution of 47 // instructions following the isync until instructions 48 // preceding the isync have completed, [...]" 49 // From IBM's AIX assembler reference: 50 // "The isync [...] instructions causes the processor to 51 // refetch any instructions that might have been fetched 52 // prior to the isync instruction. The instruction isync 53 // causes the processor to wait for all previous instructions 54 // to complete. Then any instructions already fetched are 55 // discarded and instruction processing continues in the 56 // environment established by the previous instructions." 57 // 58 // semantic barrier instructions: 59 // (as defined in orderAccess.hpp) 60 // 61 // - ppc_release orders Store|Store, (maps to ppc_lwsync) 62 // Load|Store 63 // - ppc_acquire orders Load|Store, (maps to ppc_lwsync) 64 // Load|Load 65 // - ppc_fence orders Store|Store, (maps to ppc_sync) 66 // Load|Store, 67 // Load|Load, 68 // Store|Load 69 // 70 71 #define strasm_sync "\n sync \n" 72 #define strasm_lwsync "\n lwsync \n" 73 #define strasm_isync "\n isync \n" 74 #define strasm_release strasm_lwsync 75 #define strasm_acquire strasm_lwsync 76 #define strasm_fence strasm_sync 77 #define strasm_nobarrier "" 78 #define strasm_nobarrier_clobber_memory "" 79 80 template <> 81 inline int32_t Atomic::specialized_add<int32_t>(int32_t add_value, volatile int32_t* dest) { 82 unsigned int result; 83 84 __asm__ __volatile__ ( 85 strasm_lwsync 86 "1: lwarx %0, 0, %2 \n" 87 " add %0, %0, %1 \n" 88 " stwcx. %0, 0, %2 \n" 89 " bne- 1b \n" 90 strasm_isync 91 : /*%0*/"=&r" (result) 92 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 93 : "cc", "memory" ); 94 95 return (int32_t) result; 96 } 97 98 99 template <> 100 inline int64_t Atomic::specialized_add<int64_t>(int64_t add_value, volatile int64_t* dest) { 101 long result; 102 103 __asm__ __volatile__ ( 104 strasm_lwsync 105 "1: ldarx %0, 0, %2 \n" 106 " add %0, %0, %1 \n" 107 " stdcx. %0, 0, %2 \n" 108 " bne- 1b \n" 109 strasm_isync 110 : /*%0*/"=&r" (result) 111 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 112 : "cc", "memory" ); 113 114 return (int64_t) result; 115 } 116 117 template <> 118 inline void Atomic::specialized_inc<int32_t>(volatile int32_t* dest) { 119 unsigned int temp; 120 121 __asm__ __volatile__ ( 122 strasm_nobarrier 123 "1: lwarx %0, 0, %2 \n" 124 " addic %0, %0, 1 \n" 125 " stwcx. %0, 0, %2 \n" 126 " bne- 1b \n" 127 strasm_nobarrier 128 : /*%0*/"=&r" (temp), "=m" (*dest) 129 : /*%2*/"r" (dest), "m" (*dest) 130 : "cc" strasm_nobarrier_clobber_memory); 131 132 } 133 134 template <> 135 inline void Atomic::specialized_inc<int64_t>(volatile int64_t* dest) { 136 long temp; 137 138 __asm__ __volatile__ ( 139 strasm_nobarrier 140 "1: ldarx %0, 0, %2 \n" 141 " addic %0, %0, 1 \n" 142 " stdcx. %0, 0, %2 \n" 143 " bne- 1b \n" 144 strasm_nobarrier 145 : /*%0*/"=&r" (temp), "=m" (*dest) 146 : /*%2*/"r" (dest), "m" (*dest) 147 : "cc" strasm_nobarrier_clobber_memory); 148 149 } 150 151 template <> 152 inline void Atomic::specialized_dec<int32_t>(volatile int32_t* dest) { 153 unsigned int temp; 154 155 __asm__ __volatile__ ( 156 strasm_nobarrier 157 "1: lwarx %0, 0, %2 \n" 158 " addic %0, %0, -1 \n" 159 " stwcx. %0, 0, %2 \n" 160 " bne- 1b \n" 161 strasm_nobarrier 162 : /*%0*/"=&r" (temp), "=m" (*dest) 163 : /*%2*/"r" (dest), "m" (*dest) 164 : "cc" strasm_nobarrier_clobber_memory); 165 166 } 167 168 template <> 169 inline void Atomic::specialized_dec<int64_t>(volatile int64_t* dest) { 170 long temp; 171 172 __asm__ __volatile__ ( 173 strasm_nobarrier 174 "1: ldarx %0, 0, %2 \n" 175 " addic %0, %0, -1 \n" 176 " stdcx. %0, 0, %2 \n" 177 " bne- 1b \n" 178 strasm_nobarrier 179 : /*%0*/"=&r" (temp), "=m" (*dest) 180 : /*%2*/"r" (dest), "m" (*dest) 181 : "cc" strasm_nobarrier_clobber_memory); 182 183 } 184 185 template <> 186 inline int32_t Atomic::specialized_xchg<int32_t>(int32_t exchange_value, volatile int32_t* dest) { 187 // Note that xchg_ptr doesn't necessarily do an acquire 188 // (see synchronizer.cpp). 189 190 unsigned int old_value; 191 const uint64_t zero = 0; 192 193 __asm__ __volatile__ ( 194 /* lwsync */ 195 strasm_lwsync 196 /* atomic loop */ 197 "1: \n" 198 " lwarx %[old_value], %[dest], %[zero] \n" 199 " stwcx. %[exchange_value], %[dest], %[zero] \n" 200 " bne- 1b \n" 201 /* isync */ 202 strasm_sync 203 /* exit */ 204 "2: \n" 205 /* out */ 206 : [old_value] "=&r" (old_value), 207 "=m" (*dest) 208 /* in */ 209 : [dest] "b" (dest), 210 [zero] "r" (zero), 211 [exchange_value] "r" (exchange_value), 212 "m" (*dest) 213 /* clobber */ 214 : "cc", 215 "memory" 216 ); 217 218 return (int32_t) old_value; 219 } 220 221 template <> 222 inline int64_t Atomic::specialized_xchg<int64_t>(int64_t exchange_value, volatile int64_t* dest) { 223 // Note that xchg_ptr doesn't necessarily do an acquire 224 // (see synchronizer.cpp). 225 226 long old_value; 227 const uint64_t zero = 0; 228 229 __asm__ __volatile__ ( 230 /* lwsync */ 231 strasm_lwsync 232 /* atomic loop */ 233 "1: \n" 234 " ldarx %[old_value], %[dest], %[zero] \n" 235 " stdcx. %[exchange_value], %[dest], %[zero] \n" 236 " bne- 1b \n" 237 /* isync */ 238 strasm_sync 239 /* exit */ 240 "2: \n" 241 /* out */ 242 : [old_value] "=&r" (old_value), 243 "=m" (*dest) 244 /* in */ 245 : [dest] "b" (dest), 246 [zero] "r" (zero), 247 [exchange_value] "r" (exchange_value), 248 "m" (*dest) 249 /* clobber */ 250 : "cc", 251 "memory" 252 ); 253 254 return (int64_t) old_value; 255 } 256 257 inline void cmpxchg_pre_membar(cmpxchg_memory_order order) { 258 if (order != memory_order_relaxed) { 259 __asm__ __volatile__ ( 260 /* fence */ 261 strasm_sync 262 ); 263 } 264 } 265 266 inline void cmpxchg_post_membar(cmpxchg_memory_order order) { 267 if (order != memory_order_relaxed) { 268 __asm__ __volatile__ ( 269 /* fence */ 270 strasm_sync 271 ); 272 } 273 } 274 275 #define VM_HAS_SPECIALIZED_CMPXCHG_BYTE 276 template <> 277 inline int8_t Atomic::specialized_cmpxchg<int8_t>(int8_t exchange_value, volatile int8_t* dest, int8_t compare_value, cmpxchg_memory_order order) { 278 // Note that cmpxchg guarantees a two-way memory barrier across 279 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 280 // specified otherwise (see atomic.hpp). 281 282 // Using 32 bit internally. 283 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3); 284 285 #ifdef VM_LITTLE_ENDIAN 286 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8; 287 #else 288 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8; 289 #endif 290 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value), 291 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value), 292 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount; 293 294 unsigned int old_value, value32; 295 296 cmpxchg_pre_membar(order); 297 298 __asm__ __volatile__ ( 299 /* simple guard */ 300 " lbz %[old_value], 0(%[dest]) \n" 301 " cmpw %[masked_compare_val], %[old_value] \n" 302 " bne- 2f \n" 303 /* atomic loop */ 304 "1: \n" 305 " lwarx %[value32], 0, %[dest_base] \n" 306 /* extract byte and compare */ 307 " srd %[old_value], %[value32], %[shift_amount] \n" 308 " clrldi %[old_value], %[old_value], 56 \n" 309 " cmpw %[masked_compare_val], %[old_value] \n" 310 " bne- 2f \n" 311 /* replace byte and try to store */ 312 " xor %[value32], %[xor_value], %[value32] \n" 313 " stwcx. %[value32], 0, %[dest_base] \n" 314 " bne- 1b \n" 315 /* exit */ 316 "2: \n" 317 /* out */ 318 : [old_value] "=&r" (old_value), 319 [value32] "=&r" (value32), 320 "=m" (*dest), 321 "=m" (*dest_base) 322 /* in */ 323 : [dest] "b" (dest), 324 [dest_base] "b" (dest_base), 325 [shift_amount] "r" (shift_amount), 326 [masked_compare_val] "r" (masked_compare_val), 327 [xor_value] "r" (xor_value), 328 "m" (*dest), 329 "m" (*dest_base) 330 /* clobber */ 331 : "cc", 332 "memory" 333 ); 334 335 cmpxchg_post_membar(order); 336 337 return (int8_t)(unsigned char)old_value; 338 } 339 340 template <> 341 inline int32_t Atomic::specialized_cmpxchg<int32_t>(int32_t exchange_value, volatile int32_t* dest, int32_t compare_value, cmpxchg_memory_order order) { 342 // Note that cmpxchg guarantees a two-way memory barrier across 343 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 344 // specified otherwise (see atomic.hpp). 345 346 unsigned int old_value; 347 const uint64_t zero = 0; 348 349 cmpxchg_pre_membar(order); 350 351 __asm__ __volatile__ ( 352 /* simple guard */ 353 " lwz %[old_value], 0(%[dest]) \n" 354 " cmpw %[compare_value], %[old_value] \n" 355 " bne- 2f \n" 356 /* atomic loop */ 357 "1: \n" 358 " lwarx %[old_value], %[dest], %[zero] \n" 359 " cmpw %[compare_value], %[old_value] \n" 360 " bne- 2f \n" 361 " stwcx. %[exchange_value], %[dest], %[zero] \n" 362 " bne- 1b \n" 363 /* exit */ 364 "2: \n" 365 /* out */ 366 : [old_value] "=&r" (old_value), 367 "=m" (*dest) 368 /* in */ 369 : [dest] "b" (dest), 370 [zero] "r" (zero), 371 [compare_value] "r" (compare_value), 372 [exchange_value] "r" (exchange_value), 373 "m" (*dest) 374 /* clobber */ 375 : "cc", 376 "memory" 377 ); 378 379 cmpxchg_post_membar(order); 380 381 return (int32_t) old_value; 382 } 383 384 template <> 385 inline int64_t Atomic::specialized_cmpxchg<int64_t>(int64_t exchange_value, volatile int64_t* dest, int64_t compare_value, cmpxchg_memory_order order) { 386 // Note that cmpxchg guarantees a two-way memory barrier across 387 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 388 // specified otherwise (see atomic.hpp). 389 390 long old_value; 391 const uint64_t zero = 0; 392 393 cmpxchg_pre_membar(order); 394 395 __asm__ __volatile__ ( 396 /* simple guard */ 397 " ld %[old_value], 0(%[dest]) \n" 398 " cmpd %[compare_value], %[old_value] \n" 399 " bne- 2f \n" 400 /* atomic loop */ 401 "1: \n" 402 " ldarx %[old_value], %[dest], %[zero] \n" 403 " cmpd %[compare_value], %[old_value] \n" 404 " bne- 2f \n" 405 " stdcx. %[exchange_value], %[dest], %[zero] \n" 406 " bne- 1b \n" 407 /* exit */ 408 "2: \n" 409 /* out */ 410 : [old_value] "=&r" (old_value), 411 "=m" (*dest) 412 /* in */ 413 : [dest] "b" (dest), 414 [zero] "r" (zero), 415 [compare_value] "r" (compare_value), 416 [exchange_value] "r" (exchange_value), 417 "m" (*dest) 418 /* clobber */ 419 : "cc", 420 "memory" 421 ); 422 423 cmpxchg_post_membar(order); 424 425 return (int64_t) old_value; 426 } 427 428 #undef strasm_sync 429 #undef strasm_lwsync 430 #undef strasm_isync 431 #undef strasm_release 432 #undef strasm_acquire 433 #undef strasm_fence 434 #undef strasm_nobarrier 435 #undef strasm_nobarrier_clobber_memory 436 437 #endif // OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP