1 /* 2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2012, 2014 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP 27 #define OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP 28 29 #ifndef PPC64 30 #error "Atomic currently only implemented for PPC64" 31 #endif 32 33 // Implementation of class atomic 34 35 inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; } 36 inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; } 37 inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; } 38 inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; } 39 inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; } 40 inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; } 41 42 inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; } 43 inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; } 44 inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; } 45 inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; } 46 inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; } 47 inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; } 48 49 inline jlong Atomic::load(const volatile jlong* src) { return *src; } 50 51 // 52 // machine barrier instructions: 53 // 54 // - sync two-way memory barrier, aka fence 55 // - lwsync orders Store|Store, 56 // Load|Store, 57 // Load|Load, 58 // but not Store|Load 59 // - eieio orders memory accesses for device memory (only) 60 // - isync invalidates speculatively executed instructions 61 // From the POWER ISA 2.06 documentation: 62 // "[...] an isync instruction prevents the execution of 63 // instructions following the isync until instructions 64 // preceding the isync have completed, [...]" 65 // From IBM's AIX assembler reference: 66 // "The isync [...] instructions causes the processor to 67 // refetch any instructions that might have been fetched 68 // prior to the isync instruction. The instruction isync 69 // causes the processor to wait for all previous instructions 70 // to complete. Then any instructions already fetched are 71 // discarded and instruction processing continues in the 72 // environment established by the previous instructions." 73 // 74 // semantic barrier instructions: 75 // (as defined in orderAccess.hpp) 76 // 77 // - release orders Store|Store, (maps to lwsync) 78 // Load|Store 79 // - acquire orders Load|Store, (maps to lwsync) 80 // Load|Load 81 // - fence orders Store|Store, (maps to sync) 82 // Load|Store, 83 // Load|Load, 84 // Store|Load 85 // 86 87 #define strasm_sync "\n sync \n" 88 #define strasm_lwsync "\n lwsync \n" 89 #define strasm_isync "\n isync \n" 90 #define strasm_release strasm_lwsync 91 #define strasm_acquire strasm_lwsync 92 #define strasm_fence strasm_sync 93 #define strasm_nobarrier "" 94 #define strasm_nobarrier_clobber_memory "" 95 96 template<size_t byte_size> 97 struct Atomic::PlatformAdd 98 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > 99 { 100 template<typename I, typename D> 101 D add_and_fetch(I add_value, D volatile* dest) const; 102 }; 103 104 template<> 105 template<typename I, typename D> 106 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) const { 107 STATIC_CAST(4 == sizeof(I)); 108 STATIC_CAST(4 == sizeof(D)); 109 110 D result; 111 112 __asm__ __volatile__ ( 113 strasm_lwsync 114 "1: lwarx %0, 0, %2 \n" 115 " add %0, %0, %1 \n" 116 " stwcx. %0, 0, %2 \n" 117 " bne- 1b \n" 118 strasm_isync 119 : /*%0*/"=&r" (result) 120 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 121 : "cc", "memory" ); 122 123 return result; 124 } 125 126 127 template<> 128 template<typename I, typename D> 129 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) const { 130 STATIC_CAST(8 == sizeof(I)); 131 STATIC_CAST(8 == sizeof(D)); 132 133 D result; 134 135 __asm__ __volatile__ ( 136 strasm_lwsync 137 "1: ldarx %0, 0, %2 \n" 138 " add %0, %0, %1 \n" 139 " stdcx. %0, 0, %2 \n" 140 " bne- 1b \n" 141 strasm_isync 142 : /*%0*/"=&r" (result) 143 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 144 : "cc", "memory" ); 145 146 return result; 147 } 148 149 template<> 150 struct Atomic::PlatformAdd<2>: Atomic::AddShortUsingInt {}; 151 152 inline void Atomic::inc (volatile jint* dest) { 153 154 unsigned int temp; 155 156 __asm__ __volatile__ ( 157 strasm_nobarrier 158 "1: lwarx %0, 0, %2 \n" 159 " addic %0, %0, 1 \n" 160 " stwcx. %0, 0, %2 \n" 161 " bne- 1b \n" 162 strasm_nobarrier 163 : /*%0*/"=&r" (temp), "=m" (*dest) 164 : /*%2*/"r" (dest), "m" (*dest) 165 : "cc" strasm_nobarrier_clobber_memory); 166 167 } 168 169 inline void Atomic::inc_ptr(volatile intptr_t* dest) { 170 171 long temp; 172 173 __asm__ __volatile__ ( 174 strasm_nobarrier 175 "1: ldarx %0, 0, %2 \n" 176 " addic %0, %0, 1 \n" 177 " stdcx. %0, 0, %2 \n" 178 " bne- 1b \n" 179 strasm_nobarrier 180 : /*%0*/"=&r" (temp), "=m" (*dest) 181 : /*%2*/"r" (dest), "m" (*dest) 182 : "cc" strasm_nobarrier_clobber_memory); 183 184 } 185 186 inline void Atomic::inc_ptr(volatile void* dest) { 187 inc_ptr((volatile intptr_t*)dest); 188 } 189 190 191 inline void Atomic::dec (volatile jint* dest) { 192 193 unsigned int temp; 194 195 __asm__ __volatile__ ( 196 strasm_nobarrier 197 "1: lwarx %0, 0, %2 \n" 198 " addic %0, %0, -1 \n" 199 " stwcx. %0, 0, %2 \n" 200 " bne- 1b \n" 201 strasm_nobarrier 202 : /*%0*/"=&r" (temp), "=m" (*dest) 203 : /*%2*/"r" (dest), "m" (*dest) 204 : "cc" strasm_nobarrier_clobber_memory); 205 206 } 207 208 inline void Atomic::dec_ptr(volatile intptr_t* dest) { 209 210 long temp; 211 212 __asm__ __volatile__ ( 213 strasm_nobarrier 214 "1: ldarx %0, 0, %2 \n" 215 " addic %0, %0, -1 \n" 216 " stdcx. %0, 0, %2 \n" 217 " bne- 1b \n" 218 strasm_nobarrier 219 : /*%0*/"=&r" (temp), "=m" (*dest) 220 : /*%2*/"r" (dest), "m" (*dest) 221 : "cc" strasm_nobarrier_clobber_memory); 222 223 } 224 225 inline void Atomic::dec_ptr(volatile void* dest) { 226 dec_ptr((volatile intptr_t*)dest); 227 } 228 229 inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) { 230 231 // Note that xchg_ptr doesn't necessarily do an acquire 232 // (see synchronizer.cpp). 233 234 unsigned int old_value; 235 const uint64_t zero = 0; 236 237 __asm__ __volatile__ ( 238 /* lwsync */ 239 strasm_lwsync 240 /* atomic loop */ 241 "1: \n" 242 " lwarx %[old_value], %[dest], %[zero] \n" 243 " stwcx. %[exchange_value], %[dest], %[zero] \n" 244 " bne- 1b \n" 245 /* isync */ 246 strasm_sync 247 /* exit */ 248 "2: \n" 249 /* out */ 250 : [old_value] "=&r" (old_value), 251 "=m" (*dest) 252 /* in */ 253 : [dest] "b" (dest), 254 [zero] "r" (zero), 255 [exchange_value] "r" (exchange_value), 256 "m" (*dest) 257 /* clobber */ 258 : "cc", 259 "memory" 260 ); 261 262 return (jint) old_value; 263 } 264 265 inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) { 266 267 // Note that xchg_ptr doesn't necessarily do an acquire 268 // (see synchronizer.cpp). 269 270 long old_value; 271 const uint64_t zero = 0; 272 273 __asm__ __volatile__ ( 274 /* lwsync */ 275 strasm_lwsync 276 /* atomic loop */ 277 "1: \n" 278 " ldarx %[old_value], %[dest], %[zero] \n" 279 " stdcx. %[exchange_value], %[dest], %[zero] \n" 280 " bne- 1b \n" 281 /* isync */ 282 strasm_sync 283 /* exit */ 284 "2: \n" 285 /* out */ 286 : [old_value] "=&r" (old_value), 287 "=m" (*dest) 288 /* in */ 289 : [dest] "b" (dest), 290 [zero] "r" (zero), 291 [exchange_value] "r" (exchange_value), 292 "m" (*dest) 293 /* clobber */ 294 : "cc", 295 "memory" 296 ); 297 298 return (intptr_t) old_value; 299 } 300 301 inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) { 302 return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest); 303 } 304 305 inline void cmpxchg_pre_membar(cmpxchg_memory_order order) { 306 if (order != memory_order_relaxed) { 307 __asm__ __volatile__ ( 308 /* fence */ 309 strasm_sync 310 ); 311 } 312 } 313 314 inline void cmpxchg_post_membar(cmpxchg_memory_order order) { 315 if (order != memory_order_relaxed) { 316 __asm__ __volatile__ ( 317 /* fence */ 318 strasm_sync 319 ); 320 } 321 } 322 323 template<> 324 template<typename T> 325 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, 326 T volatile* dest, 327 T compare_value, 328 cmpxchg_memory_order order) const { 329 STATIC_ASSERT(1 == sizeof(T)); 330 331 // Note that cmpxchg guarantees a two-way memory barrier across 332 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 333 // specified otherwise (see atomic.hpp). 334 335 // Using 32 bit internally. 336 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3); 337 338 #ifdef VM_LITTLE_ENDIAN 339 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8; 340 #else 341 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8; 342 #endif 343 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value), 344 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value), 345 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount; 346 347 unsigned int old_value, value32; 348 349 cmpxchg_pre_membar(order); 350 351 __asm__ __volatile__ ( 352 /* simple guard */ 353 " lbz %[old_value], 0(%[dest]) \n" 354 " cmpw %[masked_compare_val], %[old_value] \n" 355 " bne- 2f \n" 356 /* atomic loop */ 357 "1: \n" 358 " lwarx %[value32], 0, %[dest_base] \n" 359 /* extract byte and compare */ 360 " srd %[old_value], %[value32], %[shift_amount] \n" 361 " clrldi %[old_value], %[old_value], 56 \n" 362 " cmpw %[masked_compare_val], %[old_value] \n" 363 " bne- 2f \n" 364 /* replace byte and try to store */ 365 " xor %[value32], %[xor_value], %[value32] \n" 366 " stwcx. %[value32], 0, %[dest_base] \n" 367 " bne- 1b \n" 368 /* exit */ 369 "2: \n" 370 /* out */ 371 : [old_value] "=&r" (old_value), 372 [value32] "=&r" (value32), 373 "=m" (*dest), 374 "=m" (*dest_base) 375 /* in */ 376 : [dest] "b" (dest), 377 [dest_base] "b" (dest_base), 378 [shift_amount] "r" (shift_amount), 379 [masked_compare_val] "r" (masked_compare_val), 380 [xor_value] "r" (xor_value), 381 "m" (*dest), 382 "m" (*dest_base) 383 /* clobber */ 384 : "cc", 385 "memory" 386 ); 387 388 cmpxchg_post_membar(order); 389 390 return PrimitiveConversions::cast<T>((unsigned char)old_value); 391 } 392 393 template<> 394 template<typename T> 395 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, 396 T volatile* dest, 397 T compare_value, 398 cmpxchg_memory_order order) const { 399 STATIC_ASSERT(4 == sizeof(T)); 400 401 // Note that cmpxchg guarantees a two-way memory barrier across 402 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 403 // specified otherwise (see atomic.hpp). 404 405 T old_value; 406 const uint64_t zero = 0; 407 408 cmpxchg_pre_membar(order); 409 410 __asm__ __volatile__ ( 411 /* simple guard */ 412 " lwz %[old_value], 0(%[dest]) \n" 413 " cmpw %[compare_value], %[old_value] \n" 414 " bne- 2f \n" 415 /* atomic loop */ 416 "1: \n" 417 " lwarx %[old_value], %[dest], %[zero] \n" 418 " cmpw %[compare_value], %[old_value] \n" 419 " bne- 2f \n" 420 " stwcx. %[exchange_value], %[dest], %[zero] \n" 421 " bne- 1b \n" 422 /* exit */ 423 "2: \n" 424 /* out */ 425 : [old_value] "=&r" (old_value), 426 "=m" (*dest) 427 /* in */ 428 : [dest] "b" (dest), 429 [zero] "r" (zero), 430 [compare_value] "r" (compare_value), 431 [exchange_value] "r" (exchange_value), 432 "m" (*dest) 433 /* clobber */ 434 : "cc", 435 "memory" 436 ); 437 438 cmpxchg_post_membar(order); 439 440 return old_value; 441 } 442 443 template<> 444 template<typename T> 445 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, 446 T volatile* dest, 447 T compare_value, 448 cmpxchg_memory_order order) const { 449 STATIC_ASSERT(8 == sizeof(T)); 450 451 // Note that cmpxchg guarantees a two-way memory barrier across 452 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 453 // specified otherwise (see atomic.hpp). 454 455 T old_value; 456 const uint64_t zero = 0; 457 458 cmpxchg_pre_membar(order); 459 460 __asm__ __volatile__ ( 461 /* simple guard */ 462 " ld %[old_value], 0(%[dest]) \n" 463 " cmpd %[compare_value], %[old_value] \n" 464 " bne- 2f \n" 465 /* atomic loop */ 466 "1: \n" 467 " ldarx %[old_value], %[dest], %[zero] \n" 468 " cmpd %[compare_value], %[old_value] \n" 469 " bne- 2f \n" 470 " stdcx. %[exchange_value], %[dest], %[zero] \n" 471 " bne- 1b \n" 472 /* exit */ 473 "2: \n" 474 /* out */ 475 : [old_value] "=&r" (old_value), 476 "=m" (*dest) 477 /* in */ 478 : [dest] "b" (dest), 479 [zero] "r" (zero), 480 [compare_value] "r" (compare_value), 481 [exchange_value] "r" (exchange_value), 482 "m" (*dest) 483 /* clobber */ 484 : "cc", 485 "memory" 486 ); 487 488 cmpxchg_post_membar(order); 489 490 return old_value; 491 } 492 493 #undef strasm_sync 494 #undef strasm_lwsync 495 #undef strasm_isync 496 #undef strasm_release 497 #undef strasm_acquire 498 #undef strasm_fence 499 #undef strasm_nobarrier 500 #undef strasm_nobarrier_clobber_memory 501 502 #endif // OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP