79 case memory_order_acq_rel: __asm__ __volatile__ ("lwsync" : : : "memory"); break; 80 default /*conservative*/ : __asm__ __volatile__ ("sync" : : : "memory"); break; 81 } 82 } 83 84 inline void post_membar(atomic_memory_order order) { 85 switch (order) { 86 case memory_order_relaxed: 87 case memory_order_release: break; 88 case memory_order_acquire: 89 case memory_order_acq_rel: __asm__ __volatile__ ("isync" : : : "memory"); break; 90 default /*conservative*/ : __asm__ __volatile__ ("sync" : : : "memory"); break; 91 } 92 } 93 94 95 template<size_t byte_size> 96 struct Atomic::PlatformAdd 97 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > 98 { 99 template<typename I, typename D> 100 D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const; 101 }; 102 103 template<> 104 template<typename I, typename D> 105 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest, 106 atomic_memory_order order) const { 107 STATIC_ASSERT(4 == sizeof(I)); 108 STATIC_ASSERT(4 == sizeof(D)); 109 110 D result; 111 112 pre_membar(order); 113 114 __asm__ __volatile__ ( 115 "1: lwarx %0, 0, %2 \n" 116 " add %0, %0, %1 \n" 117 " stwcx. %0, 0, %2 \n" 118 " bne- 1b \n" 119 : /*%0*/"=&r" (result) 120 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 121 : "cc", "memory" ); 122 123 post_membar(order); 124 125 return result; 126 } 127 128 129 template<> 130 template<typename I, typename D> 131 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest, 132 atomic_memory_order order) const { 133 STATIC_ASSERT(8 == sizeof(I)); 134 STATIC_ASSERT(8 == sizeof(D)); 135 136 D result; 137 138 pre_membar(order); 139 140 __asm__ __volatile__ ( 141 "1: ldarx %0, 0, %2 \n" 142 " add %0, %0, %1 \n" 143 " stdcx. %0, 0, %2 \n" 144 " bne- 1b \n" 145 : /*%0*/"=&r" (result) 146 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 147 : "cc", "memory" ); 148 149 post_membar(order); 150 151 return result; 152 } 153 154 template<> 155 template<typename T> 156 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, 157 T volatile* dest, 158 atomic_memory_order order) const { 159 // Note that xchg doesn't necessarily do an acquire 160 // (see synchronizer.cpp). 161 162 T old_value; 163 const uint64_t zero = 0; 164 165 pre_membar(order); 166 167 __asm__ __volatile__ ( 168 /* atomic loop */ 169 "1: \n" 170 " lwarx %[old_value], %[dest], %[zero] \n" 171 " stwcx. %[exchange_value], %[dest], %[zero] \n" 172 " bne- 1b \n" 173 /* exit */ 174 "2: \n" 175 /* out */ 176 : [old_value] "=&r" (old_value), 177 "=m" (*dest) 178 /* in */ 179 : [dest] "b" (dest), 180 [zero] "r" (zero), 181 [exchange_value] "r" (exchange_value), 182 "m" (*dest) 183 /* clobber */ 184 : "cc", 185 "memory" 186 ); 187 188 post_membar(order); 189 190 return old_value; 191 } 192 193 template<> 194 template<typename T> 195 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, 196 T volatile* dest, 197 atomic_memory_order order) const { 198 STATIC_ASSERT(8 == sizeof(T)); 199 // Note that xchg doesn't necessarily do an acquire 200 // (see synchronizer.cpp). 201 202 T old_value; 203 const uint64_t zero = 0; 204 205 pre_membar(order); 206 207 __asm__ __volatile__ ( 208 /* atomic loop */ 209 "1: \n" 210 " ldarx %[old_value], %[dest], %[zero] \n" 211 " stdcx. %[exchange_value], %[dest], %[zero] \n" 212 " bne- 1b \n" 213 /* exit */ 214 "2: \n" 215 /* out */ 216 : [old_value] "=&r" (old_value), 217 "=m" (*dest) 218 /* in */ 219 : [dest] "b" (dest), 220 [zero] "r" (zero), 221 [exchange_value] "r" (exchange_value), 222 "m" (*dest) 223 /* clobber */ 224 : "cc", 225 "memory" 226 ); 227 228 post_membar(order); 229 230 return old_value; 231 } 232 233 template<> 234 template<typename T> 235 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, 236 T volatile* dest, 237 T compare_value, 238 atomic_memory_order order) const { 239 STATIC_ASSERT(1 == sizeof(T)); 240 241 // Note that cmpxchg guarantees a two-way memory barrier across 242 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 243 // specified otherwise (see atomic.hpp). 244 245 // Using 32 bit internally. 246 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3); 247 248 #ifdef VM_LITTLE_ENDIAN 249 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8; 250 #else 251 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8; 252 #endif 253 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value), 254 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value), 255 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount; 256 257 unsigned int old_value, value32; 285 /* in */ 286 : [dest] "b" (dest), 287 [dest_base] "b" (dest_base), 288 [shift_amount] "r" (shift_amount), 289 [masked_compare_val] "r" (masked_compare_val), 290 [xor_value] "r" (xor_value), 291 "m" (*dest), 292 "m" (*dest_base) 293 /* clobber */ 294 : "cc", 295 "memory" 296 ); 297 298 post_membar(order); 299 300 return PrimitiveConversions::cast<T>((unsigned char)old_value); 301 } 302 303 template<> 304 template<typename T> 305 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, 306 T volatile* dest, 307 T compare_value, 308 atomic_memory_order order) const { 309 STATIC_ASSERT(4 == sizeof(T)); 310 311 // Note that cmpxchg guarantees a two-way memory barrier across 312 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 313 // specified otherwise (see atomic.hpp). 314 315 T old_value; 316 const uint64_t zero = 0; 317 318 pre_membar(order); 319 320 __asm__ __volatile__ ( 321 /* simple guard */ 322 " lwz %[old_value], 0(%[dest]) \n" 323 " cmpw %[compare_value], %[old_value] \n" 324 " bne- 2f \n" 325 /* atomic loop */ 326 "1: \n" 327 " lwarx %[old_value], %[dest], %[zero] \n" 335 : [old_value] "=&r" (old_value), 336 "=m" (*dest) 337 /* in */ 338 : [dest] "b" (dest), 339 [zero] "r" (zero), 340 [compare_value] "r" (compare_value), 341 [exchange_value] "r" (exchange_value), 342 "m" (*dest) 343 /* clobber */ 344 : "cc", 345 "memory" 346 ); 347 348 post_membar(order); 349 350 return old_value; 351 } 352 353 template<> 354 template<typename T> 355 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, 356 T volatile* dest, 357 T compare_value, 358 atomic_memory_order order) const { 359 STATIC_ASSERT(8 == sizeof(T)); 360 361 // Note that cmpxchg guarantees a two-way memory barrier across 362 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 363 // specified otherwise (see atomic.hpp). 364 365 T old_value; 366 const uint64_t zero = 0; 367 368 pre_membar(order); 369 370 __asm__ __volatile__ ( 371 /* simple guard */ 372 " ld %[old_value], 0(%[dest]) \n" 373 " cmpd %[compare_value], %[old_value] \n" 374 " bne- 2f \n" 375 /* atomic loop */ 376 "1: \n" 377 " ldarx %[old_value], %[dest], %[zero] \n" | 79 case memory_order_acq_rel: __asm__ __volatile__ ("lwsync" : : : "memory"); break; 80 default /*conservative*/ : __asm__ __volatile__ ("sync" : : : "memory"); break; 81 } 82 } 83 84 inline void post_membar(atomic_memory_order order) { 85 switch (order) { 86 case memory_order_relaxed: 87 case memory_order_release: break; 88 case memory_order_acquire: 89 case memory_order_acq_rel: __asm__ __volatile__ ("isync" : : : "memory"); break; 90 default /*conservative*/ : __asm__ __volatile__ ("sync" : : : "memory"); break; 91 } 92 } 93 94 95 template<size_t byte_size> 96 struct Atomic::PlatformAdd 97 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> > 98 { 99 template<typename D, typename I> 100 D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const; 101 }; 102 103 template<> 104 template<typename D, typename I> 105 inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value, 106 atomic_memory_order order) const { 107 STATIC_ASSERT(4 == sizeof(I)); 108 STATIC_ASSERT(4 == sizeof(D)); 109 110 D result; 111 112 pre_membar(order); 113 114 __asm__ __volatile__ ( 115 "1: lwarx %0, 0, %2 \n" 116 " add %0, %0, %1 \n" 117 " stwcx. %0, 0, %2 \n" 118 " bne- 1b \n" 119 : /*%0*/"=&r" (result) 120 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 121 : "cc", "memory" ); 122 123 post_membar(order); 124 125 return result; 126 } 127 128 129 template<> 130 template<typename D, typename I> 131 inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I add_value, 132 atomic_memory_order order) const { 133 STATIC_ASSERT(8 == sizeof(I)); 134 STATIC_ASSERT(8 == sizeof(D)); 135 136 D result; 137 138 pre_membar(order); 139 140 __asm__ __volatile__ ( 141 "1: ldarx %0, 0, %2 \n" 142 " add %0, %0, %1 \n" 143 " stdcx. %0, 0, %2 \n" 144 " bne- 1b \n" 145 : /*%0*/"=&r" (result) 146 : /*%1*/"r" (add_value), /*%2*/"r" (dest) 147 : "cc", "memory" ); 148 149 post_membar(order); 150 151 return result; 152 } 153 154 template<> 155 template<typename T> 156 inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest, 157 T exchange_value, 158 atomic_memory_order order) const { 159 // Note that xchg doesn't necessarily do an acquire 160 // (see synchronizer.cpp). 161 162 T old_value; 163 const uint64_t zero = 0; 164 165 pre_membar(order); 166 167 __asm__ __volatile__ ( 168 /* atomic loop */ 169 "1: \n" 170 " lwarx %[old_value], %[dest], %[zero] \n" 171 " stwcx. %[exchange_value], %[dest], %[zero] \n" 172 " bne- 1b \n" 173 /* exit */ 174 "2: \n" 175 /* out */ 176 : [old_value] "=&r" (old_value), 177 "=m" (*dest) 178 /* in */ 179 : [dest] "b" (dest), 180 [zero] "r" (zero), 181 [exchange_value] "r" (exchange_value), 182 "m" (*dest) 183 /* clobber */ 184 : "cc", 185 "memory" 186 ); 187 188 post_membar(order); 189 190 return old_value; 191 } 192 193 template<> 194 template<typename T> 195 inline T Atomic::PlatformXchg<8>::operator()(T volatile* dest, 196 T exchange_value, 197 atomic_memory_order order) const { 198 STATIC_ASSERT(8 == sizeof(T)); 199 // Note that xchg doesn't necessarily do an acquire 200 // (see synchronizer.cpp). 201 202 T old_value; 203 const uint64_t zero = 0; 204 205 pre_membar(order); 206 207 __asm__ __volatile__ ( 208 /* atomic loop */ 209 "1: \n" 210 " ldarx %[old_value], %[dest], %[zero] \n" 211 " stdcx. %[exchange_value], %[dest], %[zero] \n" 212 " bne- 1b \n" 213 /* exit */ 214 "2: \n" 215 /* out */ 216 : [old_value] "=&r" (old_value), 217 "=m" (*dest) 218 /* in */ 219 : [dest] "b" (dest), 220 [zero] "r" (zero), 221 [exchange_value] "r" (exchange_value), 222 "m" (*dest) 223 /* clobber */ 224 : "cc", 225 "memory" 226 ); 227 228 post_membar(order); 229 230 return old_value; 231 } 232 233 template<> 234 template<typename T> 235 inline T Atomic::PlatformCmpxchg<1>::operator()(T volatile* dest, 236 T compare_value, 237 T exchange_value, 238 atomic_memory_order order) const { 239 STATIC_ASSERT(1 == sizeof(T)); 240 241 // Note that cmpxchg guarantees a two-way memory barrier across 242 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 243 // specified otherwise (see atomic.hpp). 244 245 // Using 32 bit internally. 246 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3); 247 248 #ifdef VM_LITTLE_ENDIAN 249 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8; 250 #else 251 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8; 252 #endif 253 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value), 254 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value), 255 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount; 256 257 unsigned int old_value, value32; 285 /* in */ 286 : [dest] "b" (dest), 287 [dest_base] "b" (dest_base), 288 [shift_amount] "r" (shift_amount), 289 [masked_compare_val] "r" (masked_compare_val), 290 [xor_value] "r" (xor_value), 291 "m" (*dest), 292 "m" (*dest_base) 293 /* clobber */ 294 : "cc", 295 "memory" 296 ); 297 298 post_membar(order); 299 300 return PrimitiveConversions::cast<T>((unsigned char)old_value); 301 } 302 303 template<> 304 template<typename T> 305 inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, 306 T compare_value, 307 T exchange_value, 308 atomic_memory_order order) const { 309 STATIC_ASSERT(4 == sizeof(T)); 310 311 // Note that cmpxchg guarantees a two-way memory barrier across 312 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 313 // specified otherwise (see atomic.hpp). 314 315 T old_value; 316 const uint64_t zero = 0; 317 318 pre_membar(order); 319 320 __asm__ __volatile__ ( 321 /* simple guard */ 322 " lwz %[old_value], 0(%[dest]) \n" 323 " cmpw %[compare_value], %[old_value] \n" 324 " bne- 2f \n" 325 /* atomic loop */ 326 "1: \n" 327 " lwarx %[old_value], %[dest], %[zero] \n" 335 : [old_value] "=&r" (old_value), 336 "=m" (*dest) 337 /* in */ 338 : [dest] "b" (dest), 339 [zero] "r" (zero), 340 [compare_value] "r" (compare_value), 341 [exchange_value] "r" (exchange_value), 342 "m" (*dest) 343 /* clobber */ 344 : "cc", 345 "memory" 346 ); 347 348 post_membar(order); 349 350 return old_value; 351 } 352 353 template<> 354 template<typename T> 355 inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, 356 T compare_value, 357 T exchange_value, 358 atomic_memory_order order) const { 359 STATIC_ASSERT(8 == sizeof(T)); 360 361 // Note that cmpxchg guarantees a two-way memory barrier across 362 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not 363 // specified otherwise (see atomic.hpp). 364 365 T old_value; 366 const uint64_t zero = 0; 367 368 pre_membar(order); 369 370 __asm__ __volatile__ ( 371 /* simple guard */ 372 " ld %[old_value], 0(%[dest]) \n" 373 " cmpd %[compare_value], %[old_value] \n" 374 " bne- 2f \n" 375 /* atomic loop */ 376 "1: \n" 377 " ldarx %[old_value], %[dest], %[zero] \n" |