15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #ifndef OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP
27 #define OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP
28
29 #ifndef PPC64
30 #error "Atomic currently only implemented for PPC64"
31 #endif
32
33 // Implementation of class atomic
34
35 inline void Atomic::store (jbyte store_value, jbyte* dest) { *dest = store_value; }
36 inline void Atomic::store (jshort store_value, jshort* dest) { *dest = store_value; }
37 inline void Atomic::store (jint store_value, jint* dest) { *dest = store_value; }
38 inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
39 inline void Atomic::store_ptr(intptr_t store_value, intptr_t* dest) { *dest = store_value; }
40 inline void Atomic::store_ptr(void* store_value, void* dest) { *(void**)dest = store_value; }
41
42 inline void Atomic::store (jbyte store_value, volatile jbyte* dest) { *dest = store_value; }
43 inline void Atomic::store (jshort store_value, volatile jshort* dest) { *dest = store_value; }
44 inline void Atomic::store (jint store_value, volatile jint* dest) { *dest = store_value; }
45 inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
46 inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
47 inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
48
49 inline jlong Atomic::load(const volatile jlong* src) { return *src; }
50
51 //
52 // machine barrier instructions:
53 //
54 // - sync two-way memory barrier, aka fence
55 // - lwsync orders Store|Store,
56 // Load|Store,
57 // Load|Load,
58 // but not Store|Load
59 // - eieio orders memory accesses for device memory (only)
60 // - isync invalidates speculatively executed instructions
61 // From the POWER ISA 2.06 documentation:
62 // "[...] an isync instruction prevents the execution of
63 // instructions following the isync until instructions
64 // preceding the isync have completed, [...]"
65 // From IBM's AIX assembler reference:
66 // "The isync [...] instructions causes the processor to
67 // refetch any instructions that might have been fetched
68 // prior to the isync instruction. The instruction isync
69 // causes the processor to wait for all previous instructions
70 // to complete. Then any instructions already fetched are
76 //
77 // - release orders Store|Store, (maps to lwsync)
78 // Load|Store
79 // - acquire orders Load|Store, (maps to lwsync)
80 // Load|Load
81 // - fence orders Store|Store, (maps to sync)
82 // Load|Store,
83 // Load|Load,
84 // Store|Load
85 //
86
87 #define strasm_sync "\n sync \n"
88 #define strasm_lwsync "\n lwsync \n"
89 #define strasm_isync "\n isync \n"
90 #define strasm_release strasm_lwsync
91 #define strasm_acquire strasm_lwsync
92 #define strasm_fence strasm_sync
93 #define strasm_nobarrier ""
94 #define strasm_nobarrier_clobber_memory ""
95
96 inline jint Atomic::add (jint add_value, volatile jint* dest) {
97
98 unsigned int result;
99
100 __asm__ __volatile__ (
101 strasm_lwsync
102 "1: lwarx %0, 0, %2 \n"
103 " add %0, %0, %1 \n"
104 " stwcx. %0, 0, %2 \n"
105 " bne- 1b \n"
106 strasm_isync
107 : /*%0*/"=&r" (result)
108 : /*%1*/"r" (add_value), /*%2*/"r" (dest)
109 : "cc", "memory" );
110
111 return (jint) result;
112 }
113
114
115 inline intptr_t Atomic::add_ptr(intptr_t add_value, volatile intptr_t* dest) {
116
117 long result;
118
119 __asm__ __volatile__ (
120 strasm_lwsync
121 "1: ldarx %0, 0, %2 \n"
122 " add %0, %0, %1 \n"
123 " stdcx. %0, 0, %2 \n"
124 " bne- 1b \n"
125 strasm_isync
126 : /*%0*/"=&r" (result)
127 : /*%1*/"r" (add_value), /*%2*/"r" (dest)
128 : "cc", "memory" );
129
130 return (intptr_t) result;
131 }
132
133 inline void* Atomic::add_ptr(intptr_t add_value, volatile void* dest) {
134 return (void*)add_ptr(add_value, (volatile intptr_t*)dest);
135 }
136
137
138 inline void Atomic::inc (volatile jint* dest) {
139
140 unsigned int temp;
141
142 __asm__ __volatile__ (
143 strasm_nobarrier
144 "1: lwarx %0, 0, %2 \n"
145 " addic %0, %0, 1 \n"
146 " stwcx. %0, 0, %2 \n"
147 " bne- 1b \n"
148 strasm_nobarrier
149 : /*%0*/"=&r" (temp), "=m" (*dest)
150 : /*%2*/"r" (dest), "m" (*dest)
151 : "cc" strasm_nobarrier_clobber_memory);
152
153 }
154
155 inline void Atomic::inc_ptr(volatile intptr_t* dest) {
156
157 long temp;
158
159 __asm__ __volatile__ (
160 strasm_nobarrier
161 "1: ldarx %0, 0, %2 \n"
162 " addic %0, %0, 1 \n"
163 " stdcx. %0, 0, %2 \n"
164 " bne- 1b \n"
165 strasm_nobarrier
166 : /*%0*/"=&r" (temp), "=m" (*dest)
167 : /*%2*/"r" (dest), "m" (*dest)
168 : "cc" strasm_nobarrier_clobber_memory);
169
170 }
171
172 inline void Atomic::inc_ptr(volatile void* dest) {
173 inc_ptr((volatile intptr_t*)dest);
174 }
175
176
177 inline void Atomic::dec (volatile jint* dest) {
178
179 unsigned int temp;
180
181 __asm__ __volatile__ (
182 strasm_nobarrier
183 "1: lwarx %0, 0, %2 \n"
184 " addic %0, %0, -1 \n"
185 " stwcx. %0, 0, %2 \n"
186 " bne- 1b \n"
187 strasm_nobarrier
188 : /*%0*/"=&r" (temp), "=m" (*dest)
189 : /*%2*/"r" (dest), "m" (*dest)
190 : "cc" strasm_nobarrier_clobber_memory);
191
192 }
193
194 inline void Atomic::dec_ptr(volatile intptr_t* dest) {
195
196 long temp;
197
198 __asm__ __volatile__ (
199 strasm_nobarrier
200 "1: ldarx %0, 0, %2 \n"
201 " addic %0, %0, -1 \n"
202 " stdcx. %0, 0, %2 \n"
203 " bne- 1b \n"
204 strasm_nobarrier
205 : /*%0*/"=&r" (temp), "=m" (*dest)
206 : /*%2*/"r" (dest), "m" (*dest)
207 : "cc" strasm_nobarrier_clobber_memory);
208
209 }
210
211 inline void Atomic::dec_ptr(volatile void* dest) {
212 dec_ptr((volatile intptr_t*)dest);
213 }
214
215 inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
216
217 // Note that xchg_ptr doesn't necessarily do an acquire
218 // (see synchronizer.cpp).
219
220 unsigned int old_value;
221 const uint64_t zero = 0;
222
223 __asm__ __volatile__ (
224 /* lwsync */
225 strasm_lwsync
226 /* atomic loop */
227 "1: \n"
228 " lwarx %[old_value], %[dest], %[zero] \n"
229 " stwcx. %[exchange_value], %[dest], %[zero] \n"
230 " bne- 1b \n"
231 /* isync */
232 strasm_sync
233 /* exit */
234 "2: \n"
235 /* out */
236 : [old_value] "=&r" (old_value),
237 "=m" (*dest)
238 /* in */
239 : [dest] "b" (dest),
240 [zero] "r" (zero),
241 [exchange_value] "r" (exchange_value),
242 "m" (*dest)
243 /* clobber */
244 : "cc",
245 "memory"
246 );
247
248 return (jint) old_value;
249 }
250
251 inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
252
253 // Note that xchg_ptr doesn't necessarily do an acquire
254 // (see synchronizer.cpp).
255
256 long old_value;
257 const uint64_t zero = 0;
258
259 __asm__ __volatile__ (
260 /* lwsync */
261 strasm_lwsync
262 /* atomic loop */
263 "1: \n"
264 " ldarx %[old_value], %[dest], %[zero] \n"
265 " stdcx. %[exchange_value], %[dest], %[zero] \n"
266 " bne- 1b \n"
267 /* isync */
268 strasm_sync
269 /* exit */
270 "2: \n"
271 /* out */
272 : [old_value] "=&r" (old_value),
273 "=m" (*dest)
274 /* in */
275 : [dest] "b" (dest),
276 [zero] "r" (zero),
277 [exchange_value] "r" (exchange_value),
278 "m" (*dest)
279 /* clobber */
280 : "cc",
281 "memory"
282 );
283
284 return (intptr_t) old_value;
285 }
286
287 inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
288 return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
289 }
290
291 inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {
292 if (order != memory_order_relaxed) {
293 __asm__ __volatile__ (
294 /* fence */
295 strasm_sync
296 );
297 }
298 }
299
300 inline void cmpxchg_post_membar(cmpxchg_memory_order order) {
301 if (order != memory_order_relaxed) {
302 __asm__ __volatile__ (
303 /* fence */
304 strasm_sync
305 );
306 }
307 }
308
309 #define VM_HAS_SPECIALIZED_CMPXCHG_BYTE
310 inline jbyte Atomic::cmpxchg(jbyte exchange_value, volatile jbyte* dest, jbyte compare_value, cmpxchg_memory_order order) {
311
312 // Note that cmpxchg guarantees a two-way memory barrier across
313 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
314 // specified otherwise (see atomic.hpp).
315
316 // Using 32 bit internally.
317 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3);
318
319 #ifdef VM_LITTLE_ENDIAN
320 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8;
321 #else
322 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8;
323 #endif
324 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value),
325 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value),
326 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount;
327
328 unsigned int old_value, value32;
329
330 cmpxchg_pre_membar(order);
331
351 /* out */
352 : [old_value] "=&r" (old_value),
353 [value32] "=&r" (value32),
354 "=m" (*dest),
355 "=m" (*dest_base)
356 /* in */
357 : [dest] "b" (dest),
358 [dest_base] "b" (dest_base),
359 [shift_amount] "r" (shift_amount),
360 [masked_compare_val] "r" (masked_compare_val),
361 [xor_value] "r" (xor_value),
362 "m" (*dest),
363 "m" (*dest_base)
364 /* clobber */
365 : "cc",
366 "memory"
367 );
368
369 cmpxchg_post_membar(order);
370
371 return (jbyte)(unsigned char)old_value;
372 }
373
374 inline jint Atomic::cmpxchg(jint exchange_value, volatile jint* dest, jint compare_value, cmpxchg_memory_order order) {
375
376 // Note that cmpxchg guarantees a two-way memory barrier across
377 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
378 // specified otherwise (see atomic.hpp).
379
380 unsigned int old_value;
381 const uint64_t zero = 0;
382
383 cmpxchg_pre_membar(order);
384
385 __asm__ __volatile__ (
386 /* simple guard */
387 " lwz %[old_value], 0(%[dest]) \n"
388 " cmpw %[compare_value], %[old_value] \n"
389 " bne- 2f \n"
390 /* atomic loop */
391 "1: \n"
392 " lwarx %[old_value], %[dest], %[zero] \n"
393 " cmpw %[compare_value], %[old_value] \n"
394 " bne- 2f \n"
395 " stwcx. %[exchange_value], %[dest], %[zero] \n"
396 " bne- 1b \n"
397 /* exit */
398 "2: \n"
399 /* out */
400 : [old_value] "=&r" (old_value),
401 "=m" (*dest)
402 /* in */
403 : [dest] "b" (dest),
404 [zero] "r" (zero),
405 [compare_value] "r" (compare_value),
406 [exchange_value] "r" (exchange_value),
407 "m" (*dest)
408 /* clobber */
409 : "cc",
410 "memory"
411 );
412
413 cmpxchg_post_membar(order);
414
415 return (jint) old_value;
416 }
417
418 inline jlong Atomic::cmpxchg(jlong exchange_value, volatile jlong* dest, jlong compare_value, cmpxchg_memory_order order) {
419
420 // Note that cmpxchg guarantees a two-way memory barrier across
421 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
422 // specified otherwise (see atomic.hpp).
423
424 long old_value;
425 const uint64_t zero = 0;
426
427 cmpxchg_pre_membar(order);
428
429 __asm__ __volatile__ (
430 /* simple guard */
431 " ld %[old_value], 0(%[dest]) \n"
432 " cmpd %[compare_value], %[old_value] \n"
433 " bne- 2f \n"
434 /* atomic loop */
435 "1: \n"
436 " ldarx %[old_value], %[dest], %[zero] \n"
437 " cmpd %[compare_value], %[old_value] \n"
438 " bne- 2f \n"
439 " stdcx. %[exchange_value], %[dest], %[zero] \n"
440 " bne- 1b \n"
441 /* exit */
442 "2: \n"
443 /* out */
444 : [old_value] "=&r" (old_value),
445 "=m" (*dest)
446 /* in */
447 : [dest] "b" (dest),
448 [zero] "r" (zero),
449 [compare_value] "r" (compare_value),
450 [exchange_value] "r" (exchange_value),
451 "m" (*dest)
452 /* clobber */
453 : "cc",
454 "memory"
455 );
456
457 cmpxchg_post_membar(order);
458
459 return (jlong) old_value;
460 }
461
462 inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value, cmpxchg_memory_order order) {
463 return (intptr_t)cmpxchg((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value, order);
464 }
465
466 inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value, cmpxchg_memory_order order) {
467 return (void*)cmpxchg((jlong)exchange_value, (volatile jlong*)dest, (jlong)compare_value, order);
468 }
469
470 #undef strasm_sync
471 #undef strasm_lwsync
472 #undef strasm_isync
473 #undef strasm_release
474 #undef strasm_acquire
475 #undef strasm_fence
476 #undef strasm_nobarrier
477 #undef strasm_nobarrier_clobber_memory
478
479 #endif // OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP
|
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #ifndef OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP
27 #define OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP
28
29 #ifndef PPC64
30 #error "Atomic currently only implemented for PPC64"
31 #endif
32
33 // Implementation of class atomic
34
35 //
36 // machine barrier instructions:
37 //
38 // - sync two-way memory barrier, aka fence
39 // - lwsync orders Store|Store,
40 // Load|Store,
41 // Load|Load,
42 // but not Store|Load
43 // - eieio orders memory accesses for device memory (only)
44 // - isync invalidates speculatively executed instructions
45 // From the POWER ISA 2.06 documentation:
46 // "[...] an isync instruction prevents the execution of
47 // instructions following the isync until instructions
48 // preceding the isync have completed, [...]"
49 // From IBM's AIX assembler reference:
50 // "The isync [...] instructions causes the processor to
51 // refetch any instructions that might have been fetched
52 // prior to the isync instruction. The instruction isync
53 // causes the processor to wait for all previous instructions
54 // to complete. Then any instructions already fetched are
60 //
61 // - release orders Store|Store, (maps to lwsync)
62 // Load|Store
63 // - acquire orders Load|Store, (maps to lwsync)
64 // Load|Load
65 // - fence orders Store|Store, (maps to sync)
66 // Load|Store,
67 // Load|Load,
68 // Store|Load
69 //
70
71 #define strasm_sync "\n sync \n"
72 #define strasm_lwsync "\n lwsync \n"
73 #define strasm_isync "\n isync \n"
74 #define strasm_release strasm_lwsync
75 #define strasm_acquire strasm_lwsync
76 #define strasm_fence strasm_sync
77 #define strasm_nobarrier ""
78 #define strasm_nobarrier_clobber_memory ""
79
80 template <>
81 inline int32_t Atomic::specialized_add<int32_t>(int32_t add_value, volatile int32_t* dest) {
82 unsigned int result;
83
84 __asm__ __volatile__ (
85 strasm_lwsync
86 "1: lwarx %0, 0, %2 \n"
87 " add %0, %0, %1 \n"
88 " stwcx. %0, 0, %2 \n"
89 " bne- 1b \n"
90 strasm_isync
91 : /*%0*/"=&r" (result)
92 : /*%1*/"r" (add_value), /*%2*/"r" (dest)
93 : "cc", "memory" );
94
95 return (int32_t) result;
96 }
97
98
99 template <>
100 inline int64_t Atomic::specialized_add<int64_t>(int64_t add_value, volatile int64_t* dest) {
101 long result;
102
103 __asm__ __volatile__ (
104 strasm_lwsync
105 "1: ldarx %0, 0, %2 \n"
106 " add %0, %0, %1 \n"
107 " stdcx. %0, 0, %2 \n"
108 " bne- 1b \n"
109 strasm_isync
110 : /*%0*/"=&r" (result)
111 : /*%1*/"r" (add_value), /*%2*/"r" (dest)
112 : "cc", "memory" );
113
114 return (int64_t) result;
115 }
116
117
118 template <>
119 inline void Atomic::specialized_inc<int32_t>(volatile int32_t* dest) {
120 unsigned int temp;
121
122 __asm__ __volatile__ (
123 strasm_nobarrier
124 "1: lwarx %0, 0, %2 \n"
125 " addic %0, %0, 1 \n"
126 " stwcx. %0, 0, %2 \n"
127 " bne- 1b \n"
128 strasm_nobarrier
129 : /*%0*/"=&r" (temp), "=m" (*dest)
130 : /*%2*/"r" (dest), "m" (*dest)
131 : "cc" strasm_nobarrier_clobber_memory);
132
133 }
134
135 template <>
136 inline void Atomic::specialized_inc<int64_t>(volatile int64_t* dest) {
137 long temp;
138
139 __asm__ __volatile__ (
140 strasm_nobarrier
141 "1: ldarx %0, 0, %2 \n"
142 " addic %0, %0, 1 \n"
143 " stdcx. %0, 0, %2 \n"
144 " bne- 1b \n"
145 strasm_nobarrier
146 : /*%0*/"=&r" (temp), "=m" (*dest)
147 : /*%2*/"r" (dest), "m" (*dest)
148 : "cc" strasm_nobarrier_clobber_memory);
149
150 }
151
152
153 template <>
154 inline void Atomic::specialized_dec<int32_t>(volatile int32_t* dest) {
155 unsigned int temp;
156
157 __asm__ __volatile__ (
158 strasm_nobarrier
159 "1: lwarx %0, 0, %2 \n"
160 " addic %0, %0, -1 \n"
161 " stwcx. %0, 0, %2 \n"
162 " bne- 1b \n"
163 strasm_nobarrier
164 : /*%0*/"=&r" (temp), "=m" (*dest)
165 : /*%2*/"r" (dest), "m" (*dest)
166 : "cc" strasm_nobarrier_clobber_memory);
167
168 }
169
170
171 template <>
172 inline void Atomic::specialized_dec<int64_t>(volatile int64_t* dest) {
173 long temp;
174
175 __asm__ __volatile__ (
176 strasm_nobarrier
177 "1: ldarx %0, 0, %2 \n"
178 " addic %0, %0, -1 \n"
179 " stdcx. %0, 0, %2 \n"
180 " bne- 1b \n"
181 strasm_nobarrier
182 : /*%0*/"=&r" (temp), "=m" (*dest)
183 : /*%2*/"r" (dest), "m" (*dest)
184 : "cc" strasm_nobarrier_clobber_memory);
185
186 }
187
188
189 template <>
190 inline int32_t Atomic::specialized_xchg<int32_t>(int32_t exchange_value, volatile int32_t* dest) {
191 // Note that xchg_ptr doesn't necessarily do an acquire
192 // (see synchronizer.cpp).
193
194 unsigned int old_value;
195 const uint64_t zero = 0;
196
197 __asm__ __volatile__ (
198 /* lwsync */
199 strasm_lwsync
200 /* atomic loop */
201 "1: \n"
202 " lwarx %[old_value], %[dest], %[zero] \n"
203 " stwcx. %[exchange_value], %[dest], %[zero] \n"
204 " bne- 1b \n"
205 /* isync */
206 strasm_sync
207 /* exit */
208 "2: \n"
209 /* out */
210 : [old_value] "=&r" (old_value),
211 "=m" (*dest)
212 /* in */
213 : [dest] "b" (dest),
214 [zero] "r" (zero),
215 [exchange_value] "r" (exchange_value),
216 "m" (*dest)
217 /* clobber */
218 : "cc",
219 "memory"
220 );
221
222 return (int32_t) old_value;
223 }
224
225
226 template <>
227 inline int64_t Atomic::specialized_xchg<int64_t>(int64_t exchange_value, volatile int64_t* dest) {
228 // Note that xchg_ptr doesn't necessarily do an acquire
229 // (see synchronizer.cpp).
230
231 long old_value;
232 const uint64_t zero = 0;
233
234 __asm__ __volatile__ (
235 /* lwsync */
236 strasm_lwsync
237 /* atomic loop */
238 "1: \n"
239 " ldarx %[old_value], %[dest], %[zero] \n"
240 " stdcx. %[exchange_value], %[dest], %[zero] \n"
241 " bne- 1b \n"
242 /* isync */
243 strasm_sync
244 /* exit */
245 "2: \n"
246 /* out */
247 : [old_value] "=&r" (old_value),
248 "=m" (*dest)
249 /* in */
250 : [dest] "b" (dest),
251 [zero] "r" (zero),
252 [exchange_value] "r" (exchange_value),
253 "m" (*dest)
254 /* clobber */
255 : "cc",
256 "memory"
257 );
258
259 return (int64_t) old_value;
260 }
261
262
263 inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {
264 if (order != memory_order_relaxed) {
265 __asm__ __volatile__ (
266 /* fence */
267 strasm_sync
268 );
269 }
270 }
271
272 inline void cmpxchg_post_membar(cmpxchg_memory_order order) {
273 if (order != memory_order_relaxed) {
274 __asm__ __volatile__ (
275 /* fence */
276 strasm_sync
277 );
278 }
279 }
280
281 #define VM_HAS_SPECIALIZED_CMPXCHG_BYTE
282 template <>
283 inline int8_t Atomic::specialized_cmpxchg<int8_t>(int8_t exchange_value, volatile int8_t* dest, int8_t compare_value, cmpxchg_memory_order order) {
284 // Note that cmpxchg guarantees a two-way memory barrier across
285 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
286 // specified otherwise (see atomic.hpp).
287
288 // Using 32 bit internally.
289 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3);
290
291 #ifdef VM_LITTLE_ENDIAN
292 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8;
293 #else
294 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8;
295 #endif
296 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value),
297 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value),
298 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount;
299
300 unsigned int old_value, value32;
301
302 cmpxchg_pre_membar(order);
303
323 /* out */
324 : [old_value] "=&r" (old_value),
325 [value32] "=&r" (value32),
326 "=m" (*dest),
327 "=m" (*dest_base)
328 /* in */
329 : [dest] "b" (dest),
330 [dest_base] "b" (dest_base),
331 [shift_amount] "r" (shift_amount),
332 [masked_compare_val] "r" (masked_compare_val),
333 [xor_value] "r" (xor_value),
334 "m" (*dest),
335 "m" (*dest_base)
336 /* clobber */
337 : "cc",
338 "memory"
339 );
340
341 cmpxchg_post_membar(order);
342
343 return (int8_t)(unsigned char)old_value;
344 }
345
346 template <>
347 inline int32_t Atomic::specialized_cmpxchg<int32_t>(int32_t exchange_value, volatile int32_t* dest, int32_t compare_value, cmpxchg_memory_order order) {
348 // Note that cmpxchg guarantees a two-way memory barrier across
349 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
350 // specified otherwise (see atomic.hpp).
351
352 unsigned int old_value;
353 const uint64_t zero = 0;
354
355 cmpxchg_pre_membar(order);
356
357 __asm__ __volatile__ (
358 /* simple guard */
359 " lwz %[old_value], 0(%[dest]) \n"
360 " cmpw %[compare_value], %[old_value] \n"
361 " bne- 2f \n"
362 /* atomic loop */
363 "1: \n"
364 " lwarx %[old_value], %[dest], %[zero] \n"
365 " cmpw %[compare_value], %[old_value] \n"
366 " bne- 2f \n"
367 " stwcx. %[exchange_value], %[dest], %[zero] \n"
368 " bne- 1b \n"
369 /* exit */
370 "2: \n"
371 /* out */
372 : [old_value] "=&r" (old_value),
373 "=m" (*dest)
374 /* in */
375 : [dest] "b" (dest),
376 [zero] "r" (zero),
377 [compare_value] "r" (compare_value),
378 [exchange_value] "r" (exchange_value),
379 "m" (*dest)
380 /* clobber */
381 : "cc",
382 "memory"
383 );
384
385 cmpxchg_post_membar(order);
386
387 return (int32_t) old_value;
388 }
389
390
391 template <>
392 inline int64_t Atomic::specialized_cmpxchg<int64_t>(int64_t exchange_value, volatile int64_t* dest, int64_t compare_value, cmpxchg_memory_order order) {
393 // Note that cmpxchg guarantees a two-way memory barrier across
394 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
395 // specified otherwise (see atomic.hpp).
396
397 long old_value;
398 const uint64_t zero = 0;
399
400 cmpxchg_pre_membar(order);
401
402 __asm__ __volatile__ (
403 /* simple guard */
404 " ld %[old_value], 0(%[dest]) \n"
405 " cmpd %[compare_value], %[old_value] \n"
406 " bne- 2f \n"
407 /* atomic loop */
408 "1: \n"
409 " ldarx %[old_value], %[dest], %[zero] \n"
410 " cmpd %[compare_value], %[old_value] \n"
411 " bne- 2f \n"
412 " stdcx. %[exchange_value], %[dest], %[zero] \n"
413 " bne- 1b \n"
414 /* exit */
415 "2: \n"
416 /* out */
417 : [old_value] "=&r" (old_value),
418 "=m" (*dest)
419 /* in */
420 : [dest] "b" (dest),
421 [zero] "r" (zero),
422 [compare_value] "r" (compare_value),
423 [exchange_value] "r" (exchange_value),
424 "m" (*dest)
425 /* clobber */
426 : "cc",
427 "memory"
428 );
429
430 cmpxchg_post_membar(order);
431
432 return (int64_t) old_value;
433 }
434
435
436 #undef strasm_sync
437 #undef strasm_lwsync
438 #undef strasm_isync
439 #undef strasm_release
440 #undef strasm_acquire
441 #undef strasm_fence
442 #undef strasm_nobarrier
443 #undef strasm_nobarrier_clobber_memory
444
445 #endif // OS_CPU_LINUX_PPC_VM_ATOMIC_LINUX_PPC_HPP
|