1 /*
2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2012, 2014 SAP SE. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #ifndef OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
27 #define OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
28
29 #ifndef _LP64
30 #error "Atomic currently only impleneted for PPC64"
31 #endif
32
33 #include "utilities/debug.hpp"
34
35 // Implementation of class atomic
36
37 //
38 // machine barrier instructions:
39 //
40 // - ppc_sync two-way memory barrier, aka fence
41 // - ppc_lwsync orders Store|Store,
42 // Load|Store,
43 // Load|Load,
44 // but not Store|Load
45 // - ppc_eieio orders memory accesses for device memory (only)
46 // - ppc_isync invalidates speculatively executed instructions
47 // From the POWER ISA 2.06 documentation:
48 // "[...] an isync instruction prevents the execution of
49 // instructions following the isync until instructions
50 // preceding the isync have completed, [...]"
51 // From IBM's AIX assembler reference:
52 // "The isync [...] instructions causes the processor to
53 // refetch any instructions that might have been fetched
54 // prior to the isync instruction. The instruction isync
55 // causes the processor to wait for all previous instructions
56 // to complete. Then any instructions already fetched are
57 // discarded and instruction processing continues in the
58 // environment established by the previous instructions."
59 //
60 // semantic barrier instructions:
61 // (as defined in orderAccess.hpp)
62 //
63 // - ppc_release orders Store|Store, (maps to ppc_lwsync)
64 // Load|Store
65 // - ppc_acquire orders Load|Store, (maps to ppc_lwsync)
66 // Load|Load
67 // - ppc_fence orders Store|Store, (maps to ppc_sync)
68 // Load|Store,
69 // Load|Load,
70 // Store|Load
71 //
72
73 #define strasm_sync "\n sync \n"
74 #define strasm_lwsync "\n lwsync \n"
75 #define strasm_isync "\n isync \n"
76 #define strasm_release strasm_lwsync
77 #define strasm_acquire strasm_lwsync
78 #define strasm_fence strasm_sync
79 #define strasm_nobarrier ""
80 #define strasm_nobarrier_clobber_memory ""
81
82 template<size_t byte_size>
83 struct Atomic::PlatformAdd
84 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
85 {
86 template<typename I, typename D>
87 D add_and_fetch(I add_value, D volatile* dest) const;
88 };
89
90 template<>
91 template<typename I, typename D>
92 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) const {
93 STATIC_ASSERT(4 == sizeof(I));
94 STATIC_ASSERT(4 == sizeof(D));
95
96 D result;
97
98 __asm__ __volatile__ (
99 strasm_lwsync
100 "1: lwarx %0, 0, %2 \n"
101 " add %0, %0, %1 \n"
102 " stwcx. %0, 0, %2 \n"
103 " bne- 1b \n"
104 strasm_isync
105 : /*%0*/"=&r" (result)
106 : /*%1*/"r" (add_value), /*%2*/"r" (dest)
107 : "cc", "memory" );
108
109 return result;
110 }
111
112
113 template<>
114 template<typename I, typename D>
115 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) const {
116 STATIC_ASSERT(8 == sizeof(I));
117 STATIC_ASSERT(8 == sizeof(D));
118
119 D result;
120
121 __asm__ __volatile__ (
122 strasm_lwsync
123 "1: ldarx %0, 0, %2 \n"
124 " add %0, %0, %1 \n"
125 " stdcx. %0, 0, %2 \n"
126 " bne- 1b \n"
127 strasm_isync
128 : /*%0*/"=&r" (result)
129 : /*%1*/"r" (add_value), /*%2*/"r" (dest)
130 : "cc", "memory" );
131
132 return result;
133 }
134
135 template<>
136 template<typename T>
137 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
138 T volatile* dest) const {
139 STATIC_ASSERT(4 == sizeof(T));
140 // Note that xchg doesn't necessarily do an acquire
141 // (see synchronizer.cpp).
142
143 T old_value;
144 const uint64_t zero = 0;
145
146 __asm__ __volatile__ (
147 /* lwsync */
148 strasm_lwsync
149 /* atomic loop */
150 "1: \n"
151 " lwarx %[old_value], %[dest], %[zero] \n"
152 " stwcx. %[exchange_value], %[dest], %[zero] \n"
153 " bne- 1b \n"
154 /* isync */
155 strasm_sync
156 /* exit */
157 "2: \n"
158 /* out */
159 : [old_value] "=&r" (old_value),
160 "=m" (*dest)
161 /* in */
162 : [dest] "b" (dest),
163 [zero] "r" (zero),
164 [exchange_value] "r" (exchange_value),
165 "m" (*dest)
166 /* clobber */
167 : "cc",
168 "memory"
169 );
170
171 return old_value;
172 }
173
174 template<>
175 template<typename T>
176 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
177 T volatile* dest) const {
178 STATIC_ASSERT(8 == sizeof(T));
179 // Note that xchg doesn't necessarily do an acquire
180 // (see synchronizer.cpp).
181
182 T old_value;
183 const uint64_t zero = 0;
184
185 __asm__ __volatile__ (
186 /* lwsync */
187 strasm_lwsync
188 /* atomic loop */
189 "1: \n"
190 " ldarx %[old_value], %[dest], %[zero] \n"
191 " stdcx. %[exchange_value], %[dest], %[zero] \n"
192 " bne- 1b \n"
193 /* isync */
194 strasm_sync
195 /* exit */
196 "2: \n"
197 /* out */
198 : [old_value] "=&r" (old_value),
199 "=m" (*dest)
200 /* in */
201 : [dest] "b" (dest),
202 [zero] "r" (zero),
203 [exchange_value] "r" (exchange_value),
204 "m" (*dest)
205 /* clobber */
206 : "cc",
207 "memory"
208 );
209
210 return old_value;
211 }
212
213 inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {
214 if (order != memory_order_relaxed) {
215 __asm__ __volatile__ (
216 /* fence */
217 strasm_sync
218 );
219 }
220 }
221
222 inline void cmpxchg_post_membar(cmpxchg_memory_order order) {
223 if (order != memory_order_relaxed) {
224 __asm__ __volatile__ (
225 /* fence */
226 strasm_sync
227 );
228 }
229 }
230
231 template<>
232 template<typename T>
233 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
234 T volatile* dest,
235 T compare_value,
236 cmpxchg_memory_order order) const {
237 STATIC_ASSERT(1 == sizeof(T));
238
239 // Note that cmpxchg guarantees a two-way memory barrier across
240 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
241 // specified otherwise (see atomic.hpp).
242
243 // Using 32 bit internally.
244 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3);
245
246 #ifdef VM_LITTLE_ENDIAN
247 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8;
248 #else
249 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8;
250 #endif
251 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value),
252 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value),
253 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount;
254
255 unsigned int old_value, value32;
256
257 cmpxchg_pre_membar(order);
258
259 __asm__ __volatile__ (
260 /* simple guard */
261 " lbz %[old_value], 0(%[dest]) \n"
262 " cmpw %[masked_compare_val], %[old_value] \n"
263 " bne- 2f \n"
264 /* atomic loop */
265 "1: \n"
266 " lwarx %[value32], 0, %[dest_base] \n"
267 /* extract byte and compare */
268 " srd %[old_value], %[value32], %[shift_amount] \n"
269 " clrldi %[old_value], %[old_value], 56 \n"
270 " cmpw %[masked_compare_val], %[old_value] \n"
271 " bne- 2f \n"
272 /* replace byte and try to store */
273 " xor %[value32], %[xor_value], %[value32] \n"
274 " stwcx. %[value32], 0, %[dest_base] \n"
275 " bne- 1b \n"
276 /* exit */
277 "2: \n"
278 /* out */
279 : [old_value] "=&r" (old_value),
280 [value32] "=&r" (value32),
281 "=m" (*dest),
282 "=m" (*dest_base)
283 /* in */
284 : [dest] "b" (dest),
285 [dest_base] "b" (dest_base),
286 [shift_amount] "r" (shift_amount),
287 [masked_compare_val] "r" (masked_compare_val),
288 [xor_value] "r" (xor_value),
289 "m" (*dest),
290 "m" (*dest_base)
291 /* clobber */
292 : "cc",
293 "memory"
294 );
295
296 cmpxchg_post_membar(order);
297
298 return PrimitiveConversions::cast<T>((unsigned char)old_value);
299 }
300
301 template<>
302 template<typename T>
303 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
304 T volatile* dest,
305 T compare_value,
306 cmpxchg_memory_order order) const {
307 STATIC_ASSERT(4 == sizeof(T));
308
309 // Note that cmpxchg guarantees a two-way memory barrier across
310 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
311 // specified otherwise (see atomic.hpp).
312
313 T old_value;
314 const uint64_t zero = 0;
315
316 cmpxchg_pre_membar(order);
317
318 __asm__ __volatile__ (
319 /* simple guard */
320 " lwz %[old_value], 0(%[dest]) \n"
321 " cmpw %[compare_value], %[old_value] \n"
322 " bne- 2f \n"
323 /* atomic loop */
324 "1: \n"
325 " lwarx %[old_value], %[dest], %[zero] \n"
326 " cmpw %[compare_value], %[old_value] \n"
327 " bne- 2f \n"
328 " stwcx. %[exchange_value], %[dest], %[zero] \n"
329 " bne- 1b \n"
330 /* exit */
331 "2: \n"
332 /* out */
333 : [old_value] "=&r" (old_value),
334 "=m" (*dest)
335 /* in */
336 : [dest] "b" (dest),
337 [zero] "r" (zero),
338 [compare_value] "r" (compare_value),
339 [exchange_value] "r" (exchange_value),
340 "m" (*dest)
341 /* clobber */
342 : "cc",
343 "memory"
344 );
345
346 cmpxchg_post_membar(order);
347
348 return old_value;
349 }
350
351 template<>
352 template<typename T>
353 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
354 T volatile* dest,
355 T compare_value,
356 cmpxchg_memory_order order) const {
357 STATIC_ASSERT(8 == sizeof(T));
358
359 // Note that cmpxchg guarantees a two-way memory barrier across
360 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
361 // specified otherwise (see atomic.hpp).
362
363 T old_value;
364 const uint64_t zero = 0;
365
366 cmpxchg_pre_membar(order);
367
368 __asm__ __volatile__ (
369 /* simple guard */
370 " ld %[old_value], 0(%[dest]) \n"
371 " cmpd %[compare_value], %[old_value] \n"
372 " bne- 2f \n"
373 /* atomic loop */
374 "1: \n"
375 " ldarx %[old_value], %[dest], %[zero] \n"
376 " cmpd %[compare_value], %[old_value] \n"
377 " bne- 2f \n"
378 " stdcx. %[exchange_value], %[dest], %[zero] \n"
379 " bne- 1b \n"
380 /* exit */
381 "2: \n"
382 /* out */
383 : [old_value] "=&r" (old_value),
384 "=m" (*dest)
385 /* in */
386 : [dest] "b" (dest),
387 [zero] "r" (zero),
388 [compare_value] "r" (compare_value),
389 [exchange_value] "r" (exchange_value),
390 "m" (*dest)
391 /* clobber */
392 : "cc",
393 "memory"
394 );
395
396 cmpxchg_post_membar(order);
397
398 return old_value;
399 }
400
401 #undef strasm_sync
402 #undef strasm_lwsync
403 #undef strasm_isync
404 #undef strasm_release
405 #undef strasm_acquire
406 #undef strasm_fence
407 #undef strasm_nobarrier
408 #undef strasm_nobarrier_clobber_memory
409
410 #endif // OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
|
1 /*
2 * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2012, 2018 SAP SE. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #ifndef OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
27 #define OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
28
29 #ifndef PPC64
30 #error "Atomic currently only implemented for PPC64"
31 #endif
32
33 #include "utilities/debug.hpp"
34
35 // Implementation of class atomic
36
37 //
38 // machine barrier instructions:
39 //
40 // - sync two-way memory barrier, aka fence
41 // - lwsync orders Store|Store,
42 // Load|Store,
43 // Load|Load,
44 // but not Store|Load
45 // - eieio orders memory accesses for device memory (only)
46 // - isync invalidates speculatively executed instructions
47 // From the POWER ISA 2.06 documentation:
48 // "[...] an isync instruction prevents the execution of
49 // instructions following the isync until instructions
50 // preceding the isync have completed, [...]"
51 // From IBM's AIX assembler reference:
52 // "The isync [...] instructions causes the processor to
53 // refetch any instructions that might have been fetched
54 // prior to the isync instruction. The instruction isync
55 // causes the processor to wait for all previous instructions
56 // to complete. Then any instructions already fetched are
57 // discarded and instruction processing continues in the
58 // environment established by the previous instructions."
59 //
60 // semantic barrier instructions:
61 // (as defined in orderAccess.hpp)
62 //
63 // - release orders Store|Store, (maps to lwsync)
64 // Load|Store
65 // - acquire orders Load|Store, (maps to lwsync)
66 // Load|Load
67 // - fence orders Store|Store, (maps to sync)
68 // Load|Store,
69 // Load|Load,
70 // Store|Load
71 //
72
73 #define strasm_sync "\n sync \n"
74 #define strasm_lwsync "\n lwsync \n"
75 #define strasm_isync "\n isync \n"
76 #define strasm_release strasm_lwsync
77 #define strasm_acquire strasm_lwsync
78 #define strasm_fence strasm_sync
79 #define strasm_nobarrier ""
80 #define strasm_nobarrier_clobber_memory ""
81
82 inline void pre_membar(atomic_memory_order order) {
83 switch (order) {
84 case memory_order_relaxed:
85 case memory_order_acquire: break;
86 case memory_order_release:
87 case memory_order_acq_rel: __asm__ __volatile__ (strasm_lwsync); break;
88 default /*conservative*/ : __asm__ __volatile__ (strasm_sync); break;
89 }
90 }
91
92 inline void post_membar(atomic_memory_order order) {
93 switch (order) {
94 case memory_order_relaxed:
95 case memory_order_release: break;
96 case memory_order_acquire:
97 case memory_order_acq_rel: __asm__ __volatile__ (strasm_isync); break;
98 default /*conservative*/ : __asm__ __volatile__ (strasm_sync); break;
99 }
100 }
101
102
103 template<size_t byte_size>
104 struct Atomic::PlatformAdd
105 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
106 {
107 template<typename I, typename D>
108 D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const;
109 };
110
111 template<>
112 template<typename I, typename D>
113 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest,
114 atomic_memory_order order) const {
115 STATIC_ASSERT(4 == sizeof(I));
116 STATIC_ASSERT(4 == sizeof(D));
117
118 D result;
119
120 pre_membar(order);
121
122 __asm__ __volatile__ (
123 "1: lwarx %0, 0, %2 \n"
124 " add %0, %0, %1 \n"
125 " stwcx. %0, 0, %2 \n"
126 " bne- 1b \n"
127 : /*%0*/"=&r" (result)
128 : /*%1*/"r" (add_value), /*%2*/"r" (dest)
129 : "cc", "memory" );
130
131 post_membar(order);
132
133 return result;
134 }
135
136
137 template<>
138 template<typename I, typename D>
139 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest,
140 atomic_memory_order order) const {
141 STATIC_ASSERT(8 == sizeof(I));
142 STATIC_ASSERT(8 == sizeof(D));
143
144 D result;
145
146 pre_membar(order);
147
148 __asm__ __volatile__ (
149 "1: ldarx %0, 0, %2 \n"
150 " add %0, %0, %1 \n"
151 " stdcx. %0, 0, %2 \n"
152 " bne- 1b \n"
153 : /*%0*/"=&r" (result)
154 : /*%1*/"r" (add_value), /*%2*/"r" (dest)
155 : "cc", "memory" );
156
157 post_membar(order);
158
159 return result;
160 }
161
162 template<>
163 template<typename T>
164 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
165 T volatile* dest,
166 atomic_memory_order order) const {
167 // Note that xchg doesn't necessarily do an acquire
168 // (see synchronizer.cpp).
169
170 T old_value;
171 const uint64_t zero = 0;
172
173 pre_membar(order);
174
175 __asm__ __volatile__ (
176 /* atomic loop */
177 "1: \n"
178 " lwarx %[old_value], %[dest], %[zero] \n"
179 " stwcx. %[exchange_value], %[dest], %[zero] \n"
180 " bne- 1b \n"
181 /* exit */
182 "2: \n"
183 /* out */
184 : [old_value] "=&r" (old_value),
185 "=m" (*dest)
186 /* in */
187 : [dest] "b" (dest),
188 [zero] "r" (zero),
189 [exchange_value] "r" (exchange_value),
190 "m" (*dest)
191 /* clobber */
192 : "cc",
193 "memory"
194 );
195
196 post_membar(order);
197
198 return old_value;
199 }
200
201 template<>
202 template<typename T>
203 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
204 T volatile* dest,
205 atomic_memory_order order) const {
206 STATIC_ASSERT(8 == sizeof(T));
207 // Note that xchg doesn't necessarily do an acquire
208 // (see synchronizer.cpp).
209
210 T old_value;
211 const uint64_t zero = 0;
212
213 pre_membar(order);
214
215 __asm__ __volatile__ (
216 /* atomic loop */
217 "1: \n"
218 " ldarx %[old_value], %[dest], %[zero] \n"
219 " stdcx. %[exchange_value], %[dest], %[zero] \n"
220 " bne- 1b \n"
221 /* exit */
222 "2: \n"
223 /* out */
224 : [old_value] "=&r" (old_value),
225 "=m" (*dest)
226 /* in */
227 : [dest] "b" (dest),
228 [zero] "r" (zero),
229 [exchange_value] "r" (exchange_value),
230 "m" (*dest)
231 /* clobber */
232 : "cc",
233 "memory"
234 );
235
236 post_membar(order);
237
238 return old_value;
239 }
240
241 template<>
242 template<typename T>
243 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
244 T volatile* dest,
245 T compare_value,
246 atomic_memory_order order) const {
247 STATIC_ASSERT(1 == sizeof(T));
248
249 // Note that cmpxchg guarantees a two-way memory barrier across
250 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
251 // specified otherwise (see atomic.hpp).
252
253 // Using 32 bit internally.
254 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3);
255
256 #ifdef VM_LITTLE_ENDIAN
257 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8;
258 #else
259 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8;
260 #endif
261 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value),
262 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value),
263 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount;
264
265 unsigned int old_value, value32;
266
267 pre_membar(order);
268
269 __asm__ __volatile__ (
270 /* simple guard */
271 " lbz %[old_value], 0(%[dest]) \n"
272 " cmpw %[masked_compare_val], %[old_value] \n"
273 " bne- 2f \n"
274 /* atomic loop */
275 "1: \n"
276 " lwarx %[value32], 0, %[dest_base] \n"
277 /* extract byte and compare */
278 " srd %[old_value], %[value32], %[shift_amount] \n"
279 " clrldi %[old_value], %[old_value], 56 \n"
280 " cmpw %[masked_compare_val], %[old_value] \n"
281 " bne- 2f \n"
282 /* replace byte and try to store */
283 " xor %[value32], %[xor_value], %[value32] \n"
284 " stwcx. %[value32], 0, %[dest_base] \n"
285 " bne- 1b \n"
286 /* exit */
287 "2: \n"
288 /* out */
289 : [old_value] "=&r" (old_value),
290 [value32] "=&r" (value32),
291 "=m" (*dest),
292 "=m" (*dest_base)
293 /* in */
294 : [dest] "b" (dest),
295 [dest_base] "b" (dest_base),
296 [shift_amount] "r" (shift_amount),
297 [masked_compare_val] "r" (masked_compare_val),
298 [xor_value] "r" (xor_value),
299 "m" (*dest),
300 "m" (*dest_base)
301 /* clobber */
302 : "cc",
303 "memory"
304 );
305
306 post_membar(order);
307
308 return PrimitiveConversions::cast<T>((unsigned char)old_value);
309 }
310
311 template<>
312 template<typename T>
313 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
314 T volatile* dest,
315 T compare_value,
316 atomic_memory_order order) const {
317 STATIC_ASSERT(4 == sizeof(T));
318
319 // Note that cmpxchg guarantees a two-way memory barrier across
320 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
321 // specified otherwise (see atomic.hpp).
322
323 T old_value;
324 const uint64_t zero = 0;
325
326 pre_membar(order);
327
328 __asm__ __volatile__ (
329 /* simple guard */
330 " lwz %[old_value], 0(%[dest]) \n"
331 " cmpw %[compare_value], %[old_value] \n"
332 " bne- 2f \n"
333 /* atomic loop */
334 "1: \n"
335 " lwarx %[old_value], %[dest], %[zero] \n"
336 " cmpw %[compare_value], %[old_value] \n"
337 " bne- 2f \n"
338 " stwcx. %[exchange_value], %[dest], %[zero] \n"
339 " bne- 1b \n"
340 /* exit */
341 "2: \n"
342 /* out */
343 : [old_value] "=&r" (old_value),
344 "=m" (*dest)
345 /* in */
346 : [dest] "b" (dest),
347 [zero] "r" (zero),
348 [compare_value] "r" (compare_value),
349 [exchange_value] "r" (exchange_value),
350 "m" (*dest)
351 /* clobber */
352 : "cc",
353 "memory"
354 );
355
356 post_membar(order);
357
358 return old_value;
359 }
360
361 template<>
362 template<typename T>
363 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
364 T volatile* dest,
365 T compare_value,
366 atomic_memory_order order) const {
367 STATIC_ASSERT(8 == sizeof(T));
368
369 // Note that cmpxchg guarantees a two-way memory barrier across
370 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
371 // specified otherwise (see atomic.hpp).
372
373 T old_value;
374 const uint64_t zero = 0;
375
376 pre_membar(order);
377
378 __asm__ __volatile__ (
379 /* simple guard */
380 " ld %[old_value], 0(%[dest]) \n"
381 " cmpd %[compare_value], %[old_value] \n"
382 " bne- 2f \n"
383 /* atomic loop */
384 "1: \n"
385 " ldarx %[old_value], %[dest], %[zero] \n"
386 " cmpd %[compare_value], %[old_value] \n"
387 " bne- 2f \n"
388 " stdcx. %[exchange_value], %[dest], %[zero] \n"
389 " bne- 1b \n"
390 /* exit */
391 "2: \n"
392 /* out */
393 : [old_value] "=&r" (old_value),
394 "=m" (*dest)
395 /* in */
396 : [dest] "b" (dest),
397 [zero] "r" (zero),
398 [compare_value] "r" (compare_value),
399 [exchange_value] "r" (exchange_value),
400 "m" (*dest)
401 /* clobber */
402 : "cc",
403 "memory"
404 );
405
406 post_membar(order);
407
408 return old_value;
409 }
410
411 #undef strasm_sync
412 #undef strasm_lwsync
413 #undef strasm_isync
414 #undef strasm_release
415 #undef strasm_acquire
416 #undef strasm_fence
417 #undef strasm_nobarrier
418 #undef strasm_nobarrier_clobber_memory
419
420 #endif // OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
|