1 /*
2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2012, 2014 SAP SE. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #ifndef OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
27 #define OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
28
29 #ifndef _LP64
30 #error "Atomic currently only impleneted for PPC64"
31 #endif
32
33 #include "utilities/debug.hpp"
34
35 // Implementation of class atomic
36
37 //
38 // machine barrier instructions:
39 //
40 // - ppc_sync two-way memory barrier, aka fence
41 // - ppc_lwsync orders Store|Store,
42 // Load|Store,
43 // Load|Load,
44 // but not Store|Load
45 // - ppc_eieio orders memory accesses for device memory (only)
46 // - ppc_isync invalidates speculatively executed instructions
47 // From the POWER ISA 2.06 documentation:
48 // "[...] an isync instruction prevents the execution of
49 // instructions following the isync until instructions
50 // preceding the isync have completed, [...]"
51 // From IBM's AIX assembler reference:
52 // "The isync [...] instructions causes the processor to
53 // refetch any instructions that might have been fetched
54 // prior to the isync instruction. The instruction isync
55 // causes the processor to wait for all previous instructions
56 // to complete. Then any instructions already fetched are
57 // discarded and instruction processing continues in the
58 // environment established by the previous instructions."
59 //
60 // semantic barrier instructions:
61 // (as defined in orderAccess.hpp)
62 //
63 // - ppc_release orders Store|Store, (maps to ppc_lwsync)
64 // Load|Store
65 // - ppc_acquire orders Load|Store, (maps to ppc_lwsync)
66 // Load|Load
67 // - ppc_fence orders Store|Store, (maps to ppc_sync)
68 // Load|Store,
69 // Load|Load,
70 // Store|Load
71 //
72
73 #define strasm_sync "\n sync \n"
74 #define strasm_lwsync "\n lwsync \n"
75 #define strasm_isync "\n isync \n"
76 #define strasm_release strasm_lwsync
77 #define strasm_acquire strasm_lwsync
78 #define strasm_fence strasm_sync
79 #define strasm_nobarrier ""
80 #define strasm_nobarrier_clobber_memory ""
81
82 template<size_t byte_size>
83 struct Atomic::PlatformAdd
84 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
85 {
86 template<typename I, typename D>
87 D add_and_fetch(I add_value, D volatile* dest) const;
88 };
89
90 template<>
91 template<typename I, typename D>
92 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) const {
93 STATIC_ASSERT(4 == sizeof(I));
94 STATIC_ASSERT(4 == sizeof(D));
95
96 D result;
97
98 __asm__ __volatile__ (
99 strasm_lwsync
100 "1: lwarx %0, 0, %2 \n"
101 " add %0, %0, %1 \n"
102 " stwcx. %0, 0, %2 \n"
103 " bne- 1b \n"
104 strasm_isync
105 : /*%0*/"=&r" (result)
106 : /*%1*/"r" (add_value), /*%2*/"r" (dest)
107 : "cc", "memory" );
108
109 return result;
110 }
111
112
113 template<>
114 template<typename I, typename D>
115 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) const {
116 STATIC_ASSERT(8 == sizeof(I));
117 STATIC_ASSERT(8 == sizeof(D));
118
119 D result;
120
121 __asm__ __volatile__ (
122 strasm_lwsync
123 "1: ldarx %0, 0, %2 \n"
124 " add %0, %0, %1 \n"
125 " stdcx. %0, 0, %2 \n"
126 " bne- 1b \n"
127 strasm_isync
128 : /*%0*/"=&r" (result)
129 : /*%1*/"r" (add_value), /*%2*/"r" (dest)
130 : "cc", "memory" );
131
132 return result;
133 }
134
135 template<>
136 template<typename T>
137 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
138 T volatile* dest) const {
139 STATIC_ASSERT(4 == sizeof(T));
140 // Note that xchg doesn't necessarily do an acquire
141 // (see synchronizer.cpp).
142
143 T old_value;
144 const uint64_t zero = 0;
145
146 __asm__ __volatile__ (
147 /* lwsync */
148 strasm_lwsync
149 /* atomic loop */
150 "1: \n"
151 " lwarx %[old_value], %[dest], %[zero] \n"
152 " stwcx. %[exchange_value], %[dest], %[zero] \n"
153 " bne- 1b \n"
154 /* isync */
155 strasm_sync
156 /* exit */
157 "2: \n"
158 /* out */
159 : [old_value] "=&r" (old_value),
160 "=m" (*dest)
161 /* in */
162 : [dest] "b" (dest),
163 [zero] "r" (zero),
164 [exchange_value] "r" (exchange_value),
165 "m" (*dest)
166 /* clobber */
167 : "cc",
168 "memory"
169 );
170
171 return old_value;
172 }
173
174 template<>
175 template<typename T>
176 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
177 T volatile* dest) const {
178 STATIC_ASSERT(8 == sizeof(T));
179 // Note that xchg doesn't necessarily do an acquire
180 // (see synchronizer.cpp).
181
182 T old_value;
183 const uint64_t zero = 0;
184
185 __asm__ __volatile__ (
186 /* lwsync */
187 strasm_lwsync
188 /* atomic loop */
189 "1: \n"
190 " ldarx %[old_value], %[dest], %[zero] \n"
191 " stdcx. %[exchange_value], %[dest], %[zero] \n"
192 " bne- 1b \n"
193 /* isync */
194 strasm_sync
195 /* exit */
196 "2: \n"
197 /* out */
198 : [old_value] "=&r" (old_value),
199 "=m" (*dest)
200 /* in */
201 : [dest] "b" (dest),
202 [zero] "r" (zero),
203 [exchange_value] "r" (exchange_value),
204 "m" (*dest)
205 /* clobber */
206 : "cc",
207 "memory"
208 );
209
210 return old_value;
211 }
212
213 inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {
214 if (order != memory_order_relaxed) {
215 __asm__ __volatile__ (
216 /* fence */
217 strasm_sync
218 );
219 }
220 }
221
222 inline void cmpxchg_post_membar(cmpxchg_memory_order order) {
223 if (order != memory_order_relaxed) {
224 __asm__ __volatile__ (
225 /* fence */
226 strasm_sync
227 );
228 }
229 }
230
231 template<>
232 template<typename T>
233 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
234 T volatile* dest,
235 T compare_value,
236 cmpxchg_memory_order order) const {
237 STATIC_ASSERT(1 == sizeof(T));
238
239 // Note that cmpxchg guarantees a two-way memory barrier across
240 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
241 // specified otherwise (see atomic.hpp).
242
243 // Using 32 bit internally.
244 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3);
245
246 #ifdef VM_LITTLE_ENDIAN
247 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8;
248 #else
249 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8;
250 #endif
251 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value),
252 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value),
253 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount;
254
255 unsigned int old_value, value32;
256
257 cmpxchg_pre_membar(order);
258
259 __asm__ __volatile__ (
260 /* simple guard */
261 " lbz %[old_value], 0(%[dest]) \n"
262 " cmpw %[masked_compare_val], %[old_value] \n"
263 " bne- 2f \n"
264 /* atomic loop */
265 "1: \n"
266 " lwarx %[value32], 0, %[dest_base] \n"
267 /* extract byte and compare */
268 " srd %[old_value], %[value32], %[shift_amount] \n"
269 " clrldi %[old_value], %[old_value], 56 \n"
270 " cmpw %[masked_compare_val], %[old_value] \n"
271 " bne- 2f \n"
272 /* replace byte and try to store */
273 " xor %[value32], %[xor_value], %[value32] \n"
274 " stwcx. %[value32], 0, %[dest_base] \n"
275 " bne- 1b \n"
276 /* exit */
277 "2: \n"
278 /* out */
279 : [old_value] "=&r" (old_value),
280 [value32] "=&r" (value32),
281 "=m" (*dest),
282 "=m" (*dest_base)
283 /* in */
284 : [dest] "b" (dest),
285 [dest_base] "b" (dest_base),
286 [shift_amount] "r" (shift_amount),
287 [masked_compare_val] "r" (masked_compare_val),
288 [xor_value] "r" (xor_value),
289 "m" (*dest),
290 "m" (*dest_base)
291 /* clobber */
292 : "cc",
293 "memory"
294 );
295
296 cmpxchg_post_membar(order);
297
298 return PrimitiveConversions::cast<T>((unsigned char)old_value);
299 }
300
301 template<>
302 template<typename T>
303 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
304 T volatile* dest,
305 T compare_value,
306 cmpxchg_memory_order order) const {
307 STATIC_ASSERT(4 == sizeof(T));
308
309 // Note that cmpxchg guarantees a two-way memory barrier across
310 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
311 // specified otherwise (see atomic.hpp).
312
313 T old_value;
314 const uint64_t zero = 0;
315
316 cmpxchg_pre_membar(order);
317
318 __asm__ __volatile__ (
319 /* simple guard */
320 " lwz %[old_value], 0(%[dest]) \n"
321 " cmpw %[compare_value], %[old_value] \n"
322 " bne- 2f \n"
323 /* atomic loop */
324 "1: \n"
325 " lwarx %[old_value], %[dest], %[zero] \n"
326 " cmpw %[compare_value], %[old_value] \n"
327 " bne- 2f \n"
328 " stwcx. %[exchange_value], %[dest], %[zero] \n"
329 " bne- 1b \n"
330 /* exit */
331 "2: \n"
332 /* out */
333 : [old_value] "=&r" (old_value),
334 "=m" (*dest)
335 /* in */
336 : [dest] "b" (dest),
337 [zero] "r" (zero),
338 [compare_value] "r" (compare_value),
339 [exchange_value] "r" (exchange_value),
340 "m" (*dest)
341 /* clobber */
342 : "cc",
343 "memory"
344 );
345
346 cmpxchg_post_membar(order);
347
348 return old_value;
349 }
350
351 template<>
352 template<typename T>
353 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
354 T volatile* dest,
355 T compare_value,
356 cmpxchg_memory_order order) const {
357 STATIC_ASSERT(8 == sizeof(T));
358
359 // Note that cmpxchg guarantees a two-way memory barrier across
360 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
361 // specified otherwise (see atomic.hpp).
362
363 T old_value;
364 const uint64_t zero = 0;
365
366 cmpxchg_pre_membar(order);
367
368 __asm__ __volatile__ (
369 /* simple guard */
370 " ld %[old_value], 0(%[dest]) \n"
371 " cmpd %[compare_value], %[old_value] \n"
372 " bne- 2f \n"
373 /* atomic loop */
374 "1: \n"
375 " ldarx %[old_value], %[dest], %[zero] \n"
376 " cmpd %[compare_value], %[old_value] \n"
377 " bne- 2f \n"
378 " stdcx. %[exchange_value], %[dest], %[zero] \n"
379 " bne- 1b \n"
380 /* exit */
381 "2: \n"
382 /* out */
383 : [old_value] "=&r" (old_value),
384 "=m" (*dest)
385 /* in */
386 : [dest] "b" (dest),
387 [zero] "r" (zero),
388 [compare_value] "r" (compare_value),
389 [exchange_value] "r" (exchange_value),
390 "m" (*dest)
391 /* clobber */
392 : "cc",
393 "memory"
394 );
395
396 cmpxchg_post_membar(order);
397
398 return old_value;
399 }
400
401 #undef strasm_sync
402 #undef strasm_lwsync
403 #undef strasm_isync
404 #undef strasm_release
405 #undef strasm_acquire
406 #undef strasm_fence
407 #undef strasm_nobarrier
408 #undef strasm_nobarrier_clobber_memory
409
410 #endif // OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
|
1 /*
2 * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2012, 2018 SAP SE. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #ifndef OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
27 #define OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
28
29 #ifndef PPC64
30 #error "Atomic currently only implemented for PPC64"
31 #endif
32
33 #include "utilities/debug.hpp"
34
35 // Implementation of class atomic
36
37 //
38 // machine barrier instructions:
39 //
40 // - sync two-way memory barrier, aka fence
41 // - lwsync orders Store|Store,
42 // Load|Store,
43 // Load|Load,
44 // but not Store|Load
45 // - eieio orders memory accesses for device memory (only)
46 // - isync invalidates speculatively executed instructions
47 // From the POWER ISA 2.06 documentation:
48 // "[...] an isync instruction prevents the execution of
49 // instructions following the isync until instructions
50 // preceding the isync have completed, [...]"
51 // From IBM's AIX assembler reference:
52 // "The isync [...] instructions causes the processor to
53 // refetch any instructions that might have been fetched
54 // prior to the isync instruction. The instruction isync
55 // causes the processor to wait for all previous instructions
56 // to complete. Then any instructions already fetched are
57 // discarded and instruction processing continues in the
58 // environment established by the previous instructions."
59 //
60 // semantic barrier instructions:
61 // (as defined in orderAccess.hpp)
62 //
63 // - release orders Store|Store, (maps to lwsync)
64 // Load|Store
65 // - acquire orders Load|Store, (maps to lwsync)
66 // Load|Load
67 // - fence orders Store|Store, (maps to sync)
68 // Load|Store,
69 // Load|Load,
70 // Store|Load
71 //
72
73 #define strasm_sync "\n sync \n"
74 #define strasm_lwsync "\n lwsync \n"
75 #define strasm_isync "\n isync \n"
76 #define strasm_release strasm_lwsync
77 #define strasm_acquire strasm_lwsync
78 #define strasm_fence strasm_sync
79 #define strasm_nobarrier ""
80 #define strasm_nobarrier_clobber_memory ""
81
82 inline void pre_membar(atomic_memory_order order) {
83 switch (order) {
84 case memory_order_relaxed:
85 case memory_order_consume:
86 case memory_order_acquire: break;
87 case memory_order_release:
88 case memory_order_acq_rel: __asm__ __volatile__ (strasm_lwsync); break;
89 default /*conservative*/ : __asm__ __volatile__ (strasm_sync); break;
90 }
91 }
92
93 inline void post_membar(atomic_memory_order order) {
94 switch (order) {
95 case memory_order_relaxed:
96 case memory_order_consume:
97 case memory_order_release: break;
98 case memory_order_acquire:
99 case memory_order_acq_rel: __asm__ __volatile__ (strasm_isync); break;
100 default /*conservative*/ : __asm__ __volatile__ (strasm_sync); break;
101 }
102 }
103
104
105 template<size_t byte_size>
106 struct Atomic::PlatformAdd
107 : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
108 {
109 template<typename I, typename D>
110 D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const;
111 };
112
113 template<>
114 template<typename I, typename D>
115 inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest,
116 atomic_memory_order order) const {
117 STATIC_ASSERT(4 == sizeof(I));
118 STATIC_ASSERT(4 == sizeof(D));
119
120 D result;
121
122 pre_membar(order);
123
124 __asm__ __volatile__ (
125 "1: lwarx %0, 0, %2 \n"
126 " add %0, %0, %1 \n"
127 " stwcx. %0, 0, %2 \n"
128 " bne- 1b \n"
129 : /*%0*/"=&r" (result)
130 : /*%1*/"r" (add_value), /*%2*/"r" (dest)
131 : "cc", "memory" );
132
133 post_membar(order);
134
135 return result;
136 }
137
138
139 template<>
140 template<typename I, typename D>
141 inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest,
142 atomic_memory_order order) const {
143 STATIC_ASSERT(8 == sizeof(I));
144 STATIC_ASSERT(8 == sizeof(D));
145
146 D result;
147
148 pre_membar(order);
149
150 __asm__ __volatile__ (
151 "1: ldarx %0, 0, %2 \n"
152 " add %0, %0, %1 \n"
153 " stdcx. %0, 0, %2 \n"
154 " bne- 1b \n"
155 : /*%0*/"=&r" (result)
156 : /*%1*/"r" (add_value), /*%2*/"r" (dest)
157 : "cc", "memory" );
158
159 post_membar(order);
160
161 return result;
162 }
163
164 template<>
165 template<typename T>
166 inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
167 T volatile* dest,
168 atomic_memory_order order) const {
169 // Note that xchg doesn't necessarily do an acquire
170 // (see synchronizer.cpp).
171
172 T old_value;
173 const uint64_t zero = 0;
174
175 pre_membar(order);
176
177 __asm__ __volatile__ (
178 /* atomic loop */
179 "1: \n"
180 " lwarx %[old_value], %[dest], %[zero] \n"
181 " stwcx. %[exchange_value], %[dest], %[zero] \n"
182 " bne- 1b \n"
183 /* exit */
184 "2: \n"
185 /* out */
186 : [old_value] "=&r" (old_value),
187 "=m" (*dest)
188 /* in */
189 : [dest] "b" (dest),
190 [zero] "r" (zero),
191 [exchange_value] "r" (exchange_value),
192 "m" (*dest)
193 /* clobber */
194 : "cc",
195 "memory"
196 );
197
198 post_membar(order);
199
200 return old_value;
201 }
202
203 template<>
204 template<typename T>
205 inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
206 T volatile* dest,
207 atomic_memory_order order) const {
208 STATIC_ASSERT(8 == sizeof(T));
209 // Note that xchg doesn't necessarily do an acquire
210 // (see synchronizer.cpp).
211
212 T old_value;
213 const uint64_t zero = 0;
214
215 pre_membar(order);
216
217 __asm__ __volatile__ (
218 /* atomic loop */
219 "1: \n"
220 " ldarx %[old_value], %[dest], %[zero] \n"
221 " stdcx. %[exchange_value], %[dest], %[zero] \n"
222 " bne- 1b \n"
223 /* exit */
224 "2: \n"
225 /* out */
226 : [old_value] "=&r" (old_value),
227 "=m" (*dest)
228 /* in */
229 : [dest] "b" (dest),
230 [zero] "r" (zero),
231 [exchange_value] "r" (exchange_value),
232 "m" (*dest)
233 /* clobber */
234 : "cc",
235 "memory"
236 );
237
238 post_membar(order);
239
240 return old_value;
241 }
242
243 template<>
244 template<typename T>
245 inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
246 T volatile* dest,
247 T compare_value,
248 atomic_memory_order order) const {
249 STATIC_ASSERT(1 == sizeof(T));
250
251 // Note that cmpxchg guarantees a two-way memory barrier across
252 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
253 // specified otherwise (see atomic.hpp).
254
255 // Using 32 bit internally.
256 volatile int *dest_base = (volatile int*)((uintptr_t)dest & ~3);
257
258 #ifdef VM_LITTLE_ENDIAN
259 const unsigned int shift_amount = ((uintptr_t)dest & 3) * 8;
260 #else
261 const unsigned int shift_amount = ((~(uintptr_t)dest) & 3) * 8;
262 #endif
263 const unsigned int masked_compare_val = ((unsigned int)(unsigned char)compare_value),
264 masked_exchange_val = ((unsigned int)(unsigned char)exchange_value),
265 xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount;
266
267 unsigned int old_value, value32;
268
269 pre_membar(order);
270
271 __asm__ __volatile__ (
272 /* simple guard */
273 " lbz %[old_value], 0(%[dest]) \n"
274 " cmpw %[masked_compare_val], %[old_value] \n"
275 " bne- 2f \n"
276 /* atomic loop */
277 "1: \n"
278 " lwarx %[value32], 0, %[dest_base] \n"
279 /* extract byte and compare */
280 " srd %[old_value], %[value32], %[shift_amount] \n"
281 " clrldi %[old_value], %[old_value], 56 \n"
282 " cmpw %[masked_compare_val], %[old_value] \n"
283 " bne- 2f \n"
284 /* replace byte and try to store */
285 " xor %[value32], %[xor_value], %[value32] \n"
286 " stwcx. %[value32], 0, %[dest_base] \n"
287 " bne- 1b \n"
288 /* exit */
289 "2: \n"
290 /* out */
291 : [old_value] "=&r" (old_value),
292 [value32] "=&r" (value32),
293 "=m" (*dest),
294 "=m" (*dest_base)
295 /* in */
296 : [dest] "b" (dest),
297 [dest_base] "b" (dest_base),
298 [shift_amount] "r" (shift_amount),
299 [masked_compare_val] "r" (masked_compare_val),
300 [xor_value] "r" (xor_value),
301 "m" (*dest),
302 "m" (*dest_base)
303 /* clobber */
304 : "cc",
305 "memory"
306 );
307
308 post_membar(order);
309
310 return PrimitiveConversions::cast<T>((unsigned char)old_value);
311 }
312
313 template<>
314 template<typename T>
315 inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
316 T volatile* dest,
317 T compare_value,
318 atomic_memory_order order) const {
319 STATIC_ASSERT(4 == sizeof(T));
320
321 // Note that cmpxchg guarantees a two-way memory barrier across
322 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
323 // specified otherwise (see atomic.hpp).
324
325 T old_value;
326 const uint64_t zero = 0;
327
328 pre_membar(order);
329
330 __asm__ __volatile__ (
331 /* simple guard */
332 " lwz %[old_value], 0(%[dest]) \n"
333 " cmpw %[compare_value], %[old_value] \n"
334 " bne- 2f \n"
335 /* atomic loop */
336 "1: \n"
337 " lwarx %[old_value], %[dest], %[zero] \n"
338 " cmpw %[compare_value], %[old_value] \n"
339 " bne- 2f \n"
340 " stwcx. %[exchange_value], %[dest], %[zero] \n"
341 " bne- 1b \n"
342 /* exit */
343 "2: \n"
344 /* out */
345 : [old_value] "=&r" (old_value),
346 "=m" (*dest)
347 /* in */
348 : [dest] "b" (dest),
349 [zero] "r" (zero),
350 [compare_value] "r" (compare_value),
351 [exchange_value] "r" (exchange_value),
352 "m" (*dest)
353 /* clobber */
354 : "cc",
355 "memory"
356 );
357
358 post_membar(order);
359
360 return old_value;
361 }
362
363 template<>
364 template<typename T>
365 inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
366 T volatile* dest,
367 T compare_value,
368 atomic_memory_order order) const {
369 STATIC_ASSERT(8 == sizeof(T));
370
371 // Note that cmpxchg guarantees a two-way memory barrier across
372 // the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
373 // specified otherwise (see atomic.hpp).
374
375 T old_value;
376 const uint64_t zero = 0;
377
378 pre_membar(order);
379
380 __asm__ __volatile__ (
381 /* simple guard */
382 " ld %[old_value], 0(%[dest]) \n"
383 " cmpd %[compare_value], %[old_value] \n"
384 " bne- 2f \n"
385 /* atomic loop */
386 "1: \n"
387 " ldarx %[old_value], %[dest], %[zero] \n"
388 " cmpd %[compare_value], %[old_value] \n"
389 " bne- 2f \n"
390 " stdcx. %[exchange_value], %[dest], %[zero] \n"
391 " bne- 1b \n"
392 /* exit */
393 "2: \n"
394 /* out */
395 : [old_value] "=&r" (old_value),
396 "=m" (*dest)
397 /* in */
398 : [dest] "b" (dest),
399 [zero] "r" (zero),
400 [compare_value] "r" (compare_value),
401 [exchange_value] "r" (exchange_value),
402 "m" (*dest)
403 /* clobber */
404 : "cc",
405 "memory"
406 );
407
408 post_membar(order);
409
410 return old_value;
411 }
412
413 #undef strasm_sync
414 #undef strasm_lwsync
415 #undef strasm_isync
416 #undef strasm_release
417 #undef strasm_acquire
418 #undef strasm_fence
419 #undef strasm_nobarrier
420 #undef strasm_nobarrier_clobber_memory
421
422 #endif // OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
|