< prev index next >
src/hotspot/os_cpu/aix_ppc/atomic_aix_ppc.hpp
Print this page
rev 49986 : 8202080: Introduce ordering semantics for Atomic::add and other RMW atomics
Reviewed-by: lucy, rehn, dholmes
*** 1,8 ****
/*
! * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
! * Copyright (c) 2012, 2014 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
--- 1,8 ----
/*
! * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
! * Copyright (c) 2012, 2018 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*** 24,51 ****
*/
#ifndef OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
#define OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
! #ifndef _LP64
! #error "Atomic currently only impleneted for PPC64"
#endif
#include "utilities/debug.hpp"
// Implementation of class atomic
//
// machine barrier instructions:
//
! // - ppc_sync two-way memory barrier, aka fence
! // - ppc_lwsync orders Store|Store,
// Load|Store,
// Load|Load,
// but not Store|Load
! // - ppc_eieio orders memory accesses for device memory (only)
! // - ppc_isync invalidates speculatively executed instructions
// From the POWER ISA 2.06 documentation:
// "[...] an isync instruction prevents the execution of
// instructions following the isync until instructions
// preceding the isync have completed, [...]"
// From IBM's AIX assembler reference:
--- 24,51 ----
*/
#ifndef OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
#define OS_CPU_AIX_OJDKPPC_VM_ATOMIC_AIX_PPC_HPP
! #ifndef PPC64
! #error "Atomic currently only implemented for PPC64"
#endif
#include "utilities/debug.hpp"
// Implementation of class atomic
//
// machine barrier instructions:
//
! // - sync two-way memory barrier, aka fence
! // - lwsync orders Store|Store,
// Load|Store,
// Load|Load,
// but not Store|Load
! // - eieio orders memory accesses for device memory (only)
! // - isync invalidates speculatively executed instructions
// From the POWER ISA 2.06 documentation:
// "[...] an isync instruction prevents the execution of
// instructions following the isync until instructions
// preceding the isync have completed, [...]"
// From IBM's AIX assembler reference:
*** 58,72 ****
// environment established by the previous instructions."
//
// semantic barrier instructions:
// (as defined in orderAccess.hpp)
//
! // - ppc_release orders Store|Store, (maps to ppc_lwsync)
// Load|Store
! // - ppc_acquire orders Load|Store, (maps to ppc_lwsync)
// Load|Load
! // - ppc_fence orders Store|Store, (maps to ppc_sync)
// Load|Store,
// Load|Load,
// Store|Load
//
--- 58,72 ----
// environment established by the previous instructions."
//
// semantic barrier instructions:
// (as defined in orderAccess.hpp)
//
! // - release orders Store|Store, (maps to lwsync)
// Load|Store
! // - acquire orders Load|Store, (maps to lwsync)
// Load|Load
! // - fence orders Store|Store, (maps to sync)
// Load|Store,
// Load|Load,
// Store|Load
//
*** 77,160 ****
#define strasm_acquire strasm_lwsync
#define strasm_fence strasm_sync
#define strasm_nobarrier ""
#define strasm_nobarrier_clobber_memory ""
template<size_t byte_size>
struct Atomic::PlatformAdd
: Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
{
template<typename I, typename D>
! D add_and_fetch(I add_value, D volatile* dest) const;
};
template<>
template<typename I, typename D>
! inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) const {
STATIC_ASSERT(4 == sizeof(I));
STATIC_ASSERT(4 == sizeof(D));
D result;
__asm__ __volatile__ (
- strasm_lwsync
"1: lwarx %0, 0, %2 \n"
" add %0, %0, %1 \n"
" stwcx. %0, 0, %2 \n"
" bne- 1b \n"
- strasm_isync
: /*%0*/"=&r" (result)
: /*%1*/"r" (add_value), /*%2*/"r" (dest)
: "cc", "memory" );
return result;
}
template<>
template<typename I, typename D>
! inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) const {
STATIC_ASSERT(8 == sizeof(I));
STATIC_ASSERT(8 == sizeof(D));
D result;
__asm__ __volatile__ (
- strasm_lwsync
"1: ldarx %0, 0, %2 \n"
" add %0, %0, %1 \n"
" stdcx. %0, 0, %2 \n"
" bne- 1b \n"
- strasm_isync
: /*%0*/"=&r" (result)
: /*%1*/"r" (add_value), /*%2*/"r" (dest)
: "cc", "memory" );
return result;
}
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
! T volatile* dest) const {
! STATIC_ASSERT(4 == sizeof(T));
// Note that xchg doesn't necessarily do an acquire
// (see synchronizer.cpp).
T old_value;
const uint64_t zero = 0;
__asm__ __volatile__ (
- /* lwsync */
- strasm_lwsync
/* atomic loop */
"1: \n"
" lwarx %[old_value], %[dest], %[zero] \n"
" stwcx. %[exchange_value], %[dest], %[zero] \n"
" bne- 1b \n"
- /* isync */
- strasm_sync
/* exit */
"2: \n"
/* out */
: [old_value] "=&r" (old_value),
"=m" (*dest)
--- 77,185 ----
#define strasm_acquire strasm_lwsync
#define strasm_fence strasm_sync
#define strasm_nobarrier ""
#define strasm_nobarrier_clobber_memory ""
+ inline void pre_membar(atomic_memory_order order) {
+ switch (order) {
+ case memory_order_relaxed:
+ case memory_order_acquire: break;
+ case memory_order_release:
+ case memory_order_acq_rel: __asm__ __volatile__ (strasm_lwsync); break;
+ default /*conservative*/ : __asm__ __volatile__ (strasm_sync); break;
+ }
+ }
+
+ inline void post_membar(atomic_memory_order order) {
+ switch (order) {
+ case memory_order_relaxed:
+ case memory_order_release: break;
+ case memory_order_acquire:
+ case memory_order_acq_rel: __asm__ __volatile__ (strasm_isync); break;
+ default /*conservative*/ : __asm__ __volatile__ (strasm_sync); break;
+ }
+ }
+
+
template<size_t byte_size>
struct Atomic::PlatformAdd
: Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
{
template<typename I, typename D>
! D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const;
};
template<>
template<typename I, typename D>
! inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest,
! atomic_memory_order order) const {
STATIC_ASSERT(4 == sizeof(I));
STATIC_ASSERT(4 == sizeof(D));
D result;
+ pre_membar(order);
+
__asm__ __volatile__ (
"1: lwarx %0, 0, %2 \n"
" add %0, %0, %1 \n"
" stwcx. %0, 0, %2 \n"
" bne- 1b \n"
: /*%0*/"=&r" (result)
: /*%1*/"r" (add_value), /*%2*/"r" (dest)
: "cc", "memory" );
+ post_membar(order);
+
return result;
}
template<>
template<typename I, typename D>
! inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest,
! atomic_memory_order order) const {
STATIC_ASSERT(8 == sizeof(I));
STATIC_ASSERT(8 == sizeof(D));
D result;
+ pre_membar(order);
+
__asm__ __volatile__ (
"1: ldarx %0, 0, %2 \n"
" add %0, %0, %1 \n"
" stdcx. %0, 0, %2 \n"
" bne- 1b \n"
: /*%0*/"=&r" (result)
: /*%1*/"r" (add_value), /*%2*/"r" (dest)
: "cc", "memory" );
+ post_membar(order);
+
return result;
}
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
! T volatile* dest,
! atomic_memory_order order) const {
// Note that xchg doesn't necessarily do an acquire
// (see synchronizer.cpp).
T old_value;
const uint64_t zero = 0;
+ pre_membar(order);
+
__asm__ __volatile__ (
/* atomic loop */
"1: \n"
" lwarx %[old_value], %[dest], %[zero] \n"
" stwcx. %[exchange_value], %[dest], %[zero] \n"
" bne- 1b \n"
/* exit */
"2: \n"
/* out */
: [old_value] "=&r" (old_value),
"=m" (*dest)
*** 166,199 ****
/* clobber */
: "cc",
"memory"
);
return old_value;
}
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
! T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
// Note that xchg doesn't necessarily do an acquire
// (see synchronizer.cpp).
T old_value;
const uint64_t zero = 0;
__asm__ __volatile__ (
- /* lwsync */
- strasm_lwsync
/* atomic loop */
"1: \n"
" ldarx %[old_value], %[dest], %[zero] \n"
" stdcx. %[exchange_value], %[dest], %[zero] \n"
" bne- 1b \n"
- /* isync */
- strasm_sync
/* exit */
"2: \n"
/* out */
: [old_value] "=&r" (old_value),
"=m" (*dest)
--- 191,225 ----
/* clobber */
: "cc",
"memory"
);
+ post_membar(order);
+
return old_value;
}
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
! T volatile* dest,
! atomic_memory_order order) const {
STATIC_ASSERT(8 == sizeof(T));
// Note that xchg doesn't necessarily do an acquire
// (see synchronizer.cpp).
T old_value;
const uint64_t zero = 0;
+ pre_membar(order);
+
__asm__ __volatile__ (
/* atomic loop */
"1: \n"
" ldarx %[old_value], %[dest], %[zero] \n"
" stdcx. %[exchange_value], %[dest], %[zero] \n"
" bne- 1b \n"
/* exit */
"2: \n"
/* out */
: [old_value] "=&r" (old_value),
"=m" (*dest)
*** 205,241 ****
/* clobber */
: "cc",
"memory"
);
! return old_value;
! }
!
! inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {
! if (order != memory_order_relaxed) {
! __asm__ __volatile__ (
! /* fence */
! strasm_sync
! );
! }
! }
! inline void cmpxchg_post_membar(cmpxchg_memory_order order) {
! if (order != memory_order_relaxed) {
! __asm__ __volatile__ (
! /* fence */
! strasm_sync
! );
! }
}
template<>
template<typename T>
inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
T volatile* dest,
T compare_value,
! cmpxchg_memory_order order) const {
STATIC_ASSERT(1 == sizeof(T));
// Note that cmpxchg guarantees a two-way memory barrier across
// the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
// specified otherwise (see atomic.hpp).
--- 231,251 ----
/* clobber */
: "cc",
"memory"
);
! post_membar(order);
! return old_value;
}
template<>
template<typename T>
inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
T volatile* dest,
T compare_value,
! atomic_memory_order order) const {
STATIC_ASSERT(1 == sizeof(T));
// Note that cmpxchg guarantees a two-way memory barrier across
// the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
// specified otherwise (see atomic.hpp).
*** 252,262 ****
masked_exchange_val = ((unsigned int)(unsigned char)exchange_value),
xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount;
unsigned int old_value, value32;
! cmpxchg_pre_membar(order);
__asm__ __volatile__ (
/* simple guard */
" lbz %[old_value], 0(%[dest]) \n"
" cmpw %[masked_compare_val], %[old_value] \n"
--- 262,272 ----
masked_exchange_val = ((unsigned int)(unsigned char)exchange_value),
xor_value = (masked_compare_val ^ masked_exchange_val) << shift_amount;
unsigned int old_value, value32;
! pre_membar(order);
__asm__ __volatile__ (
/* simple guard */
" lbz %[old_value], 0(%[dest]) \n"
" cmpw %[masked_compare_val], %[old_value] \n"
*** 291,321 ****
/* clobber */
: "cc",
"memory"
);
! cmpxchg_post_membar(order);
return PrimitiveConversions::cast<T>((unsigned char)old_value);
}
template<>
template<typename T>
inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
T volatile* dest,
T compare_value,
! cmpxchg_memory_order order) const {
STATIC_ASSERT(4 == sizeof(T));
// Note that cmpxchg guarantees a two-way memory barrier across
// the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
// specified otherwise (see atomic.hpp).
T old_value;
const uint64_t zero = 0;
! cmpxchg_pre_membar(order);
__asm__ __volatile__ (
/* simple guard */
" lwz %[old_value], 0(%[dest]) \n"
" cmpw %[compare_value], %[old_value] \n"
--- 301,331 ----
/* clobber */
: "cc",
"memory"
);
! post_membar(order);
return PrimitiveConversions::cast<T>((unsigned char)old_value);
}
template<>
template<typename T>
inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
T volatile* dest,
T compare_value,
! atomic_memory_order order) const {
STATIC_ASSERT(4 == sizeof(T));
// Note that cmpxchg guarantees a two-way memory barrier across
// the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
// specified otherwise (see atomic.hpp).
T old_value;
const uint64_t zero = 0;
! pre_membar(order);
__asm__ __volatile__ (
/* simple guard */
" lwz %[old_value], 0(%[dest]) \n"
" cmpw %[compare_value], %[old_value] \n"
*** 341,371 ****
/* clobber */
: "cc",
"memory"
);
! cmpxchg_post_membar(order);
return old_value;
}
template<>
template<typename T>
inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
T volatile* dest,
T compare_value,
! cmpxchg_memory_order order) const {
STATIC_ASSERT(8 == sizeof(T));
// Note that cmpxchg guarantees a two-way memory barrier across
// the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
// specified otherwise (see atomic.hpp).
T old_value;
const uint64_t zero = 0;
! cmpxchg_pre_membar(order);
__asm__ __volatile__ (
/* simple guard */
" ld %[old_value], 0(%[dest]) \n"
" cmpd %[compare_value], %[old_value] \n"
--- 351,381 ----
/* clobber */
: "cc",
"memory"
);
! post_membar(order);
return old_value;
}
template<>
template<typename T>
inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
T volatile* dest,
T compare_value,
! atomic_memory_order order) const {
STATIC_ASSERT(8 == sizeof(T));
// Note that cmpxchg guarantees a two-way memory barrier across
// the cmpxchg, so it's really a a 'fence_cmpxchg_fence' if not
// specified otherwise (see atomic.hpp).
T old_value;
const uint64_t zero = 0;
! pre_membar(order);
__asm__ __volatile__ (
/* simple guard */
" ld %[old_value], 0(%[dest]) \n"
" cmpd %[compare_value], %[old_value] \n"
*** 391,401 ****
/* clobber */
: "cc",
"memory"
);
! cmpxchg_post_membar(order);
return old_value;
}
#undef strasm_sync
--- 401,411 ----
/* clobber */
: "cc",
"memory"
);
! post_membar(order);
return old_value;
}
#undef strasm_sync
< prev index next >