--- old/src/os_cpu/aix_ppc/vm/orderAccess_aix_ppc.inline.hpp 2015-01-22 17:48:31.112061835 -0500 +++ new/src/os_cpu/aix_ppc/vm/orderAccess_aix_ppc.inline.hpp 2015-01-22 17:48:29.815986756 -0500 @@ -61,86 +61,30 @@ #define inlasm_lwsync() __asm__ __volatile__ ("lwsync" : : : "memory"); #define inlasm_eieio() __asm__ __volatile__ ("eieio" : : : "memory"); #define inlasm_isync() __asm__ __volatile__ ("isync" : : : "memory"); -#define inlasm_release() inlasm_lwsync(); -#define inlasm_acquire() inlasm_lwsync(); // Use twi-isync for load_acquire (faster than lwsync). // ATTENTION: seems like xlC 10.1 has problems with this inline assembler macro (VerifyMethodHandles found "bad vminfo in AMH.conv"): // #define inlasm_acquire_reg(X) __asm__ __volatile__ ("twi 0,%0,0\n isync\n" : : "r" (X) : "memory"); #define inlasm_acquire_reg(X) inlasm_lwsync(); -#define inlasm_fence() inlasm_sync(); -inline void OrderAccess::loadload() { inlasm_lwsync(); } -inline void OrderAccess::storestore() { inlasm_lwsync(); } -inline void OrderAccess::loadstore() { inlasm_lwsync(); } -inline void OrderAccess::storeload() { inlasm_fence(); } - -inline void OrderAccess::acquire() { inlasm_acquire(); } -inline void OrderAccess::release() { inlasm_release(); } -inline void OrderAccess::fence() { inlasm_fence(); } - -inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { register jbyte t = *p; inlasm_acquire_reg(t); return t; } -inline jshort OrderAccess::load_acquire(volatile jshort* p) { register jshort t = *p; inlasm_acquire_reg(t); return t; } -inline jint OrderAccess::load_acquire(volatile jint* p) { register jint t = *p; inlasm_acquire_reg(t); return t; } -inline jlong OrderAccess::load_acquire(volatile jlong* p) { register jlong t = *p; inlasm_acquire_reg(t); return t; } -inline jubyte OrderAccess::load_acquire(volatile jubyte* p) { register jubyte t = *p; inlasm_acquire_reg(t); return t; } -inline jushort OrderAccess::load_acquire(volatile jushort* p) { register jushort t = *p; inlasm_acquire_reg(t); return t; } -inline juint OrderAccess::load_acquire(volatile juint* p) { register juint t = *p; inlasm_acquire_reg(t); return t; } -inline julong OrderAccess::load_acquire(volatile julong* p) { return (julong)load_acquire((volatile jlong*)p); } -inline jfloat OrderAccess::load_acquire(volatile jfloat* p) { register jfloat t = *p; inlasm_acquire(); return t; } -inline jdouble OrderAccess::load_acquire(volatile jdouble* p) { register jdouble t = *p; inlasm_acquire(); return t; } - -inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t* p) { return (intptr_t)load_acquire((volatile jlong*)p); } -inline void* OrderAccess::load_ptr_acquire(volatile void* p) { return (void*) load_acquire((volatile jlong*)p); } -inline void* OrderAccess::load_ptr_acquire(const volatile void* p) { return (void*) load_acquire((volatile jlong*)p); } - -inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) { inlasm_release(); *p = v; } -inline void OrderAccess::release_store(volatile jshort* p, jshort v) { inlasm_release(); *p = v; } -inline void OrderAccess::release_store(volatile jint* p, jint v) { inlasm_release(); *p = v; } -inline void OrderAccess::release_store(volatile jlong* p, jlong v) { inlasm_release(); *p = v; } -inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) { inlasm_release(); *p = v; } -inline void OrderAccess::release_store(volatile jushort* p, jushort v) { inlasm_release(); *p = v; } -inline void OrderAccess::release_store(volatile juint* p, juint v) { inlasm_release(); *p = v; } -inline void OrderAccess::release_store(volatile julong* p, julong v) { inlasm_release(); *p = v; } -inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) { inlasm_release(); *p = v; } -inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) { inlasm_release(); *p = v; } - -inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { inlasm_release(); *p = v; } -inline void OrderAccess::release_store_ptr(volatile void* p, void* v) { inlasm_release(); *(void* volatile *)p = v; } - -inline void OrderAccess::store_fence(jbyte* p, jbyte v) { *p = v; inlasm_fence(); } -inline void OrderAccess::store_fence(jshort* p, jshort v) { *p = v; inlasm_fence(); } -inline void OrderAccess::store_fence(jint* p, jint v) { *p = v; inlasm_fence(); } -inline void OrderAccess::store_fence(jlong* p, jlong v) { *p = v; inlasm_fence(); } -inline void OrderAccess::store_fence(jubyte* p, jubyte v) { *p = v; inlasm_fence(); } -inline void OrderAccess::store_fence(jushort* p, jushort v) { *p = v; inlasm_fence(); } -inline void OrderAccess::store_fence(juint* p, juint v) { *p = v; inlasm_fence(); } -inline void OrderAccess::store_fence(julong* p, julong v) { *p = v; inlasm_fence(); } -inline void OrderAccess::store_fence(jfloat* p, jfloat v) { *p = v; inlasm_fence(); } -inline void OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; inlasm_fence(); } - -inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; inlasm_fence(); } -inline void OrderAccess::store_ptr_fence(void** p, void* v) { *p = v; inlasm_fence(); } - -inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { inlasm_release(); *p = v; inlasm_fence(); } -inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { inlasm_release(); *p = v; inlasm_fence(); } -inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { inlasm_release(); *p = v; inlasm_fence(); } -inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { inlasm_release(); *p = v; inlasm_fence(); } -inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { inlasm_release(); *p = v; inlasm_fence(); } -inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { inlasm_release(); *p = v; inlasm_fence(); } -inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { inlasm_release(); *p = v; inlasm_fence(); } -inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { inlasm_release(); *p = v; inlasm_fence(); } -inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { inlasm_release(); *p = v; inlasm_fence(); } -inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { inlasm_release(); *p = v; inlasm_fence(); } - -inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { inlasm_release(); *p = v; inlasm_fence(); } -inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { inlasm_release(); *(void* volatile *)p = v; inlasm_fence(); } +inline void OrderAccess::loadload() { inlasm_lwsync(); } +inline void OrderAccess::storestore() { inlasm_lwsync(); } +inline void OrderAccess::loadstore() { inlasm_lwsync(); } +inline void OrderAccess::storeload() { inlasm_sync(); } + +inline void OrderAccess::acquire() { inlasm_lwsync(); } +inline void OrderAccess::release() { inlasm_lwsync(); } +inline void OrderAccess::fence() { inlasm_sync(); } + +template<> inline jbyte OrderAccess::specialized_load_acquire (volatile jbyte* p) { register jbyte t = load(p); inlasm_acquire_reg(t); return t; } +template<> inline jshort OrderAccess::specialized_load_acquire(volatile jshort* p) { register jshort t = load(p); inlasm_acquire_reg(t); return t; } +template<> inline jint OrderAccess::specialized_load_acquire (volatile jint* p) { register jint t = load(p); inlasm_acquire_reg(t); return t; } +template<> inline jlong OrderAccess::specialized_load_acquire (volatile jlong* p) { register jlong t = load(p); inlasm_acquire_reg(t); return t; } #undef inlasm_sync #undef inlasm_lwsync #undef inlasm_eieio #undef inlasm_isync -#undef inlasm_release -#undef inlasm_acquire -#undef inlasm_fence + +#define VM_HAS_GENERALIZED_ORDER_ACCESS 1 #endif // OS_CPU_AIX_OJDKPPC_VM_ORDERACCESS_AIX_PPC_INLINE_HPP --- old/src/os_cpu/bsd_x86/vm/orderAccess_bsd_x86.inline.hpp 2015-01-22 17:48:35.240300981 -0500 +++ new/src/os_cpu/bsd_x86/vm/orderAccess_bsd_x86.inline.hpp 2015-01-22 17:48:33.940225669 -0500 @@ -29,27 +29,24 @@ #include "runtime/orderAccess.hpp" #include "runtime/os.hpp" -// Implementation of class OrderAccess. +// A compiler barrier, forcing the C++ compiler to invalidate all memory assumptions +static inline void compiler_barrier() { + __asm__ volatile ("" : : : "memory"); +} -inline void OrderAccess::loadload() { acquire(); } -inline void OrderAccess::storestore() { release(); } -inline void OrderAccess::loadstore() { acquire(); } -inline void OrderAccess::storeload() { fence(); } +// x86 is TSO and hence only needs a fence for storeload +// However, a compiler barrier is still needed to prevent reordering +// between volatile and non-volatile memory accesses. -inline void OrderAccess::acquire() { - volatile intptr_t local_dummy; -#ifdef AMD64 - __asm__ volatile ("movq 0(%%rsp), %0" : "=r" (local_dummy) : : "memory"); -#else - __asm__ volatile ("movl 0(%%esp),%0" : "=r" (local_dummy) : : "memory"); -#endif // AMD64 -} +// Implementation of class OrderAccess. -inline void OrderAccess::release() { - // Avoid hitting the same cache-line from - // different threads. - volatile jint local_dummy = 0; -} +inline void OrderAccess::loadload() { compiler_barrier(); } +inline void OrderAccess::storestore() { compiler_barrier(); } +inline void OrderAccess::loadstore() { compiler_barrier(); } +inline void OrderAccess::storeload() { fence(); } + +inline void OrderAccess::acquire() { compiler_barrier(); } +inline void OrderAccess::release() { compiler_barrier(); } inline void OrderAccess::fence() { if (os::is_MP()) { @@ -60,156 +57,50 @@ __asm__ volatile ("lock; addl $0,0(%%esp)" : : : "cc", "memory"); #endif } + compiler_barrier(); } -inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { return *p; } -inline jshort OrderAccess::load_acquire(volatile jshort* p) { return *p; } -inline jint OrderAccess::load_acquire(volatile jint* p) { return *p; } -inline jlong OrderAccess::load_acquire(volatile jlong* p) { return Atomic::load(p); } -inline jubyte OrderAccess::load_acquire(volatile jubyte* p) { return *p; } -inline jushort OrderAccess::load_acquire(volatile jushort* p) { return *p; } -inline juint OrderAccess::load_acquire(volatile juint* p) { return *p; } -inline julong OrderAccess::load_acquire(volatile julong* p) { return Atomic::load((volatile jlong*)p); } -inline jfloat OrderAccess::load_acquire(volatile jfloat* p) { return *p; } -inline jdouble OrderAccess::load_acquire(volatile jdouble* p) { return jdouble_cast(Atomic::load((volatile jlong*)p)); } - -inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t* p) { return *p; } -inline void* OrderAccess::load_ptr_acquire(volatile void* p) { return *(void* volatile *)p; } -inline void* OrderAccess::load_ptr_acquire(const volatile void* p) { return *(void* const volatile *)p; } - -inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) { *p = v; } -inline void OrderAccess::release_store(volatile jshort* p, jshort v) { *p = v; } -inline void OrderAccess::release_store(volatile jint* p, jint v) { *p = v; } -inline void OrderAccess::release_store(volatile jlong* p, jlong v) { Atomic::store(v, p); } -inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) { *p = v; } -inline void OrderAccess::release_store(volatile jushort* p, jushort v) { *p = v; } -inline void OrderAccess::release_store(volatile juint* p, juint v) { *p = v; } -inline void OrderAccess::release_store(volatile julong* p, julong v) { Atomic::store((jlong)v, (volatile jlong*)p); } -inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) { *p = v; } -inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) { release_store((volatile jlong*)p, jlong_cast(v)); } - -inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { *p = v; } -inline void OrderAccess::release_store_ptr(volatile void* p, void* v) { *(void* volatile *)p = v; } - -inline void OrderAccess::store_fence(jbyte* p, jbyte v) { +template<> +inline void OrderAccess::specialized_release_store_fence (volatile jbyte* p, jbyte v) { __asm__ volatile ( "xchgb (%2),%0" : "=q" (v) : "0" (v), "r" (p) : "memory"); } -inline void OrderAccess::store_fence(jshort* p, jshort v) { +template<> +inline void OrderAccess::specialized_release_store_fence(volatile jshort* p, jshort v) { __asm__ volatile ( "xchgw (%2),%0" : "=r" (v) : "0" (v), "r" (p) : "memory"); } -inline void OrderAccess::store_fence(jint* p, jint v) { +template<> +inline void OrderAccess::specialized_release_store_fence (volatile jint* p, jint v) { __asm__ volatile ( "xchgl (%2),%0" : "=r" (v) : "0" (v), "r" (p) : "memory"); } -inline void OrderAccess::store_fence(jlong* p, jlong v) { #ifdef AMD64 - __asm__ __volatile__ ("xchgq (%2), %0" - : "=r" (v) - : "0" (v), "r" (p) - : "memory"); -#else - *p = v; fence(); -#endif // AMD64 -} - -// AMD64 copied the bodies for the the signed version. 32bit did this. As long as the -// compiler does the inlining this is simpler. -inline void OrderAccess::store_fence(jubyte* p, jubyte v) { store_fence((jbyte*)p, (jbyte)v); } -inline void OrderAccess::store_fence(jushort* p, jushort v) { store_fence((jshort*)p, (jshort)v); } -inline void OrderAccess::store_fence(juint* p, juint v) { store_fence((jint*)p, (jint)v); } -inline void OrderAccess::store_fence(julong* p, julong v) { store_fence((jlong*)p, (jlong)v); } -inline void OrderAccess::store_fence(jfloat* p, jfloat v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; fence(); } - -inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { -#ifdef AMD64 - __asm__ __volatile__ ("xchgq (%2), %0" - : "=r" (v) - : "0" (v), "r" (p) - : "memory"); -#else - store_fence((jint*)p, (jint)v); -#endif // AMD64 -} - -inline void OrderAccess::store_ptr_fence(void** p, void* v) { -#ifdef AMD64 - __asm__ __volatile__ ("xchgq (%2), %0" - : "=r" (v) - : "0" (v), "r" (p) - : "memory"); -#else - store_fence((jint*)p, (jint)v); -#endif // AMD64 -} - -// Must duplicate definitions instead of calling store_fence because we don't want to cast away volatile. -inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { - __asm__ volatile ( "xchgb (%2),%0" - : "=q" (v) - : "0" (v), "r" (p) - : "memory"); -} -inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { - __asm__ volatile ( "xchgw (%2),%0" +template<> +inline void OrderAccess::specialized_release_store_fence (volatile jlong* p, jlong v) { + __asm__ volatile ( "xchgq (%2), %0" : "=r" (v) : "0" (v), "r" (p) : "memory"); } -inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { - __asm__ volatile ( "xchgl (%2),%0" - : "=r" (v) - : "0" (v), "r" (p) - : "memory"); -} - -inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { -#ifdef AMD64 - __asm__ __volatile__ ( "xchgq (%2), %0" - : "=r" (v) - : "0" (v), "r" (p) - : "memory"); -#else - release_store(p, v); fence(); #endif // AMD64 -} -inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { release_store_fence((volatile jbyte*)p, (jbyte)v); } -inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { release_store_fence((volatile jshort*)p, (jshort)v); } -inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { release_store_fence((volatile jint*)p, (jint)v); } -inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { release_store_fence((volatile jlong*)p, (jlong)v); } - -inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store_fence((volatile jlong*)p, jlong_cast(v)); } - -inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { -#ifdef AMD64 - __asm__ __volatile__ ( "xchgq (%2), %0" - : "=r" (v) - : "0" (v), "r" (p) - : "memory"); -#else - release_store_fence((volatile jint*)p, (jint)v); -#endif // AMD64 +template<> +inline void OrderAccess::specialized_release_store_fence (volatile jfloat* p, jfloat v) { + release_store_fence((volatile jint*)p, jint_cast(v)); } -inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { -#ifdef AMD64 - __asm__ __volatile__ ( "xchgq (%2), %0" - : "=r" (v) - : "0" (v), "r" (p) - : "memory"); -#else - release_store_fence((volatile jint*)p, (jint)v); -#endif // AMD64 +template<> +inline void OrderAccess::specialized_release_store_fence(volatile jdouble* p, jdouble v) { + release_store_fence((volatile jlong*)p, jlong_cast(v)); } +#define VM_HAS_GENERALIZED_ORDER_ACCESS 1 + #endif // OS_CPU_BSD_X86_VM_ORDERACCESS_BSD_X86_INLINE_HPP --- old/src/os_cpu/bsd_zero/vm/orderAccess_bsd_zero.inline.hpp 2015-01-22 17:48:39.188529700 -0500 +++ new/src/os_cpu/bsd_zero/vm/orderAccess_bsd_zero.inline.hpp 2015-01-22 17:48:37.888454388 -0500 @@ -40,8 +40,7 @@ #define __kernel_dmb (*(__kernel_dmb_t *) 0xffff0fa0) #define FULL_MEM_BARRIER __kernel_dmb() -#define READ_MEM_BARRIER __kernel_dmb() -#define WRITE_MEM_BARRIER __kernel_dmb() +#define LIGHT_MEM_BARRIER __kernel_dmb() #else // ARM @@ -50,126 +49,31 @@ #ifdef PPC #ifdef __NO_LWSYNC__ -#define READ_MEM_BARRIER __asm __volatile ("sync":::"memory") -#define WRITE_MEM_BARRIER __asm __volatile ("sync":::"memory") +#define LIGHT_MEM_BARRIER __asm __volatile ("sync":::"memory") #else -#define READ_MEM_BARRIER __asm __volatile ("lwsync":::"memory") -#define WRITE_MEM_BARRIER __asm __volatile ("lwsync":::"memory") +#define LIGHT_MEM_BARRIER __asm __volatile ("lwsync":::"memory") #endif #else // PPC -#define READ_MEM_BARRIER __asm __volatile ("":::"memory") -#define WRITE_MEM_BARRIER __asm __volatile ("":::"memory") +#define LIGHT_MEM_BARRIER __asm __volatile ("":::"memory") #endif // PPC #endif // ARM +// Note: What is meant by LIGHT_MEM_BARRIER is a barrier which is sufficient +// to provide TSO semantics, i.e. StoreStore | LoadLoad | LoadStore. -inline void OrderAccess::loadload() { acquire(); } -inline void OrderAccess::storestore() { release(); } -inline void OrderAccess::loadstore() { acquire(); } -inline void OrderAccess::storeload() { fence(); } - -inline void OrderAccess::acquire() { - READ_MEM_BARRIER; -} - -inline void OrderAccess::release() { - WRITE_MEM_BARRIER; -} - -inline void OrderAccess::fence() { - FULL_MEM_BARRIER; -} - -inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { jbyte data = *p; acquire(); return data; } -inline jshort OrderAccess::load_acquire(volatile jshort* p) { jshort data = *p; acquire(); return data; } -inline jint OrderAccess::load_acquire(volatile jint* p) { jint data = *p; acquire(); return data; } -inline jlong OrderAccess::load_acquire(volatile jlong* p) { - jlong tmp; - os::atomic_copy64(p, &tmp); - acquire(); - return tmp; -} -inline jubyte OrderAccess::load_acquire(volatile jubyte* p) { jubyte data = *p; acquire(); return data; } -inline jushort OrderAccess::load_acquire(volatile jushort* p) { jushort data = *p; acquire(); return data; } -inline juint OrderAccess::load_acquire(volatile juint* p) { juint data = *p; acquire(); return data; } -inline julong OrderAccess::load_acquire(volatile julong* p) { - julong tmp; - os::atomic_copy64(p, &tmp); - acquire(); - return tmp; -} -inline jfloat OrderAccess::load_acquire(volatile jfloat* p) { jfloat data = *p; acquire(); return data; } -inline jdouble OrderAccess::load_acquire(volatile jdouble* p) { - jdouble tmp; - os::atomic_copy64(p, &tmp); - acquire(); - return tmp; -} - -inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t* p) { - intptr_t data = *p; - acquire(); - return data; -} -inline void* OrderAccess::load_ptr_acquire(volatile void* p) { - void *data = *(void* volatile *)p; - acquire(); - return data; -} -inline void* OrderAccess::load_ptr_acquire(const volatile void* p) { - void *data = *(void* const volatile *)p; - acquire(); - return data; -} - -inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) { release(); *p = v; } -inline void OrderAccess::release_store(volatile jshort* p, jshort v) { release(); *p = v; } -inline void OrderAccess::release_store(volatile jint* p, jint v) { release(); *p = v; } -inline void OrderAccess::release_store(volatile jlong* p, jlong v) -{ release(); os::atomic_copy64(&v, p); } -inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) { release(); *p = v; } -inline void OrderAccess::release_store(volatile jushort* p, jushort v) { release(); *p = v; } -inline void OrderAccess::release_store(volatile juint* p, juint v) { release(); *p = v; } -inline void OrderAccess::release_store(volatile julong* p, julong v) -{ release(); os::atomic_copy64(&v, p); } -inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) { release(); *p = v; } -inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) -{ release(); os::atomic_copy64(&v, p); } - -inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { release(); *p = v; } -inline void OrderAccess::release_store_ptr(volatile void* p, void* v) -{ release(); *(void* volatile *)p = v; } - -inline void OrderAccess::store_fence(jbyte* p, jbyte v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jshort* p, jshort v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jint* p, jint v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jlong* p, jlong v) { os::atomic_copy64(&v, p); fence(); } -inline void OrderAccess::store_fence(jubyte* p, jubyte v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jushort* p, jushort v) { *p = v; fence(); } -inline void OrderAccess::store_fence(juint* p, juint v) { *p = v; fence(); } -inline void OrderAccess::store_fence(julong* p, julong v) { os::atomic_copy64(&v, p); fence(); } -inline void OrderAccess::store_fence(jfloat* p, jfloat v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jdouble* p, jdouble v) { os::atomic_copy64(&v, p); fence(); } - -inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; fence(); } -inline void OrderAccess::store_ptr_fence(void** p, void* v) { *p = v; fence(); } - -inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { release_store(p, v); fence(); } -inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { release_store(p, v); fence(); } -inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { release_store(p, v); fence(); } -inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { release_store(p, v); fence(); } -inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { release_store(p, v); fence(); } -inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { release_store(p, v); fence(); } -inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { release_store(p, v); fence(); } -inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { release_store(p, v); fence(); } -inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { release_store(p, v); fence(); } -inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store(p, v); fence(); } +inline void OrderAccess::loadload() { LIGHT_MEM_BARRIER; } +inline void OrderAccess::storestore() { LIGHT_MEM_BARRIER; } +inline void OrderAccess::loadstore() { LIGHT_MEM_BARRIER; } +inline void OrderAccess::storeload() { FULL_MEM_BARRIER; } + +inline void OrderAccess::acquire() { LIGHT_MEM_BARRIER; } +inline void OrderAccess::release() { LIGHT_MEM_BARRIER; } +inline void OrderAccess::fence() { FULL_MEM_BARRIER; } -inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { release_store_ptr(p, v); fence(); } -inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { release_store_ptr(p, v); fence(); } +#define VM_HAS_GENERALIZED_ORDER_ACCESS 1 #endif // OS_CPU_BSD_ZERO_VM_ORDERACCESS_BSD_ZERO_INLINE_HPP --- old/src/os_cpu/linux_ppc/vm/orderAccess_linux_ppc.inline.hpp 2015-01-22 17:48:43.136758413 -0500 +++ new/src/os_cpu/linux_ppc/vm/orderAccess_linux_ppc.inline.hpp 2015-01-22 17:48:41.836683102 -0500 @@ -65,84 +65,29 @@ #define inlasm_lwsync() __asm__ __volatile__ ("lwsync" : : : "memory"); #define inlasm_eieio() __asm__ __volatile__ ("eieio" : : : "memory"); #define inlasm_isync() __asm__ __volatile__ ("isync" : : : "memory"); -#define inlasm_release() inlasm_lwsync(); -#define inlasm_acquire() inlasm_lwsync(); // Use twi-isync for load_acquire (faster than lwsync). #define inlasm_acquire_reg(X) __asm__ __volatile__ ("twi 0,%0,0\n isync\n" : : "r" (X) : "memory"); -#define inlasm_fence() inlasm_sync(); -inline void OrderAccess::loadload() { inlasm_lwsync(); } -inline void OrderAccess::storestore() { inlasm_lwsync(); } -inline void OrderAccess::loadstore() { inlasm_lwsync(); } -inline void OrderAccess::storeload() { inlasm_fence(); } - -inline void OrderAccess::acquire() { inlasm_acquire(); } -inline void OrderAccess::release() { inlasm_release(); } -inline void OrderAccess::fence() { inlasm_fence(); } - -inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { register jbyte t = *p; inlasm_acquire_reg(t); return t; } -inline jshort OrderAccess::load_acquire(volatile jshort* p) { register jshort t = *p; inlasm_acquire_reg(t); return t; } -inline jint OrderAccess::load_acquire(volatile jint* p) { register jint t = *p; inlasm_acquire_reg(t); return t; } -inline jlong OrderAccess::load_acquire(volatile jlong* p) { register jlong t = *p; inlasm_acquire_reg(t); return t; } -inline jubyte OrderAccess::load_acquire(volatile jubyte* p) { register jubyte t = *p; inlasm_acquire_reg(t); return t; } -inline jushort OrderAccess::load_acquire(volatile jushort* p) { register jushort t = *p; inlasm_acquire_reg(t); return t; } -inline juint OrderAccess::load_acquire(volatile juint* p) { register juint t = *p; inlasm_acquire_reg(t); return t; } -inline julong OrderAccess::load_acquire(volatile julong* p) { return (julong)load_acquire((volatile jlong*)p); } -inline jfloat OrderAccess::load_acquire(volatile jfloat* p) { register jfloat t = *p; inlasm_acquire(); return t; } -inline jdouble OrderAccess::load_acquire(volatile jdouble* p) { register jdouble t = *p; inlasm_acquire(); return t; } - -inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t* p) { return (intptr_t)load_acquire((volatile jlong*)p); } -inline void* OrderAccess::load_ptr_acquire(volatile void* p) { return (void*) load_acquire((volatile jlong*)p); } -inline void* OrderAccess::load_ptr_acquire(const volatile void* p) { return (void*) load_acquire((volatile jlong*)p); } - -inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) { inlasm_release(); *p = v; } -inline void OrderAccess::release_store(volatile jshort* p, jshort v) { inlasm_release(); *p = v; } -inline void OrderAccess::release_store(volatile jint* p, jint v) { inlasm_release(); *p = v; } -inline void OrderAccess::release_store(volatile jlong* p, jlong v) { inlasm_release(); *p = v; } -inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) { inlasm_release(); *p = v; } -inline void OrderAccess::release_store(volatile jushort* p, jushort v) { inlasm_release(); *p = v; } -inline void OrderAccess::release_store(volatile juint* p, juint v) { inlasm_release(); *p = v; } -inline void OrderAccess::release_store(volatile julong* p, julong v) { inlasm_release(); *p = v; } -inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) { inlasm_release(); *p = v; } -inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) { inlasm_release(); *p = v; } - -inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { inlasm_release(); *p = v; } -inline void OrderAccess::release_store_ptr(volatile void* p, void* v) { inlasm_release(); *(void* volatile *)p = v; } - -inline void OrderAccess::store_fence(jbyte* p, jbyte v) { *p = v; inlasm_fence(); } -inline void OrderAccess::store_fence(jshort* p, jshort v) { *p = v; inlasm_fence(); } -inline void OrderAccess::store_fence(jint* p, jint v) { *p = v; inlasm_fence(); } -inline void OrderAccess::store_fence(jlong* p, jlong v) { *p = v; inlasm_fence(); } -inline void OrderAccess::store_fence(jubyte* p, jubyte v) { *p = v; inlasm_fence(); } -inline void OrderAccess::store_fence(jushort* p, jushort v) { *p = v; inlasm_fence(); } -inline void OrderAccess::store_fence(juint* p, juint v) { *p = v; inlasm_fence(); } -inline void OrderAccess::store_fence(julong* p, julong v) { *p = v; inlasm_fence(); } -inline void OrderAccess::store_fence(jfloat* p, jfloat v) { *p = v; inlasm_fence(); } -inline void OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; inlasm_fence(); } - -inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; inlasm_fence(); } -inline void OrderAccess::store_ptr_fence(void** p, void* v) { *p = v; inlasm_fence(); } - -inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { inlasm_release(); *p = v; inlasm_fence(); } -inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { inlasm_release(); *p = v; inlasm_fence(); } -inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { inlasm_release(); *p = v; inlasm_fence(); } -inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { inlasm_release(); *p = v; inlasm_fence(); } -inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { inlasm_release(); *p = v; inlasm_fence(); } -inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { inlasm_release(); *p = v; inlasm_fence(); } -inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { inlasm_release(); *p = v; inlasm_fence(); } -inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { inlasm_release(); *p = v; inlasm_fence(); } -inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { inlasm_release(); *p = v; inlasm_fence(); } -inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { inlasm_release(); *p = v; inlasm_fence(); } - -inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { inlasm_release(); *p = v; inlasm_fence(); } -inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { inlasm_release(); *(void* volatile *)p = v; inlasm_fence(); } +inline void OrderAccess::loadload() { inlasm_lwsync(); } +inline void OrderAccess::storestore() { inlasm_lwsync(); } +inline void OrderAccess::loadstore() { inlasm_lwsync(); } +inline void OrderAccess::storeload() { inlasm_sync(); } + +inline void OrderAccess::acquire() { inlasm_lwsync(); } +inline void OrderAccess::release() { inlasm_lwsync(); } +inline void OrderAccess::fence() { inlasm_sync(); } + +template<> inline jbyte OrderAccess::specialized_load_acquire (volatile jbyte* p) { register jbyte t = load(p); inlasm_acquire_reg(t); return t; } +template<> inline jshort OrderAccess::specialized_load_acquire(volatile jshort* p) { register jshort t = load(p); inlasm_acquire_reg(t); return t; } +template<> inline jint OrderAccess::specialized_load_acquire (volatile jint* p) { register jint t = load(p); inlasm_acquire_reg(t); return t; } +template<> inline jlong OrderAccess::specialized_load_acquire (volatile jlong* p) { register jlong t = load(p); inlasm_acquire_reg(t); return t; } #undef inlasm_sync #undef inlasm_lwsync #undef inlasm_eieio #undef inlasm_isync -#undef inlasm_release -#undef inlasm_acquire -#undef inlasm_fence +#undef inlasm_acquire_reg + +#define VM_HAS_GENERALIZED_ORDER_ACCESS 1 #endif // OS_CPU_LINUX_PPC_VM_ORDERACCESS_LINUX_PPC_INLINE_HPP --- old/src/os_cpu/linux_sparc/vm/orderAccess_linux_sparc.inline.hpp 2015-01-22 17:48:47.168991993 -0500 +++ new/src/os_cpu/linux_sparc/vm/orderAccess_linux_sparc.inline.hpp 2015-01-22 17:48:45.784911818 -0500 @@ -29,81 +29,25 @@ // Implementation of class OrderAccess. -// Assume TSO. +// A compiler barrier, forcing the C++ compiler to invalidate all memory assumptions +static inline void compiler_barrier() { + __asm__ volatile ("" : : : "memory"); +} -inline void OrderAccess::loadload() { acquire(); } -inline void OrderAccess::storestore() { release(); } -inline void OrderAccess::loadstore() { acquire(); } -inline void OrderAccess::storeload() { fence(); } +// Assume TSO. -inline void OrderAccess::acquire() { - __asm__ volatile ("nop" : : :); -} +inline void OrderAccess::loadload() { compiler_barrier(); } +inline void OrderAccess::storestore() { compiler_barrier(); } +inline void OrderAccess::loadstore() { compiler_barrier(); } +inline void OrderAccess::storeload() { fence(); } -inline void OrderAccess::release() { - jint* local_dummy = (jint*)&local_dummy; - __asm__ volatile("stw %%g0, [%0]" : : "r" (local_dummy) : "memory"); -} +inline void OrderAccess::acquire() { compiler_barrier(); } +inline void OrderAccess::release() { compiler_barrier(); } inline void OrderAccess::fence() { - __asm__ volatile ("membar #StoreLoad" : : :); + __asm__ volatile ("membar #StoreLoad" : : : "memory"); } -inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { return *p; } -inline jshort OrderAccess::load_acquire(volatile jshort* p) { return *p; } -inline jint OrderAccess::load_acquire(volatile jint* p) { return *p; } -inline jlong OrderAccess::load_acquire(volatile jlong* p) { return *p; } -inline jubyte OrderAccess::load_acquire(volatile jubyte* p) { return *p; } -inline jushort OrderAccess::load_acquire(volatile jushort* p) { return *p; } -inline juint OrderAccess::load_acquire(volatile juint* p) { return *p; } -inline julong OrderAccess::load_acquire(volatile julong* p) { return *p; } -inline jfloat OrderAccess::load_acquire(volatile jfloat* p) { return *p; } -inline jdouble OrderAccess::load_acquire(volatile jdouble* p) { return *p; } - -inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t* p) { return *p; } -inline void* OrderAccess::load_ptr_acquire(volatile void* p) { return *(void* volatile *)p; } -inline void* OrderAccess::load_ptr_acquire(const volatile void* p) { return *(void* const volatile *)p; } - -inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) { *p = v; } -inline void OrderAccess::release_store(volatile jshort* p, jshort v) { *p = v; } -inline void OrderAccess::release_store(volatile jint* p, jint v) { *p = v; } -inline void OrderAccess::release_store(volatile jlong* p, jlong v) { *p = v; } -inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) { *p = v; } -inline void OrderAccess::release_store(volatile jushort* p, jushort v) { *p = v; } -inline void OrderAccess::release_store(volatile juint* p, juint v) { *p = v; } -inline void OrderAccess::release_store(volatile julong* p, julong v) { *p = v; } -inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) { *p = v; } -inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) { *p = v; } - -inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { *p = v; } -inline void OrderAccess::release_store_ptr(volatile void* p, void* v) { *(void* volatile *)p = v; } - -inline void OrderAccess::store_fence(jbyte* p, jbyte v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jshort* p, jshort v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jint* p, jint v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jlong* p, jlong v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jubyte* p, jubyte v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jushort* p, jushort v) { *p = v; fence(); } -inline void OrderAccess::store_fence(juint* p, juint v) { *p = v; fence(); } -inline void OrderAccess::store_fence(julong* p, julong v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jfloat* p, jfloat v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; fence(); } - -inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; fence(); } -inline void OrderAccess::store_ptr_fence(void** p, void* v) { *p = v; fence(); } - -inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { *p = v; fence(); } - -inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { *p = v; fence(); } -inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { *(void* volatile *)p = v; fence(); } +#define VM_HAS_GENERALIZED_ORDER_ACCESS 1 #endif // OS_CPU_LINUX_SPARC_VM_ORDERACCESS_LINUX_SPARC_INLINE_HPP --- old/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp 2015-01-22 17:48:51.113220476 -0500 +++ new/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp 2015-01-22 17:48:49.813145165 -0500 @@ -36,23 +36,13 @@ __asm__ volatile ("" : : : "memory"); } -inline void OrderAccess::loadload() { acquire(); } -inline void OrderAccess::storestore() { release(); } -inline void OrderAccess::loadstore() { acquire(); } -inline void OrderAccess::storeload() { fence(); } +inline void OrderAccess::loadload() { compiler_barrier(); } +inline void OrderAccess::storestore() { compiler_barrier(); } +inline void OrderAccess::loadstore() { compiler_barrier(); } +inline void OrderAccess::storeload() { fence(); } -inline void OrderAccess::acquire() { - volatile intptr_t local_dummy; -#ifdef AMD64 - __asm__ volatile ("movq 0(%%rsp), %0" : "=r" (local_dummy) : : "memory"); -#else - __asm__ volatile ("movl 0(%%esp),%0" : "=r" (local_dummy) : : "memory"); -#endif // AMD64 -} - -inline void OrderAccess::release() { - compiler_barrier(); -} +inline void OrderAccess::acquire() { compiler_barrier(); } +inline void OrderAccess::release() { compiler_barrier(); } inline void OrderAccess::fence() { if (os::is_MP()) { @@ -63,156 +53,50 @@ __asm__ volatile ("lock; addl $0,0(%%esp)" : : : "cc", "memory"); #endif } + compiler_barrier(); } -inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { jbyte v = *p; compiler_barrier(); return v; } -inline jshort OrderAccess::load_acquire(volatile jshort* p) { jshort v = *p; compiler_barrier(); return v; } -inline jint OrderAccess::load_acquire(volatile jint* p) { jint v = *p; compiler_barrier(); return v; } -inline jlong OrderAccess::load_acquire(volatile jlong* p) { jlong v = Atomic::load(p); compiler_barrier(); return v; } -inline jubyte OrderAccess::load_acquire(volatile jubyte* p) { jubyte v = *p; compiler_barrier(); return v; } -inline jushort OrderAccess::load_acquire(volatile jushort* p) { jushort v = *p; compiler_barrier(); return v; } -inline juint OrderAccess::load_acquire(volatile juint* p) { juint v = *p; compiler_barrier(); return v; } -inline julong OrderAccess::load_acquire(volatile julong* p) { julong v = Atomic::load((volatile jlong*)p); compiler_barrier(); return v; } -inline jfloat OrderAccess::load_acquire(volatile jfloat* p) { jfloat v = *p; compiler_barrier(); return v; } -inline jdouble OrderAccess::load_acquire(volatile jdouble* p) { jdouble v = jdouble_cast(Atomic::load((volatile jlong*)p)); compiler_barrier(); return v; } - -inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t* p) { intptr_t v = *p; compiler_barrier(); return v; } -inline void* OrderAccess::load_ptr_acquire(volatile void* p) { void* v = *(void* volatile *)p; compiler_barrier(); return v; } -inline void* OrderAccess::load_ptr_acquire(const volatile void* p) { void* v = *(void* const volatile *)p; compiler_barrier(); return v; } - -inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) { compiler_barrier(); *p = v; } -inline void OrderAccess::release_store(volatile jshort* p, jshort v) { compiler_barrier(); *p = v; } -inline void OrderAccess::release_store(volatile jint* p, jint v) { compiler_barrier(); *p = v; } -inline void OrderAccess::release_store(volatile jlong* p, jlong v) { compiler_barrier(); Atomic::store(v, p); } -inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) { compiler_barrier(); *p = v; } -inline void OrderAccess::release_store(volatile jushort* p, jushort v) { compiler_barrier(); *p = v; } -inline void OrderAccess::release_store(volatile juint* p, juint v) { compiler_barrier(); *p = v; } -inline void OrderAccess::release_store(volatile julong* p, julong v) { compiler_barrier(); Atomic::store((jlong)v, (volatile jlong*)p); } -inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) { compiler_barrier(); *p = v; } -inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) { release_store((volatile jlong *)p, jlong_cast(v)); } - -inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { compiler_barrier(); *p = v; } -inline void OrderAccess::release_store_ptr(volatile void* p, void* v) { compiler_barrier(); *(void* volatile *)p = v; } - -inline void OrderAccess::store_fence(jbyte* p, jbyte v) { +template<> +inline void OrderAccess::specialized_release_store_fence (volatile jbyte* p, jbyte v) { __asm__ volatile ( "xchgb (%2),%0" : "=q" (v) : "0" (v), "r" (p) : "memory"); } -inline void OrderAccess::store_fence(jshort* p, jshort v) { +template<> +inline void OrderAccess::specialized_release_store_fence(volatile jshort* p, jshort v) { __asm__ volatile ( "xchgw (%2),%0" : "=r" (v) : "0" (v), "r" (p) : "memory"); } -inline void OrderAccess::store_fence(jint* p, jint v) { +template<> +inline void OrderAccess::specialized_release_store_fence (volatile jint* p, jint v) { __asm__ volatile ( "xchgl (%2),%0" : "=r" (v) : "0" (v), "r" (p) : "memory"); } -inline void OrderAccess::store_fence(jlong* p, jlong v) { #ifdef AMD64 - __asm__ __volatile__ ("xchgq (%2), %0" - : "=r" (v) - : "0" (v), "r" (p) - : "memory"); -#else - *p = v; fence(); -#endif // AMD64 -} - -// AMD64 copied the bodies for the the signed version. 32bit did this. As long as the -// compiler does the inlining this is simpler. -inline void OrderAccess::store_fence(jubyte* p, jubyte v) { store_fence((jbyte*)p, (jbyte)v); } -inline void OrderAccess::store_fence(jushort* p, jushort v) { store_fence((jshort*)p, (jshort)v); } -inline void OrderAccess::store_fence(juint* p, juint v) { store_fence((jint*)p, (jint)v); } -inline void OrderAccess::store_fence(julong* p, julong v) { store_fence((jlong*)p, (jlong)v); } -inline void OrderAccess::store_fence(jfloat* p, jfloat v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jdouble* p, jdouble v) { store_fence((jlong*)p, jlong_cast(v)); } - -inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { -#ifdef AMD64 - __asm__ __volatile__ ("xchgq (%2), %0" - : "=r" (v) - : "0" (v), "r" (p) - : "memory"); -#else - store_fence((jint*)p, (jint)v); -#endif // AMD64 -} - -inline void OrderAccess::store_ptr_fence(void** p, void* v) { -#ifdef AMD64 - __asm__ __volatile__ ("xchgq (%2), %0" - : "=r" (v) - : "0" (v), "r" (p) - : "memory"); -#else - store_fence((jint*)p, (jint)v); -#endif // AMD64 -} - -// Must duplicate definitions instead of calling store_fence because we don't want to cast away volatile. -inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { - __asm__ volatile ( "xchgb (%2),%0" - : "=q" (v) - : "0" (v), "r" (p) - : "memory"); -} -inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { - __asm__ volatile ( "xchgw (%2),%0" +template<> +inline void OrderAccess::specialized_release_store_fence (volatile jlong* p, jlong v) { + __asm__ volatile ( "xchgq (%2), %0" : "=r" (v) : "0" (v), "r" (p) : "memory"); } -inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { - __asm__ volatile ( "xchgl (%2),%0" - : "=r" (v) - : "0" (v), "r" (p) - : "memory"); -} - -inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { -#ifdef AMD64 - __asm__ __volatile__ ( "xchgq (%2), %0" - : "=r" (v) - : "0" (v), "r" (p) - : "memory"); -#else - release_store(p, v); fence(); #endif // AMD64 -} -inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { release_store_fence((volatile jbyte*)p, (jbyte)v); } -inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { release_store_fence((volatile jshort*)p, (jshort)v); } -inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { release_store_fence((volatile jint*)p, (jint)v); } -inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { release_store_fence((volatile jlong*)p, (jlong)v); } - -inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store_fence((volatile jlong*)p, jlong_cast(v)); } - -inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { -#ifdef AMD64 - __asm__ __volatile__ ( "xchgq (%2), %0" - : "=r" (v) - : "0" (v), "r" (p) - : "memory"); -#else - release_store_fence((volatile jint*)p, (jint)v); -#endif // AMD64 +template<> +inline void OrderAccess::specialized_release_store_fence (volatile jfloat* p, jfloat v) { + release_store_fence((volatile jint*)p, jint_cast(v)); } -inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { -#ifdef AMD64 - __asm__ __volatile__ ( "xchgq (%2), %0" - : "=r" (v) - : "0" (v), "r" (p) - : "memory"); -#else - release_store_fence((volatile jint*)p, (jint)v); -#endif // AMD64 +template<> +inline void OrderAccess::specialized_release_store_fence(volatile jdouble* p, jdouble v) { + release_store_fence((volatile jlong*)p, jlong_cast(v)); } +#define VM_HAS_GENERALIZED_ORDER_ACCESS 1 + #endif // OS_CPU_LINUX_X86_VM_ORDERACCESS_LINUX_X86_INLINE_HPP --- old/src/os_cpu/linux_zero/vm/orderAccess_linux_zero.inline.hpp 2015-01-22 17:48:55.061449187 -0500 +++ new/src/os_cpu/linux_zero/vm/orderAccess_linux_zero.inline.hpp 2015-01-22 17:48:53.765374109 -0500 @@ -40,8 +40,7 @@ #define __kernel_dmb (*(__kernel_dmb_t *) 0xffff0fa0) #define FULL_MEM_BARRIER __kernel_dmb() -#define READ_MEM_BARRIER __kernel_dmb() -#define WRITE_MEM_BARRIER __kernel_dmb() +#define LIGHT_MEM_BARRIER __kernel_dmb() #else // ARM @@ -49,126 +48,33 @@ #ifdef PPC -#define READ_MEM_BARRIER __asm __volatile ("isync":::"memory") #ifdef __NO_LWSYNC__ -#define WRITE_MEM_BARRIER __asm __volatile ("sync":::"memory") +#define LIGHT_MEM_BARRIER __asm __volatile ("sync":::"memory") #else -#define WRITE_MEM_BARRIER __asm __volatile ("lwsync":::"memory") +#define LIGHT_MEM_BARRIER __asm __volatile ("lwsync":::"memory") #endif #else // PPC -#define READ_MEM_BARRIER __asm __volatile ("":::"memory") -#define WRITE_MEM_BARRIER __asm __volatile ("":::"memory") +#define LIGHT_MEM_BARRIER __asm __volatile ("":::"memory") #endif // PPC #endif // ARM +// Note: What is meant by LIGHT_MEM_BARRIER is a barrier which is sufficient +// to provide TSO semantics, i.e. StoreStore | LoadLoad | LoadStore. -inline void OrderAccess::loadload() { acquire(); } -inline void OrderAccess::storestore() { release(); } -inline void OrderAccess::loadstore() { acquire(); } -inline void OrderAccess::storeload() { fence(); } - -inline void OrderAccess::acquire() { - READ_MEM_BARRIER; -} - -inline void OrderAccess::release() { - WRITE_MEM_BARRIER; -} - -inline void OrderAccess::fence() { - FULL_MEM_BARRIER; -} - -inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { jbyte data = *p; acquire(); return data; } -inline jshort OrderAccess::load_acquire(volatile jshort* p) { jshort data = *p; acquire(); return data; } -inline jint OrderAccess::load_acquire(volatile jint* p) { jint data = *p; acquire(); return data; } -inline jlong OrderAccess::load_acquire(volatile jlong* p) { - jlong tmp; - os::atomic_copy64(p, &tmp); - acquire(); - return tmp; -} -inline jubyte OrderAccess::load_acquire(volatile jubyte* p) { jubyte data = *p; acquire(); return data; } -inline jushort OrderAccess::load_acquire(volatile jushort* p) { jushort data = *p; acquire(); return data; } -inline juint OrderAccess::load_acquire(volatile juint* p) { juint data = *p; acquire(); return data; } -inline julong OrderAccess::load_acquire(volatile julong* p) { - julong tmp; - os::atomic_copy64(p, &tmp); - acquire(); - return tmp; -} -inline jfloat OrderAccess::load_acquire(volatile jfloat* p) { jfloat data = *p; acquire(); return data; } -inline jdouble OrderAccess::load_acquire(volatile jdouble* p) { - jdouble tmp; - os::atomic_copy64(p, &tmp); - acquire(); - return tmp; -} - -inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t* p) { - intptr_t data = *p; - acquire(); - return data; -} -inline void* OrderAccess::load_ptr_acquire(volatile void* p) { - void *data = *(void* volatile *)p; - acquire(); - return data; -} -inline void* OrderAccess::load_ptr_acquire(const volatile void* p) { - void *data = *(void* const volatile *)p; - acquire(); - return data; -} - -inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) { release(); *p = v; } -inline void OrderAccess::release_store(volatile jshort* p, jshort v) { release(); *p = v; } -inline void OrderAccess::release_store(volatile jint* p, jint v) { release(); *p = v; } -inline void OrderAccess::release_store(volatile jlong* p, jlong v) -{ release(); os::atomic_copy64(&v, p); } -inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) { release(); *p = v; } -inline void OrderAccess::release_store(volatile jushort* p, jushort v) { release(); *p = v; } -inline void OrderAccess::release_store(volatile juint* p, juint v) { release(); *p = v; } -inline void OrderAccess::release_store(volatile julong* p, julong v) -{ release(); os::atomic_copy64(&v, p); } -inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) { release(); *p = v; } -inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) -{ release(); os::atomic_copy64(&v, p); } - -inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { release(); *p = v; } -inline void OrderAccess::release_store_ptr(volatile void* p, void* v) -{ release(); *(void* volatile *)p = v; } - -inline void OrderAccess::store_fence(jbyte* p, jbyte v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jshort* p, jshort v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jint* p, jint v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jlong* p, jlong v) { os::atomic_copy64(&v, p); fence(); } -inline void OrderAccess::store_fence(jubyte* p, jubyte v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jushort* p, jushort v) { *p = v; fence(); } -inline void OrderAccess::store_fence(juint* p, juint v) { *p = v; fence(); } -inline void OrderAccess::store_fence(julong* p, julong v) { os::atomic_copy64(&v, p); fence(); } -inline void OrderAccess::store_fence(jfloat* p, jfloat v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jdouble* p, jdouble v) { os::atomic_copy64(&v, p); fence(); } - -inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; fence(); } -inline void OrderAccess::store_ptr_fence(void** p, void* v) { *p = v; fence(); } - -inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { release_store(p, v); fence(); } -inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { release_store(p, v); fence(); } -inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { release_store(p, v); fence(); } -inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { release_store(p, v); fence(); } -inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { release_store(p, v); fence(); } -inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { release_store(p, v); fence(); } -inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { release_store(p, v); fence(); } -inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { release_store(p, v); fence(); } -inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { release_store(p, v); fence(); } -inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store(p, v); fence(); } +inline void OrderAccess::loadload() { LIGHT_MEM_BARRIER; } +inline void OrderAccess::storestore() { LIGHT_MEM_BARRIER; } +inline void OrderAccess::loadstore() { LIGHT_MEM_BARRIER; } +inline void OrderAccess::storeload() { FULL_MEM_BARRIER; } -inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { release_store_ptr(p, v); fence(); } -inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { release_store_ptr(p, v); fence(); } +inline void OrderAccess::acquire() { LIGHT_MEM_BARRIER; } +inline void OrderAccess::release() { LIGHT_MEM_BARRIER; } + +inline void OrderAccess::fence() { FULL_MEM_BARRIER; } + +#define VM_HAS_GENERALIZED_ORDER_ACCESS 1 #endif // OS_CPU_LINUX_ZERO_VM_ORDERACCESS_LINUX_ZERO_INLINE_HPP --- old/src/os_cpu/solaris_sparc/vm/orderAccess_solaris_sparc.inline.hpp 2015-01-22 17:48:59.185688092 -0500 +++ new/src/os_cpu/solaris_sparc/vm/orderAccess_solaris_sparc.inline.hpp 2015-01-22 17:48:57.885612783 -0500 @@ -32,103 +32,23 @@ // Assume TSO. -// In solaris_sparc.il -extern "C" void _OrderAccess_acquire(); -extern "C" void _OrderAccess_fence(); - -inline void OrderAccess::loadload() { acquire(); } -inline void OrderAccess::storestore() { release(); } -inline void OrderAccess::loadstore() { acquire(); } -inline void OrderAccess::storeload() { fence(); } - -#ifdef _GNU_SOURCE - -inline void OrderAccess::acquire() { - __asm__ volatile ("nop" : : :); -} - -inline void OrderAccess::release() { - jint* local_dummy = (jint*)&local_dummy; - __asm__ volatile("stw %%g0, [%0]" : : "r" (local_dummy) : "memory"); -} - -inline void OrderAccess::fence() { - __asm__ volatile ("membar #StoreLoad" : : :); +// A compiler barrier, forcing the C++ compiler to invalidate all memory assumptions +inline void compiler_barrier() { + __asm__ volatile ("" : : : "memory"); } -#else // _GNU_SOURCE - -inline void OrderAccess::acquire() { - _OrderAccess_acquire(); -} +inline void OrderAccess::loadload() { compiler_barrier(); } +inline void OrderAccess::storestore() { compiler_barrier(); } +inline void OrderAccess::loadstore() { compiler_barrier(); } +inline void OrderAccess::storeload() { fence(); } -inline void OrderAccess::release() { - // Avoid hitting the same cache-line from - // different threads. - volatile jint local_dummy = 0; -} +inline void OrderAccess::acquire() { compiler_barrier(); } +inline void OrderAccess::release() { compiler_barrier(); } inline void OrderAccess::fence() { - _OrderAccess_fence(); + __asm__ volatile ("membar #StoreLoad" : : : "memory"); } -#endif // _GNU_SOURCE - -inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { return *p; } -inline jshort OrderAccess::load_acquire(volatile jshort* p) { return *p; } -inline jint OrderAccess::load_acquire(volatile jint* p) { return *p; } -inline jlong OrderAccess::load_acquire(volatile jlong* p) { return Atomic::load(p); } -inline jubyte OrderAccess::load_acquire(volatile jubyte* p) { return *p; } -inline jushort OrderAccess::load_acquire(volatile jushort* p) { return *p; } -inline juint OrderAccess::load_acquire(volatile juint* p) { return *p; } -inline julong OrderAccess::load_acquire(volatile julong* p) { return Atomic::load((volatile jlong*)p); } -inline jfloat OrderAccess::load_acquire(volatile jfloat* p) { return *p; } -inline jdouble OrderAccess::load_acquire(volatile jdouble* p) { return *p; } - -inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t* p) { return *p; } -inline void* OrderAccess::load_ptr_acquire(volatile void* p) { return *(void* volatile *)p; } -inline void* OrderAccess::load_ptr_acquire(const volatile void* p) { return *(void* const volatile *)p; } - -inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) { *p = v; } -inline void OrderAccess::release_store(volatile jshort* p, jshort v) { *p = v; } -inline void OrderAccess::release_store(volatile jint* p, jint v) { *p = v; } -inline void OrderAccess::release_store(volatile jlong* p, jlong v) { Atomic::store(v, p); } -inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) { *p = v; } -inline void OrderAccess::release_store(volatile jushort* p, jushort v) { *p = v; } -inline void OrderAccess::release_store(volatile juint* p, juint v) { *p = v; } -inline void OrderAccess::release_store(volatile julong* p, julong v) { Atomic::store((jlong)v, (volatile jlong*)p); } -inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) { *p = v; } -inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) { *p = v; } - -inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { *p = v; } -inline void OrderAccess::release_store_ptr(volatile void* p, void* v) { *(void* volatile *)p = v; } - -inline void OrderAccess::store_fence(jbyte* p, jbyte v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jshort* p, jshort v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jint* p, jint v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jlong* p, jlong v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jubyte* p, jubyte v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jushort* p, jushort v) { *p = v; fence(); } -inline void OrderAccess::store_fence(juint* p, juint v) { *p = v; fence(); } -inline void OrderAccess::store_fence(julong* p, julong v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jfloat* p, jfloat v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; fence(); } - -inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; fence(); } -inline void OrderAccess::store_ptr_fence(void** p, void* v) { *p = v; fence(); } - -inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { release_store(p, v); fence(); } -inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { release_store(p, v); fence(); } -inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { *p = v; fence(); } - -inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { *p = v; fence(); } -inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { *(void* volatile *)p = v; fence(); } +#define VM_HAS_GENERALIZED_ORDER_ACCESS 1 #endif // OS_CPU_SOLARIS_SPARC_VM_ORDERACCESS_SOLARIS_SPARC_INLINE_HPP --- old/src/os_cpu/solaris_sparc/vm/solaris_sparc.il 2015-01-22 17:49:03.249923521 -0500 +++ new/src/os_cpu/solaris_sparc/vm/solaris_sparc.il 2015-01-22 17:49:01.953848443 -0500 @@ -19,7 +19,7 @@ // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA // or visit www.oracle.com if you need additional information or have any // questions. -// +// // // Get the raw thread ID from %g7 @@ -35,11 +35,11 @@ // Clear SPARC fprs.FEF DU and DL bits -- // allows the kernel to avoid saving FPU state at context-switch time. // Use for state-transition points (into _thread_blocked) or when - // parking. - + // parking. + .inline _mark_fpu_nosave, 0 .volatile - wr %g0, 0, %fprs + wr %g0, 0, %fprs .nonvolatile .end @@ -85,7 +85,7 @@ // Support for jint Atomic::cmpxchg(jint exchange_value, - // volatile jint* dest, + // volatile jint* dest, // jint compare_value) // // Arguments: @@ -103,8 +103,8 @@ .end - // Support for intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, - // volatile intptr_t* dest, + // Support for intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, + // volatile intptr_t* dest, // intptr_t compare_value) // // 64-bit @@ -124,8 +124,8 @@ .end - // Support for jlong Atomic::cmpxchg(jlong exchange_value, - // volatile jlong* dest, + // Support for jlong Atomic::cmpxchg(jlong exchange_value, + // volatile jlong* dest, // jlong compare_value) // // 32-bit calling conventions @@ -220,27 +220,6 @@ .nonvolatile .end - - // Support for void OrderAccess::acquire() - // The method is intentionally empty. - // It exists for the sole purpose of generating - // a C/C++ sequence point over which the compiler won't - // reorder code. - - .inline _OrderAccess_acquire,0 - .volatile - .nonvolatile - .end - - - // Support for void OrderAccess::fence() - - .inline _OrderAccess_fence,0 - .volatile - membar #StoreLoad - .nonvolatile - .end - // Support for void Prefetch::read(void *loc, intx interval) // --- old/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp 2015-01-22 17:49:07.198152228 -0500 +++ new/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp 2015-01-22 17:49:05.902077151 -0500 @@ -31,108 +31,30 @@ // Implementation of class OrderAccess. -// For Sun Studio - implementation is in solaris_i486.il. -// For gcc - implementation is just below. -extern "C" void _OrderAccess_acquire(); -extern "C" void _OrderAccess_fence(); - -inline void OrderAccess::loadload() { acquire(); } -inline void OrderAccess::storestore() { release(); } -inline void OrderAccess::loadstore() { acquire(); } -inline void OrderAccess::storeload() { fence(); } - -inline void OrderAccess::acquire() { - _OrderAccess_acquire(); - +// A compiler barrier, forcing the C++ compiler to invalidate all memory assumptions +inline void compiler_barrier() { + __asm__ volatile ("" : : : "memory"); } -inline void OrderAccess::release() { - // Avoid hitting the same cache-line from - // different threads. - volatile jint local_dummy = 0; -} +inline void OrderAccess::loadload() { compiler_barrier(); } +inline void OrderAccess::storestore() { compiler_barrier(); } +inline void OrderAccess::loadstore() { compiler_barrier(); } +inline void OrderAccess::storeload() { fence(); } + +inline void OrderAccess::acquire() { compiler_barrier(); } +inline void OrderAccess::release() { compiler_barrier(); } inline void OrderAccess::fence() { if (os::is_MP()) { - _OrderAccess_fence(); - } -} - -#ifdef _GNU_SOURCE - -extern "C" { - inline void _OrderAccess_acquire() { - volatile intptr_t local_dummy; #ifdef AMD64 - __asm__ volatile ("movq 0(%%rsp), %0" : "=r" (local_dummy) : : "memory"); + __asm__ volatile ("lock; addl $0,0(%%rsp)" : : : "cc", "memory"); #else - __asm__ volatile ("movl 0(%%esp),%0" : "=r" (local_dummy) : : "memory"); -#endif // AMD64 - } - inline void _OrderAccess_fence() { - // Always use locked addl since mfence is sometimes expensive __asm__ volatile ("lock; addl $0,0(%%esp)" : : : "cc", "memory"); +#endif } - + compiler_barrier(); } -#endif // GNU_SOURCE - -inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { return *p; } -inline jshort OrderAccess::load_acquire(volatile jshort* p) { return *p; } -inline jint OrderAccess::load_acquire(volatile jint* p) { return *p; } -inline jlong OrderAccess::load_acquire(volatile jlong* p) { return Atomic::load(p); } -inline jubyte OrderAccess::load_acquire(volatile jubyte* p) { return *p; } -inline jushort OrderAccess::load_acquire(volatile jushort* p) { return *p; } -inline juint OrderAccess::load_acquire(volatile juint* p) { return *p; } -inline julong OrderAccess::load_acquire(volatile julong* p) { return Atomic::load((volatile jlong*)p); } -inline jfloat OrderAccess::load_acquire(volatile jfloat* p) { return *p; } -inline jdouble OrderAccess::load_acquire(volatile jdouble* p) { return jdouble_cast(Atomic::load((volatile jlong*)p)); } - -inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t* p) { return *p; } -inline void* OrderAccess::load_ptr_acquire(volatile void* p) { return *(void* volatile *)p; } -inline void* OrderAccess::load_ptr_acquire(const volatile void* p) { return *(void* const volatile *)p; } - -inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) { *p = v; } -inline void OrderAccess::release_store(volatile jshort* p, jshort v) { *p = v; } -inline void OrderAccess::release_store(volatile jint* p, jint v) { *p = v; } -inline void OrderAccess::release_store(volatile jlong* p, jlong v) { Atomic::store(v, p); } -inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) { *p = v; } -inline void OrderAccess::release_store(volatile jushort* p, jushort v) { *p = v; } -inline void OrderAccess::release_store(volatile juint* p, juint v) { *p = v; } -inline void OrderAccess::release_store(volatile julong* p, julong v) { Atomic::store((jlong)v, (volatile jlong*)p); } -inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) { *p = v; } -inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) { release_store((volatile jlong*)p, jlong_cast(v)); } - -inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { *p = v; } -inline void OrderAccess::release_store_ptr(volatile void* p, void* v) { *(void* volatile *)p = v; } - -inline void OrderAccess::store_fence(jbyte* p, jbyte v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jshort* p, jshort v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jint* p, jint v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jlong* p, jlong v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jubyte* p, jubyte v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jushort* p, jushort v) { *p = v; fence(); } -inline void OrderAccess::store_fence(juint* p, juint v) { *p = v; fence(); } -inline void OrderAccess::store_fence(julong* p, julong v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jfloat* p, jfloat v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; fence(); } - -inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { *p = v; fence(); } -inline void OrderAccess::store_ptr_fence(void** p, void* v) { *p = v; fence(); } - -inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { release_store(p, v); fence(); } -inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { release_store((jlong *)p, (jlong)v); fence(); } -inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store_fence((volatile jlong*)p, jlong_cast(v)); } - -inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { *p = v; fence(); } -inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { *(void* volatile *)p = v; fence(); } +#define VM_HAS_GENERALIZED_ORDER_ACCESS 1 #endif // OS_CPU_SOLARIS_X86_VM_ORDERACCESS_SOLARIS_X86_INLINE_HPP --- old/src/os_cpu/solaris_x86/vm/solaris_x86_32.il 2015-01-22 17:49:11.150381288 -0500 +++ new/src/os_cpu/solaris_x86/vm/solaris_x86_32.il 2015-01-22 17:49:09.854306138 -0500 @@ -19,7 +19,7 @@ // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA // or visit www.oracle.com if you need additional information or have any // questions. -// +// // @@ -34,19 +34,19 @@ // Get the raw thread ID from %gs:0 .inline _raw_thread_id,0 - movl %gs:0, %eax + movl %gs:0, %eax .end // Get current sp .inline _get_current_sp,0 .volatile - movl %esp, %eax + movl %esp, %eax .end // Get current fp .inline _get_current_fp,0 .volatile - movl %ebp, %eax + movl %ebp, %eax .end // Support for os::rdtsc() @@ -76,8 +76,8 @@ xchgl (%ecx), %eax .end - // Support for jbyte Atomic::cmpxchg(jbyte exchange_value, - // volatile jbyte *dest, + // Support for jbyte Atomic::cmpxchg(jbyte exchange_value, + // volatile jbyte *dest, // jbyte compare_value) // An additional bool (os::is_MP()) is passed as the last argument. .inline _Atomic_cmpxchg_byte,4 @@ -93,8 +93,8 @@ 2: .end - // Support for jint Atomic::cmpxchg(jint exchange_value, - // volatile jint *dest, + // Support for jint Atomic::cmpxchg(jint exchange_value, + // volatile jint *dest, // jint compare_value) // An additional bool (os::is_MP()) is passed as the last argument. .inline _Atomic_cmpxchg,4 @@ -141,17 +141,6 @@ fistpll (%eax) .end - // Support for OrderAccess::acquire() - .inline _OrderAccess_acquire,0 - movl 0(%esp), %eax - .end - - // Support for OrderAccess::fence() - .inline _OrderAccess_fence,0 - lock - addl $0, (%esp) - .end - // Support for u2 Bytes::swap_u2(u2 x) .inline _raw_swap_u2,1 movl 0(%esp), %eax --- old/src/os_cpu/solaris_x86/vm/solaris_x86_64.il 2015-01-22 17:49:15.418628766 -0500 +++ new/src/os_cpu/solaris_x86/vm/solaris_x86_64.il 2015-01-22 17:49:14.030548284 -0500 @@ -19,7 +19,7 @@ // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA // or visit www.oracle.com if you need additional information or have any // questions. -// +// // // The argument size of each inline directive is ignored by the compiler @@ -27,19 +27,19 @@ // Get the raw thread ID from %gs:0 .inline _raw_thread_id,0 - movq %fs:0, %rax + movq %fs:0, %rax .end // Get current sp .inline _get_current_sp,0 .volatile - movq %rsp, %rax + movq %rsp, %rax .end // Get current fp .inline _get_current_fp,0 .volatile - movq %rbp, %rax + movq %rbp, %rax .end // Support for os::rdtsc() @@ -77,8 +77,8 @@ movq %rdi, %rax .end - // Support for jbyte Atomic::cmpxchg(jbyte exchange_value, - // volatile jbyte *dest, + // Support for jbyte Atomic::cmpxchg(jbyte exchange_value, + // volatile jbyte *dest, // jbyte compare_value) .inline _Atomic_cmpxchg_byte,3 movb %dl, %al // compare_value @@ -86,8 +86,8 @@ cmpxchgb %dil, (%rsi) .end - // Support for jint Atomic::cmpxchg(jint exchange_value, - // volatile jint *dest, + // Support for jint Atomic::cmpxchg(jint exchange_value, + // volatile jint *dest, // jint compare_value) .inline _Atomic_cmpxchg,3 movl %edx, %eax // compare_value @@ -104,17 +104,6 @@ cmpxchgq %rdi, (%rsi) .end - // Support for OrderAccess::acquire() - .inline _OrderAccess_acquire,0 - movl 0(%rsp), %eax - .end - - // Support for OrderAccess::fence() - .inline _OrderAccess_fence,0 - lock - addl $0, (%rsp) - .end - // Support for u2 Bytes::swap_u2(u2 x) .inline _raw_swap_u2,1 movw %di, %ax --- old/src/os_cpu/windows_x86/vm/orderAccess_windows_x86.inline.hpp 2015-01-22 17:49:20.942949071 -0500 +++ new/src/os_cpu/windows_x86/vm/orderAccess_windows_x86.inline.hpp 2015-01-22 17:49:19.642873691 -0500 @@ -25,29 +25,34 @@ #ifndef OS_CPU_WINDOWS_X86_VM_ORDERACCESS_WINDOWS_X86_INLINE_HPP #define OS_CPU_WINDOWS_X86_VM_ORDERACCESS_WINDOWS_X86_INLINE_HPP +#include #include "runtime/atomic.inline.hpp" #include "runtime/orderAccess.hpp" #include "runtime/os.hpp" // Implementation of class OrderAccess. -inline void OrderAccess::loadload() { acquire(); } -inline void OrderAccess::storestore() { release(); } -inline void OrderAccess::loadstore() { acquire(); } +// A compiler barrier, forcing the C++ compiler to invalidate all memory assumptions +inline void compiler_barrier() { + _ReadWriteBarrier(); +} + +// Note that in MSVC, volatile memory accesses are explicitly +// guaranteed to have acquire release semantics (w.r.t. compiler +// reordering) and therefore does not even need a compiler barrier +// for normal acquire release accesses. +template<> inline void ScopedFence::postfix() { } +template<> inline void ScopedFence::prefix() { } +template<> inline void ScopedFence::prefix() { } +template<> inline void ScopedFence::postfix() { OrderAccess::fence(); } + +inline void OrderAccess::loadload() { compiler_barrier(); } +inline void OrderAccess::storestore() { compiler_barrier(); } +inline void OrderAccess::loadstore() { compiler_barrier(); } inline void OrderAccess::storeload() { fence(); } -inline void OrderAccess::acquire() { -#ifndef AMD64 - __asm { - mov eax, dword ptr [esp]; - } -#endif // !AMD64 -} - -inline void OrderAccess::release() { - // A volatile store has release semantics. - volatile jint local_dummy = 0; -} +inline void OrderAccess::acquire() { compiler_barrier(); } +inline void OrderAccess::release() { compiler_barrier(); } inline void OrderAccess::fence() { #ifdef AMD64 @@ -59,157 +64,47 @@ } } #endif // AMD64 + compiler_barrier(); } -inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { return *p; } -inline jshort OrderAccess::load_acquire(volatile jshort* p) { return *p; } -inline jint OrderAccess::load_acquire(volatile jint* p) { return *p; } -inline jlong OrderAccess::load_acquire(volatile jlong* p) { return Atomic::load(p); } -inline jubyte OrderAccess::load_acquire(volatile jubyte* p) { return *p; } -inline jushort OrderAccess::load_acquire(volatile jushort* p) { return *p; } -inline juint OrderAccess::load_acquire(volatile juint* p) { return *p; } -inline julong OrderAccess::load_acquire(volatile julong* p) { return Atomic::load((volatile jlong*)p); } -inline jfloat OrderAccess::load_acquire(volatile jfloat* p) { return *p; } -inline jdouble OrderAccess::load_acquire(volatile jdouble* p) { return jdouble_cast(Atomic::load((volatile jlong*)p)); } - -inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t* p) { return *p; } -inline void* OrderAccess::load_ptr_acquire(volatile void* p) { return *(void* volatile *)p; } -inline void* OrderAccess::load_ptr_acquire(const volatile void* p) { return *(void* const volatile *)p; } - -inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) { *p = v; } -inline void OrderAccess::release_store(volatile jshort* p, jshort v) { *p = v; } -inline void OrderAccess::release_store(volatile jint* p, jint v) { *p = v; } -inline void OrderAccess::release_store(volatile jlong* p, jlong v) { Atomic::store(v, p); } -inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) { *p = v; } -inline void OrderAccess::release_store(volatile jushort* p, jushort v) { *p = v; } -inline void OrderAccess::release_store(volatile juint* p, juint v) { *p = v; } -inline void OrderAccess::release_store(volatile julong* p, julong v) { Atomic::store((jlong)v, (volatile jlong*)p); } -inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) { *p = v; } -inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) { release_store((volatile jlong*)p, jlong_cast(v)); } - -inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { *p = v; } -inline void OrderAccess::release_store_ptr(volatile void* p, void* v) { *(void* volatile *)p = v; } - -inline void OrderAccess::store_fence(jbyte* p, jbyte v) { -#ifdef AMD64 - *p = v; fence(); -#else +#ifndef AMD64 +template<> +inline void OrderAccess::specialized_release_store_fence (volatile jbyte* p, jbyte v) { __asm { mov edx, p; mov al, v; xchg al, byte ptr [edx]; } -#endif // AMD64 } -inline void OrderAccess::store_fence(jshort* p, jshort v) { -#ifdef AMD64 - *p = v; fence(); -#else +template<> +inline void OrderAccess::specialized_release_store_fence(volatile jshort* p, jshort v) { __asm { mov edx, p; mov ax, v; xchg ax, word ptr [edx]; } -#endif // AMD64 } -inline void OrderAccess::store_fence(jint* p, jint v) { -#ifdef AMD64 - *p = v; fence(); -#else +template<> +inline void OrderAccess::specialized_release_store_fence (volatile jint* p, jint v) { __asm { mov edx, p; mov eax, v; xchg eax, dword ptr [edx]; } -#endif // AMD64 -} - -inline void OrderAccess::store_fence(jlong* p, jlong v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jubyte* p, jubyte v) { store_fence((jbyte*)p, (jbyte)v); } -inline void OrderAccess::store_fence(jushort* p, jushort v) { store_fence((jshort*)p, (jshort)v); } -inline void OrderAccess::store_fence(juint* p, juint v) { store_fence((jint*)p, (jint)v); } -inline void OrderAccess::store_fence(julong* p, julong v) { store_fence((jlong*)p, (jlong)v); } -inline void OrderAccess::store_fence(jfloat* p, jfloat v) { *p = v; fence(); } -inline void OrderAccess::store_fence(jdouble* p, jdouble v) { *p = v; fence(); } - -inline void OrderAccess::store_ptr_fence(intptr_t* p, intptr_t v) { -#ifdef AMD64 - *p = v; fence(); -#else - store_fence((jint*)p, (jint)v); -#endif // AMD64 -} - -inline void OrderAccess::store_ptr_fence(void** p, void* v) { -#ifdef AMD64 - *p = v; fence(); -#else - store_fence((jint*)p, (jint)v); -#endif // AMD64 -} - -// Must duplicate definitions instead of calling store_fence because we don't want to cast away volatile. -inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { -#ifdef AMD64 - *p = v; fence(); -#else - __asm { - mov edx, p; - mov al, v; - xchg al, byte ptr [edx]; - } -#endif // AMD64 } - -inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { -#ifdef AMD64 - *p = v; fence(); -#else - __asm { - mov edx, p; - mov ax, v; - xchg ax, word ptr [edx]; - } #endif // AMD64 -} -inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { -#ifdef AMD64 - *p = v; fence(); -#else - __asm { - mov edx, p; - mov eax, v; - xchg eax, dword ptr [edx]; - } -#endif // AMD64 +template<> +inline void OrderAccess::specialized_release_store_fence(volatile jfloat* p, jfloat v) { + release_store_fence((volatile jint*)p, jint_cast(v)); } - -inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { release_store(p, v); fence(); } - -inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { release_store_fence((volatile jbyte*)p, (jbyte)v); } -inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { release_store_fence((volatile jshort*)p, (jshort)v); } -inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { release_store_fence((volatile jint*)p, (jint)v); } -inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { release_store_fence((volatile jlong*)p, (jlong)v); } -inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { *p = v; fence(); } -inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { release_store_fence((volatile jlong*)p, jlong_cast(v)); } - -inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { -#ifdef AMD64 - *p = v; fence(); -#else - release_store_fence((volatile jint*)p, (jint)v); -#endif // AMD64 +template<> +inline void OrderAccess::specialized_release_store_fence(volatile jdouble* p, jdouble v) { + release_store_fence((volatile jlong*)p, jlong_cast(v)); } -inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { -#ifdef AMD64 - *(void* volatile *)p = v; fence(); -#else - release_store_fence((volatile jint*)p, (jint)v); -#endif // AMD64 -} +#define VM_HAS_GENERALIZED_ORDER_ACCESS 1 #endif // OS_CPU_WINDOWS_X86_VM_ORDERACCESS_WINDOWS_X86_INLINE_HPP --- old/src/share/vm/runtime/orderAccess.hpp 2015-01-22 17:49:24.915179382 -0500 +++ new/src/share/vm/runtime/orderAccess.hpp 2015-01-22 17:49:23.591102611 -0500 @@ -29,11 +29,7 @@ // Memory Access Ordering Model // -// This interface is based on the JSR-133 Cookbook for Compiler Writers -// and on the IA64 memory model. It is the dynamic equivalent of the -// C/C++ volatile specifier. I.e., volatility restricts compile-time -// memory access reordering in a way similar to what we want to occur -// at runtime. +// This interface is based on the JSR-133 Cookbook for Compiler Writers. // // In the following, the terms 'previous', 'subsequent', 'before', // 'after', 'preceding' and 'succeeding' refer to program order. The @@ -69,85 +65,93 @@ // operations. Stores before Store1 may *not* float below Load2 and any // subsequent load operations. // +// We define two further barriers: acquire and release. // -// We define two further operations, 'release' and 'acquire'. They are -// mirror images of each other. +// Conceptually, acquire/release semantics form unidirectional and +// asynchronous barriers w.r.t. a synchronizing load(X) and store(X) pair. +// They should always be used in pairs to publish (release store) and +// access (load acquire) some implicitly understood shared data between +// threads in a relatively cheap fashion not requiring storeload. If not +// used in such a pair, it is adviced to use a membar instead: +// acquire/release only make sense as pairs. +// +// T1: access_shared_data +// T1: ]release +// T1: (...) +// T1: store(X) +// +// T2: load(X) +// T2: (...) +// T2: acquire[ +// T2: access_shared_data +// +// It is guaranteed that if T2: load(X) synchronizes with (observes the +// value written by) T1: store(X), then the memory accesses before the T1: +// ]release happen before the memory accesses after the T2: acquire[. +// +// Total Store Order (TSO) machines can be seen as machines issuing a +// release store for each store and a load acquire for each load. Therefore +// there is an inherent resemblence between TSO and acquire/release +// semantics. TSO can be seen as an abstract machine where loads are +// executed immediately when encountered (hence loadload reordering not +// happening) but enqueues stores in a FIFO queue +// for asynchronous serialization (neither storestore or loadstore +// reordering happening). The only reordering happening is storeload due to +// the queue asynchronously serializing stores (yet in order). +// +// Acquire/release semantics essentially exploits this asynchronicity: when +// the load(X) acquire[ observes the store of ]release store(X), the +// accesses before the release must have happened before the accesses after +// acquire. +// +// The API offers both stand-alone acquire() and release() as well as joined +// load_acquire() and release_store(). It is guaranteed that these are +// semantically equivalent w.r.t. the defined model. However, since +// stand-alone acquire()/release() does not know which previous +// load/subsequent store is considered the synchronizing load/store, they +// may be more conservative in implementations. We advice using the joined +// variants whenever possible. +// +// Finally, we define a "fence" operation, as a bidirectional barrier. +// It guarantees that any memory access preceding the fence is not +// reordered w.r.t. any memory accesses subsequent to the fence in program +// order. This may be used to prevent sequences of loads from floating up +// above sequences of stores. +// +// The following table shows the implementations on some architectures: +// +// Constraint x86 sparc ppc +// --------------------------------------------------------------------------- +// fence LoadStore | lock membar #StoreLoad sync +// StoreStore | addl 0,(sp) +// LoadLoad | +// StoreLoad +// +// release LoadStore | lwsync +// StoreStore +// +// acquire LoadLoad | lwsync +// LoadStore +// +// release_store lwsync +// +// +// release_store_fence xchg lwsync +// membar #StoreLoad +// sync // -// Execution by a processor of release makes the effect of all memory -// accesses issued by it previous to the release visible to all -// processors *before* the release completes. The effect of subsequent -// memory accesses issued by it *may* be made visible *before* the -// release. I.e., subsequent memory accesses may float above the -// release, but prior ones may not float below it. -// -// Execution by a processor of acquire makes the effect of all memory -// accesses issued by it subsequent to the acquire visible to all -// processors *after* the acquire completes. The effect of prior memory -// accesses issued by it *may* be made visible *after* the acquire. -// I.e., prior memory accesses may float below the acquire, but -// subsequent ones may not float above it. -// -// Finally, we define a 'fence' operation, which conceptually is a -// release combined with an acquire. In the real world these operations -// require one or more machine instructions which can float above and -// below the release or acquire, so we usually can't just issue the -// release-acquire back-to-back. All machines we know of implement some -// sort of memory fence instruction. -// -// -// The standalone implementations of release and acquire need an associated -// dummy volatile store or load respectively. To avoid redundant operations, -// we can define the composite operators: 'release_store', 'store_fence' and -// 'load_acquire'. Here's a summary of the machine instructions corresponding -// to each operation. -// -// sparc RMO ia64 x86 -// --------------------------------------------------------------------- -// fence membar #LoadStore | mf lock addl 0,(sp) -// #StoreStore | -// #LoadLoad | -// #StoreLoad -// -// release membar #LoadStore | st.rel [sp]=r0 movl $0, -// #StoreStore -// st %g0,[] -// -// acquire ld [%sp],%g0 ld.acq =[sp] movl (sp), -// membar #LoadLoad | -// #LoadStore -// -// release_store membar #LoadStore | st.rel -// #StoreStore -// st -// -// store_fence st st lock xchg -// fence mf -// -// load_acquire ld ld.acq -// membar #LoadLoad | -// #LoadStore -// -// Using only release_store and load_acquire, we can implement the -// following ordered sequences. -// -// 1. load, load == load_acquire, load -// or load_acquire, load_acquire -// 2. load, store == load, release_store -// or load_acquire, store -// or load_acquire, release_store -// 3. store, store == store, release_store -// or release_store, release_store // -// These require no membar instructions for sparc-TSO and no extra -// instructions for ia64. +// load_acquire +// lwsync // -// Ordering a load relative to preceding stores requires a store_fence, +// Ordering a load relative to preceding stores requires a fence, // which implies a membar #StoreLoad between the store and load under -// sparc-TSO. A fence is required by ia64. On x86, we use locked xchg. +// sparc-TSO. A fence is required by x86. On x86, we use explicitly +// locked add. // -// 4. store, load == store_fence, load +// 4. store, load <= is constrained by => store, fence, load // -// Use store_fence to make sure all stores done in an 'interesting' +// Use store, fence to make sure all stores done in an 'interesting' // region are made visible prior to both subsequent loads and stores. // // Conventional usage is to issue a load_acquire for ordered loads. Use @@ -157,26 +161,7 @@ // release_store_fence to update values like the thread state, where we // don't want the current thread to continue until all our prior memory // accesses (including the new thread state) are visible to other threads. -// -// -// C++ Volatility -// -// C++ guarantees ordering at operations termed 'sequence points' (defined -// to be volatile accesses and calls to library I/O functions). 'Side -// effects' (defined as volatile accesses, calls to library I/O functions -// and object modification) previous to a sequence point must be visible -// at that sequence point. See the C++ standard, section 1.9, titled -// "Program Execution". This means that all barrier implementations, -// including standalone loadload, storestore, loadstore, storeload, acquire -// and release must include a sequence point, usually via a volatile memory -// access. Other ways to guarantee a sequence point are, e.g., use of -// indirect calls and linux's __asm__ volatile. -// Note: as of 6973570, we have replaced the originally static "dummy" field -// (see above) by a volatile store to the stack. All of the versions of the -// compilers that we currently use (SunStudio, gcc and VC++) respect the -// semantics of volatile here. If you build HotSpot using other -// compilers, you may need to verify that no compiler reordering occurs -// across the sequence point represented by the volatile access. +// This is equivalent to the volatile semantics of the Java Memory Model. // // // os::is_MP Considered Redundant @@ -240,8 +225,34 @@ // order. If their implementations change such that these assumptions // are violated, a whole lot of code will break. +enum ScopedFenceType { + X_ACQUIRE + , RELEASE_X + , RELEASE_X_FENCE +}; + +template +class ScopedFenceGeneral: public StackObj { + public: + void prefix() {} + void postfix() {} +}; + +template +class ScopedFence : public ScopedFenceGeneral { + void *const _field; + public: + ScopedFence(void *const field) : _field(field) { prefix(); } + ~ScopedFence() { postfix(); } + void prefix() { ScopedFenceGeneral::prefix(); } + void postfix() { ScopedFenceGeneral::postfix(); } +}; + +// This class implements some fences for different platforms and specializes +// the methods of its superclass using template specialization for improved performance. class OrderAccess : AllStatic { public: + // barriers static void loadload(); static void storestore(); static void loadstore(); @@ -280,20 +291,6 @@ static void release_store_ptr(volatile intptr_t* p, intptr_t v); static void release_store_ptr(volatile void* p, void* v); - static void store_fence(jbyte* p, jbyte v); - static void store_fence(jshort* p, jshort v); - static void store_fence(jint* p, jint v); - static void store_fence(jlong* p, jlong v); - static void store_fence(jubyte* p, jubyte v); - static void store_fence(jushort* p, jushort v); - static void store_fence(juint* p, juint v); - static void store_fence(julong* p, julong v); - static void store_fence(jfloat* p, jfloat v); - static void store_fence(jdouble* p, jdouble v); - - static void store_ptr_fence(intptr_t* p, intptr_t v); - static void store_ptr_fence(void** p, void* v); - static void release_store_fence(volatile jbyte* p, jbyte v); static void release_store_fence(volatile jshort* p, jshort v); static void release_store_fence(volatile jint* p, jint v); @@ -313,6 +310,47 @@ // routine if it exists, It should only be used by platforms that // don't have another way to do the inline assembly. static void StubRoutines_fence(); + + // Give platforms a varation point to specialize. + template static T specialized_load_acquire (volatile T* p ); + template static void specialized_release_store (volatile T* p, T v); + template static void specialized_release_store_fence(volatile T* p, T v); + + template + static void ordered_store(volatile FieldType* p, FieldType v); + + template + static FieldType ordered_load(volatile FieldType* p); + + static void store(volatile jbyte* p, jbyte v); + static void store(volatile jshort* p, jshort v); + static void store(volatile jint* p, jint v); + static void store(volatile jlong* p, jlong v); + static void store(volatile jdouble* p, jdouble v); + static void store(volatile jfloat* p, jfloat v); + + static jbyte load (volatile jbyte* p); + static jshort load (volatile jshort* p); + static jint load (volatile jint* p); + static jlong load (volatile jlong* p); + static jdouble load (volatile jdouble* p); + static jfloat load (volatile jfloat* p); + + // The following store_fence methods are deprecated and will be removed + // when all repos conform to the new generalized OrderAccess. + static void store_fence(jbyte* p, jbyte v); + static void store_fence(jshort* p, jshort v); + static void store_fence(jint* p, jint v); + static void store_fence(jlong* p, jlong v); + static void store_fence(jubyte* p, jubyte v); + static void store_fence(jushort* p, jushort v); + static void store_fence(juint* p, juint v); + static void store_fence(julong* p, julong v); + static void store_fence(jfloat* p, jfloat v); + static void store_fence(jdouble* p, jdouble v); + + static void store_ptr_fence(intptr_t* p, intptr_t v); + static void store_ptr_fence(void** p, void* v); }; #endif // SHARE_VM_RUNTIME_ORDERACCESS_HPP --- old/src/share/vm/runtime/orderAccess.inline.hpp 2015-01-22 17:49:28.875408996 -0500 +++ new/src/share/vm/runtime/orderAccess.inline.hpp 2015-01-22 17:49:27.563332922 -0500 @@ -26,6 +26,7 @@ #ifndef SHARE_VM_RUNTIME_ORDERACCESS_INLINE_HPP #define SHARE_VM_RUNTIME_ORDERACCESS_INLINE_HPP +#include "runtime/atomic.inline.hpp" #include "runtime/orderAccess.hpp" // Linux @@ -71,4 +72,92 @@ # include "orderAccess_bsd_zero.inline.hpp" #endif +#ifdef VM_HAS_GENERALIZED_ORDER_ACCESS + +template<> inline void ScopedFenceGeneral::postfix() { OrderAccess::acquire(); } +template<> inline void ScopedFenceGeneral::prefix() { OrderAccess::release(); } +template<> inline void ScopedFenceGeneral::prefix() { OrderAccess::release(); } +template<> inline void ScopedFenceGeneral::postfix() { OrderAccess::fence(); } + + +template +inline void OrderAccess::ordered_store(volatile FieldType* p, FieldType v) { + ScopedFence f((void*)p); + store(p, v); +} + +template +inline FieldType OrderAccess::ordered_load(volatile FieldType* p) { + ScopedFence f((void*)p); + return load(p); +} + +inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { return specialized_load_acquire(p); } +inline jshort OrderAccess::load_acquire(volatile jshort* p) { return specialized_load_acquire(p); } +inline jint OrderAccess::load_acquire(volatile jint* p) { return specialized_load_acquire(p); } +inline jlong OrderAccess::load_acquire(volatile jlong* p) { return specialized_load_acquire(p); } +inline jfloat OrderAccess::load_acquire(volatile jfloat* p) { return specialized_load_acquire(p); } +inline jdouble OrderAccess::load_acquire(volatile jdouble* p) { return specialized_load_acquire(p); } +inline jubyte OrderAccess::load_acquire(volatile jubyte* p) { return (jubyte) specialized_load_acquire((volatile jbyte*)p); } +inline jushort OrderAccess::load_acquire(volatile jushort* p) { return (jushort)specialized_load_acquire((volatile jshort*)p); } +inline juint OrderAccess::load_acquire(volatile juint* p) { return (juint) specialized_load_acquire((volatile jint*)p); } +inline julong OrderAccess::load_acquire(volatile julong* p) { return (julong) specialized_load_acquire((volatile jlong*)p); } + +inline intptr_t OrderAccess::load_ptr_acquire(volatile intptr_t* p) { return (intptr_t)specialized_load_acquire(p); } +inline void* OrderAccess::load_ptr_acquire(volatile void* p) { return (void*)specialized_load_acquire((volatile intptr_t*)p); } +inline void* OrderAccess::load_ptr_acquire(const volatile void* p) { return (void*)specialized_load_acquire((volatile intptr_t*)p); } + +inline void OrderAccess::release_store(volatile jbyte* p, jbyte v) { specialized_release_store(p, v); } +inline void OrderAccess::release_store(volatile jshort* p, jshort v) { specialized_release_store(p, v); } +inline void OrderAccess::release_store(volatile jint* p, jint v) { specialized_release_store(p, v); } +inline void OrderAccess::release_store(volatile jlong* p, jlong v) { specialized_release_store(p, v); } +inline void OrderAccess::release_store(volatile jfloat* p, jfloat v) { specialized_release_store(p, v); } +inline void OrderAccess::release_store(volatile jdouble* p, jdouble v) { specialized_release_store(p, v); } +inline void OrderAccess::release_store(volatile jubyte* p, jubyte v) { specialized_release_store((volatile jbyte*) p, (jbyte) v); } +inline void OrderAccess::release_store(volatile jushort* p, jushort v) { specialized_release_store((volatile jshort*)p, (jshort)v); } +inline void OrderAccess::release_store(volatile juint* p, juint v) { specialized_release_store((volatile jint*) p, (jint) v); } +inline void OrderAccess::release_store(volatile julong* p, julong v) { specialized_release_store((volatile jlong*) p, (jlong) v); } + +inline void OrderAccess::release_store_ptr(volatile intptr_t* p, intptr_t v) { specialized_release_store(p, v); } +inline void OrderAccess::release_store_ptr(volatile void* p, void* v) { specialized_release_store((volatile intptr_t*)p, (intptr_t)v); } + +inline void OrderAccess::release_store_fence(volatile jbyte* p, jbyte v) { specialized_release_store_fence(p, v); } +inline void OrderAccess::release_store_fence(volatile jshort* p, jshort v) { specialized_release_store_fence(p, v); } +inline void OrderAccess::release_store_fence(volatile jint* p, jint v) { specialized_release_store_fence(p, v); } +inline void OrderAccess::release_store_fence(volatile jlong* p, jlong v) { specialized_release_store_fence(p, v); } +inline void OrderAccess::release_store_fence(volatile jfloat* p, jfloat v) { specialized_release_store_fence(p, v); } +inline void OrderAccess::release_store_fence(volatile jdouble* p, jdouble v) { specialized_release_store_fence(p, v); } +inline void OrderAccess::release_store_fence(volatile jubyte* p, jubyte v) { specialized_release_store_fence((volatile jbyte*) p, (jbyte) v); } +inline void OrderAccess::release_store_fence(volatile jushort* p, jushort v) { specialized_release_store_fence((volatile jshort*)p, (jshort)v); } +inline void OrderAccess::release_store_fence(volatile juint* p, juint v) { specialized_release_store_fence((volatile jint*) p, (jint) v); } +inline void OrderAccess::release_store_fence(volatile julong* p, julong v) { specialized_release_store_fence((volatile jlong*) p, (jlong) v); } + +inline void OrderAccess::release_store_ptr_fence(volatile intptr_t* p, intptr_t v) { specialized_release_store_fence(p, v); } +inline void OrderAccess::release_store_ptr_fence(volatile void* p, void* v) { specialized_release_store_fence((volatile intptr_t*)p, (intptr_t)v); } + +// The following methods can be specialized using simple template specialization +// in the platform specific files for optimization purposes. Otherwise the +// generalized variant is used. +template inline T OrderAccess::specialized_load_acquire (volatile T* p) { return ordered_load(p); } +template inline void OrderAccess::specialized_release_store (volatile T* p, T v) { ordered_store(p, v); } +template inline void OrderAccess::specialized_release_store_fence(volatile T* p, T v) { ordered_store(p, v); } + +// Generalized atomic volatile accesses valid in OrderAccess +// All other types can be expressed in terms of these. +inline void OrderAccess::store(volatile jbyte* p, jbyte v) { *p = v; } +inline void OrderAccess::store(volatile jshort* p, jshort v) { *p = v; } +inline void OrderAccess::store(volatile jint* p, jint v) { *p = v; } +inline void OrderAccess::store(volatile jlong* p, jlong v) { Atomic::store(v, p); } +inline void OrderAccess::store(volatile jdouble* p, jdouble v) { Atomic::store(jlong_cast(v), (volatile jlong*)p); } +inline void OrderAccess::store(volatile jfloat* p, jfloat v) { *p = v; } + +inline jbyte OrderAccess::load(volatile jbyte* p) { return *p; } +inline jshort OrderAccess::load(volatile jshort* p) { return *p; } +inline jint OrderAccess::load(volatile jint* p) { return *p; } +inline jlong OrderAccess::load(volatile jlong* p) { return Atomic::load(p); } +inline jdouble OrderAccess::load(volatile jdouble* p) { return jdouble_cast(Atomic::load((volatile jlong*)p)); } +inline jfloat OrderAccess::load(volatile jfloat* p) { return *p; } + +#endif // VM_HAS_GENERALIZED_ORDER_ACCESS + #endif // SHARE_VM_RUNTIME_ORDERACCESS_INLINE_HPP