diff a/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp b/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp --- a/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp +++ b/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp @@ -1,7 +1,7 @@ /* - * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. @@ -23,10 +23,11 @@ */ #ifndef OS_CPU_WINDOWS_X86_ATOMIC_WINDOWS_X86_HPP #define OS_CPU_WINDOWS_X86_ATOMIC_WINDOWS_X86_HPP +#include #include "runtime/os.hpp" // Note that in MSVC, volatile memory accesses are explicitly // guaranteed to have acquire release semantics (w.r.t. compiler // reordering) and therefore does not even need a compiler barrier @@ -36,25 +37,10 @@ template<> inline void ScopedFence::postfix() { } template<> inline void ScopedFence::prefix() { } template<> inline void ScopedFence::prefix() { } template<> inline void ScopedFence::postfix() { OrderAccess::fence(); } -// The following alternative implementations are needed because -// Windows 95 doesn't support (some of) the corresponding Windows NT -// calls. Furthermore, these versions allow inlining in the caller. -// (More precisely: The documentation for InterlockedExchange says -// it is supported for Windows 95. However, when single-stepping -// through the assembly code we cannot step into the routine and -// when looking at the routine address we see only garbage code. -// Better safe then sorry!). Was bug 7/31/98 (gri). -// -// Performance note: On uniprocessors, the 'lock' prefixes are not -// necessary (and expensive). We should generate separate cases if -// this becomes a performance problem. - -#pragma warning(disable: 4035) // Disables warnings reporting missing return statement - template struct Atomic::PlatformAdd { template D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const; @@ -62,144 +48,74 @@ D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const { return add_and_fetch(dest, add_value, order) - add_value; } }; -#ifdef AMD64 -template<> -template -inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value, - atomic_memory_order order) const { - return add_using_helper(os::atomic_add_func, dest, add_value); -} - -template<> -template -inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I add_value, - atomic_memory_order order) const { - return add_using_helper(os::atomic_add_long_func, dest, add_value); -} - -#define DEFINE_STUB_XCHG(ByteSize, StubType, StubName) \ - template<> \ - template \ - inline T Atomic::PlatformXchg::operator()(T volatile* dest, \ - T exchange_value, \ - atomic_memory_order order) const { \ - STATIC_ASSERT(ByteSize == sizeof(T)); \ - return xchg_using_helper(StubName, dest, exchange_value); \ +// The Interlocked* APIs only take long and will not accept __int32. That is +// acceptable on Windows, since long is a 32-bits integer type. + +#define DEFINE_INTRINSIC_ADD(IntrinsicName, IntrinsicType) \ + template<> \ + template \ + inline D Atomic::PlatformAdd::add_and_fetch(D volatile* dest, \ + I add_value, \ + atomic_memory_order order) const { \ + STATIC_ASSERT(sizeof(IntrinsicType) == sizeof(D)); \ + return PrimitiveConversions::cast( \ + IntrinsicName(reinterpret_cast(dest), \ + PrimitiveConversions::cast(add_value))); \ } -DEFINE_STUB_XCHG(4, int32_t, os::atomic_xchg_func) -DEFINE_STUB_XCHG(8, int64_t, os::atomic_xchg_long_func) - -#undef DEFINE_STUB_XCHG - -#define DEFINE_STUB_CMPXCHG(ByteSize, StubType, StubName) \ - template<> \ - template \ - inline T Atomic::PlatformCmpxchg::operator()(T volatile* dest, \ - T compare_value, \ - T exchange_value, \ - atomic_memory_order order) const { \ - STATIC_ASSERT(ByteSize == sizeof(T)); \ - return cmpxchg_using_helper(StubName, dest, compare_value, exchange_value); \ +DEFINE_INTRINSIC_ADD(InterlockedAdd, long) +DEFINE_INTRINSIC_ADD(InterlockedAdd64, __int64) + +#undef DEFINE_INTRINSIC_ADD + +#define DEFINE_INTRINSIC_XCHG(IntrinsicName, IntrinsicType) \ + template<> \ + template \ + inline T Atomic::PlatformXchg::operator()(T volatile* dest, \ + T exchange_value, \ + atomic_memory_order order) const { \ + STATIC_ASSERT(sizeof(IntrinsicType) == sizeof(T)); \ + return PrimitiveConversions::cast( \ + IntrinsicName(reinterpret_cast(dest), \ + PrimitiveConversions::cast(exchange_value))); \ } -DEFINE_STUB_CMPXCHG(1, int8_t, os::atomic_cmpxchg_byte_func) -DEFINE_STUB_CMPXCHG(4, int32_t, os::atomic_cmpxchg_func) -DEFINE_STUB_CMPXCHG(8, int64_t, os::atomic_cmpxchg_long_func) - -#undef DEFINE_STUB_CMPXCHG - -#else // !AMD64 - -template<> -template -inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value, - atomic_memory_order order) const { - STATIC_ASSERT(4 == sizeof(I)); - STATIC_ASSERT(4 == sizeof(D)); - __asm { - mov edx, dest; - mov eax, add_value; - mov ecx, eax; - lock xadd dword ptr [edx], eax; - add eax, ecx; +DEFINE_INTRINSIC_XCHG(InterlockedExchange, long) +DEFINE_INTRINSIC_XCHG(InterlockedExchange64, __int64) + +#undef DEFINE_INTRINSIC_XCHG + +// Note: the order of the parameters is different between +// Atomic::PlatformCmpxchg<*>::operator() and the +// InterlockedCompareExchange* API. + +#define DEFINE_INTRINSIC_CMPXCHG(IntrinsicName, IntrinsicType) \ + template<> \ + template \ + inline T Atomic::PlatformCmpxchg::operator()(T volatile* dest, \ + T compare_value, \ + T exchange_value, \ + atomic_memory_order order) const { \ + STATIC_ASSERT(sizeof(IntrinsicType) == sizeof(T)); \ + return PrimitiveConversions::cast( \ + IntrinsicName(reinterpret_cast(dest), \ + PrimitiveConversions::cast(exchange_value), \ + PrimitiveConversions::cast(compare_value))); \ } -} -template<> -template -inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest, - T exchange_value, - atomic_memory_order order) const { - STATIC_ASSERT(4 == sizeof(T)); - // alternative for InterlockedExchange - __asm { - mov eax, exchange_value; - mov ecx, dest; - xchg eax, dword ptr [ecx]; - } -} +DEFINE_INTRINSIC_CMPXCHG(_InterlockedCompareExchange8, char) // Use the intrinsic as InterlockedCompareExchange8 does not exist +DEFINE_INTRINSIC_CMPXCHG(InterlockedCompareExchange, long) +DEFINE_INTRINSIC_CMPXCHG(InterlockedCompareExchange64, __int64) -template<> -template -inline T Atomic::PlatformCmpxchg<1>::operator()(T volatile* dest, - T compare_value, - T exchange_value, - atomic_memory_order order) const { - STATIC_ASSERT(1 == sizeof(T)); - // alternative for InterlockedCompareExchange - __asm { - mov edx, dest - mov cl, exchange_value - mov al, compare_value - lock cmpxchg byte ptr [edx], cl - } -} +#undef DEFINE_INTRINSIC_CMPXCHG -template<> -template -inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, - T compare_value, - T exchange_value, - atomic_memory_order order) const { - STATIC_ASSERT(4 == sizeof(T)); - // alternative for InterlockedCompareExchange - __asm { - mov edx, dest - mov ecx, exchange_value - mov eax, compare_value - lock cmpxchg dword ptr [edx], ecx - } -} +#ifndef AMD64 -template<> -template -inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, - T compare_value, - T exchange_value, - atomic_memory_order order) const { - STATIC_ASSERT(8 == sizeof(T)); - int32_t ex_lo = (int32_t)exchange_value; - int32_t ex_hi = *( ((int32_t*)&exchange_value) + 1 ); - int32_t cmp_lo = (int32_t)compare_value; - int32_t cmp_hi = *( ((int32_t*)&compare_value) + 1 ); - __asm { - push ebx - push edi - mov eax, cmp_lo - mov edx, cmp_hi - mov edi, dest - mov ebx, ex_lo - mov ecx, ex_hi - lock cmpxchg8b qword ptr [edi] - pop edi - pop ebx - } -} +#pragma warning(disable: 4035) // Disables warnings reporting missing return statement template<> template inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const { STATIC_ASSERT(8 == sizeof(T)); @@ -226,15 +142,12 @@ mov eax, dest fistp qword ptr [eax] } } -#endif // AMD64 - #pragma warning(default: 4035) // Enables warnings reporting missing return statement -#ifndef AMD64 template<> struct Atomic::PlatformOrderedStore<1, RELEASE_X_FENCE> { template void operator()(volatile T* p, T v) const {