diff a/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp b/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp
--- a/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp
+++ b/src/hotspot/os_cpu/windows_x86/atomic_windows_x86.hpp
@@ -1,7 +1,7 @@
 /*
- * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 only, as
  * published by the Free Software Foundation.
@@ -23,10 +23,11 @@
  */
 
 #ifndef OS_CPU_WINDOWS_X86_ATOMIC_WINDOWS_X86_HPP
 #define OS_CPU_WINDOWS_X86_ATOMIC_WINDOWS_X86_HPP
 
+#include <intrin.h>
 #include "runtime/os.hpp"
 
 // Note that in MSVC, volatile memory accesses are explicitly
 // guaranteed to have acquire release semantics (w.r.t. compiler
 // reordering) and therefore does not even need a compiler barrier
@@ -36,25 +37,10 @@
 template<> inline void ScopedFence<X_ACQUIRE>::postfix()       { }
 template<> inline void ScopedFence<RELEASE_X>::prefix()        { }
 template<> inline void ScopedFence<RELEASE_X_FENCE>::prefix()  { }
 template<> inline void ScopedFence<RELEASE_X_FENCE>::postfix() { OrderAccess::fence(); }
 
-// The following alternative implementations are needed because
-// Windows 95 doesn't support (some of) the corresponding Windows NT
-// calls. Furthermore, these versions allow inlining in the caller.
-// (More precisely: The documentation for InterlockedExchange says
-// it is supported for Windows 95. However, when single-stepping
-// through the assembly code we cannot step into the routine and
-// when looking at the routine address we see only garbage code.
-// Better safe then sorry!). Was bug 7/31/98 (gri).
-//
-// Performance note: On uniprocessors, the 'lock' prefixes are not
-// necessary (and expensive). We should generate separate cases if
-// this becomes a performance problem.
-
-#pragma warning(disable: 4035) // Disables warnings reporting missing return statement
-
 template<size_t byte_size>
 struct Atomic::PlatformAdd {
   template<typename D, typename I>
   D add_and_fetch(D volatile* dest, I add_value, atomic_memory_order order) const;
 
@@ -62,144 +48,74 @@
   D fetch_and_add(D volatile* dest, I add_value, atomic_memory_order order) const {
     return add_and_fetch(dest, add_value, order) - add_value;
   }
 };
 
-#ifdef AMD64
-template<>
-template<typename D, typename I>
-inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value,
-                                               atomic_memory_order order) const {
-  return add_using_helper<int32_t>(os::atomic_add_func, dest, add_value);
-}
-
-template<>
-template<typename D, typename I>
-inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I add_value,
-                                               atomic_memory_order order) const {
-  return add_using_helper<int64_t>(os::atomic_add_long_func, dest, add_value);
-}
-
-#define DEFINE_STUB_XCHG(ByteSize, StubType, StubName)                  \
-  template<>                                                            \
-  template<typename T>                                                  \
-  inline T Atomic::PlatformXchg<ByteSize>::operator()(T volatile* dest, \
-                                                      T exchange_value, \
-                                                      atomic_memory_order order) const { \
-    STATIC_ASSERT(ByteSize == sizeof(T));                               \
-    return xchg_using_helper<StubType>(StubName, dest, exchange_value); \
+// The Interlocked* APIs only take long and will not accept __int32. That is
+// acceptable on Windows, since long is a 32-bits integer type.
+
+#define DEFINE_INTRINSIC_ADD(IntrinsicName, IntrinsicType)                \
+  template<>                                                              \
+  template<typename D, typename I>                                        \
+  inline D Atomic::PlatformAdd<sizeof(IntrinsicType)>::add_and_fetch(D volatile* dest, \
+                                                                     I add_value, \
+                                                                     atomic_memory_order order) const { \
+    STATIC_ASSERT(sizeof(IntrinsicType) == sizeof(D));                    \
+    return PrimitiveConversions::cast<D>(                                 \
+      IntrinsicName(reinterpret_cast<IntrinsicType volatile *>(dest),     \
+                    PrimitiveConversions::cast<IntrinsicType>(add_value))); \
   }
 
-DEFINE_STUB_XCHG(4, int32_t, os::atomic_xchg_func)
-DEFINE_STUB_XCHG(8, int64_t, os::atomic_xchg_long_func)
-
-#undef DEFINE_STUB_XCHG
-
-#define DEFINE_STUB_CMPXCHG(ByteSize, StubType, StubName)                  \
-  template<>                                                               \
-  template<typename T>                                                     \
-  inline T Atomic::PlatformCmpxchg<ByteSize>::operator()(T volatile* dest, \
-                                                         T compare_value,  \
-                                                         T exchange_value, \
-                                                         atomic_memory_order order) const { \
-    STATIC_ASSERT(ByteSize == sizeof(T));                                  \
-    return cmpxchg_using_helper<StubType>(StubName, dest, compare_value, exchange_value); \
+DEFINE_INTRINSIC_ADD(InterlockedAdd,   long)
+DEFINE_INTRINSIC_ADD(InterlockedAdd64, __int64)
+
+#undef DEFINE_INTRINSIC_ADD
+
+#define DEFINE_INTRINSIC_XCHG(IntrinsicName, IntrinsicType)               \
+  template<>                                                              \
+  template<typename T>                                                    \
+  inline T Atomic::PlatformXchg<sizeof(IntrinsicType)>::operator()(T volatile* dest, \
+                                                                   T exchange_value, \
+                                                                   atomic_memory_order order) const { \
+    STATIC_ASSERT(sizeof(IntrinsicType) == sizeof(T));                    \
+    return PrimitiveConversions::cast<T>(                                 \
+      IntrinsicName(reinterpret_cast<IntrinsicType volatile *>(dest),     \
+                    PrimitiveConversions::cast<IntrinsicType>(exchange_value))); \
   }
 
-DEFINE_STUB_CMPXCHG(1, int8_t,  os::atomic_cmpxchg_byte_func)
-DEFINE_STUB_CMPXCHG(4, int32_t, os::atomic_cmpxchg_func)
-DEFINE_STUB_CMPXCHG(8, int64_t, os::atomic_cmpxchg_long_func)
-
-#undef DEFINE_STUB_CMPXCHG
-
-#else // !AMD64
-
-template<>
-template<typename D, typename I>
-inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value,
-                                               atomic_memory_order order) const {
-  STATIC_ASSERT(4 == sizeof(I));
-  STATIC_ASSERT(4 == sizeof(D));
-  __asm {
-    mov edx, dest;
-    mov eax, add_value;
-    mov ecx, eax;
-    lock xadd dword ptr [edx], eax;
-    add eax, ecx;
+DEFINE_INTRINSIC_XCHG(InterlockedExchange,   long)
+DEFINE_INTRINSIC_XCHG(InterlockedExchange64, __int64)
+
+#undef DEFINE_INTRINSIC_XCHG
+
+// Note: the order of the parameters is different between
+// Atomic::PlatformCmpxchg<*>::operator() and the
+// InterlockedCompareExchange* API.
+
+#define DEFINE_INTRINSIC_CMPXCHG(IntrinsicName, IntrinsicType)            \
+  template<>                                                              \
+  template<typename T>                                                    \
+  inline T Atomic::PlatformCmpxchg<sizeof(IntrinsicType)>::operator()(T volatile* dest, \
+                                                                      T compare_value, \
+                                                                      T exchange_value, \
+                                                                      atomic_memory_order order) const { \
+    STATIC_ASSERT(sizeof(IntrinsicType) == sizeof(T));                    \
+    return PrimitiveConversions::cast<T>(                                 \
+      IntrinsicName(reinterpret_cast<IntrinsicType volatile *>(dest),     \
+                    PrimitiveConversions::cast<IntrinsicType>(exchange_value), \
+                    PrimitiveConversions::cast<IntrinsicType>(compare_value))); \
   }
-}
 
-template<>
-template<typename T>
-inline T Atomic::PlatformXchg<4>::operator()(T volatile* dest,
-                                             T exchange_value,
-                                             atomic_memory_order order) const {
-  STATIC_ASSERT(4 == sizeof(T));
-  // alternative for InterlockedExchange
-  __asm {
-    mov eax, exchange_value;
-    mov ecx, dest;
-    xchg eax, dword ptr [ecx];
-  }
-}
+DEFINE_INTRINSIC_CMPXCHG(_InterlockedCompareExchange8, char) // Use the intrinsic as InterlockedCompareExchange8 does not exist
+DEFINE_INTRINSIC_CMPXCHG(InterlockedCompareExchange,   long)
+DEFINE_INTRINSIC_CMPXCHG(InterlockedCompareExchange64, __int64)
 
-template<>
-template<typename T>
-inline T Atomic::PlatformCmpxchg<1>::operator()(T volatile* dest,
-                                                T compare_value,
-                                                T exchange_value,
-                                                atomic_memory_order order) const {
-  STATIC_ASSERT(1 == sizeof(T));
-  // alternative for InterlockedCompareExchange
-  __asm {
-    mov edx, dest
-    mov cl, exchange_value
-    mov al, compare_value
-    lock cmpxchg byte ptr [edx], cl
-  }
-}
+#undef DEFINE_INTRINSIC_CMPXCHG
 
-template<>
-template<typename T>
-inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest,
-                                                T compare_value,
-                                                T exchange_value,
-                                                atomic_memory_order order) const {
-  STATIC_ASSERT(4 == sizeof(T));
-  // alternative for InterlockedCompareExchange
-  __asm {
-    mov edx, dest
-    mov ecx, exchange_value
-    mov eax, compare_value
-    lock cmpxchg dword ptr [edx], ecx
-  }
-}
+#ifndef AMD64
 
-template<>
-template<typename T>
-inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest,
-                                                T compare_value,
-                                                T exchange_value,
-                                                atomic_memory_order order) const {
-  STATIC_ASSERT(8 == sizeof(T));
-  int32_t ex_lo  = (int32_t)exchange_value;
-  int32_t ex_hi  = *( ((int32_t*)&exchange_value) + 1 );
-  int32_t cmp_lo = (int32_t)compare_value;
-  int32_t cmp_hi = *( ((int32_t*)&compare_value) + 1 );
-  __asm {
-    push ebx
-    push edi
-    mov eax, cmp_lo
-    mov edx, cmp_hi
-    mov edi, dest
-    mov ebx, ex_lo
-    mov ecx, ex_hi
-    lock cmpxchg8b qword ptr [edi]
-    pop edi
-    pop ebx
-  }
-}
+#pragma warning(disable: 4035) // Disables warnings reporting missing return statement
 
 template<>
 template<typename T>
 inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const {
   STATIC_ASSERT(8 == sizeof(T));
@@ -226,15 +142,12 @@
     mov eax, dest
     fistp    qword ptr [eax]
   }
 }
 
-#endif // AMD64
-
 #pragma warning(default: 4035) // Enables warnings reporting missing return statement
 
-#ifndef AMD64
 template<>
 struct Atomic::PlatformOrderedStore<1, RELEASE_X_FENCE>
 {
   template <typename T>
   void operator()(volatile T* p, T v) const {