diff --git a/include/boost/atomic/detail/interlocked.hpp b/include/boost/atomic/detail/interlocked.hpp index 774354f..fff93e7 100644 --- a/include/boost/atomic/detail/interlocked.hpp +++ b/include/boost/atomic/detail/interlocked.hpp @@ -18,23 +18,31 @@ #if _WIN32_WCE >= 0x600 -extern "C" long __cdecl _InterlockedCompareExchange( long volatile *, long, long ); -extern "C" long __cdecl _InterlockedExchangeAdd( long volatile *, long ); -extern "C" long __cdecl _InterlockedExchange( long volatile *, long ); +extern "C" long __cdecl _InterlockedCompareExchange(long volatile*, long, long); +extern "C" long __cdecl _InterlockedExchangeAdd(long volatile*, long); +extern "C" long __cdecl _InterlockedExchange(long volatile*, long); +extern "C" long __cdecl _InterlockedIncrement(long volatile*); +extern "C" long __cdecl _InterlockedDecrement(long volatile*); #define BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE(dest, exchange, compare) _InterlockedCompareExchange((long*)(dest), exchange, compare) #define BOOST_ATOMIC_INTERLOCKED_EXCHANGE_ADD(dest, addend) _InterlockedExchangeAdd((long*)(dest), (long)(addend)) #define BOOST_ATOMIC_INTERLOCKED_EXCHANGE(dest, newval) _InterlockedExchange((long*)(dest), (long)(newval)) +#define BOOST_ATOMIC_INTERLOCKED_INCREMENT(dest) _InterlockedIncrement((long*)(dest)) +#define BOOST_ATOMIC_INTERLOCKED_DECREMENT(dest) _InterlockedDecrement((long*)(dest)) #else // _WIN32_WCE >= 0x600 -extern "C" long __cdecl InterlockedCompareExchange( long*, long, long ); -extern "C" long __cdecl InterlockedExchangeAdd( long*, long ); -extern "C" long __cdecl InterlockedExchange( long*, long ); +extern "C" long __cdecl InterlockedCompareExchange(long*, long, long); +extern "C" long __cdecl InterlockedExchangeAdd(long*, long); +extern "C" long __cdecl InterlockedExchange(long*, long); +extern "C" long __cdecl InterlockedIncrement(long*); +extern "C" long __cdecl InterlockedDecrement(long*); #define BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE(dest, exchange, compare) InterlockedCompareExchange((long*)(dest), exchange, compare) #define BOOST_ATOMIC_INTERLOCKED_EXCHANGE_ADD(dest, addend) InterlockedExchangeAdd((long*)(dest), (long)(addend)) #define BOOST_ATOMIC_INTERLOCKED_EXCHANGE(dest, newval) InterlockedExchange((long*)(dest), (long)(newval)) +#define BOOST_ATOMIC_INTERLOCKED_INCREMENT(dest) InterlockedIncrement((long*)(dest)) +#define BOOST_ATOMIC_INTERLOCKED_DECREMENT(dest) InterlockedDecrement((long*)(dest)) #endif // _WIN32_WCE >= 0x600 @@ -45,19 +53,25 @@ extern "C" long __cdecl InterlockedExchange( long*, long ); #if _MSC_VER < 1400 -extern "C" long __cdecl _InterlockedCompareExchange( long volatile *, long, long ); -extern "C" long __cdecl _InterlockedExchangeAdd( long volatile *, long ); -extern "C" long __cdecl _InterlockedExchange( long volatile *, long ); +extern "C" long __cdecl _InterlockedCompareExchange(long volatile*, long, long); +extern "C" long __cdecl _InterlockedExchangeAdd(long volatile*, long); +extern "C" long __cdecl _InterlockedExchange(long volatile*, long); +extern "C" long __cdecl _InterlockedIncrement(long volatile*); +extern "C" long __cdecl _InterlockedDecrement(long volatile*); #if defined(BOOST_MSVC) #pragma intrinsic(_InterlockedCompareExchange) #pragma intrinsic(_InterlockedExchangeAdd) #pragma intrinsic(_InterlockedExchange) +#pragma intrinsic(_InterlockedIncrement) +#pragma intrinsic(_InterlockedDecrement) #endif #define BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE(dest, exchange, compare) _InterlockedCompareExchange((long*)(dest), exchange, compare) #define BOOST_ATOMIC_INTERLOCKED_EXCHANGE_ADD(dest, addend) _InterlockedExchangeAdd((long*)(dest), (long)(addend)) #define BOOST_ATOMIC_INTERLOCKED_EXCHANGE(dest, newval) _InterlockedExchange((long*)(dest), (long)(newval)) +#define BOOST_ATOMIC_INTERLOCKED_INCREMENT(dest) _InterlockedIncrement((long*)(dest)) +#define BOOST_ATOMIC_INTERLOCKED_DECREMENT(dest) _InterlockedDecrement((long*)(dest)) #define BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE_POINTER(dest, exchange, compare) ((void*)BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE((long*)(dest), (long)(exchange), (long)(compare))) #define BOOST_ATOMIC_INTERLOCKED_EXCHANGE_POINTER(dest, exchange) ((void*)BOOST_ATOMIC_INTERLOCKED_EXCHANGE((long*)(dest), (long)(exchange))) @@ -70,6 +84,8 @@ extern "C" long __cdecl _InterlockedExchange( long volatile *, long ); #pragma intrinsic(_InterlockedCompareExchange) #pragma intrinsic(_InterlockedExchangeAdd) #pragma intrinsic(_InterlockedExchange) +#pragma intrinsic(_InterlockedIncrement) +#pragma intrinsic(_InterlockedDecrement) #pragma intrinsic(_InterlockedAnd) #pragma intrinsic(_InterlockedOr) #pragma intrinsic(_InterlockedXor) @@ -80,6 +96,8 @@ extern "C" long __cdecl _InterlockedExchange( long volatile *, long ); #define BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE(dest, exchange, compare) _InterlockedCompareExchange((long*)(dest), (long)(exchange), (long)(compare)) #define BOOST_ATOMIC_INTERLOCKED_EXCHANGE_ADD(dest, addend) _InterlockedExchangeAdd((long*)(dest), (long)(addend)) #define BOOST_ATOMIC_INTERLOCKED_EXCHANGE(dest, newval) _InterlockedExchange((long*)(dest), (long)(newval)) +#define BOOST_ATOMIC_INTERLOCKED_INCREMENT(dest) _InterlockedIncrement((long*)(dest)) +#define BOOST_ATOMIC_INTERLOCKED_DECREMENT(dest) _InterlockedDecrement((long*)(dest)) #define BOOST_ATOMIC_INTERLOCKED_AND(dest, arg) _InterlockedAnd((long*)(dest), (long)(arg)) #define BOOST_ATOMIC_INTERLOCKED_OR(dest, arg) _InterlockedOr((long*)(dest), (long)(arg)) #define BOOST_ATOMIC_INTERLOCKED_XOR(dest, arg) _InterlockedXor((long*)(dest), (long)(arg)) @@ -441,6 +459,8 @@ extern "C" long __cdecl _InterlockedExchange( long volatile *, long ); #define BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE(dest, exchange, compare) InterlockedCompareExchange((long*)(dest), (long)(exchange), (long)(compare)) #define BOOST_ATOMIC_INTERLOCKED_EXCHANGE(dest, newval) InterlockedExchange((long*)(dest), (long)(newval)) #define BOOST_ATOMIC_INTERLOCKED_EXCHANGE_ADD(dest, addend) InterlockedExchangeAdd((long*)(dest), (long)(addend)) +#define BOOST_ATOMIC_INTERLOCKED_INCREMENT(dest) InterlockedIncrement((long*)(dest)) +#define BOOST_ATOMIC_INTERLOCKED_DECREMENT(dest) InterlockedDecrement((long*)(dest)) #if defined(_WIN64) @@ -477,10 +497,14 @@ extern "C" { BOOST_ATOMIC_INTERLOCKED_IMPORT long __stdcall InterlockedCompareExchange(long volatile*, long, long); BOOST_ATOMIC_INTERLOCKED_IMPORT long __stdcall InterlockedExchange(long volatile*, long); BOOST_ATOMIC_INTERLOCKED_IMPORT long __stdcall InterlockedExchangeAdd(long volatile*, long); +BOOST_ATOMIC_INTERLOCKED_IMPORT long __stdcall InterlockedIncrement(long volatile*, long); +BOOST_ATOMIC_INTERLOCKED_IMPORT long __stdcall InterlockedDecrement(long volatile*, long); #define BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE(dest, exchange, compare) boost::atomics::detail::InterlockedCompareExchange((long*)(dest), (long)(exchange), (long)(compare)) #define BOOST_ATOMIC_INTERLOCKED_EXCHANGE(dest, newval) boost::atomics::detail::InterlockedExchange((long*)(dest), (long)(newval)) #define BOOST_ATOMIC_INTERLOCKED_EXCHANGE_ADD(dest, addend) boost::atomics::detail::InterlockedExchangeAdd((long*)(dest), (long)(addend)) +#define BOOST_ATOMIC_INTERLOCKED_INCREMENT(dest) boost::atomics::detail::InterlockedIncrement((long*)(dest)) +#define BOOST_ATOMIC_INTERLOCKED_DECREMENT(dest) boost::atomics::detail::InterlockedDecrement((long*)(dest)) #if defined(_WIN64) @@ -488,8 +512,8 @@ BOOST_ATOMIC_INTERLOCKED_IMPORT __int64 __stdcall InterlockedCompareExchange64(_ BOOST_ATOMIC_INTERLOCKED_IMPORT __int64 __stdcall InterlockedExchange64(__int64 volatile*, __int64); BOOST_ATOMIC_INTERLOCKED_IMPORT __int64 __stdcall InterlockedExchangeAdd64(__int64 volatile*, __int64); -BOOST_ATOMIC_INTERLOCKED_IMPORT void* __stdcall InterlockedCompareExchangePointer(void* volatile *, void*, void*); -BOOST_ATOMIC_INTERLOCKED_IMPORT void* __stdcall InterlockedExchangePointer(void* volatile *, void*); +BOOST_ATOMIC_INTERLOCKED_IMPORT void* __stdcall InterlockedCompareExchangePointer(void* volatile*, void*, void*); +BOOST_ATOMIC_INTERLOCKED_IMPORT void* __stdcall InterlockedExchangePointer(void* volatile*, void*); #define BOOST_ATOMIC_INTERLOCKED_COMPARE_EXCHANGE64(dest, exchange, compare) boost::atomics::detail::InterlockedCompareExchange64((__int64*)(dest), (__int64)(exchange), (__int64)(compare)) #define BOOST_ATOMIC_INTERLOCKED_EXCHANGE64(dest, newval) boost::atomics::detail::InterlockedExchange64((__int64*)(dest), (__int64)(newval)) diff --git a/include/boost/atomic/detail/ops_gcc_atomic.hpp b/include/boost/atomic/detail/ops_gcc_atomic.hpp index a56aed3..ce1b8bd 100644 --- a/include/boost/atomic/detail/ops_gcc_atomic.hpp +++ b/include/boost/atomic/detail/ops_gcc_atomic.hpp @@ -15,6 +15,7 @@ #define BOOST_ATOMIC_DETAIL_OPS_GCC_ATOMIC_HPP_INCLUDED_ #include +#include #include #include #include @@ -393,8 +394,8 @@ BOOST_FORCEINLINE void thread_fence(memory_order order) BOOST_NOEXCEPT { // gcc, clang, icc and probably other compilers generate mfence for a seq_cst fence, // while a dummy lock-prefixed instruction would be enough and faster. See the comment in ops_gcc_x86.hpp. - unsigned int dummy; - __asm__ __volatile__ ("lock; orl $0, %0" : "=m" (dummy) : : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"); + boost::uint32_t dummy; + __asm__ __volatile__ ("lock; notl %0" : "=m" (dummy) : : "memory"); } #else __atomic_thread_fence(atomics::detail::convert_memory_order_to_gcc(order)); diff --git a/include/boost/atomic/detail/ops_gcc_x86.hpp b/include/boost/atomic/detail/ops_gcc_x86.hpp index b107cf9..0d2eadf 100644 --- a/include/boost/atomic/detail/ops_gcc_x86.hpp +++ b/include/boost/atomic/detail/ops_gcc_x86.hpp @@ -17,6 +17,7 @@ #define BOOST_ATOMIC_DETAIL_OPS_GCC_X86_HPP_INCLUDED_ #include +#include #include #include #include @@ -523,11 +524,11 @@ BOOST_FORCEINLINE void thread_fence(memory_order order) BOOST_NOEXCEPT // and is faster than mfence on most modern x86 CPUs (as of 2020). // Note that we want to apply the atomic operation on any location so that: // - It is not shared with other threads. A variable on the stack suits this well. - // - It is likely in cache. Being close to the stack top fits this well. + // - It is likely in cache. Being close to the top of the stack fits this well. // - It does not alias existing data on the stack, so that we don't introduce a false data dependency. - // See here: https://shipilev.net/blog/2014/on-the-fence-with-dependencies/ - unsigned int dummy; - __asm__ __volatile__ ("lock; orl $0, %0" : "=m" (dummy) : : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"); + // See some performance data here: https://shipilev.net/blog/2014/on-the-fence-with-dependencies/ + boost::uint32_t dummy; + __asm__ __volatile__ ("lock; notl %0" : "=m" (dummy) : : "memory"); } else if ((static_cast< unsigned int >(order) & (static_cast< unsigned int >(memory_order_acquire) | static_cast< unsigned int >(memory_order_release))) != 0u) { diff --git a/include/boost/atomic/detail/ops_msvc_x86.hpp b/include/boost/atomic/detail/ops_msvc_x86.hpp index d38ec1c..ffc86b9 100644 --- a/include/boost/atomic/detail/ops_msvc_x86.hpp +++ b/include/boost/atomic/detail/ops_msvc_x86.hpp @@ -17,6 +17,7 @@ #define BOOST_ATOMIC_DETAIL_OPS_MSVC_X86_HPP_INCLUDED_ #include +#include #include #include #include @@ -877,13 +878,8 @@ BOOST_FORCEINLINE void thread_fence(memory_order order) BOOST_NOEXCEPT // See the comment in ops_gcc_x86.hpp as to why we're not using mfence here. // We're not using __faststorefence() here because it generates an atomic operation // on [rsp]/[esp] location, which may alias valid data and cause false data dependency. - long dummy; -#if defined(BOOST_ATOMIC_INTERLOCKED_OR) - // May be more efficient as it doesn't require an additional register - BOOST_ATOMIC_INTERLOCKED_OR(&dummy, 0); -#else - BOOST_ATOMIC_INTERLOCKED_EXCHANGE(&dummy, 0); -#endif + boost::uint32_t dummy; + BOOST_ATOMIC_INTERLOCKED_INCREMENT(&dummy); } else if (order != memory_order_relaxed) {