diff --git a/boost/atomic.hpp b/boost/atomic.hpp index b05234f6..cf07ea5e 100644 --- a/boost/atomic.hpp +++ b/boost/atomic.hpp @@ -164,7 +164,7 @@ public: bool test_and_set(memory_order order=memory_order_seq_cst) { - return super::exchange(1, order); + return super::exchange(1, order) != 0; } void clear(memory_order order=memory_order_seq_cst) { diff --git a/boost/atomic/detail/fallback.hpp b/boost/atomic/detail/fallback.hpp index 71a4dd2c..4c041559 100644 --- a/boost/atomic/detail/fallback.hpp +++ b/boost/atomic/detail/fallback.hpp @@ -25,7 +25,7 @@ public: detail::spinlock_pool<0>::scoped_lock guard(const_cast(&i)); memcpy((void*)&i, &t, sizeof(T)); } - T load(memory_order order=memory_order_seq_cst) volatile const + T load(memory_order /*order*/=memory_order_seq_cst) volatile const { detail::spinlock_pool<0>::scoped_lock guard(const_cast(&i)); T tmp; @@ -35,8 +35,8 @@ public: bool compare_exchange_strong( T &expected, T desired, - memory_order success_order, - memory_order failure_order) volatile + memory_order /*success_order*/, + memory_order /*failure_order*/) volatile { detail::spinlock_pool<0>::scoped_lock guard(const_cast(&i)); if (memcmp((void*)&i, &expected, sizeof(T))==0) { @@ -55,7 +55,7 @@ public: { return compare_exchange_strong(expected, desired, success_order, failure_order); } - T exchange(T replacement, memory_order order=memory_order_seq_cst) volatile + T exchange(T replacement, memory_order /*order*/=memory_order_seq_cst) volatile { detail::spinlock_pool<0>::scoped_lock guard(const_cast(&i)); T tmp; diff --git a/boost/atomic/detail/gcc-alpha.hpp b/boost/atomic/detail/gcc-alpha.hpp index c47b367b..84a54112 100644 --- a/boost/atomic/detail/gcc-alpha.hpp +++ b/boost/atomic/detail/gcc-alpha.hpp @@ -68,7 +68,7 @@ static inline void fence_after(memory_order order) } template<> -static inline void platform_atomic_thread_fence(memory_order order) +inline void platform_atomic_thread_fence(memory_order order) { switch(order) { case memory_order_acquire: diff --git a/boost/atomic/detail/gcc-armv6+.hpp b/boost/atomic/detail/gcc-armv6+.hpp new file mode 100644 index 00000000..cd8609d7 --- /dev/null +++ b/boost/atomic/detail/gcc-armv6+.hpp @@ -0,0 +1,299 @@ +#ifndef BOOST_DETAIL_ATOMIC_GCC_ARMV6P_HPP +#define BOOST_DETAIL_ATOMIC_GCC_ARMV6P_HPP + +// Distributed under the Boost Software License, Version 1.0. +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +// +// Copyright (c) 2009 Helge Bahmann +// Copyright (c) 2009 Phil Endecott +// ARM Code by Phil Endecott, based on other architectures. + + +#include +#include +#include + +// From the ARM Architecture Reference Manual for architecture v6: +// +// LDREX{} , [] +// Specifies the destination register for the memory word addressed by +// Specifies the register containing the address. +// +// STREX{} , , [] +// Specifies the destination register for the returned status value. +// 0 if the operation updates memory +// 1 if the operation fails to update memory +// Specifies the register containing the word to be stored to memory. +// Specifies the register containing the address. +// Rd must not be the same register is Rm or Rn. +// +// ARM v7 is like ARM v6 plus: +// There are half-word and byte versions of the LDREX and STREX instructions, +// LDREXH, LDREXB, STREXH and STREXB. +// There are also double-word versions, LDREXD and STREXD. +// (Actually it looks like these are available from version 6k onwards.) +// FIXME these are not yet used; should be mostly a matter of copy-and-paste. +// I think you can supply an immediate offset to the address. +// +// A memory barrier is effected using a "co-processor 15" instruction, +// though a separate assembler mnemonic is available for it in v7. + +namespace boost { +namespace detail { +namespace atomic { + + +// "Thumb 1" is a subset of the ARM instruction set that uses a 16-bit encoding. It +// doesn't include all instructions and in particular it doesn't include the co-processor +// instruction used for the memory barrier or the load-locked/store-conditional +// instructions. So, if we're compiling in "Thumb 1" mode, we need to wrap all of our +// asm blocks with code to temporarily change to ARM mode. +// +// You can only change between ARM and Thumb modes when branching using the bx instruction. +// bx takes an address specified in a register. The least significant bit of the address +// indicates the mode, so 1 is added to indicate that the destination code is Thumb. +// A temporary register is needed for the address and is passed as an argument to these +// macros. It must be one of the "low" registers accessible to Thumb code, specified +// usng the "l" attribute in the asm statement. +// +// Architecture v7 introduces "Thumb 2", which does include (almost?) all of the ARM +// instruction set. So in v7 we don't need to change to ARM mode; we can write "universal +// assembler" which will assemble to Thumb 2 or ARM code as appropriate. The only thing +// we need to do to make this "universal" assembler mode work is to insert "IT" instructions +// to annotate the conditional instructions. These are ignored in other modes (e.g. v6), +// so they can always be present. + +#if defined(__thumb__) && !defined(__ARM_ARCH_7A__) +// FIXME also other v7 variants. +#define BOOST_ATOMIC_ARM_ASM_START(TMPREG) "adr " #TMPREG ", 1f\n" "bx " #TMPREG "\n" ".arm\n" ".align 4\n" "1: " +#define BOOST_ATOMIC_ARM_ASM_END(TMPREG) "adr " #TMPREG ", 1f + 1\n" "bx " #TMPREG "\n" ".thumb\n" ".align 2\n" "1: " + +#else +// The tmpreg is wasted in this case, which is non-optimal. +#define BOOST_ATOMIC_ARM_ASM_START(TMPREG) +#define BOOST_ATOMIC_ARM_ASM_END(TMPREG) +#endif + + +#if defined(__ARM_ARCH_7A__) +// FIXME ditto. +#define BOOST_ATOMIC_ARM_DMB "dmb\n" +#else +#define BOOST_ATOMIC_ARM_DMB "mcr\tp15, 0, r0, c7, c10, 5\n" +#endif + +// There is also a "Data Synchronisation Barrier" DSB; this exists in v6 as another co-processor +// instruction like the above. + + +static inline void fence_before(memory_order order) +{ + // FIXME I don't understand enough about barriers to know what this should do. + switch(order) { + case memory_order_release: + case memory_order_acq_rel: + case memory_order_seq_cst: + int brtmp; + __asm__ __volatile__ ( + BOOST_ATOMIC_ARM_ASM_START(%0) + BOOST_ATOMIC_ARM_DMB + BOOST_ATOMIC_ARM_ASM_END(%0) + : "=&l" (brtmp) :: "memory" + ); + default:; + } +} + +static inline void fence_after(memory_order order) +{ + // FIXME I don't understand enough about barriers to know what this should do. + switch(order) { + case memory_order_acquire: + case memory_order_acq_rel: + case memory_order_seq_cst: + int brtmp; + __asm__ __volatile__ ( + BOOST_ATOMIC_ARM_ASM_START(%0) + BOOST_ATOMIC_ARM_DMB + BOOST_ATOMIC_ARM_ASM_END(%0) + : "=&l" (brtmp) :: "memory" + ); + case memory_order_consume: + __asm__ __volatile__ ("" ::: "memory"); + default:; + } +} + +#undef BOOST_ATOMIC_ARM_DMB + + +template +class atomic_arm_4 { +public: + typedef T integral_type; + explicit atomic_arm_4(T v) : i(v) {} + atomic_arm_4() {} + T load(memory_order order=memory_order_seq_cst) const volatile + { + T v=const_cast(i); + fence_after(order); + return v; + } + void store(T v, memory_order order=memory_order_seq_cst) volatile + { + fence_before(order); + const_cast(i)=v; + } + bool compare_exchange_weak( + T &expected, + T desired, + memory_order success_order, + memory_order failure_order) volatile + { + fence_before(success_order); + int success; + int tmp; + __asm__ __volatile__( + BOOST_ATOMIC_ARM_ASM_START(%2) + "mov %1, #0\n" // success = 0 + "ldrex %0, [%3]\n" // expected' = *(&i) + "teq %0, %4\n" // flags = expected'==expected + "ittt eq\n" + "strexeq %2, %5, [%3]\n" // if (flags.equal) *(&i) = desired, tmp = !OK + "teqeq %2, #0\n" // if (flags.equal) flags = tmp==0 + "moveq %1, #1\n" // if (flags.equal) success = 1 + BOOST_ATOMIC_ARM_ASM_END(%2) + : "=&r" (expected), // %0 + "=&r" (success), // %1 + "=&l" (tmp) // %2 + : "r" (&i), // %3 + "r" (expected), // %4 + "r" ((int)desired) // %5 + : "cc" + ); + if (success) fence_after(success_order); + else fence_after(failure_order); + return success; + } + + bool is_lock_free(void) const volatile {return true;} +protected: + inline T fetch_add_var(T c, memory_order order) volatile + { + fence_before(order); + T original, tmp; + int tmp2; + __asm__ __volatile__( + BOOST_ATOMIC_ARM_ASM_START(%2) + "1: ldrex %0, [%3]\n" // original = *(&i) + "add %1, %0, %4\n" // tmp = original + c + "strex %2, %1, [%3]\n" // *(&i) = tmp; tmp2 = !OK + "teq %2, #0\n" // flags = tmp2==0 + "it ne\n" + "bne 1b\n" // if (!flags.equal) goto 1 + BOOST_ATOMIC_ARM_ASM_END(%2) + : "=&r" (original), // %0 + "=&r" (tmp), // %1 + "=&l" (tmp2) // %2 + : "r" (&i), // %3 + "r" (c) // %4 + : "cc" + ); + fence_after(order); + return original; + } + inline T fetch_inc(memory_order order) volatile + { + fence_before(order); + T original, tmp; + int tmp2; + __asm__ __volatile__( + BOOST_ATOMIC_ARM_ASM_START(%2) + "1: ldrex %0, [%3]\n" // original = *(&i) + "add %1, %0, #1\n" // tmp = original + 1 + "strex %2, %1, [%3]\n" // *(&i) = tmp; tmp2 = !OK + "teq %2, #0\n" // flags = tmp2==0 + "it ne\n" + "bne 1b\n" // if (!flags.equal) goto 1 + BOOST_ATOMIC_ARM_ASM_END(%2) + : "=&r" (original), // %0 + "=&r" (tmp), // %1 + "=&l" (tmp2) // %2 + : "r" (&i) // %3 + : "cc" + ); + fence_after(order); + return original; + } + inline T fetch_dec(memory_order order) volatile + { + fence_before(order); + T original, tmp; + int tmp2; + __asm__ __volatile__( + BOOST_ATOMIC_ARM_ASM_START(%2) + "1: ldrex %0, [%3]\n" // original = *(&i) + "sub %1, %0, #1\n" // tmp = original - 1 + "strex %2, %1, [%3]\n" // *(&i) = tmp; tmp2 = !OK + "teq %2, #0\n" // flags = tmp2==0 + "it ne\n" + "bne 1b\n" // if (!flags.equal) goto 1 + BOOST_ATOMIC_ARM_ASM_END(%2) + : "=&r" (original), // %0 + "=&r" (tmp), // %1 + "=&l" (tmp2) // %2 + : "r" (&i) // %3 + : "cc" + ); + fence_after(order); + return original; + } +private: + T i; +}; + + +// #ifdef _ARM_ARCH_7 +// FIXME TODO can add native byte and halfword version here + + +template +class platform_atomic_integral : public build_atomic_from_typical > > { +public: + typedef build_atomic_from_typical > > super; + explicit platform_atomic_integral(T v) : super(v) {} + platform_atomic_integral(void) {} +}; + +template +class platform_atomic_integral: public build_atomic_from_larger_type, T> { +public: + typedef build_atomic_from_larger_type, T> super; + + explicit platform_atomic_integral(T v) : super(v) {} + platform_atomic_integral(void) {} +}; + +template +class platform_atomic_integral: public build_atomic_from_larger_type, T> { +public: + typedef build_atomic_from_larger_type, T> super; + + explicit platform_atomic_integral(T v) : super(v) {} + platform_atomic_integral(void) {} +}; + + + +typedef build_exchange > platform_atomic_address; + +} +} +} + +#undef BOOST_ATOMIC_ARM_ASM_START +#undef BOOST_ATOMIC_ARM_ASM_END + + +#endif diff --git a/boost/atomic/detail/gcc-ppc.hpp b/boost/atomic/detail/gcc-ppc.hpp index 6b1879c8..19ce2ca7 100644 --- a/boost/atomic/detail/gcc-ppc.hpp +++ b/boost/atomic/detail/gcc-ppc.hpp @@ -73,7 +73,7 @@ static inline void fence_after(memory_order order) } template<> -static inline void platform_atomic_thread_fence(memory_order order) +inline void platform_atomic_thread_fence(memory_order order) { switch(order) { case memory_order_acquire: diff --git a/boost/atomic/detail/gcc-x86.hpp b/boost/atomic/detail/gcc-x86.hpp index 425a4f50..29bc9b8d 100644 --- a/boost/atomic/detail/gcc-x86.hpp +++ b/boost/atomic/detail/gcc-x86.hpp @@ -43,7 +43,7 @@ static inline void full_fence(void) __asm__ __volatile__("mfence" ::: "memory"); #else /* could use mfence iff i686, but it does not appear to matter much */ - __asm__ __volatile__("lock addl $0, (%%esp)" ::: "memory"); + __asm__ __volatile__("lock; addl $0, (%%esp)" ::: "memory"); #endif } @@ -59,7 +59,8 @@ static inline void fence_after_load(memory_order order) } } -static inline void platform_atomic_thread_fence(memory_order order) +template<> +inline void platform_atomic_thread_fence(memory_order order) { switch(order) { case memory_order_seq_cst: @@ -101,7 +102,7 @@ public: { fence_before(success_order); T prev=expected; - __asm__ __volatile__("lock cmpxchgb %1, %2\n" : "=a" (prev) : "q" (desired), "m" (i), "a" (expected) : "memory"); + __asm__ __volatile__("lock; cmpxchgb %1, %2\n" : "=a" (prev) : "q" (desired), "m" (i), "a" (expected) : "memory"); bool success=(prev==expected); if (success) fence_after(success_order); else fence_after(failure_order); @@ -118,12 +119,12 @@ public: } T exchange(T r, memory_order order=memory_order_seq_cst) volatile { - __asm__ __volatile__("xchgb %0, %1\n" : "=r" (r) : "m"(i), "0" (r) : "memory"); + __asm__ __volatile__("xchgb %0, %1\n" : "=q" (r) : "m"(i), "0" (r) : "memory"); return r; } T fetch_add(T c, memory_order order=memory_order_seq_cst) volatile { - __asm__ __volatile__("lock xaddb %0, %1" : "+r" (c), "+m" (i) :: "memory"); + __asm__ __volatile__("lock; xaddb %0, %1" : "+q" (c), "+m" (i) :: "memory"); return c; } @@ -170,7 +171,7 @@ public: { fence_before(success_order); T prev=expected; - __asm__ __volatile__("lock cmpxchgw %1, %2\n" : "=a" (prev) : "q" (desired), "m" (i), "a" (expected) : "memory"); + __asm__ __volatile__("lock; cmpxchgw %1, %2\n" : "=a" (prev) : "q" (desired), "m" (i), "a" (expected) : "memory"); bool success=(prev==expected); if (success) fence_after(success_order); else fence_after(failure_order); @@ -192,7 +193,7 @@ public: } T fetch_add(T c, memory_order order=memory_order_seq_cst) volatile { - __asm__ __volatile__("lock xaddw %0, %1" : "+r" (c), "+m" (i) :: "memory"); + __asm__ __volatile__("lock; xaddw %0, %1" : "+r" (c), "+m" (i) :: "memory"); return c; } @@ -239,7 +240,7 @@ public: { fence_before(success_order); T prev=expected; - __asm__ __volatile__("lock cmpxchgl %1, %2\n" : "=a" (prev) : "q" (desired), "m" (i), "a" (expected) : "memory"); + __asm__ __volatile__("lock; cmpxchgl %1, %2\n" : "=a" (prev) : "q" (desired), "m" (i), "a" (expected) : "memory"); bool success=(prev==expected); if (success) fence_after(success_order); else fence_after(failure_order); @@ -261,7 +262,7 @@ public: } T fetch_add(T c, memory_order order=memory_order_seq_cst) volatile { - __asm__ __volatile__("lock xaddl %0, %1" : "+r" (c), "+m" (i) :: "memory"); + __asm__ __volatile__("lock; xaddl %0, %1" : "+r" (c), "+m" (i) :: "memory"); return c; } @@ -309,7 +310,7 @@ public: { fence_before(success_order); T prev=expected; - __asm__ __volatile__("lock cmpxchgq %1, %2\n" : "=a" (prev) : "q" (desired), "m" (i), "a" (expected) : "memory"); + __asm__ __volatile__("lock; cmpxchgq %1, %2\n" : "=a" (prev) : "q" (desired), "m" (i), "a" (expected) : "memory"); bool success=(prev==expected); if (success) fence_after(success_order); else fence_after(failure_order); @@ -331,7 +332,7 @@ public: } T fetch_add(T c, memory_order order=memory_order_seq_cst) volatile { - __asm__ __volatile__("lock xaddq %0, %1" : "+r" (c), "+m" (i) :: "memory"); + __asm__ __volatile__("lock; xaddq %0, %1" : "+r" (c), "+m" (i) :: "memory"); return c; } @@ -358,10 +359,31 @@ public: memory_order success_order, memory_order failure_order) volatile { + long scratch; fence_before(success_order); T prev=expected; - __asm__ __volatile__("lock cmpxchg8b %3\n" : - "=A" (prev) : "b" ((long)desired), "c" ((long)(desired>>32)), "m" (i), "0" (prev) : "memory"); + /* Make sure ebx is saved and restored properly in case + this object is compiled as "position independent". Since + programmers on x86 tend to forget specifying -DPIC or + similar, always assume PIC. + + To make this work uniformly even in the non-PIC case, + setup register constraints such that ebx can not be + used by accident e.g. as base address for the variable + to be modified. Accessing "scratch" should always be okay, + as it can only be placed on the stack (and therefore + accessed through ebp or esp only). + + In theory, could push/pop ebx onto/off the stack, but movs + to a prepared stack slot turn out to be faster. */ + __asm__ __volatile__( + "movl %%ebx, %1\n" + "movl %2, %%ebx\n" + "lock; cmpxchg8b 0(%4)\n" + "movl %1, %%ebx\n" + : "=A" (prev), "=m" (scratch) + : "D" ((long)desired), "c" ((long)(desired>>32)), "S" (&i), "0" (prev) + : "memory"); bool success=(prev==expected); if (success) fence_after(success_order); else fence_after(failure_order); diff --git a/boost/atomic/detail/interlocked.hpp b/boost/atomic/detail/interlocked.hpp index 91865108..83456a7d 100644 --- a/boost/atomic/detail/interlocked.hpp +++ b/boost/atomic/detail/interlocked.hpp @@ -23,7 +23,7 @@ static inline void full_fence(void) } template<> -static inline void platform_atomic_thread_fence(memory_order order) +inline void platform_atomic_thread_fence(memory_order order) { switch(order) { case memory_order_seq_cst: diff --git a/boost/atomic/detail/linux-arm.hpp b/boost/atomic/detail/linux-arm.hpp new file mode 100644 index 00000000..3137f5f6 --- /dev/null +++ b/boost/atomic/detail/linux-arm.hpp @@ -0,0 +1,169 @@ +#ifndef BOOST_DETAIL_ATOMIC_LINUX_ARM_HPP +#define BOOST_DETAIL_ATOMIC_LINUX_ARM_HPP + +// Distributed under the Boost Software License, Version 1.0. +// See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +// +// Copyright (c) 2009 Helge Bahmann +// Copyright (c) 2009 Phil Endecott +// ARM Code by Phil Endecott, based on other architectures. + +#include +#include +#include + +namespace boost { +namespace detail { +namespace atomic { + + +// Different ARM processors have different atomic instructions. In particular, +// architecture versions before v6 (which are still in widespread use, e.g. the +// Intel/Marvell XScale chips like the one in the NSLU2) have only atomic swap. +// On Linux the kernel provides some support that lets us abstract away from +// these differences: it provides emulated CAS and barrier functions at special +// addresses that are garaunteed not to be interrupted by the kernel. Using +// this facility is slightly slower than inline assembler would be, but much +// faster than a system call. +// +// For documentation, see arch/arm/kernel/entry-armv.S in the kernel source +// (search for "User Helpers"). + + +typedef void (kernel_dmb_t)(void); +#define BOOST_ATOMIC_KERNEL_DMB (*(kernel_dmb_t *)0xffff0fa0) + +static inline void fence_before(memory_order order) +{ + switch(order) { + // FIXME I really don't know which of these cases should call + // kernel_dmb() and which shouldn't... + case memory_order_consume: + case memory_order_release: + case memory_order_acq_rel: + case memory_order_seq_cst: + BOOST_ATOMIC_KERNEL_DMB(); + default:; + } +} + +static inline void fence_after(memory_order order) +{ + switch(order) { + // FIXME I really don't know which of these cases should call + // kernel_dmb() and which shouldn't... + case memory_order_acquire: + case memory_order_acq_rel: + case memory_order_seq_cst: + BOOST_ATOMIC_KERNEL_DMB(); + default:; + } +} + +#undef BOOST_ATOMIC_KERNEL_DMB + + +template +class atomic_linux_arm_4 { + + typedef int (kernel_cmpxchg_t)(T oldval, T newval, T *ptr); +# define BOOST_ATOMIC_KERNEL_CMPXCHG (*(kernel_cmpxchg_t *)0xffff0fc0) + // Returns 0 if *ptr was changed. + +public: + explicit atomic_linux_arm_4(T v) : i(v) {} + atomic_linux_arm_4() {} + T load(memory_order order=memory_order_seq_cst) const volatile + { + T v=const_cast(i); + fence_after(order); + return v; + } + void store(T v, memory_order order=memory_order_seq_cst) volatile + { + fence_before(order); + const_cast(i)=v; + } + bool compare_exchange_strong( + T &expected, + T desired, + memory_order success_order, + memory_order failure_order) volatile + { + // Aparently we can consider kernel_cmpxchg to be strong if it is retried + // by the kernel after being interrupted, which I think it is. + // Also it seems that when an ll/sc implementation is used the kernel + // loops until the store succeeds. + bool success = BOOST_ATOMIC_KERNEL_CMPXCHG(expected,desired,&i)==0; + if (!success) e = load(memory_order_relaxed); + return success; + } + bool compare_exchange_weak( + T &expected, + T desired, + memory_order success_order, + memory_order failure_order) volatile + { + return compare_exchange_strong(expected, desired, success_order, failure_order); + } + T exchange(T replacement, memory_order order=memory_order_seq_cst) volatile + { + // Copied from build_exchange. + T o=load(memory_order_relaxed); + do {} while(!compare_exchange_weak(o, replacement, order)); + return o; + // Note that ARM has an atomic swap instruction that we could use here: + // T oldval; + // asm volatile ("swp\t%0, %1, [%2]" : "=&r"(oldval) : "r" (replacement), "r" (&i) : "memory"); + // return oldval; + // This instruction is deprecated in architecture >= 6. I'm unsure how inefficient + // its implementation is on those newer architectures. I don't think this would gain + // much since exchange() is not used often. + } + + bool is_lock_free(void) const volatile {return true;} +protected: + typedef T integral_type; +private: + T i; + +# undef BOOST_ATOMIC_KERNEL_CMPXCHG + +}; + +template +class platform_atomic_integral : public build_atomic_from_exchange > { +public: + typedef build_atomic_from_exchange > super; + explicit platform_atomic_integral(T v) : super(v) {} + platform_atomic_integral(void) {} +}; + + +template +class platform_atomic_integral : public build_atomic_from_larger_type, T > { +public: + typedef build_atomic_from_larger_type, T> super; + explicit platform_atomic_integral(T v) : super(v) {} + platform_atomic_integral(void) {} +}; + + +template +class platform_atomic_integral : public build_atomic_from_larger_type, T > { +public: + typedef build_atomic_from_larger_type, T> super; + explicit platform_atomic_integral(T v) : super(v) {} + platform_atomic_integral(void) {} +}; + + +typedef atomic_linux_arm_4 platform_atomic_address; + + +} +} +} + +#endif diff --git a/boost/atomic/platform.hpp b/boost/atomic/platform.hpp index d6bfdd39..eacf0088 100644 --- a/boost/atomic/platform.hpp +++ b/boost/atomic/platform.hpp @@ -18,12 +18,25 @@ #include +// This list of ARM architecture versions comes from Apple's arm/arch.h header. +// I don't know how complete it is. +#elif defined(__GNUC__) && (defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ + || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \ + || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_7A__)) + + #include + +#elif defined(__linux__) && defined(__arm__) + + #include + #elif defined(BOOST_USE_WINDOWS_H) || defined(_WIN32_CE) || defined(BOOST_MSVC) || defined(BOOST_INTEL_WIN) || defined(WIN32) || defined(_WIN32) || defined(__WIN32__) || defined(__CYGWIN__) #include #else - + + #warning "Using slow fallback atomic implementation" #include #endif diff --git a/libs/atomic/doc/examples.qbk b/libs/atomic/doc/examples.qbk index a5ff63b7..4a713ad8 100644 --- a/libs/atomic/doc/examples.qbk +++ b/libs/atomic/doc/examples.qbk @@ -45,7 +45,7 @@ soon as the reference counter reaches zero. Increasing the reference counter can always be done with [^memory_order_relaxed]: New references to an object can only -be formed from an existing reference, an passing an existing +be formed from an existing reference, and passing an existing reference from one thread to another must already provide any required synchronization. @@ -75,7 +75,7 @@ soon as the reference counter reaches zero. There may be the object from being freed. "True" references may be formed from "weak" references unless the object has been deleted already. -FIXME: The point to make here is that for upgrading "weak" to "full" +FIXME: The point to make here is that upgrading "weak" to "full" references requires memory_order_acquire. [endsect] @@ -159,7 +159,7 @@ and degrade the performance of other system components. [section:singleton Double-checked singleton pattern] -The purpose of the ['double-cheked singleton pattern] is to ensure +The purpose of the ['double-checked singleton pattern] is to ensure that at most one instance of a particular object is created. If one instance has been created already, access to the existing object should be as light-weight as possible. @@ -227,7 +227,7 @@ the pointer need to be ordered. A ['wait-free ring buffer] provides a mechanism for relaying objects from one single "producer" thread to one single "consumer" thread without any locks. The operations on this data structure are "wait-free" which -means that each operation finites within a constant number of steps. +means that each operation finishes within a constant number of steps. This makes this data structure suitable for use in hard real-time systems or for communication with interrupt/signal handlers. @@ -292,7 +292,7 @@ are either lost or read twice. Furthermore it must guarantee that read-access to a particular object in [^pop] "happens after" it has been -written in [^push]. This is achieved by writing [^head_] +written in [^push]. This is achieved by writing [^head_ ] with "release" and reading it with "acquire". Conversely the implementation also ensures that read access to a particular ring element "happens before" before @@ -313,7 +313,6 @@ retrieved and processed in FIFO order by a single consumer. [c++] - // assume that every "T" object has a pointer "T * T::next" template class waitfree_queue { public: